Merge branch 'main' into add-batches

This commit is contained in:
Matthew Farrellee 2025-08-13 07:33:41 -04:00
commit 95a3ecdffc
67 changed files with 1158 additions and 424 deletions

View file

@ -28,7 +28,7 @@ runs:
# Install llama-stack-client-python based on the client-version input # Install llama-stack-client-python based on the client-version input
if [ "${{ inputs.client-version }}" = "latest" ]; then if [ "${{ inputs.client-version }}" = "latest" ]; then
echo "Installing latest llama-stack-client-python from main branch" echo "Installing latest llama-stack-client-python from main branch"
uv pip install git+https://github.com/meta-llama/llama-stack-client-python.git@main uv pip install git+https://github.com/llamastack/llama-stack-client-python.git@main
elif [ "${{ inputs.client-version }}" = "published" ]; then elif [ "${{ inputs.client-version }}" = "published" ]; then
echo "Installing published llama-stack-client-python from PyPI" echo "Installing published llama-stack-client-python from PyPI"
uv pip install llama-stack-client uv pip install llama-stack-client

View file

@ -52,7 +52,8 @@ jobs:
run: | run: |
# Get test directories dynamically, excluding non-test directories # Get test directories dynamically, excluding non-test directories
# NOTE: we are excluding post_training since the tests take too long # NOTE: we are excluding post_training since the tests take too long
TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" | TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d |
sed 's|tests/integration/||' |
grep -Ev "^(__pycache__|fixtures|test_cases|recordings|non_ci|post_training)$" | grep -Ev "^(__pycache__|fixtures|test_cases|recordings|non_ci|post_training)$" |
sort | jq -R -s -c 'split("\n")[:-1]') sort | jq -R -s -c 'split("\n")[:-1]')
echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT

View file

@ -164,9 +164,9 @@ jobs:
ENABLE_WEAVIATE: ${{ matrix.vector-io-provider == 'remote::weaviate' && 'true' || '' }} ENABLE_WEAVIATE: ${{ matrix.vector-io-provider == 'remote::weaviate' && 'true' || '' }}
WEAVIATE_CLUSTER_URL: ${{ matrix.vector-io-provider == 'remote::weaviate' && 'localhost:8080' || '' }} WEAVIATE_CLUSTER_URL: ${{ matrix.vector-io-provider == 'remote::weaviate' && 'localhost:8080' || '' }}
run: | run: |
uv run pytest -sv --stack-config="inference=inline::sentence-transformers,vector_io=${{ matrix.vector-io-provider }}" \ uv run pytest -sv --stack-config="files=inline::localfs,inference=inline::sentence-transformers,vector_io=${{ matrix.vector-io-provider }}" \
tests/integration/vector_io \ tests/integration/vector_io \
--embedding-model sentence-transformers/all-MiniLM-L6-v2 --embedding-model inline::sentence-transformers/all-MiniLM-L6-v2
- name: Check Storage and Memory Available After Tests - name: Check Storage and Memory Available After Tests
if: ${{ always() }} if: ${{ always() }}

View file

@ -1,13 +1,82 @@
# Contributing to Llama-Stack # Contributing to Llama Stack
We want to make contributing to this project as easy and transparent as We want to make contributing to this project as easy and transparent as
possible. possible.
## Set up your development environment
We use [uv](https://github.com/astral-sh/uv) to manage python dependencies and virtual environments.
You can install `uv` by following this [guide](https://docs.astral.sh/uv/getting-started/installation/).
You can install the dependencies by running:
```bash
cd llama-stack
uv sync --group dev
uv pip install -e .
source .venv/bin/activate
```
```{note}
You can use a specific version of Python with `uv` by adding the `--python <version>` flag (e.g. `--python 3.12`).
Otherwise, `uv` will automatically select a Python version according to the `requires-python` section of the `pyproject.toml`.
For more info, see the [uv docs around Python versions](https://docs.astral.sh/uv/concepts/python-versions/).
```
Note that you can create a dotenv file `.env` that includes necessary environment variables:
```
LLAMA_STACK_BASE_URL=http://localhost:8321
LLAMA_STACK_CLIENT_LOG=debug
LLAMA_STACK_PORT=8321
LLAMA_STACK_CONFIG=<provider-name>
TAVILY_SEARCH_API_KEY=
BRAVE_SEARCH_API_KEY=
```
And then use this dotenv file when running client SDK tests via the following:
```bash
uv run --env-file .env -- pytest -v tests/integration/inference/test_text_inference.py --text-model=meta-llama/Llama-3.1-8B-Instruct
```
### Pre-commit Hooks
We use [pre-commit](https://pre-commit.com/) to run linting and formatting checks on your code. You can install the pre-commit hooks by running:
```bash
uv run pre-commit install
```
After that, pre-commit hooks will run automatically before each commit.
Alternatively, if you don't want to install the pre-commit hooks, you can run the checks manually by running:
```bash
uv run pre-commit run --all-files
```
```{caution}
Before pushing your changes, make sure that the pre-commit hooks have passed successfully.
```
## Discussions -> Issues -> Pull Requests ## Discussions -> Issues -> Pull Requests
We actively welcome your pull requests. However, please read the following. This is heavily inspired by [Ghostty](https://github.com/ghostty-org/ghostty/blob/main/CONTRIBUTING.md). We actively welcome your pull requests. However, please read the following. This is heavily inspired by [Ghostty](https://github.com/ghostty-org/ghostty/blob/main/CONTRIBUTING.md).
If in doubt, please open a [discussion](https://github.com/meta-llama/llama-stack/discussions); we can always convert that to an issue later. If in doubt, please open a [discussion](https://github.com/meta-llama/llama-stack/discussions); we can always convert that to an issue later.
### Issues
We use GitHub issues to track public bugs. Please ensure your description is
clear and has sufficient instructions to be able to reproduce the issue.
Meta has a [bounty program](http://facebook.com/whitehat/info) for the safe
disclosure of security bugs. In those cases, please go through the process
outlined on that page and do not file a public issue.
### Contributor License Agreement ("CLA")
In order to accept your pull request, we need you to submit a CLA. You only need
to do this once to work on any of Meta's open source projects.
Complete your CLA here: <https://code.facebook.com/cla>
**I'd like to contribute!** **I'd like to contribute!**
If you are new to the project, start by looking at the issues tagged with "good first issue". If you're interested If you are new to the project, start by looking at the issues tagged with "good first issue". If you're interested
@ -51,93 +120,15 @@ Please avoid picking up too many issues at once. This helps you stay focused and
Please keep pull requests (PRs) small and focused. If you have a large set of changes, consider splitting them into logically grouped, smaller PRs to facilitate review and testing. Please keep pull requests (PRs) small and focused. If you have a large set of changes, consider splitting them into logically grouped, smaller PRs to facilitate review and testing.
> [!TIP] ```{tip}
> As a general guideline: As a general guideline:
> - Experienced contributors should try to keep no more than 5 open PRs at a time. - Experienced contributors should try to keep no more than 5 open PRs at a time.
> - New contributors are encouraged to have only one open PR at a time until theyre familiar with the codebase and process. - New contributors are encouraged to have only one open PR at a time until theyre familiar with the codebase and process.
## Contributor License Agreement ("CLA")
In order to accept your pull request, we need you to submit a CLA. You only need
to do this once to work on any of Meta's open source projects.
Complete your CLA here: <https://code.facebook.com/cla>
## Issues
We use GitHub issues to track public bugs. Please ensure your description is
clear and has sufficient instructions to be able to reproduce the issue.
Meta has a [bounty program](http://facebook.com/whitehat/info) for the safe
disclosure of security bugs. In those cases, please go through the process
outlined on that page and do not file a public issue.
## Set up your development environment
We use [uv](https://github.com/astral-sh/uv) to manage python dependencies and virtual environments.
You can install `uv` by following this [guide](https://docs.astral.sh/uv/getting-started/installation/).
You can install the dependencies by running:
```bash
cd llama-stack
uv sync --group dev
uv pip install -e .
source .venv/bin/activate
``` ```
> [!NOTE] ## Repository guidelines
> You can use a specific version of Python with `uv` by adding the `--python <version>` flag (e.g. `--python 3.12`)
> Otherwise, `uv` will automatically select a Python version according to the `requires-python` section of the `pyproject.toml`.
> For more info, see the [uv docs around Python versions](https://docs.astral.sh/uv/concepts/python-versions/).
Note that you can create a dotenv file `.env` that includes necessary environment variables: ### Coding Style
```
LLAMA_STACK_BASE_URL=http://localhost:8321
LLAMA_STACK_CLIENT_LOG=debug
LLAMA_STACK_PORT=8321
LLAMA_STACK_CONFIG=<provider-name>
TAVILY_SEARCH_API_KEY=
BRAVE_SEARCH_API_KEY=
```
And then use this dotenv file when running client SDK tests via the following:
```bash
uv run --env-file .env -- pytest -v tests/integration/inference/test_text_inference.py --text-model=meta-llama/Llama-3.1-8B-Instruct
```
## Pre-commit Hooks
We use [pre-commit](https://pre-commit.com/) to run linting and formatting checks on your code. You can install the pre-commit hooks by running:
```bash
uv run pre-commit install
```
After that, pre-commit hooks will run automatically before each commit.
Alternatively, if you don't want to install the pre-commit hooks, you can run the checks manually by running:
```bash
uv run pre-commit run --all-files
```
> [!CAUTION]
> Before pushing your changes, make sure that the pre-commit hooks have passed successfully.
## Running tests
You can find the Llama Stack testing documentation [here](https://github.com/meta-llama/llama-stack/blob/main/tests/README.md).
## Adding a new dependency to the project
To add a new dependency to the project, you can use the `uv` command. For example, to add `foo` to the project, you can run:
```bash
uv add foo
uv sync
```
## Coding Style
* Comments should provide meaningful insights into the code. Avoid filler comments that simply * Comments should provide meaningful insights into the code. Avoid filler comments that simply
describe the next step, as they create unnecessary clutter, same goes for docstrings. describe the next step, as they create unnecessary clutter, same goes for docstrings.
@ -159,6 +150,10 @@ uv sync
* When possible, use keyword arguments only when calling functions. * When possible, use keyword arguments only when calling functions.
* Llama Stack utilizes [custom Exception classes](llama_stack/apis/common/errors.py) for certain Resources that should be used where applicable. * Llama Stack utilizes [custom Exception classes](llama_stack/apis/common/errors.py) for certain Resources that should be used where applicable.
### License
By contributing to Llama, you agree that your contributions will be licensed
under the LICENSE file in the root directory of this source tree.
## Common Tasks ## Common Tasks
Some tips about common tasks you work on while contributing to Llama Stack: Some tips about common tasks you work on while contributing to Llama Stack:
@ -210,8 +205,4 @@ If you modify or add new API endpoints, update the API documentation accordingly
uv run ./docs/openapi_generator/run_openapi_generator.sh uv run ./docs/openapi_generator/run_openapi_generator.sh
``` ```
The generated API documentation will be available in `docs/_static/`. Make sure to review the changes before committing. The generated API documentation will be available in `docs/_static/`. Make sure to review the changes before committing.
## License
By contributing to Llama, you agree that your contributions will be licensed
under the LICENSE file in the root directory of this source tree.

View file

@ -1,5 +1,8 @@
# Llama Stack # Llama Stack
<a href="https://trendshift.io/repositories/11824" target="_blank"><img src="https://trendshift.io/api/badge/repositories/11824" alt="meta-llama%2Fllama-stack | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
-----
[![PyPI version](https://img.shields.io/pypi/v/llama_stack.svg)](https://pypi.org/project/llama_stack/) [![PyPI version](https://img.shields.io/pypi/v/llama_stack.svg)](https://pypi.org/project/llama_stack/)
[![PyPI - Downloads](https://img.shields.io/pypi/dm/llama-stack)](https://pypi.org/project/llama-stack/) [![PyPI - Downloads](https://img.shields.io/pypi/dm/llama-stack)](https://pypi.org/project/llama-stack/)
[![License](https://img.shields.io/pypi/l/llama_stack.svg)](https://github.com/meta-llama/llama-stack/blob/main/LICENSE) [![License](https://img.shields.io/pypi/l/llama_stack.svg)](https://github.com/meta-llama/llama-stack/blob/main/LICENSE)
@ -9,6 +12,7 @@
[**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack) [**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
### ✨🎉 Llama 4 Support 🎉✨ ### ✨🎉 Llama 4 Support 🎉✨
We released [Version 0.2.0](https://github.com/meta-llama/llama-stack/releases/tag/v0.2.0) with support for the Llama 4 herd of models released by Meta. We released [Version 0.2.0](https://github.com/meta-llama/llama-stack/releases/tag/v0.2.0) with support for the Llama 4 herd of models released by Meta.
@ -179,3 +183,17 @@ Please checkout our [Documentation](https://llama-stack.readthedocs.io/en/latest
Check out our client SDKs for connecting to a Llama Stack server in your preferred language, you can choose from [python](https://github.com/meta-llama/llama-stack-client-python), [typescript](https://github.com/meta-llama/llama-stack-client-typescript), [swift](https://github.com/meta-llama/llama-stack-client-swift), and [kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) programming languages to quickly build your applications. Check out our client SDKs for connecting to a Llama Stack server in your preferred language, you can choose from [python](https://github.com/meta-llama/llama-stack-client-python), [typescript](https://github.com/meta-llama/llama-stack-client-typescript), [swift](https://github.com/meta-llama/llama-stack-client-swift), and [kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) programming languages to quickly build your applications.
You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repo. You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repo.
## 🌟 GitHub Star History
## Star History
[![Star History Chart](https://api.star-history.com/svg?repos=meta-llama/llama-stack&type=Date)](https://www.star-history.com/#meta-llama/llama-stack&Date)
## ✨ Contributors
Thanks to all of our amazing contributors!
<a href="https://github.com/meta-llama/llama-stack/graphs/contributors">
<img src="https://contrib.rocks/image?repo=meta-llama/llama-stack" />
</a>

14
docs/_static/js/keyboard_shortcuts.js vendored Normal file
View file

@ -0,0 +1,14 @@
document.addEventListener('keydown', function(event) {
// command+K or ctrl+K
if ((event.metaKey || event.ctrlKey) && event.key === 'k') {
event.preventDefault();
document.querySelector('.search-input, .search-field, input[name="q"]').focus();
}
// forward slash
if (event.key === '/' &&
!event.target.matches('input, textarea, select')) {
event.preventDefault();
document.querySelector('.search-input, .search-field, input[name="q"]').focus();
}
});

View file

@ -8293,28 +8293,60 @@
"type": "array", "type": "array",
"items": { "items": {
"type": "object", "type": "object",
"additionalProperties": { "properties": {
"oneOf": [ "attributes": {
{ "type": "object",
"type": "null" "additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}, },
{ "description": "(Optional) Key-value attributes associated with the file"
"type": "boolean" },
}, "file_id": {
{ "type": "string",
"type": "number" "description": "Unique identifier of the file containing the result"
}, },
{ "filename": {
"type": "string" "type": "string",
}, "description": "Name of the file containing the result"
{ },
"type": "array" "score": {
}, "type": "number",
{ "description": "Relevance score for this search result (between 0 and 1)"
"type": "object" },
} "text": {
] "type": "string",
} "description": "Text content of the search result"
}
},
"additionalProperties": false,
"required": [
"attributes",
"file_id",
"filename",
"score",
"text"
],
"title": "OpenAIResponseOutputMessageFileSearchToolCallResults",
"description": "Search results returned by the file search operation."
}, },
"description": "(Optional) Search results returned by the file search operation" "description": "(Optional) Search results returned by the file search operation"
} }
@ -8515,6 +8547,13 @@
"$ref": "#/components/schemas/OpenAIResponseInputTool" "$ref": "#/components/schemas/OpenAIResponseInputTool"
} }
}, },
"include": {
"type": "array",
"items": {
"type": "string"
},
"description": "(Optional) Additional fields to include in the response."
},
"max_infer_iters": { "max_infer_iters": {
"type": "integer" "type": "integer"
} }

View file

@ -6021,14 +6021,44 @@ components:
type: array type: array
items: items:
type: object type: object
additionalProperties: properties:
oneOf: attributes:
- type: 'null' type: object
- type: boolean additionalProperties:
- type: number oneOf:
- type: string - type: 'null'
- type: array - type: boolean
- type: object - type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Key-value attributes associated with the file
file_id:
type: string
description: >-
Unique identifier of the file containing the result
filename:
type: string
description: Name of the file containing the result
score:
type: number
description: >-
Relevance score for this search result (between 0 and 1)
text:
type: string
description: Text content of the search result
additionalProperties: false
required:
- attributes
- file_id
- filename
- score
- text
title: >-
OpenAIResponseOutputMessageFileSearchToolCallResults
description: >-
Search results returned by the file search operation.
description: >- description: >-
(Optional) Search results returned by the file search operation (Optional) Search results returned by the file search operation
additionalProperties: false additionalProperties: false
@ -6188,6 +6218,12 @@ components:
type: array type: array
items: items:
$ref: '#/components/schemas/OpenAIResponseInputTool' $ref: '#/components/schemas/OpenAIResponseInputTool'
include:
type: array
items:
type: string
description: >-
(Optional) Additional fields to include in the response.
max_infer_iters: max_infer_iters:
type: integer type: integer
additionalProperties: false additionalProperties: false

View file

@ -111,7 +111,7 @@ name = "llama-stack-api-weather"
version = "0.1.0" version = "0.1.0"
description = "Weather API for Llama Stack" description = "Weather API for Llama Stack"
readme = "README.md" readme = "README.md"
requires-python = ">=3.10" requires-python = ">=3.12"
dependencies = ["llama-stack", "pydantic"] dependencies = ["llama-stack", "pydantic"]
[build-system] [build-system]
@ -231,7 +231,7 @@ name = "llama-stack-provider-kaze"
version = "0.1.0" version = "0.1.0"
description = "Kaze weather provider for Llama Stack" description = "Kaze weather provider for Llama Stack"
readme = "README.md" readme = "README.md"
requires-python = ">=3.10" requires-python = ">=3.12"
dependencies = ["llama-stack", "pydantic", "aiohttp"] dependencies = ["llama-stack", "pydantic", "aiohttp"]
[build-system] [build-system]

View file

@ -131,6 +131,7 @@ html_static_path = ["../_static"]
def setup(app): def setup(app):
app.add_css_file("css/my_theme.css") app.add_css_file("css/my_theme.css")
app.add_js_file("js/detect_theme.js") app.add_js_file("js/detect_theme.js")
app.add_js_file("js/keyboard_shortcuts.js")
def dockerhub_role(name, rawtext, text, lineno, inliner, options={}, content=[]): def dockerhub_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
url = f"https://hub.docker.com/r/llamastack/{text}" url = f"https://hub.docker.com/r/llamastack/{text}"

View file

@ -2,14 +2,28 @@
```{include} ../../../CONTRIBUTING.md ```{include} ../../../CONTRIBUTING.md
``` ```
See the [Adding a New API Provider](new_api_provider.md) which describes how to add new API providers to the Stack. ## Adding a New Provider
See the [Adding a New API Provider Page](new_api_provider.md) which describes how to add new API providers to the Stack.
See the [Vector Database Page](new_vector_database.md) which describes how to add a new vector databases with Llama Stack.
See the [External Provider Page](../providers/external/index.md) which describes how to add external providers to the Stack.
```{toctree} ```{toctree}
:maxdepth: 1 :maxdepth: 1
:hidden: :hidden:
new_api_provider new_api_provider
testing new_vector_database
``` ```
## Testing
See the [Test Page](testing.md) which describes how to test your changes.
```{toctree}
:maxdepth: 1
:hidden:
:caption: Testing
testing
```

View file

@ -0,0 +1,75 @@
# Adding a New Vector Database
This guide will walk you through the process of adding a new vector database to Llama Stack.
> **_NOTE:_** Here's an example Pull Request of the [Milvus Vector Database Provider](https://github.com/meta-llama/llama-stack/pull/1467).
Vector Database providers are used to store and retrieve vector embeddings. Vector databases are not limited to vector
search but can support keyword and hybrid search. Additionally, vector database can also support operations like
filtering, sorting, and aggregating vectors.
## Steps to Add a New Vector Database Provider
1. **Choose the Database Type**: Determine if your vector database is a remote service, inline, or both.
- Remote databases make requests to external services, while inline databases execute locally. Some providers support both.
2. **Implement the Provider**: Create a new provider class that inherits from `VectorDatabaseProvider` and implements the required methods.
- Implement methods for vector storage, retrieval, search, and any additional features your database supports.
- You will need to implement the following methods for `YourVectorIndex`:
- `YourVectorIndex.create()`
- `YourVectorIndex.initialize()`
- `YourVectorIndex.add_chunks()`
- `YourVectorIndex.delete_chunk()`
- `YourVectorIndex.query_vector()`
- `YourVectorIndex.query_keyword()`
- `YourVectorIndex.query_hybrid()`
- You will need to implement the following methods for `YourVectorIOAdapter`:
- `YourVectorIOAdapter.initialize()`
- `YourVectorIOAdapter.shutdown()`
- `YourVectorIOAdapter.list_vector_dbs()`
- `YourVectorIOAdapter.register_vector_db()`
- `YourVectorIOAdapter.unregister_vector_db()`
- `YourVectorIOAdapter.insert_chunks()`
- `YourVectorIOAdapter.query_chunks()`
- `YourVectorIOAdapter.delete_chunks()`
3. **Add to Registry**: Register your provider in the appropriate registry file.
- Update {repopath}`llama_stack/providers/registry/vector_io.py` to include your new provider.
```python
from llama_stack.providers.registry.specs import InlineProviderSpec
from llama_stack.providers.registry.api import Api
InlineProviderSpec(
api=Api.vector_io,
provider_type="inline::milvus",
pip_packages=["pymilvus>=2.4.10"],
module="llama_stack.providers.inline.vector_io.milvus",
config_class="llama_stack.providers.inline.vector_io.milvus.MilvusVectorIOConfig",
api_dependencies=[Api.inference],
optional_api_dependencies=[Api.files],
description="",
),
```
4. **Add Tests**: Create unit tests and integration tests for your provider in the `tests/` directory.
- Unit Tests
- By following the structure of the class methods, you will be able to easily run unit and integration tests for your database.
1. You have to configure the tests for your provide in `/tests/unit/providers/vector_io/conftest.py`.
2. Update the `vector_provider` fixture to include your provider if they are an inline provider.
3. Create a `your_vectorprovider_index` fixture that initializes your vector index.
4. Create a `your_vectorprovider_adapter` fixture that initializes your vector adapter.
5. Add your provider to the `vector_io_providers` fixture dictionary.
- Please follow the naming convention of `your_vectorprovider_index` and `your_vectorprovider_adapter` as the tests require this to execute properly.
- Integration Tests
- Integration tests are located in {repopath}`tests/integration`. These tests use the python client-SDK APIs (from the `llama_stack_client` package) to test functionality.
- The two set of integration tests are:
- `tests/integration/vector_io/test_vector_io.py`: This file tests registration, insertion, and retrieval.
- `tests/integration/vector_io/test_openai_vector_stores.py`: These tests are for OpenAI-compatible vector stores and test the OpenAI API compatibility.
- You will need to update `skip_if_provider_doesnt_support_openai_vector_stores` to include your provider as well as `skip_if_provider_doesnt_support_openai_vector_stores_search` to test the appropriate search functionality.
- Running the tests in the GitHub CI
- You will need to update the `.github/workflows/integration-vector-io-tests.yml` file to include your provider.
- If your provider is a remote provider, you will also have to add a container to spin up and run it in the action.
- Updating the pyproject.yml
- If you are adding tests for the `inline` provider you will have to update the `unit` group.
- `uv add new_pip_package --group unit`
- If you are adding tests for the `remote` provider you will have to update the `test` group, which is used in the GitHub CI for integration tests.
- `uv add new_pip_package --group test`
5. **Update Documentation**: Please update the documentation for end users
- Generate the provider documentation by running {repopath}`./scripts/provider_codegen.py`.
- Update the autogenerated content in the registry/vector_io.py file with information about your provider. Please see other providers for examples.

View file

@ -1,6 +1,8 @@
# Testing Llama Stack ```{include} ../../../tests/README.md
```
Tests are of three different kinds: ```{include} ../../../tests/unit/README.md
- Unit tests ```
- Provider focused integration tests
- Client SDK tests ```{include} ../../../tests/integration/README.md
```

View file

@ -226,7 +226,7 @@ uv init
name = "llama-stack-provider-ollama" name = "llama-stack-provider-ollama"
version = "0.1.0" version = "0.1.0"
description = "Ollama provider for Llama Stack" description = "Ollama provider for Llama Stack"
requires-python = ">=3.10" requires-python = ">=3.12"
dependencies = ["llama-stack", "pydantic", "ollama", "aiohttp"] dependencies = ["llama-stack", "pydantic", "ollama", "aiohttp"]
``` ```

View file

@ -35,6 +35,7 @@ remote_runpod
remote_sambanova remote_sambanova
remote_tgi remote_tgi
remote_together remote_together
remote_vertexai
remote_vllm remote_vllm
remote_watsonx remote_watsonx
``` ```

View file

@ -0,0 +1,40 @@
# remote::vertexai
## Description
Google Vertex AI inference provider enables you to use Google's Gemini models through Google Cloud's Vertex AI platform, providing several advantages:
• Enterprise-grade security: Uses Google Cloud's security controls and IAM
• Better integration: Seamless integration with other Google Cloud services
• Advanced features: Access to additional Vertex AI features like model tuning and monitoring
• Authentication: Uses Google Cloud Application Default Credentials (ADC) instead of API keys
Configuration:
- Set VERTEX_AI_PROJECT environment variable (required)
- Set VERTEX_AI_LOCATION environment variable (optional, defaults to us-central1)
- Use Google Cloud Application Default Credentials or service account key
Authentication Setup:
Option 1 (Recommended): gcloud auth application-default login
Option 2: Set GOOGLE_APPLICATION_CREDENTIALS to service account key path
Available Models:
- vertex_ai/gemini-2.0-flash
- vertex_ai/gemini-2.5-flash
- vertex_ai/gemini-2.5-pro
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `project` | `<class 'str'>` | No | | Google Cloud project ID for Vertex AI |
| `location` | `<class 'str'>` | No | us-central1 | Google Cloud location for Vertex AI |
## Sample Configuration
```yaml
project: ${env.VERTEX_AI_PROJECT:=}
location: ${env.VERTEX_AI_LOCATION:=us-central1}
```

View file

@ -12,6 +12,18 @@ That means you'll get fast and efficient vector retrieval.
- Lightweight and easy to use - Lightweight and easy to use
- Fully integrated with Llama Stack - Fully integrated with Llama Stack
- GPU support - GPU support
- **Vector search** - FAISS supports pure vector similarity search using embeddings
## Search Modes
**Supported:**
- **Vector Search** (`mode="vector"`): Performs vector similarity search using embeddings
**Not Supported:**
- **Keyword Search** (`mode="keyword"`): Not supported by FAISS
- **Hybrid Search** (`mode="hybrid"`): Not supported by FAISS
> **Note**: FAISS is designed as a pure vector similarity search library. See the [FAISS GitHub repository](https://github.com/facebookresearch/faiss) for more details about FAISS's core functionality.
## Usage ## Usage

View file

@ -11,6 +11,7 @@ That means you're not limited to storing vectors in memory or in a separate serv
- Easy to use - Easy to use
- Fully integrated with Llama Stack - Fully integrated with Llama Stack
- Supports all search modes: vector, keyword, and hybrid search (both inline and remote configurations)
## Usage ## Usage
@ -101,6 +102,92 @@ vector_io:
- **`client_pem_path`**: Path to the **client certificate** file (required for mTLS). - **`client_pem_path`**: Path to the **client certificate** file (required for mTLS).
- **`client_key_path`**: Path to the **client private key** file (required for mTLS). - **`client_key_path`**: Path to the **client private key** file (required for mTLS).
## Search Modes
Milvus supports three different search modes for both inline and remote configurations:
### Vector Search
Vector search uses semantic similarity to find the most relevant chunks based on embedding vectors. This is the default search mode and works well for finding conceptually similar content.
```python
# Vector search example
search_response = client.vector_stores.search(
vector_store_id=vector_store.id,
query="What is machine learning?",
search_mode="vector",
max_num_results=5,
)
```
### Keyword Search
Keyword search uses traditional text-based matching to find chunks containing specific terms or phrases. This is useful when you need exact term matches.
```python
# Keyword search example
search_response = client.vector_stores.search(
vector_store_id=vector_store.id,
query="Python programming language",
search_mode="keyword",
max_num_results=5,
)
```
### Hybrid Search
Hybrid search combines both vector and keyword search methods to provide more comprehensive results. It leverages the strengths of both semantic similarity and exact term matching.
#### Basic Hybrid Search
```python
# Basic hybrid search example (uses RRF ranker with default impact_factor=60.0)
search_response = client.vector_stores.search(
vector_store_id=vector_store.id,
query="neural networks in Python",
search_mode="hybrid",
max_num_results=5,
)
```
**Note**: The default `impact_factor` value of 60.0 was empirically determined to be optimal in the original RRF research paper: ["Reciprocal Rank Fusion outperforms Condorcet and individual Rank Learning Methods"](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) (Cormack et al., 2009).
#### Hybrid Search with RRF (Reciprocal Rank Fusion) Ranker
RRF combines rankings from vector and keyword search by using reciprocal ranks. The impact factor controls how much weight is given to higher-ranked results.
```python
# Hybrid search with custom RRF parameters
search_response = client.vector_stores.search(
vector_store_id=vector_store.id,
query="neural networks in Python",
search_mode="hybrid",
max_num_results=5,
ranking_options={
"ranker": {
"type": "rrf",
"impact_factor": 100.0, # Higher values give more weight to top-ranked results
}
},
)
```
#### Hybrid Search with Weighted Ranker
Weighted ranker linearly combines normalized scores from vector and keyword search. The alpha parameter controls the balance between the two search methods.
```python
# Hybrid search with weighted ranker
search_response = client.vector_stores.search(
vector_store_id=vector_store.id,
query="neural networks in Python",
search_mode="hybrid",
max_num_results=5,
ranking_options={
"ranker": {
"type": "weighted",
"alpha": 0.7, # 70% vector search, 30% keyword search
}
},
)
```
For detailed documentation on RRF and Weighted rankers, please refer to the [Milvus Reranking Guide](https://milvus.io/docs/reranking.md).
## Documentation ## Documentation
See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general. See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general.

View file

@ -706,6 +706,7 @@ class Agents(Protocol):
temperature: float | None = None, temperature: float | None = None,
text: OpenAIResponseText | None = None, text: OpenAIResponseText | None = None,
tools: list[OpenAIResponseInputTool] | None = None, tools: list[OpenAIResponseInputTool] | None = None,
include: list[str] | None = None,
max_infer_iters: int | None = 10, # this is an extension to the OpenAI API max_infer_iters: int | None = 10, # this is an extension to the OpenAI API
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]: ) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
"""Create a new OpenAI response. """Create a new OpenAI response.
@ -713,6 +714,7 @@ class Agents(Protocol):
:param input: Input message(s) to create the response. :param input: Input message(s) to create the response.
:param model: The underlying LLM used for completions. :param model: The underlying LLM used for completions.
:param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses. :param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
:param include: (Optional) Additional fields to include in the response.
:returns: An OpenAIResponseObject. :returns: An OpenAIResponseObject.
""" """
... ...

View file

@ -170,6 +170,23 @@ class OpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
type: Literal["web_search_call"] = "web_search_call" type: Literal["web_search_call"] = "web_search_call"
class OpenAIResponseOutputMessageFileSearchToolCallResults(BaseModel):
"""Search results returned by the file search operation.
:param attributes: (Optional) Key-value attributes associated with the file
:param file_id: Unique identifier of the file containing the result
:param filename: Name of the file containing the result
:param score: Relevance score for this search result (between 0 and 1)
:param text: Text content of the search result
"""
attributes: dict[str, Any]
file_id: str
filename: str
score: float
text: str
@json_schema_type @json_schema_type
class OpenAIResponseOutputMessageFileSearchToolCall(BaseModel): class OpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
"""File search tool call output message for OpenAI responses. """File search tool call output message for OpenAI responses.
@ -185,7 +202,7 @@ class OpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
queries: list[str] queries: list[str]
status: str status: str
type: Literal["file_search_call"] = "file_search_call" type: Literal["file_search_call"] = "file_search_call"
results: list[dict[str, Any]] | None = None results: list[OpenAIResponseOutputMessageFileSearchToolCallResults] | None = None
@json_schema_type @json_schema_type

View file

@ -67,5 +67,14 @@ class SessionNotFoundError(ValueError):
class ConflictError(ValueError): class ConflictError(ValueError):
"""raised when an operation cannot be performed due to a conflict with the current state""" """raised when an operation cannot be performed due to a conflict with the current state"""
def __init__(self, message: str) -> None: pass
class ModelTypeError(TypeError):
"""raised when a model is present but not the correct type"""
def __init__(self, model_name: str, model_type: str, expected_model_type: str) -> None:
message = (
f"Model '{model_name}' is of type '{model_type}' rather than the expected type '{expected_model_type}'"
)
super().__init__(message) super().__init__(message)

View file

@ -91,7 +91,7 @@ def get_provider_dependencies(
def print_pip_install_help(config: BuildConfig): def print_pip_install_help(config: BuildConfig):
normal_deps, special_deps = get_provider_dependencies(config) normal_deps, special_deps, _ = get_provider_dependencies(config)
cprint( cprint(
f"Please install needed dependencies using the following commands:\n\nuv pip install {' '.join(normal_deps)}", f"Please install needed dependencies using the following commands:\n\nuv pip install {' '.join(normal_deps)}",

View file

@ -18,7 +18,7 @@ from llama_stack.apis.common.content_types import (
InterleavedContent, InterleavedContent,
InterleavedContentItem, InterleavedContentItem,
) )
from llama_stack.apis.common.errors import ModelNotFoundError from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
from llama_stack.apis.inference import ( from llama_stack.apis.inference import (
BatchChatCompletionResponse, BatchChatCompletionResponse,
BatchCompletionResponse, BatchCompletionResponse,
@ -65,7 +65,7 @@ from llama_stack.providers.datatypes import HealthResponse, HealthStatus, Routin
from llama_stack.providers.utils.inference.inference_store import InferenceStore from llama_stack.providers.utils.inference.inference_store import InferenceStore
from llama_stack.providers.utils.telemetry.tracing import get_current_span from llama_stack.providers.utils.telemetry.tracing import get_current_span
logger = get_logger(name=__name__, category="core") logger = get_logger(name=__name__, category="inference")
class InferenceRouter(Inference): class InferenceRouter(Inference):
@ -177,6 +177,15 @@ class InferenceRouter(Inference):
encoded = self.formatter.encode_content(messages) encoded = self.formatter.encode_content(messages)
return len(encoded.tokens) if encoded and encoded.tokens else 0 return len(encoded.tokens) if encoded and encoded.tokens else 0
async def _get_model(self, model_id: str, expected_model_type: str) -> Model:
"""takes a model id and gets model after ensuring that it is accessible and of the correct type"""
model = await self.routing_table.get_model(model_id)
if model is None:
raise ModelNotFoundError(model_id)
if model.model_type != expected_model_type:
raise ModelTypeError(model_id, model.model_type, expected_model_type)
return model
async def chat_completion( async def chat_completion(
self, self,
model_id: str, model_id: str,
@ -195,11 +204,7 @@ class InferenceRouter(Inference):
) )
if sampling_params is None: if sampling_params is None:
sampling_params = SamplingParams() sampling_params = SamplingParams()
model = await self.routing_table.get_model(model_id) model = await self._get_model(model_id, ModelType.llm)
if model is None:
raise ModelNotFoundError(model_id)
if model.model_type == ModelType.embedding:
raise ValueError(f"Model '{model_id}' is an embedding model and does not support chat completions")
if tool_config: if tool_config:
if tool_choice and tool_choice != tool_config.tool_choice: if tool_choice and tool_choice != tool_config.tool_choice:
raise ValueError("tool_choice and tool_config.tool_choice must match") raise ValueError("tool_choice and tool_config.tool_choice must match")
@ -301,11 +306,7 @@ class InferenceRouter(Inference):
logger.debug( logger.debug(
f"InferenceRouter.completion: {model_id=}, {stream=}, {content=}, {sampling_params=}, {response_format=}", f"InferenceRouter.completion: {model_id=}, {stream=}, {content=}, {sampling_params=}, {response_format=}",
) )
model = await self.routing_table.get_model(model_id) model = await self._get_model(model_id, ModelType.llm)
if model is None:
raise ModelNotFoundError(model_id)
if model.model_type == ModelType.embedding:
raise ValueError(f"Model '{model_id}' is an embedding model and does not support chat completions")
provider = await self.routing_table.get_provider_impl(model_id) provider = await self.routing_table.get_provider_impl(model_id)
params = dict( params = dict(
model_id=model_id, model_id=model_id,
@ -355,11 +356,7 @@ class InferenceRouter(Inference):
task_type: EmbeddingTaskType | None = None, task_type: EmbeddingTaskType | None = None,
) -> EmbeddingsResponse: ) -> EmbeddingsResponse:
logger.debug(f"InferenceRouter.embeddings: {model_id}") logger.debug(f"InferenceRouter.embeddings: {model_id}")
model = await self.routing_table.get_model(model_id) await self._get_model(model_id, ModelType.embedding)
if model is None:
raise ModelNotFoundError(model_id)
if model.model_type == ModelType.llm:
raise ValueError(f"Model '{model_id}' is an LLM model and does not support embeddings")
provider = await self.routing_table.get_provider_impl(model_id) provider = await self.routing_table.get_provider_impl(model_id)
return await provider.embeddings( return await provider.embeddings(
model_id=model_id, model_id=model_id,
@ -395,12 +392,7 @@ class InferenceRouter(Inference):
logger.debug( logger.debug(
f"InferenceRouter.openai_completion: {model=}, {stream=}, {prompt=}", f"InferenceRouter.openai_completion: {model=}, {stream=}, {prompt=}",
) )
model_obj = await self.routing_table.get_model(model) model_obj = await self._get_model(model, ModelType.llm)
if model_obj is None:
raise ModelNotFoundError(model)
if model_obj.model_type == ModelType.embedding:
raise ValueError(f"Model '{model}' is an embedding model and does not support completions")
params = dict( params = dict(
model=model_obj.identifier, model=model_obj.identifier,
prompt=prompt, prompt=prompt,
@ -476,11 +468,7 @@ class InferenceRouter(Inference):
logger.debug( logger.debug(
f"InferenceRouter.openai_chat_completion: {model=}, {stream=}, {messages=}", f"InferenceRouter.openai_chat_completion: {model=}, {stream=}, {messages=}",
) )
model_obj = await self.routing_table.get_model(model) model_obj = await self._get_model(model, ModelType.llm)
if model_obj is None:
raise ModelNotFoundError(model)
if model_obj.model_type == ModelType.embedding:
raise ValueError(f"Model '{model}' is an embedding model and does not support chat completions")
# Use the OpenAI client for a bit of extra input validation without # Use the OpenAI client for a bit of extra input validation without
# exposing the OpenAI client itself as part of our API surface # exposing the OpenAI client itself as part of our API surface
@ -567,12 +555,7 @@ class InferenceRouter(Inference):
logger.debug( logger.debug(
f"InferenceRouter.openai_embeddings: {model=}, input_type={type(input)}, {encoding_format=}, {dimensions=}", f"InferenceRouter.openai_embeddings: {model=}, input_type={type(input)}, {encoding_format=}, {dimensions=}",
) )
model_obj = await self.routing_table.get_model(model) model_obj = await self._get_model(model, ModelType.embedding)
if model_obj is None:
raise ModelNotFoundError(model)
if model_obj.model_type != ModelType.embedding:
raise ValueError(f"Model '{model}' is not an embedding model")
params = dict( params = dict(
model=model_obj.identifier, model=model_obj.identifier,
input=input, input=input,
@ -871,4 +854,5 @@ class InferenceRouter(Inference):
model=model.identifier, model=model.identifier,
object="chat.completion", object="chat.completion",
) )
logger.debug(f"InferenceRouter.completion_response: {final_response}")
await self.store.store_chat_completion(final_response, messages) await self.store.store_chat_completion(final_response, messages)

View file

@ -63,6 +63,8 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
async def get_provider_impl(self, model_id: str) -> Any: async def get_provider_impl(self, model_id: str) -> Any:
model = await lookup_model(self, model_id) model = await lookup_model(self, model_id)
if model.provider_id not in self.impls_by_provider_id:
raise ValueError(f"Provider {model.provider_id} not found in the routing table")
return self.impls_by_provider_id[model.provider_id] return self.impls_by_provider_id[model.provider_id]
async def register_model( async def register_model(

View file

@ -124,10 +124,7 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
return toolgroup return toolgroup
async def unregister_toolgroup(self, toolgroup_id: str) -> None: async def unregister_toolgroup(self, toolgroup_id: str) -> None:
tool_group = await self.get_tool_group(toolgroup_id) await self.unregister_object(await self.get_tool_group(toolgroup_id))
if tool_group is None:
raise ToolGroupNotFoundError(toolgroup_id)
await self.unregister_object(tool_group)
async def shutdown(self) -> None: async def shutdown(self) -> None:
pass pass

View file

@ -8,7 +8,7 @@ from typing import Any
from pydantic import TypeAdapter from pydantic import TypeAdapter
from llama_stack.apis.common.errors import ModelNotFoundError, VectorStoreNotFoundError from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError, VectorStoreNotFoundError
from llama_stack.apis.models import ModelType from llama_stack.apis.models import ModelType
from llama_stack.apis.resource import ResourceType from llama_stack.apis.resource import ResourceType
from llama_stack.apis.vector_dbs import ListVectorDBsResponse, VectorDB, VectorDBs from llama_stack.apis.vector_dbs import ListVectorDBsResponse, VectorDB, VectorDBs
@ -66,7 +66,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
if model is None: if model is None:
raise ModelNotFoundError(embedding_model) raise ModelNotFoundError(embedding_model)
if model.model_type != ModelType.embedding: if model.model_type != ModelType.embedding:
raise ValueError(f"Model {embedding_model} is not an embedding model") raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding)
if "embedding_dimension" not in model.metadata: if "embedding_dimension" not in model.metadata:
raise ValueError(f"Model {embedding_model} does not have an embedding dimension") raise ValueError(f"Model {embedding_model} does not have an embedding dimension")
vector_db_data = { vector_db_data = {

View file

@ -14,6 +14,7 @@ distribution_spec:
- provider_type: remote::openai - provider_type: remote::openai
- provider_type: remote::anthropic - provider_type: remote::anthropic
- provider_type: remote::gemini - provider_type: remote::gemini
- provider_type: remote::vertexai
- provider_type: remote::groq - provider_type: remote::groq
- provider_type: remote::sambanova - provider_type: remote::sambanova
- provider_type: inline::sentence-transformers - provider_type: inline::sentence-transformers

View file

@ -65,6 +65,11 @@ providers:
provider_type: remote::gemini provider_type: remote::gemini
config: config:
api_key: ${env.GEMINI_API_KEY:=} api_key: ${env.GEMINI_API_KEY:=}
- provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
provider_type: remote::vertexai
config:
project: ${env.VERTEX_AI_PROJECT:=}
location: ${env.VERTEX_AI_LOCATION:=us-central1}
- provider_id: groq - provider_id: groq
provider_type: remote::groq provider_type: remote::groq
config: config:

View file

@ -14,6 +14,7 @@ distribution_spec:
- provider_type: remote::openai - provider_type: remote::openai
- provider_type: remote::anthropic - provider_type: remote::anthropic
- provider_type: remote::gemini - provider_type: remote::gemini
- provider_type: remote::vertexai
- provider_type: remote::groq - provider_type: remote::groq
- provider_type: remote::sambanova - provider_type: remote::sambanova
- provider_type: inline::sentence-transformers - provider_type: inline::sentence-transformers

View file

@ -65,6 +65,11 @@ providers:
provider_type: remote::gemini provider_type: remote::gemini
config: config:
api_key: ${env.GEMINI_API_KEY:=} api_key: ${env.GEMINI_API_KEY:=}
- provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
provider_type: remote::vertexai
config:
project: ${env.VERTEX_AI_PROJECT:=}
location: ${env.VERTEX_AI_LOCATION:=us-central1}
- provider_id: groq - provider_id: groq
provider_type: remote::groq provider_type: remote::groq
config: config:

View file

@ -56,6 +56,7 @@ ENABLED_INFERENCE_PROVIDERS = [
"fireworks", "fireworks",
"together", "together",
"gemini", "gemini",
"vertexai",
"groq", "groq",
"sambanova", "sambanova",
"anthropic", "anthropic",
@ -71,6 +72,7 @@ INFERENCE_PROVIDER_IDS = {
"tgi": "${env.TGI_URL:+tgi}", "tgi": "${env.TGI_URL:+tgi}",
"cerebras": "${env.CEREBRAS_API_KEY:+cerebras}", "cerebras": "${env.CEREBRAS_API_KEY:+cerebras}",
"nvidia": "${env.NVIDIA_API_KEY:+nvidia}", "nvidia": "${env.NVIDIA_API_KEY:+nvidia}",
"vertexai": "${env.VERTEX_AI_PROJECT:+vertexai}",
} }
@ -246,6 +248,14 @@ def get_distribution_template() -> DistributionTemplate:
"", "",
"Gemini API Key", "Gemini API Key",
), ),
"VERTEX_AI_PROJECT": (
"",
"Google Cloud Project ID for Vertex AI",
),
"VERTEX_AI_LOCATION": (
"us-central1",
"Google Cloud Location for Vertex AI",
),
"SAMBANOVA_API_KEY": ( "SAMBANOVA_API_KEY": (
"", "",
"SambaNova API Key", "SambaNova API Key",

View file

@ -32,6 +32,7 @@ CATEGORIES = [
"tools", "tools",
"client", "client",
"telemetry", "telemetry",
"openai_responses",
] ]
# Initialize category levels with default level # Initialize category levels with default level
@ -99,7 +100,8 @@ def parse_environment_config(env_config: str) -> dict[str, int]:
Dict[str, int]: A dictionary mapping categories to their log levels. Dict[str, int]: A dictionary mapping categories to their log levels.
""" """
category_levels = {} category_levels = {}
for pair in env_config.split(";"): delimiter = ","
for pair in env_config.split(delimiter):
if not pair.strip(): if not pair.strip():
continue continue

View file

@ -236,6 +236,7 @@ class ChatFormat:
arguments_json=json.dumps(tool_arguments), arguments_json=json.dumps(tool_arguments),
) )
) )
content = ""
return RawMessage( return RawMessage(
role="assistant", role="assistant",

View file

@ -327,10 +327,21 @@ class MetaReferenceAgentsImpl(Agents):
temperature: float | None = None, temperature: float | None = None,
text: OpenAIResponseText | None = None, text: OpenAIResponseText | None = None,
tools: list[OpenAIResponseInputTool] | None = None, tools: list[OpenAIResponseInputTool] | None = None,
include: list[str] | None = None,
max_infer_iters: int | None = 10, max_infer_iters: int | None = 10,
) -> OpenAIResponseObject: ) -> OpenAIResponseObject:
return await self.openai_responses_impl.create_openai_response( return await self.openai_responses_impl.create_openai_response(
input, model, instructions, previous_response_id, store, stream, temperature, text, tools, max_infer_iters input,
model,
instructions,
previous_response_id,
store,
stream,
temperature,
text,
tools,
include,
max_infer_iters,
) )
async def list_openai_responses( async def list_openai_responses(

View file

@ -38,6 +38,7 @@ from llama_stack.apis.agents.openai_responses import (
OpenAIResponseOutputMessageContent, OpenAIResponseOutputMessageContent,
OpenAIResponseOutputMessageContentOutputText, OpenAIResponseOutputMessageContentOutputText,
OpenAIResponseOutputMessageFileSearchToolCall, OpenAIResponseOutputMessageFileSearchToolCall,
OpenAIResponseOutputMessageFileSearchToolCallResults,
OpenAIResponseOutputMessageFunctionToolCall, OpenAIResponseOutputMessageFunctionToolCall,
OpenAIResponseOutputMessageMCPListTools, OpenAIResponseOutputMessageMCPListTools,
OpenAIResponseOutputMessageWebSearchToolCall, OpenAIResponseOutputMessageWebSearchToolCall,
@ -333,6 +334,7 @@ class OpenAIResponsesImpl:
temperature: float | None = None, temperature: float | None = None,
text: OpenAIResponseText | None = None, text: OpenAIResponseText | None = None,
tools: list[OpenAIResponseInputTool] | None = None, tools: list[OpenAIResponseInputTool] | None = None,
include: list[str] | None = None,
max_infer_iters: int | None = 10, max_infer_iters: int | None = 10,
): ):
stream = bool(stream) stream = bool(stream)
@ -486,8 +488,12 @@ class OpenAIResponsesImpl:
# Convert collected chunks to complete response # Convert collected chunks to complete response
if chat_response_tool_calls: if chat_response_tool_calls:
tool_calls = [chat_response_tool_calls[i] for i in sorted(chat_response_tool_calls.keys())] tool_calls = [chat_response_tool_calls[i] for i in sorted(chat_response_tool_calls.keys())]
# when there are tool calls, we need to clear the content
chat_response_content = []
else: else:
tool_calls = None tool_calls = None
assistant_message = OpenAIAssistantMessageParam( assistant_message = OpenAIAssistantMessageParam(
content="".join(chat_response_content), content="".join(chat_response_content),
tool_calls=tool_calls, tool_calls=tool_calls,
@ -826,12 +832,13 @@ class OpenAIResponsesImpl:
text = result.metadata["chunks"][i] if "chunks" in result.metadata else None text = result.metadata["chunks"][i] if "chunks" in result.metadata else None
score = result.metadata["scores"][i] if "scores" in result.metadata else None score = result.metadata["scores"][i] if "scores" in result.metadata else None
message.results.append( message.results.append(
{ OpenAIResponseOutputMessageFileSearchToolCallResults(
"file_id": doc_id, file_id=doc_id,
"filename": doc_id, filename=doc_id,
"text": text, text=text,
"score": score, score=score,
} attributes={},
)
) )
if error_exc or (result.error_code and result.error_code > 0) or result.error_message: if error_exc or (result.error_code and result.error_code > 0) or result.error_message:
message.status = "failed" message.status = "failed"

View file

@ -15,6 +15,7 @@ from llama_stack.apis.safety import (
RunShieldResponse, RunShieldResponse,
Safety, Safety,
SafetyViolation, SafetyViolation,
ShieldStore,
ViolationLevel, ViolationLevel,
) )
from llama_stack.apis.shields import Shield from llama_stack.apis.shields import Shield
@ -32,6 +33,8 @@ PROMPT_GUARD_MODEL = "Prompt-Guard-86M"
class PromptGuardSafetyImpl(Safety, ShieldsProtocolPrivate): class PromptGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
shield_store: ShieldStore
def __init__(self, config: PromptGuardConfig, _deps) -> None: def __init__(self, config: PromptGuardConfig, _deps) -> None:
self.config = config self.config = config
@ -53,7 +56,7 @@ class PromptGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
self, self,
shield_id: str, shield_id: str,
messages: list[Message], messages: list[Message],
params: dict[str, Any] = None, params: dict[str, Any],
) -> RunShieldResponse: ) -> RunShieldResponse:
shield = await self.shield_store.get_shield(shield_id) shield = await self.shield_store.get_shield(shield_id)
if not shield: if not shield:
@ -61,6 +64,9 @@ class PromptGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
return await self.shield.run(messages) return await self.shield.run(messages)
async def run_moderation(self, input: str | list[str], model: str):
raise NotImplementedError("run_moderation not implemented for PromptGuard")
class PromptGuardShield: class PromptGuardShield:
def __init__( def __init__(
@ -117,8 +123,10 @@ class PromptGuardShield:
elif self.config.guard_type == PromptGuardType.jailbreak.value and score_malicious > self.threshold: elif self.config.guard_type == PromptGuardType.jailbreak.value and score_malicious > self.threshold:
violation = SafetyViolation( violation = SafetyViolation(
violation_level=ViolationLevel.ERROR, violation_level=ViolationLevel.ERROR,
violation_type=f"prompt_injection:malicious={score_malicious}", user_message="Sorry, I cannot do this.",
violation_return_message="Sorry, I cannot do this.", metadata={
"violation_type": f"prompt_injection:malicious={score_malicious}",
},
) )
return RunShieldResponse(violation=violation) return RunShieldResponse(violation=violation)

View file

@ -33,6 +33,7 @@ from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.kvstore.api import KVStore
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
from llama_stack.providers.utils.memory.vector_store import ( from llama_stack.providers.utils.memory.vector_store import (
ChunkForDeletion,
EmbeddingIndex, EmbeddingIndex,
VectorDBWithIndex, VectorDBWithIndex,
) )
@ -128,11 +129,12 @@ class FaissIndex(EmbeddingIndex):
# Save updated index # Save updated index
await self._save_index() await self._save_index()
async def delete_chunk(self, chunk_id: str) -> None: async def delete_chunks(self, chunks_for_deletion: list[ChunkForDeletion]) -> None:
if chunk_id not in self.chunk_ids: chunk_ids = [c.chunk_id for c in chunks_for_deletion]
if not set(chunk_ids).issubset(self.chunk_ids):
return return
async with self.chunk_id_lock: def remove_chunk(chunk_id: str):
index = self.chunk_ids.index(chunk_id) index = self.chunk_ids.index(chunk_id)
self.index.remove_ids(np.array([index])) self.index.remove_ids(np.array([index]))
@ -146,6 +148,10 @@ class FaissIndex(EmbeddingIndex):
self.chunk_by_index = new_chunk_by_index self.chunk_by_index = new_chunk_by_index
self.chunk_ids.pop(index) self.chunk_ids.pop(index)
async with self.chunk_id_lock:
for chunk_id in chunk_ids:
remove_chunk(chunk_id)
await self._save_index() await self._save_index()
async def query_vector( async def query_vector(
@ -174,7 +180,9 @@ class FaissIndex(EmbeddingIndex):
k: int, k: int,
score_threshold: float, score_threshold: float,
) -> QueryChunksResponse: ) -> QueryChunksResponse:
raise NotImplementedError("Keyword search is not supported in FAISS") raise NotImplementedError(
"Keyword search is not supported - underlying DB FAISS does not support this search mode"
)
async def query_hybrid( async def query_hybrid(
self, self,
@ -185,7 +193,9 @@ class FaissIndex(EmbeddingIndex):
reranker_type: str, reranker_type: str,
reranker_params: dict[str, Any] | None = None, reranker_params: dict[str, Any] | None = None,
) -> QueryChunksResponse: ) -> QueryChunksResponse:
raise NotImplementedError("Hybrid search is not supported in FAISS") raise NotImplementedError(
"Hybrid search is not supported - underlying DB FAISS does not support this search mode"
)
class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
@ -293,8 +303,7 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
return await index.query_chunks(query, params) return await index.query_chunks(query, params)
async def delete_chunks(self, store_id: str, chunk_ids: list[str]) -> None: async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
"""Delete a chunk from a faiss index""" """Delete chunks from a faiss index"""
faiss_index = self.cache[store_id].index faiss_index = self.cache[store_id].index
for chunk_id in chunk_ids: await faiss_index.delete_chunks(chunks_for_deletion)
await faiss_index.delete_chunk(chunk_id)

View file

@ -31,6 +31,7 @@ from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIV
from llama_stack.providers.utils.memory.vector_store import ( from llama_stack.providers.utils.memory.vector_store import (
RERANKER_TYPE_RRF, RERANKER_TYPE_RRF,
RERANKER_TYPE_WEIGHTED, RERANKER_TYPE_WEIGHTED,
ChunkForDeletion,
EmbeddingIndex, EmbeddingIndex,
VectorDBWithIndex, VectorDBWithIndex,
) )
@ -426,34 +427,36 @@ class SQLiteVecIndex(EmbeddingIndex):
return QueryChunksResponse(chunks=chunks, scores=scores) return QueryChunksResponse(chunks=chunks, scores=scores)
async def delete_chunk(self, chunk_id: str) -> None: async def delete_chunks(self, chunks_for_deletion: list[ChunkForDeletion]) -> None:
"""Remove a chunk from the SQLite vector store.""" """Remove a chunk from the SQLite vector store."""
chunk_ids = [c.chunk_id for c in chunks_for_deletion]
def _delete_chunk(): def _delete_chunks():
connection = _create_sqlite_connection(self.db_path) connection = _create_sqlite_connection(self.db_path)
cur = connection.cursor() cur = connection.cursor()
try: try:
cur.execute("BEGIN TRANSACTION") cur.execute("BEGIN TRANSACTION")
# Delete from metadata table # Delete from metadata table
cur.execute(f"DELETE FROM {self.metadata_table} WHERE id = ?", (chunk_id,)) placeholders = ",".join("?" * len(chunk_ids))
cur.execute(f"DELETE FROM {self.metadata_table} WHERE id IN ({placeholders})", chunk_ids)
# Delete from vector table # Delete from vector table
cur.execute(f"DELETE FROM {self.vector_table} WHERE id = ?", (chunk_id,)) cur.execute(f"DELETE FROM {self.vector_table} WHERE id IN ({placeholders})", chunk_ids)
# Delete from FTS table # Delete from FTS table
cur.execute(f"DELETE FROM {self.fts_table} WHERE id = ?", (chunk_id,)) cur.execute(f"DELETE FROM {self.fts_table} WHERE id IN ({placeholders})", chunk_ids)
connection.commit() connection.commit()
except Exception as e: except Exception as e:
connection.rollback() connection.rollback()
logger.error(f"Error deleting chunk {chunk_id}: {e}") logger.error(f"Error deleting chunks: {e}")
raise raise
finally: finally:
cur.close() cur.close()
connection.close() connection.close()
await asyncio.to_thread(_delete_chunk) await asyncio.to_thread(_delete_chunks)
class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
@ -551,12 +554,10 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc
raise VectorStoreNotFoundError(vector_db_id) raise VectorStoreNotFoundError(vector_db_id)
return await index.query_chunks(query, params) return await index.query_chunks(query, params)
async def delete_chunks(self, store_id: str, chunk_ids: list[str]) -> None: async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
"""Delete a chunk from a sqlite_vec index.""" """Delete chunks from a sqlite_vec index."""
index = await self._get_and_cache_vector_db_index(store_id) index = await self._get_and_cache_vector_db_index(store_id)
if not index: if not index:
raise VectorStoreNotFoundError(store_id) raise VectorStoreNotFoundError(store_id)
for chunk_id in chunk_ids: await index.index.delete_chunks(chunks_for_deletion)
# Use the index's delete_chunk method
await index.index.delete_chunk(chunk_id)

View file

@ -213,6 +213,36 @@ def available_providers() -> list[ProviderSpec]:
description="Google Gemini inference provider for accessing Gemini models and Google's AI services.", description="Google Gemini inference provider for accessing Gemini models and Google's AI services.",
), ),
), ),
remote_provider_spec(
api=Api.inference,
adapter=AdapterSpec(
adapter_type="vertexai",
pip_packages=["litellm", "google-cloud-aiplatform"],
module="llama_stack.providers.remote.inference.vertexai",
config_class="llama_stack.providers.remote.inference.vertexai.VertexAIConfig",
provider_data_validator="llama_stack.providers.remote.inference.vertexai.config.VertexAIProviderDataValidator",
description="""Google Vertex AI inference provider enables you to use Google's Gemini models through Google Cloud's Vertex AI platform, providing several advantages:
Enterprise-grade security: Uses Google Cloud's security controls and IAM
Better integration: Seamless integration with other Google Cloud services
Advanced features: Access to additional Vertex AI features like model tuning and monitoring
Authentication: Uses Google Cloud Application Default Credentials (ADC) instead of API keys
Configuration:
- Set VERTEX_AI_PROJECT environment variable (required)
- Set VERTEX_AI_LOCATION environment variable (optional, defaults to us-central1)
- Use Google Cloud Application Default Credentials or service account key
Authentication Setup:
Option 1 (Recommended): gcloud auth application-default login
Option 2: Set GOOGLE_APPLICATION_CREDENTIALS to service account key path
Available Models:
- vertex_ai/gemini-2.0-flash
- vertex_ai/gemini-2.5-flash
- vertex_ai/gemini-2.5-pro""",
),
),
remote_provider_spec( remote_provider_spec(
api=Api.inference, api=Api.inference,
adapter=AdapterSpec( adapter=AdapterSpec(

View file

@ -45,6 +45,18 @@ That means you'll get fast and efficient vector retrieval.
- Lightweight and easy to use - Lightweight and easy to use
- Fully integrated with Llama Stack - Fully integrated with Llama Stack
- GPU support - GPU support
- **Vector search** - FAISS supports pure vector similarity search using embeddings
## Search Modes
**Supported:**
- **Vector Search** (`mode="vector"`): Performs vector similarity search using embeddings
**Not Supported:**
- **Keyword Search** (`mode="keyword"`): Not supported by FAISS
- **Hybrid Search** (`mode="hybrid"`): Not supported by FAISS
> **Note**: FAISS is designed as a pure vector similarity search library. See the [FAISS GitHub repository](https://github.com/facebookresearch/faiss) for more details about FAISS's core functionality.
## Usage ## Usage
@ -330,6 +342,7 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti
""", """,
), ),
api_dependencies=[Api.inference], api_dependencies=[Api.inference],
optional_api_dependencies=[Api.files],
), ),
InlineProviderSpec( InlineProviderSpec(
api=Api.vector_io, api=Api.vector_io,
@ -338,6 +351,7 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti
module="llama_stack.providers.inline.vector_io.chroma", module="llama_stack.providers.inline.vector_io.chroma",
config_class="llama_stack.providers.inline.vector_io.chroma.ChromaVectorIOConfig", config_class="llama_stack.providers.inline.vector_io.chroma.ChromaVectorIOConfig",
api_dependencies=[Api.inference], api_dependencies=[Api.inference],
optional_api_dependencies=[Api.files],
description=""" description="""
[Chroma](https://www.trychroma.com/) is an inline and remote vector [Chroma](https://www.trychroma.com/) is an inline and remote vector
database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database. database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database.
@ -452,6 +466,7 @@ See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more
""", """,
), ),
api_dependencies=[Api.inference], api_dependencies=[Api.inference],
optional_api_dependencies=[Api.files],
), ),
InlineProviderSpec( InlineProviderSpec(
api=Api.vector_io, api=Api.vector_io,
@ -535,6 +550,7 @@ That means you're not limited to storing vectors in memory or in a separate serv
- Easy to use - Easy to use
- Fully integrated with Llama Stack - Fully integrated with Llama Stack
- Supports all search modes: vector, keyword, and hybrid search (both inline and remote configurations)
## Usage ## Usage
@ -625,6 +641,92 @@ vector_io:
- **`client_pem_path`**: Path to the **client certificate** file (required for mTLS). - **`client_pem_path`**: Path to the **client certificate** file (required for mTLS).
- **`client_key_path`**: Path to the **client private key** file (required for mTLS). - **`client_key_path`**: Path to the **client private key** file (required for mTLS).
## Search Modes
Milvus supports three different search modes for both inline and remote configurations:
### Vector Search
Vector search uses semantic similarity to find the most relevant chunks based on embedding vectors. This is the default search mode and works well for finding conceptually similar content.
```python
# Vector search example
search_response = client.vector_stores.search(
vector_store_id=vector_store.id,
query="What is machine learning?",
search_mode="vector",
max_num_results=5,
)
```
### Keyword Search
Keyword search uses traditional text-based matching to find chunks containing specific terms or phrases. This is useful when you need exact term matches.
```python
# Keyword search example
search_response = client.vector_stores.search(
vector_store_id=vector_store.id,
query="Python programming language",
search_mode="keyword",
max_num_results=5,
)
```
### Hybrid Search
Hybrid search combines both vector and keyword search methods to provide more comprehensive results. It leverages the strengths of both semantic similarity and exact term matching.
#### Basic Hybrid Search
```python
# Basic hybrid search example (uses RRF ranker with default impact_factor=60.0)
search_response = client.vector_stores.search(
vector_store_id=vector_store.id,
query="neural networks in Python",
search_mode="hybrid",
max_num_results=5,
)
```
**Note**: The default `impact_factor` value of 60.0 was empirically determined to be optimal in the original RRF research paper: ["Reciprocal Rank Fusion outperforms Condorcet and individual Rank Learning Methods"](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) (Cormack et al., 2009).
#### Hybrid Search with RRF (Reciprocal Rank Fusion) Ranker
RRF combines rankings from vector and keyword search by using reciprocal ranks. The impact factor controls how much weight is given to higher-ranked results.
```python
# Hybrid search with custom RRF parameters
search_response = client.vector_stores.search(
vector_store_id=vector_store.id,
query="neural networks in Python",
search_mode="hybrid",
max_num_results=5,
ranking_options={
"ranker": {
"type": "rrf",
"impact_factor": 100.0, # Higher values give more weight to top-ranked results
}
},
)
```
#### Hybrid Search with Weighted Ranker
Weighted ranker linearly combines normalized scores from vector and keyword search. The alpha parameter controls the balance between the two search methods.
```python
# Hybrid search with weighted ranker
search_response = client.vector_stores.search(
vector_store_id=vector_store.id,
query="neural networks in Python",
search_mode="hybrid",
max_num_results=5,
ranking_options={
"ranker": {
"type": "weighted",
"alpha": 0.7, # 70% vector search, 30% keyword search
}
},
)
```
For detailed documentation on RRF and Weighted rankers, please refer to the [Milvus Reranking Guide](https://milvus.io/docs/reranking.md).
## Documentation ## Documentation
See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general. See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general.
@ -632,6 +734,7 @@ For more details on TLS configuration, refer to the [TLS setup guide](https://mi
""", """,
), ),
api_dependencies=[Api.inference], api_dependencies=[Api.inference],
optional_api_dependencies=[Api.files],
), ),
InlineProviderSpec( InlineProviderSpec(
api=Api.vector_io, api=Api.vector_io,

View file

@ -235,6 +235,7 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv
llama_model = self.get_llama_model(request.model) llama_model = self.get_llama_model(request.model)
if isinstance(request, ChatCompletionRequest): if isinstance(request, ChatCompletionRequest):
# TODO: tools are never added to the request, so we need to add them here
if media_present or not llama_model: if media_present or not llama_model:
input_dict["messages"] = [ input_dict["messages"] = [
await convert_message_to_openai_dict(m, download=True) for m in request.messages await convert_message_to_openai_dict(m, download=True) for m in request.messages
@ -378,6 +379,7 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv
# Fireworks chat completions OpenAI-compatible API does not support # Fireworks chat completions OpenAI-compatible API does not support
# tool calls properly. # tool calls properly.
llama_model = self.get_llama_model(model_obj.provider_resource_id) llama_model = self.get_llama_model(model_obj.provider_resource_id)
if llama_model: if llama_model:
return await OpenAIChatCompletionToLlamaStackMixin.openai_chat_completion( return await OpenAIChatCompletionToLlamaStackMixin.openai_chat_completion(
self, self,
@ -431,4 +433,5 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv
user=user, user=user,
) )
logger.debug(f"fireworks params: {params}")
return await self._get_openai_client().chat.completions.create(model=model_obj.provider_resource_id, **params) return await self._get_openai_client().chat.completions.create(model=model_obj.provider_resource_id, **params)

View file

@ -457,9 +457,6 @@ class OllamaInferenceAdapter(
user: str | None = None, user: str | None = None,
) -> OpenAIEmbeddingsResponse: ) -> OpenAIEmbeddingsResponse:
model_obj = await self._get_model(model) model_obj = await self._get_model(model)
if model_obj.model_type != ModelType.embedding:
raise ValueError(f"Model {model} is not an embedding model")
if model_obj.provider_resource_id is None: if model_obj.provider_resource_id is None:
raise ValueError(f"Model {model} has no provider_resource_id set") raise ValueError(f"Model {model} has no provider_resource_id set")

View file

@ -0,0 +1,15 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from .config import VertexAIConfig
async def get_adapter_impl(config: VertexAIConfig, _deps):
from .vertexai import VertexAIInferenceAdapter
impl = VertexAIInferenceAdapter(config)
await impl.initialize()
return impl

View file

@ -0,0 +1,45 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Any
from pydantic import BaseModel, Field
from llama_stack.schema_utils import json_schema_type
class VertexAIProviderDataValidator(BaseModel):
vertex_project: str | None = Field(
default=None,
description="Google Cloud project ID for Vertex AI",
)
vertex_location: str | None = Field(
default=None,
description="Google Cloud location for Vertex AI (e.g., us-central1)",
)
@json_schema_type
class VertexAIConfig(BaseModel):
project: str = Field(
description="Google Cloud project ID for Vertex AI",
)
location: str = Field(
default="us-central1",
description="Google Cloud location for Vertex AI",
)
@classmethod
def sample_run_config(
cls,
project: str = "${env.VERTEX_AI_PROJECT:=}",
location: str = "${env.VERTEX_AI_LOCATION:=us-central1}",
**kwargs,
) -> dict[str, Any]:
return {
"project": project,
"location": location,
}

View file

@ -0,0 +1,20 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.providers.utils.inference.model_registry import (
ProviderModelEntry,
)
# Vertex AI model IDs with vertex_ai/ prefix as required by litellm
LLM_MODEL_IDS = [
"vertex_ai/gemini-2.0-flash",
"vertex_ai/gemini-2.5-flash",
"vertex_ai/gemini-2.5-pro",
]
SAFETY_MODELS_ENTRIES = list[ProviderModelEntry]()
MODEL_ENTRIES = [ProviderModelEntry(provider_model_id=m) for m in LLM_MODEL_IDS] + SAFETY_MODELS_ENTRIES

View file

@ -0,0 +1,52 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Any
from llama_stack.apis.inference import ChatCompletionRequest
from llama_stack.providers.utils.inference.litellm_openai_mixin import (
LiteLLMOpenAIMixin,
)
from .config import VertexAIConfig
from .models import MODEL_ENTRIES
class VertexAIInferenceAdapter(LiteLLMOpenAIMixin):
def __init__(self, config: VertexAIConfig) -> None:
LiteLLMOpenAIMixin.__init__(
self,
MODEL_ENTRIES,
litellm_provider_name="vertex_ai",
api_key_from_config=None, # Vertex AI uses ADC, not API keys
provider_data_api_key_field="vertex_project", # Use project for validation
)
self.config = config
def get_api_key(self) -> str:
# Vertex AI doesn't use API keys, it uses Application Default Credentials
# Return empty string to let litellm handle authentication via ADC
return ""
async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]:
# Get base parameters from parent
params = await super()._get_params(request)
# Add Vertex AI specific parameters
provider_data = self.get_request_provider_data()
if provider_data:
if getattr(provider_data, "vertex_project", None):
params["vertex_project"] = provider_data.vertex_project
if getattr(provider_data, "vertex_location", None):
params["vertex_location"] = provider_data.vertex_location
else:
params["vertex_project"] = self.config.project
params["vertex_location"] = self.config.location
# Remove api_key since Vertex AI uses ADC
params.pop("api_key", None)
return params

View file

@ -26,6 +26,7 @@ from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.kvstore.api import KVStore
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
from llama_stack.providers.utils.memory.vector_store import ( from llama_stack.providers.utils.memory.vector_store import (
ChunkForDeletion,
EmbeddingIndex, EmbeddingIndex,
VectorDBWithIndex, VectorDBWithIndex,
) )
@ -115,8 +116,10 @@ class ChromaIndex(EmbeddingIndex):
) -> QueryChunksResponse: ) -> QueryChunksResponse:
raise NotImplementedError("Keyword search is not supported in Chroma") raise NotImplementedError("Keyword search is not supported in Chroma")
async def delete_chunk(self, chunk_id: str) -> None: async def delete_chunks(self, chunks_for_deletion: list[ChunkForDeletion]) -> None:
raise NotImplementedError("delete_chunk is not supported in Chroma") """Delete a single chunk from the Chroma collection by its ID."""
ids = [f"{chunk.document_id}:{chunk.chunk_id}" for chunk in chunks_for_deletion]
await maybe_await(self.collection.delete(ids=ids))
async def query_hybrid( async def query_hybrid(
self, self,
@ -144,6 +147,7 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
self.cache = {} self.cache = {}
self.kvstore: KVStore | None = None self.kvstore: KVStore | None = None
self.vector_db_store = None self.vector_db_store = None
self.files_api = files_api
async def initialize(self) -> None: async def initialize(self) -> None:
self.kvstore = await kvstore_impl(self.config.kvstore) self.kvstore = await kvstore_impl(self.config.kvstore)
@ -227,5 +231,10 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
self.cache[vector_db_id] = index self.cache[vector_db_id] = index
return index return index
async def delete_chunks(self, store_id: str, chunk_ids: list[str]) -> None: async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
raise NotImplementedError("OpenAI Vector Stores API is not supported in Chroma") """Delete chunks from a Chroma vector store."""
index = await self._get_and_cache_vector_db_index(store_id)
if not index:
raise ValueError(f"Vector DB {store_id} not found")
await index.index.delete_chunks(chunks_for_deletion)

View file

@ -28,6 +28,7 @@ from llama_stack.providers.utils.kvstore.api import KVStore
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
from llama_stack.providers.utils.memory.vector_store import ( from llama_stack.providers.utils.memory.vector_store import (
RERANKER_TYPE_WEIGHTED, RERANKER_TYPE_WEIGHTED,
ChunkForDeletion,
EmbeddingIndex, EmbeddingIndex,
VectorDBWithIndex, VectorDBWithIndex,
) )
@ -287,14 +288,17 @@ class MilvusIndex(EmbeddingIndex):
return QueryChunksResponse(chunks=filtered_chunks, scores=filtered_scores) return QueryChunksResponse(chunks=filtered_chunks, scores=filtered_scores)
async def delete_chunk(self, chunk_id: str) -> None: async def delete_chunks(self, chunks_for_deletion: list[ChunkForDeletion]) -> None:
"""Remove a chunk from the Milvus collection.""" """Remove a chunk from the Milvus collection."""
chunk_ids = [c.chunk_id for c in chunks_for_deletion]
try: try:
# Use IN clause with square brackets and single quotes for VARCHAR field
chunk_ids_str = ", ".join(f"'{chunk_id}'" for chunk_id in chunk_ids)
await asyncio.to_thread( await asyncio.to_thread(
self.client.delete, collection_name=self.collection_name, filter=f'chunk_id == "{chunk_id}"' self.client.delete, collection_name=self.collection_name, filter=f"chunk_id in [{chunk_ids_str}]"
) )
except Exception as e: except Exception as e:
logger.error(f"Error deleting chunk {chunk_id} from Milvus collection {self.collection_name}: {e}") logger.error(f"Error deleting chunks from Milvus collection {self.collection_name}: {e}")
raise raise
@ -420,12 +424,10 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
return await index.query_chunks(query, params) return await index.query_chunks(query, params)
async def delete_chunks(self, store_id: str, chunk_ids: list[str]) -> None: async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
"""Delete a chunk from a milvus vector store.""" """Delete a chunk from a milvus vector store."""
index = await self._get_and_cache_vector_db_index(store_id) index = await self._get_and_cache_vector_db_index(store_id)
if not index: if not index:
raise VectorStoreNotFoundError(store_id) raise VectorStoreNotFoundError(store_id)
for chunk_id in chunk_ids: await index.index.delete_chunks(chunks_for_deletion)
# Use the index's delete_chunk method
await index.index.delete_chunk(chunk_id)

View file

@ -27,6 +27,7 @@ from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.kvstore.api import KVStore
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
from llama_stack.providers.utils.memory.vector_store import ( from llama_stack.providers.utils.memory.vector_store import (
ChunkForDeletion,
EmbeddingIndex, EmbeddingIndex,
VectorDBWithIndex, VectorDBWithIndex,
) )
@ -163,10 +164,11 @@ class PGVectorIndex(EmbeddingIndex):
with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
cur.execute(f"DROP TABLE IF EXISTS {self.table_name}") cur.execute(f"DROP TABLE IF EXISTS {self.table_name}")
async def delete_chunk(self, chunk_id: str) -> None: async def delete_chunks(self, chunks_for_deletion: list[ChunkForDeletion]) -> None:
"""Remove a chunk from the PostgreSQL table.""" """Remove a chunk from the PostgreSQL table."""
chunk_ids = [c.chunk_id for c in chunks_for_deletion]
with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
cur.execute(f"DELETE FROM {self.table_name} WHERE id = %s", (chunk_id,)) cur.execute(f"DELETE FROM {self.table_name} WHERE id = ANY(%s)", (chunk_ids,))
class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
@ -275,12 +277,10 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
self.cache[vector_db_id] = VectorDBWithIndex(vector_db, index, self.inference_api) self.cache[vector_db_id] = VectorDBWithIndex(vector_db, index, self.inference_api)
return self.cache[vector_db_id] return self.cache[vector_db_id]
async def delete_chunks(self, store_id: str, chunk_ids: list[str]) -> None: async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
"""Delete a chunk from a PostgreSQL vector store.""" """Delete a chunk from a PostgreSQL vector store."""
index = await self._get_and_cache_vector_db_index(store_id) index = await self._get_and_cache_vector_db_index(store_id)
if not index: if not index:
raise VectorStoreNotFoundError(store_id) raise VectorStoreNotFoundError(store_id)
for chunk_id in chunk_ids: await index.index.delete_chunks(chunks_for_deletion)
# Use the index's delete_chunk method
await index.index.delete_chunk(chunk_id)

View file

@ -29,6 +29,7 @@ from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig a
from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
from llama_stack.providers.utils.memory.vector_store import ( from llama_stack.providers.utils.memory.vector_store import (
ChunkForDeletion,
EmbeddingIndex, EmbeddingIndex,
VectorDBWithIndex, VectorDBWithIndex,
) )
@ -88,15 +89,16 @@ class QdrantIndex(EmbeddingIndex):
await self.client.upsert(collection_name=self.collection_name, points=points) await self.client.upsert(collection_name=self.collection_name, points=points)
async def delete_chunk(self, chunk_id: str) -> None: async def delete_chunks(self, chunks_for_deletion: list[ChunkForDeletion]) -> None:
"""Remove a chunk from the Qdrant collection.""" """Remove a chunk from the Qdrant collection."""
chunk_ids = [convert_id(c.chunk_id) for c in chunks_for_deletion]
try: try:
await self.client.delete( await self.client.delete(
collection_name=self.collection_name, collection_name=self.collection_name,
points_selector=models.PointIdsList(points=[convert_id(chunk_id)]), points_selector=models.PointIdsList(points=chunk_ids),
) )
except Exception as e: except Exception as e:
log.error(f"Error deleting chunk {chunk_id} from Qdrant collection {self.collection_name}: {e}") log.error(f"Error deleting chunks from Qdrant collection {self.collection_name}: {e}")
raise raise
async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse: async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
@ -264,12 +266,14 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
) -> VectorStoreFileObject: ) -> VectorStoreFileObject:
# Qdrant doesn't allow multiple clients to access the same storage path simultaneously. # Qdrant doesn't allow multiple clients to access the same storage path simultaneously.
async with self._qdrant_lock: async with self._qdrant_lock:
await super().openai_attach_file_to_vector_store(vector_store_id, file_id, attributes, chunking_strategy) return await super().openai_attach_file_to_vector_store(
vector_store_id, file_id, attributes, chunking_strategy
)
async def delete_chunks(self, store_id: str, chunk_ids: list[str]) -> None: async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
"""Delete chunks from a Qdrant vector store.""" """Delete chunks from a Qdrant vector store."""
index = await self._get_and_cache_vector_db_index(store_id) index = await self._get_and_cache_vector_db_index(store_id)
if not index: if not index:
raise ValueError(f"Vector DB {store_id} not found") raise ValueError(f"Vector DB {store_id} not found")
for chunk_id in chunk_ids:
await index.index.delete_chunk(chunk_id) await index.index.delete_chunks(chunks_for_deletion)

View file

@ -26,6 +26,7 @@ from llama_stack.providers.utils.memory.openai_vector_store_mixin import (
OpenAIVectorStoreMixin, OpenAIVectorStoreMixin,
) )
from llama_stack.providers.utils.memory.vector_store import ( from llama_stack.providers.utils.memory.vector_store import (
ChunkForDeletion,
EmbeddingIndex, EmbeddingIndex,
VectorDBWithIndex, VectorDBWithIndex,
) )
@ -67,6 +68,7 @@ class WeaviateIndex(EmbeddingIndex):
data_objects.append( data_objects.append(
wvc.data.DataObject( wvc.data.DataObject(
properties={ properties={
"chunk_id": chunk.chunk_id,
"chunk_content": chunk.model_dump_json(), "chunk_content": chunk.model_dump_json(),
}, },
vector=embeddings[i].tolist(), vector=embeddings[i].tolist(),
@ -79,10 +81,11 @@ class WeaviateIndex(EmbeddingIndex):
# TODO: make this async friendly # TODO: make this async friendly
collection.data.insert_many(data_objects) collection.data.insert_many(data_objects)
async def delete_chunk(self, chunk_id: str) -> None: async def delete_chunks(self, chunks_for_deletion: list[ChunkForDeletion]) -> None:
sanitized_collection_name = sanitize_collection_name(self.collection_name, weaviate_format=True) sanitized_collection_name = sanitize_collection_name(self.collection_name, weaviate_format=True)
collection = self.client.collections.get(sanitized_collection_name) collection = self.client.collections.get(sanitized_collection_name)
collection.data.delete_many(where=Filter.by_property("id").contains_any([chunk_id])) chunk_ids = [chunk.chunk_id for chunk in chunks_for_deletion]
collection.data.delete_many(where=Filter.by_property("chunk_id").contains_any(chunk_ids))
async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse: async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
sanitized_collection_name = sanitize_collection_name(self.collection_name, weaviate_format=True) sanitized_collection_name = sanitize_collection_name(self.collection_name, weaviate_format=True)
@ -307,10 +310,10 @@ class WeaviateVectorIOAdapter(
return await index.query_chunks(query, params) return await index.query_chunks(query, params)
async def delete_chunks(self, store_id: str, chunk_ids: list[str]) -> None: async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
sanitized_collection_name = sanitize_collection_name(store_id, weaviate_format=True) sanitized_collection_name = sanitize_collection_name(store_id, weaviate_format=True)
index = await self._get_and_cache_vector_db_index(sanitized_collection_name) index = await self._get_and_cache_vector_db_index(sanitized_collection_name)
if not index: if not index:
raise ValueError(f"Vector DB {sanitized_collection_name} not found") raise ValueError(f"Vector DB {sanitized_collection_name} not found")
await index.delete(chunk_ids) await index.index.delete_chunks(chunks_for_deletion)

View file

@ -70,7 +70,7 @@ from openai.types.chat.chat_completion_chunk import (
from openai.types.chat.chat_completion_content_part_image_param import ( from openai.types.chat.chat_completion_content_part_image_param import (
ImageURL as OpenAIImageURL, ImageURL as OpenAIImageURL,
) )
from openai.types.chat.chat_completion_message_tool_call_param import ( from openai.types.chat.chat_completion_message_tool_call import (
Function as OpenAIFunction, Function as OpenAIFunction,
) )
from pydantic import BaseModel from pydantic import BaseModel

View file

@ -6,7 +6,6 @@
import asyncio import asyncio
import json import json
import logging
import mimetypes import mimetypes
import time import time
import uuid import uuid
@ -37,10 +36,15 @@ from llama_stack.apis.vector_io import (
VectorStoreSearchResponse, VectorStoreSearchResponse,
VectorStoreSearchResponsePage, VectorStoreSearchResponsePage,
) )
from llama_stack.log import get_logger
from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.kvstore.api import KVStore
from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type, make_overlapped_chunks from llama_stack.providers.utils.memory.vector_store import (
ChunkForDeletion,
content_from_data_and_mime_type,
make_overlapped_chunks,
)
logger = logging.getLogger(__name__) logger = get_logger(__name__, category="vector_io")
# Constants for OpenAI vector stores # Constants for OpenAI vector stores
CHUNK_MULTIPLIER = 5 CHUNK_MULTIPLIER = 5
@ -154,8 +158,8 @@ class OpenAIVectorStoreMixin(ABC):
self.openai_vector_stores = await self._load_openai_vector_stores() self.openai_vector_stores = await self._load_openai_vector_stores()
@abstractmethod @abstractmethod
async def delete_chunks(self, store_id: str, chunk_ids: list[str]) -> None: async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
"""Delete a chunk from a vector store.""" """Delete chunks from a vector store."""
pass pass
@abstractmethod @abstractmethod
@ -614,7 +618,7 @@ class OpenAIVectorStoreMixin(ABC):
) )
vector_store_file_object.status = "completed" vector_store_file_object.status = "completed"
except Exception as e: except Exception as e:
logger.error(f"Error attaching file to vector store: {e}") logger.exception("Error attaching file to vector store")
vector_store_file_object.status = "failed" vector_store_file_object.status = "failed"
vector_store_file_object.last_error = VectorStoreFileLastError( vector_store_file_object.last_error = VectorStoreFileLastError(
code="server_error", code="server_error",
@ -767,7 +771,21 @@ class OpenAIVectorStoreMixin(ABC):
dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id) dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
chunks = [Chunk.model_validate(c) for c in dict_chunks] chunks = [Chunk.model_validate(c) for c in dict_chunks]
await self.delete_chunks(vector_store_id, [str(c.chunk_id) for c in chunks if c.chunk_id])
# Create ChunkForDeletion objects with both chunk_id and document_id
chunks_for_deletion = []
for c in chunks:
if c.chunk_id:
document_id = c.metadata.get("document_id") or (
c.chunk_metadata.document_id if c.chunk_metadata else None
)
if document_id:
chunks_for_deletion.append(ChunkForDeletion(chunk_id=str(c.chunk_id), document_id=document_id))
else:
logger.warning(f"Chunk {c.chunk_id} has no document_id, skipping deletion")
if chunks_for_deletion:
await self.delete_chunks(vector_store_id, chunks_for_deletion)
store_info = self.openai_vector_stores[vector_store_id].copy() store_info = self.openai_vector_stores[vector_store_id].copy()

View file

@ -16,6 +16,7 @@ from urllib.parse import unquote
import httpx import httpx
import numpy as np import numpy as np
from numpy.typing import NDArray from numpy.typing import NDArray
from pydantic import BaseModel
from llama_stack.apis.common.content_types import ( from llama_stack.apis.common.content_types import (
URL, URL,
@ -34,6 +35,18 @@ from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
class ChunkForDeletion(BaseModel):
"""Information needed to delete a chunk from a vector store.
:param chunk_id: The ID of the chunk to delete
:param document_id: The ID of the document this chunk belongs to
"""
chunk_id: str
document_id: str
# Constants for reranker types # Constants for reranker types
RERANKER_TYPE_RRF = "rrf" RERANKER_TYPE_RRF = "rrf"
RERANKER_TYPE_WEIGHTED = "weighted" RERANKER_TYPE_WEIGHTED = "weighted"
@ -232,7 +245,7 @@ class EmbeddingIndex(ABC):
raise NotImplementedError() raise NotImplementedError()
@abstractmethod @abstractmethod
async def delete_chunk(self, chunk_id: str): async def delete_chunks(self, chunks_for_deletion: list[ChunkForDeletion]):
raise NotImplementedError() raise NotImplementedError()
@abstractmethod @abstractmethod

View file

@ -175,7 +175,7 @@ const handleSubmitWithContent = async (content: string) => {
return ( return (
<div className="flex flex-col h-full max-w-4xl mx-auto"> <div className="flex flex-col h-full max-w-4xl mx-auto">
<div className="mb-4 flex justify-between items-center"> <div className="mb-4 flex justify-between items-center">
<h1 className="text-2xl font-bold">Chat Playground</h1> <h1 className="text-2xl font-bold">Chat Playground (Completions)</h1>
<div className="flex gap-2"> <div className="flex gap-2">
<Select value={selectedModel} onValueChange={setSelectedModel} disabled={isModelsLoading || isGenerating}> <Select value={selectedModel} onValueChange={setSelectedModel} disabled={isModelsLoading || isGenerating}>
<SelectTrigger className="w-[180px]"> <SelectTrigger className="w-[180px]">

View file

@ -6,6 +6,8 @@ import {
MoveUpRight, MoveUpRight,
Database, Database,
MessageCircle, MessageCircle,
Settings2,
Compass,
} from "lucide-react"; } from "lucide-react";
import Link from "next/link"; import Link from "next/link";
import { usePathname } from "next/navigation"; import { usePathname } from "next/navigation";
@ -22,15 +24,16 @@ import {
SidebarMenuItem, SidebarMenuItem,
SidebarHeader, SidebarHeader,
} from "@/components/ui/sidebar"; } from "@/components/ui/sidebar";
// Extracted Chat Playground item
const chatPlaygroundItem = {
title: "Chat Playground",
url: "/chat-playground",
icon: MessageCircle,
};
// Removed Chat Playground from log items const createItems = [
const logItems = [ {
title: "Chat Playground",
url: "/chat-playground",
icon: MessageCircle,
},
];
const manageItems = [
{ {
title: "Chat Completions", title: "Chat Completions",
url: "/logs/chat-completions", url: "/logs/chat-completions",
@ -53,77 +56,96 @@ const logItems = [
}, },
]; ];
const optimizeItems: { title: string; url: string; icon: React.ElementType }[] = [
{
title: "Evaluations",
url: "",
icon: Compass,
},
{
title: "Fine-tuning",
url: "",
icon: Settings2,
},
];
interface SidebarItem {
title: string;
url: string;
icon: React.ElementType;
}
export function AppSidebar() { export function AppSidebar() {
const pathname = usePathname(); const pathname = usePathname();
return ( const renderSidebarItems = (items: SidebarItem[]) => {
<Sidebar> return items.map((item) => {
<SidebarHeader> const isActive = pathname.startsWith(item.url);
<Link href="/">Llama Stack</Link> return (
</SidebarHeader> <SidebarMenuItem key={item.title}>
<SidebarContent> <SidebarMenuButton
{/* Chat Playground as its own section */} asChild
<SidebarGroup> className={cn(
<SidebarGroupContent> "justify-start",
<SidebarMenu> isActive &&
<SidebarMenuItem> "bg-gray-200 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-700 text-gray-900 dark:text-gray-100",
)}
>
<Link href={item.url}>
<item.icon
className={cn(
isActive && "text-gray-900 dark:text-gray-100",
"mr-2 h-4 w-4",
)}
/>
<span>{item.title}</span>
</Link>
</SidebarMenuButton>
</SidebarMenuItem>
);
});
};
return (
<Sidebar>
<SidebarHeader>
<Link href="/">Llama Stack</Link>
</SidebarHeader>
<SidebarContent>
<SidebarGroup>
<SidebarGroupLabel>Create</SidebarGroupLabel>
<SidebarGroupContent>
<SidebarMenu>{renderSidebarItems(createItems)}</SidebarMenu>
</SidebarGroupContent>
</SidebarGroup>
<SidebarGroup>
<SidebarGroupLabel>Manage</SidebarGroupLabel>
<SidebarGroupContent>
<SidebarMenu>{renderSidebarItems(manageItems)}</SidebarMenu>
</SidebarGroupContent>
</SidebarGroup>
<SidebarGroup>
<SidebarGroupLabel>Optimize</SidebarGroupLabel>
<SidebarGroupContent>
<SidebarMenu>
{optimizeItems.map((item) => (
<SidebarMenuItem key={item.title}>
<SidebarMenuButton <SidebarMenuButton
asChild disabled
className={cn( className="justify-start opacity-60 cursor-not-allowed"
"justify-start",
pathname.startsWith(chatPlaygroundItem.url) &&
"bg-gray-200 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-700 text-gray-900 dark:text-gray-100",
)}
> >
<Link href={chatPlaygroundItem.url}> <item.icon className="mr-2 h-4 w-4" />
<chatPlaygroundItem.icon <span>{item.title}</span>
className={cn( <span className="ml-2 text-xs text-gray-500">(Coming Soon)</span>
pathname.startsWith(chatPlaygroundItem.url) && "text-gray-900 dark:text-gray-100",
"mr-2 h-4 w-4",
)}
/>
<span>{chatPlaygroundItem.title}</span>
</Link>
</SidebarMenuButton> </SidebarMenuButton>
</SidebarMenuItem> </SidebarMenuItem>
</SidebarMenu> ))}
</SidebarGroupContent> </SidebarMenu>
</SidebarGroup> </SidebarGroupContent>
</SidebarGroup>
{/* Logs section */} </SidebarContent>
<SidebarGroup> </Sidebar>
<SidebarGroupLabel>Logs</SidebarGroupLabel>
<SidebarGroupContent>
<SidebarMenu>
{logItems.map((item) => {
const isActive = pathname.startsWith(item.url);
return (
<SidebarMenuItem key={item.title}>
<SidebarMenuButton
asChild
className={cn(
"justify-start",
isActive &&
"bg-gray-200 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-700 text-gray-900 dark:text-gray-100",
)}
>
<Link href={item.url}>
<item.icon
className={cn(
isActive && "text-gray-900 dark:text-gray-100",
"mr-2 h-4 w-4",
)}
/>
<span>{item.title}</span>
</Link>
</SidebarMenuButton>
</SidebarMenuItem>
);
})}
</SidebarMenu>
</SidebarGroupContent>
</SidebarGroup>
</SidebarContent>
</Sidebar>
); );
} }

View file

@ -33,7 +33,7 @@ dependencies = [
"jsonschema", "jsonschema",
"llama-stack-client>=0.2.17", "llama-stack-client>=0.2.17",
"llama-api-client>=0.1.2", "llama-api-client>=0.1.2",
"openai>=1.66", "openai>=1.99.6",
"prompt-toolkit", "prompt-toolkit",
"python-dotenv", "python-dotenv",
"python-jose[cryptography]", "python-jose[cryptography]",
@ -266,7 +266,6 @@ exclude = [
"^llama_stack/providers/inline/post_training/common/validator\\.py$", "^llama_stack/providers/inline/post_training/common/validator\\.py$",
"^llama_stack/providers/inline/safety/code_scanner/", "^llama_stack/providers/inline/safety/code_scanner/",
"^llama_stack/providers/inline/safety/llama_guard/", "^llama_stack/providers/inline/safety/llama_guard/",
"^llama_stack/providers/inline/safety/prompt_guard/",
"^llama_stack/providers/inline/scoring/basic/", "^llama_stack/providers/inline/scoring/basic/",
"^llama_stack/providers/inline/scoring/braintrust/", "^llama_stack/providers/inline/scoring/braintrust/",
"^llama_stack/providers/inline/scoring/llm_as_judge/", "^llama_stack/providers/inline/scoring/llm_as_judge/",

View file

@ -16,13 +16,10 @@ MCP_TOOLGROUP_ID = "mcp::localmcp"
def default_tools(): def default_tools():
"""Default tools for backward compatibility.""" """Default tools for backward compatibility."""
from mcp import types
from mcp.server.fastmcp import Context from mcp.server.fastmcp import Context
async def greet_everyone( async def greet_everyone(url: str, ctx: Context) -> str:
url: str, ctx: Context return "Hello, world!"
) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
return [types.TextContent(type="text", text="Hello, world!")]
async def get_boiling_point(liquid_name: str, celsius: bool = True) -> int: async def get_boiling_point(liquid_name: str, celsius: bool = True) -> int:
""" """
@ -45,7 +42,6 @@ def default_tools():
def dependency_tools(): def dependency_tools():
"""Tools with natural dependencies for multi-turn testing.""" """Tools with natural dependencies for multi-turn testing."""
from mcp import types
from mcp.server.fastmcp import Context from mcp.server.fastmcp import Context
async def get_user_id(username: str, ctx: Context) -> str: async def get_user_id(username: str, ctx: Context) -> str:
@ -106,7 +102,7 @@ def dependency_tools():
else: else:
access = "no" access = "no"
return [types.TextContent(type="text", text=access)] return access
async def get_experiment_id(experiment_name: str, ctx: Context) -> str: async def get_experiment_id(experiment_name: str, ctx: Context) -> str:
""" """
@ -245,7 +241,6 @@ def make_mcp_server(required_auth_token: str | None = None, tools: dict[str, Cal
try: try:
yield {"server_url": server_url} yield {"server_url": server_url}
finally: finally:
print("Telling SSE server to exit")
server_instance.should_exit = True server_instance.should_exit = True
time.sleep(0.5) time.sleep(0.5)
@ -269,4 +264,3 @@ def make_mcp_server(required_auth_token: str | None = None, tools: dict[str, Cal
AppStatus.should_exit = False AppStatus.should_exit = False
AppStatus.should_exit_event = None AppStatus.should_exit_event = None
print("SSE server exited")

View file

@ -3,7 +3,7 @@ name = "llama-stack-api-weather"
version = "0.1.0" version = "0.1.0"
description = "Weather API for Llama Stack" description = "Weather API for Llama Stack"
readme = "README.md" readme = "README.md"
requires-python = ">=3.10" requires-python = ">=3.12"
dependencies = ["llama-stack", "pydantic"] dependencies = ["llama-stack", "pydantic"]
[build-system] [build-system]

View file

@ -3,7 +3,7 @@ name = "llama-stack-provider-kaze"
version = "0.1.0" version = "0.1.0"
description = "Kaze weather provider for Llama Stack" description = "Kaze weather provider for Llama Stack"
readme = "README.md" readme = "README.md"
requires-python = ">=3.10" requires-python = ">=3.12"
dependencies = ["llama-stack", "pydantic", "aiohttp"] dependencies = ["llama-stack", "pydantic", "aiohttp"]
[build-system] [build-system]

View file

@ -270,7 +270,7 @@ def openai_client(client_with_models):
@pytest.fixture(params=["openai_client", "client_with_models"]) @pytest.fixture(params=["openai_client", "client_with_models"])
def compat_client(request, client_with_models): def compat_client(request, client_with_models):
if isinstance(client_with_models, LlamaStackAsLibraryClient): if request.param == "openai_client" and isinstance(client_with_models, LlamaStackAsLibraryClient):
# OpenAI client expects a server, so unless we also rewrite OpenAI client's requests # OpenAI client expects a server, so unless we also rewrite OpenAI client's requests
# to go via the Stack library client (which itself rewrites requests to be served inline), # to go via the Stack library client (which itself rewrites requests to be served inline),
# we cannot do this. # we cannot do this.

View file

@ -34,6 +34,7 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
"remote::runpod", "remote::runpod",
"remote::sambanova", "remote::sambanova",
"remote::tgi", "remote::tgi",
"remote::vertexai",
): ):
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.") pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.")

View file

@ -29,6 +29,7 @@ def skip_if_model_doesnt_support_completion(client_with_models, model_id):
"remote::openai", "remote::openai",
"remote::anthropic", "remote::anthropic",
"remote::gemini", "remote::gemini",
"remote::vertexai",
"remote::groq", "remote::groq",
"remote::sambanova", "remote::sambanova",
) )

View file

@ -137,7 +137,7 @@ test_response_multi_turn_tool_execution:
server_url: "<FILLED_BY_TEST_RUNNER>" server_url: "<FILLED_BY_TEST_RUNNER>"
output: "yes" output: "yes"
- case_id: "experiment_results_lookup" - case_id: "experiment_results_lookup"
input: "I need to get the results for the 'boiling_point' experiment. First, get the experiment ID for 'boiling_point', then use that ID to get the experiment results. Tell me what you found." input: "I need to get the results for the 'boiling_point' experiment. First, get the experiment ID for 'boiling_point', then use that ID to get the experiment results. Tell me the boiling point in Celsius."
tools: tools:
- type: mcp - type: mcp
server_label: "localmcp" server_label: "localmcp"
@ -149,7 +149,7 @@ test_response_multi_turn_tool_execution_streaming:
test_params: test_params:
case: case:
- case_id: "user_permissions_workflow" - case_id: "user_permissions_workflow"
input: "Help me with this security check: First, get the user ID for 'charlie', then get the permissions for that user ID, and finally check if that user can access 'secret_file.txt'. Stream your progress as you work through each step." input: "Help me with this security check: First, get the user ID for 'charlie', then get the permissions for that user ID, and finally check if that user can access 'secret_file.txt'. Stream your progress as you work through each step. Return only one tool call per step. Summarize the final result with a single 'yes' or 'no' response."
tools: tools:
- type: mcp - type: mcp
server_label: "localmcp" server_label: "localmcp"
@ -157,7 +157,7 @@ test_response_multi_turn_tool_execution_streaming:
stream: true stream: true
output: "no" output: "no"
- case_id: "experiment_analysis_streaming" - case_id: "experiment_analysis_streaming"
input: "I need a complete analysis: First, get the experiment ID for 'chemical_reaction', then get the results for that experiment, and tell me if the yield was above 80%. Please stream your analysis process." input: "I need a complete analysis: First, get the experiment ID for 'chemical_reaction', then get the results for that experiment, and tell me if the yield was above 80%. Return only one tool call per step. Please stream your analysis process."
tools: tools:
- type: mcp - type: mcp
server_label: "localmcp" server_label: "localmcp"

View file

@ -363,6 +363,9 @@ def test_response_non_streaming_file_search_empty_vector_store(request, compat_c
ids=case_id_generator, ids=case_id_generator,
) )
def test_response_non_streaming_mcp_tool(request, compat_client, text_model_id, case): def test_response_non_streaming_mcp_tool(request, compat_client, text_model_id, case):
if not isinstance(compat_client, LlamaStackAsLibraryClient):
pytest.skip("in-process MCP server is only supported in library client")
with make_mcp_server() as mcp_server_info: with make_mcp_server() as mcp_server_info:
tools = case["tools"] tools = case["tools"]
for tool in tools: for tool in tools:
@ -485,8 +488,11 @@ def test_response_non_streaming_multi_turn_image(request, compat_client, text_mo
responses_test_cases["test_response_multi_turn_tool_execution"]["test_params"]["case"], responses_test_cases["test_response_multi_turn_tool_execution"]["test_params"]["case"],
ids=case_id_generator, ids=case_id_generator,
) )
def test_response_non_streaming_multi_turn_tool_execution(request, compat_client, text_model_id, case): def test_response_non_streaming_multi_turn_tool_execution(compat_client, text_model_id, case):
"""Test multi-turn tool execution where multiple MCP tool calls are performed in sequence.""" """Test multi-turn tool execution where multiple MCP tool calls are performed in sequence."""
if not isinstance(compat_client, LlamaStackAsLibraryClient):
pytest.skip("in-process MCP server is only supported in library client")
with make_mcp_server(tools=dependency_tools()) as mcp_server_info: with make_mcp_server(tools=dependency_tools()) as mcp_server_info:
tools = case["tools"] tools = case["tools"]
# Replace the placeholder URL with the actual server URL # Replace the placeholder URL with the actual server URL
@ -541,8 +547,11 @@ def test_response_non_streaming_multi_turn_tool_execution(request, compat_client
responses_test_cases["test_response_multi_turn_tool_execution_streaming"]["test_params"]["case"], responses_test_cases["test_response_multi_turn_tool_execution_streaming"]["test_params"]["case"],
ids=case_id_generator, ids=case_id_generator,
) )
async def test_response_streaming_multi_turn_tool_execution(request, compat_client, text_model_id, case): def test_response_streaming_multi_turn_tool_execution(compat_client, text_model_id, case):
"""Test streaming multi-turn tool execution where multiple MCP tool calls are performed in sequence.""" """Test streaming multi-turn tool execution where multiple MCP tool calls are performed in sequence."""
if not isinstance(compat_client, LlamaStackAsLibraryClient):
pytest.skip("in-process MCP server is only supported in library client")
with make_mcp_server(tools=dependency_tools()) as mcp_server_info: with make_mcp_server(tools=dependency_tools()) as mcp_server_info:
tools = case["tools"] tools = case["tools"]
# Replace the placeholder URL with the actual server URL # Replace the placeholder URL with the actual server URL
@ -634,7 +643,7 @@ async def test_response_streaming_multi_turn_tool_execution(request, compat_clie
}, },
], ],
) )
def test_response_text_format(request, compat_client, text_model_id, text_format): def test_response_text_format(compat_client, text_model_id, text_format):
if isinstance(compat_client, LlamaStackAsLibraryClient): if isinstance(compat_client, LlamaStackAsLibraryClient):
pytest.skip("Responses API text format is not yet supported in library client.") pytest.skip("Responses API text format is not yet supported in library client.")
@ -653,7 +662,7 @@ def test_response_text_format(request, compat_client, text_model_id, text_format
@pytest.fixture @pytest.fixture
def vector_store_with_filtered_files(request, compat_client, text_model_id, tmp_path_factory): def vector_store_with_filtered_files(compat_client, text_model_id, tmp_path_factory):
"""Create a vector store with multiple files that have different attributes for filtering tests.""" """Create a vector store with multiple files that have different attributes for filtering tests."""
if isinstance(compat_client, LlamaStackAsLibraryClient): if isinstance(compat_client, LlamaStackAsLibraryClient):
pytest.skip("Responses API file search is not yet supported in library client.") pytest.skip("Responses API file search is not yet supported in library client.")

View file

@ -9,10 +9,11 @@ import time
from io import BytesIO from io import BytesIO
import pytest import pytest
from llama_stack_client import BadRequestError, LlamaStackClient from llama_stack_client import BadRequestError
from openai import BadRequestError as OpenAIBadRequestError from openai import BadRequestError as OpenAIBadRequestError
from llama_stack.apis.vector_io import Chunk from llama_stack.apis.vector_io import Chunk
from llama_stack.core.library_client import LlamaStackAsLibraryClient
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -475,9 +476,6 @@ def test_openai_vector_store_attach_file(compat_client_with_empty_stores, client
"""Test OpenAI vector store attach file.""" """Test OpenAI vector store attach file."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files attach is not yet supported with LlamaStackClient")
compat_client = compat_client_with_empty_stores compat_client = compat_client_with_empty_stores
# Create a vector store # Create a vector store
@ -526,9 +524,6 @@ def test_openai_vector_store_attach_files_on_creation(compat_client_with_empty_s
"""Test OpenAI vector store attach files on creation.""" """Test OpenAI vector store attach files on creation."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files attach is not yet supported with LlamaStackClient")
compat_client = compat_client_with_empty_stores compat_client = compat_client_with_empty_stores
# Create some files and attach them to the vector store # Create some files and attach them to the vector store
@ -582,9 +577,6 @@ def test_openai_vector_store_list_files(compat_client_with_empty_stores, client_
"""Test OpenAI vector store list files.""" """Test OpenAI vector store list files."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files list is not yet supported with LlamaStackClient")
compat_client = compat_client_with_empty_stores compat_client = compat_client_with_empty_stores
# Create a vector store # Create a vector store
@ -597,16 +589,20 @@ def test_openai_vector_store_list_files(compat_client_with_empty_stores, client_
file_buffer.name = f"openai_test_{i}.txt" file_buffer.name = f"openai_test_{i}.txt"
file = compat_client.files.create(file=file_buffer, purpose="assistants") file = compat_client.files.create(file=file_buffer, purpose="assistants")
compat_client.vector_stores.files.create( response = compat_client.vector_stores.files.create(
vector_store_id=vector_store.id, vector_store_id=vector_store.id,
file_id=file.id, file_id=file.id,
) )
assert response is not None
assert response.status == "completed", (
f"Failed to attach file {file.id} to vector store {vector_store.id}: {response=}"
)
file_ids.append(file.id) file_ids.append(file.id)
files_list = compat_client.vector_stores.files.list(vector_store_id=vector_store.id) files_list = compat_client.vector_stores.files.list(vector_store_id=vector_store.id)
assert files_list assert files_list
assert files_list.object == "list" assert files_list.object == "list"
assert files_list.data assert files_list.data is not None
assert not files_list.has_more assert not files_list.has_more
assert len(files_list.data) == 3 assert len(files_list.data) == 3
assert set(file_ids) == {file.id for file in files_list.data} assert set(file_ids) == {file.id for file in files_list.data}
@ -642,12 +638,13 @@ def test_openai_vector_store_list_files_invalid_vector_store(compat_client_with_
"""Test OpenAI vector store list files with invalid vector store ID.""" """Test OpenAI vector store list files with invalid vector store ID."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files list is not yet supported with LlamaStackClient")
compat_client = compat_client_with_empty_stores compat_client = compat_client_with_empty_stores
if isinstance(compat_client, LlamaStackAsLibraryClient):
errors = ValueError
else:
errors = (BadRequestError, OpenAIBadRequestError)
with pytest.raises((BadRequestError, OpenAIBadRequestError)): with pytest.raises(errors):
compat_client.vector_stores.files.list(vector_store_id="abc123") compat_client.vector_stores.files.list(vector_store_id="abc123")
@ -655,9 +652,6 @@ def test_openai_vector_store_retrieve_file_contents(compat_client_with_empty_sto
"""Test OpenAI vector store retrieve file contents.""" """Test OpenAI vector store retrieve file contents."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files retrieve contents is not yet supported with LlamaStackClient")
compat_client = compat_client_with_empty_stores compat_client = compat_client_with_empty_stores
# Create a vector store # Create a vector store
@ -685,9 +679,15 @@ def test_openai_vector_store_retrieve_file_contents(compat_client_with_empty_sto
file_id=file.id, file_id=file.id,
) )
assert file_contents assert file_contents is not None
assert file_contents.content[0]["type"] == "text" assert len(file_contents.content) == 1
assert file_contents.content[0]["text"] == test_content.decode("utf-8") content = file_contents.content[0]
# llama-stack-client returns a model, openai-python is a badboy and returns a dict
if not isinstance(content, dict):
content = content.model_dump()
assert content["type"] == "text"
assert content["text"] == test_content.decode("utf-8")
assert file_contents.filename == file_name assert file_contents.filename == file_name
assert file_contents.attributes == attributes assert file_contents.attributes == attributes
@ -696,9 +696,6 @@ def test_openai_vector_store_delete_file(compat_client_with_empty_stores, client
"""Test OpenAI vector store delete file.""" """Test OpenAI vector store delete file."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files list is not yet supported with LlamaStackClient")
compat_client = compat_client_with_empty_stores compat_client = compat_client_with_empty_stores
# Create a vector store # Create a vector store
@ -751,9 +748,6 @@ def test_openai_vector_store_delete_file_removes_from_vector_store(compat_client
"""Test OpenAI vector store delete file removes from vector store.""" """Test OpenAI vector store delete file removes from vector store."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files attach is not yet supported with LlamaStackClient")
compat_client = compat_client_with_empty_stores compat_client = compat_client_with_empty_stores
# Create a vector store # Create a vector store
@ -792,9 +786,6 @@ def test_openai_vector_store_update_file(compat_client_with_empty_stores, client
"""Test OpenAI vector store update file.""" """Test OpenAI vector store update file."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files update is not yet supported with LlamaStackClient")
compat_client = compat_client_with_empty_stores compat_client = compat_client_with_empty_stores
# Create a vector store # Create a vector store
@ -840,9 +831,6 @@ def test_create_vector_store_files_duplicate_vector_store_name(compat_client_wit
""" """
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files create is not yet supported with LlamaStackClient")
compat_client = compat_client_with_empty_stores compat_client = compat_client_with_empty_stores
# Create a vector store with files # Create a vector store with files

97
uv.lock generated
View file

@ -476,7 +476,7 @@ wheels = [
[[package]] [[package]]
name = "chromadb" name = "chromadb"
version = "1.0.15" version = "1.0.16"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
dependencies = [ dependencies = [
{ name = "bcrypt" }, { name = "bcrypt" },
@ -507,13 +507,13 @@ dependencies = [
{ name = "typing-extensions" }, { name = "typing-extensions" },
{ name = "uvicorn", extra = ["standard"] }, { name = "uvicorn", extra = ["standard"] },
] ]
sdist = { url = "https://files.pythonhosted.org/packages/ad/e2/0653b2e539db5512d2200c759f1bc7f9ef5609fe47f3c7d24b82f62dc00f/chromadb-1.0.15.tar.gz", hash = "sha256:3e910da3f5414e2204f89c7beca1650847f2bf3bd71f11a2e40aad1eb31050aa", size = 1218840, upload-time = "2025-07-02T17:07:09.875Z" } sdist = { url = "https://files.pythonhosted.org/packages/15/2a/5b7e793d2a27c425e9f1813e9cb965b70e9bda08b69ee15a10e07dc3e59a/chromadb-1.0.16.tar.gz", hash = "sha256:3c864b5beb5e131bdc1f83c0b63a01ec481c6ee52028f088563ffba8478478e1", size = 1241545, upload-time = "2025-08-08T00:25:41.414Z" }
wheels = [ wheels = [
{ url = "https://files.pythonhosted.org/packages/85/5a/866c6f0c2160cbc8dca0cf77b2fb391dcf435b32a58743da1bc1a08dc442/chromadb-1.0.15-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:51791553014297798b53df4e043e9c30f4e8bd157647971a6bb02b04bfa65f82", size = 18838820, upload-time = "2025-07-02T17:07:07.632Z" }, { url = "https://files.pythonhosted.org/packages/a3/9d/bffcc814272c9b7982551803b2d45b77f39eeea1b9e965c00c05ee81c649/chromadb-1.0.16-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:144163ce7ca4f4448684d5d0c13ebb37c4d68490ecb60967a95d05cea30e0d2d", size = 18942157, upload-time = "2025-08-08T00:25:38.459Z" },
{ url = "https://files.pythonhosted.org/packages/e1/18/ff9b58ab5d334f5ecff7fdbacd6761bac467176708fa4d2500ae7c048af0/chromadb-1.0.15-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:48015803c0631c3a817befc276436dc084bb628c37fd4214047212afb2056291", size = 18057131, upload-time = "2025-07-02T17:07:05.15Z" }, { url = "https://files.pythonhosted.org/packages/58/4e/de0086f3cbcfd667d75d112bb546386803ab5335599bf7099272a675e98b/chromadb-1.0.16-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:4ebcc5894e6fbb6b576452bbf4659746bfe58d9daf99a18363364e9497434bd2", size = 18147831, upload-time = "2025-08-08T00:25:35.546Z" },
{ url = "https://files.pythonhosted.org/packages/31/49/74e34cc5aeeb25aff2c0ede6790b3671e14c1b91574dd8f98d266a4c5aad/chromadb-1.0.15-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b73cd6fb32fcdd91c577cca16ea6112b691d72b441bb3f2140426d1e79e453a", size = 18595284, upload-time = "2025-07-02T17:06:59.102Z" }, { url = "https://files.pythonhosted.org/packages/0e/7f/a8aff4ce96281bcb9731d10b2554f41963dd0b47acb4f90a78b2b7c4f199/chromadb-1.0.16-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:937051fc3aae94f7c171503d8f1f7662820aacc75acf45f28d3656c75c5ff1f8", size = 18682195, upload-time = "2025-08-08T00:25:29.654Z" },
{ url = "https://files.pythonhosted.org/packages/cb/33/190df917a057067e37f8b48d082d769bed8b3c0c507edefc7b6c6bb577d0/chromadb-1.0.15-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:479f1b401af9e7c20f50642ffb3376abbfd78e2b5b170429f7c79eff52e367db", size = 19526626, upload-time = "2025-07-02T17:07:02.163Z" }, { url = "https://files.pythonhosted.org/packages/a3/9c/2a97d0257176aae472dff6f1ef1b7050449f384e420120e0f31d2d8f532f/chromadb-1.0.16-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0f5c5ad0c59154a9cab1506b857bab8487b588352e668cf1222c54bb9d52daa", size = 19635695, upload-time = "2025-08-08T00:25:32.68Z" },
{ url = "https://files.pythonhosted.org/packages/a1/30/6890da607358993f87a01e80bcce916b4d91515ce865f07dc06845cb472f/chromadb-1.0.15-cp39-abi3-win_amd64.whl", hash = "sha256:e0cb3b93fdc42b1786f151d413ef36299f30f783a30ce08bf0bfb12e552b4190", size = 19520490, upload-time = "2025-07-02T17:07:11.559Z" }, { url = "https://files.pythonhosted.org/packages/96/8a/f7e810f3cbdc9186ba4e649dc32711b7ab2c23aba37cf61175f731d22293/chromadb-1.0.16-cp39-abi3-win_amd64.whl", hash = "sha256:2528c01bd8b3facca9d0e1ffac866767c386b94604df484fc792ee891c86e09a", size = 19641144, upload-time = "2025-08-08T00:25:43.446Z" },
] ]
[[package]] [[package]]
@ -1632,10 +1632,10 @@ test = [
{ name = "pypdf" }, { name = "pypdf" },
{ name = "requests" }, { name = "requests" },
{ name = "sqlalchemy", extra = ["asyncio"] }, { name = "sqlalchemy", extra = ["asyncio"] },
{ name = "torch", version = "2.7.1", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, { name = "torch", version = "2.8.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" },
{ name = "torch", version = "2.7.1+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" }, { name = "torch", version = "2.8.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" },
{ name = "torchvision", version = "0.22.1", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or sys_platform == 'darwin'" }, { name = "torchvision", version = "0.23.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or sys_platform == 'darwin'" },
{ name = "torchvision", version = "0.22.1+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, { name = "torchvision", version = "0.23.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
{ name = "transformers" }, { name = "transformers" },
{ name = "weaviate-client" }, { name = "weaviate-client" },
] ]
@ -1674,7 +1674,7 @@ requires-dist = [
{ name = "llama-api-client", specifier = ">=0.1.2" }, { name = "llama-api-client", specifier = ">=0.1.2" },
{ name = "llama-stack-client", specifier = ">=0.2.17" }, { name = "llama-stack-client", specifier = ">=0.2.17" },
{ name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.17" }, { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.17" },
{ name = "openai", specifier = ">=1.66" }, { name = "openai", specifier = ">=1.99.6" },
{ name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" }, { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" },
{ name = "opentelemetry-sdk", specifier = ">=1.30.0" }, { name = "opentelemetry-sdk", specifier = ">=1.30.0" },
{ name = "pandas", marker = "extra == 'ui'" }, { name = "pandas", marker = "extra == 'ui'" },
@ -2301,7 +2301,7 @@ wheels = [
[[package]] [[package]]
name = "openai" name = "openai"
version = "1.98.0" version = "1.99.6"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
dependencies = [ dependencies = [
{ name = "anyio" }, { name = "anyio" },
@ -2313,9 +2313,9 @@ dependencies = [
{ name = "tqdm" }, { name = "tqdm" },
{ name = "typing-extensions" }, { name = "typing-extensions" },
] ]
sdist = { url = "https://files.pythonhosted.org/packages/d8/9d/52eadb15c92802711d6b6cf00df3a6d0d18b588f4c5ba5ff210c6419fc03/openai-1.98.0.tar.gz", hash = "sha256:3ee0fcc50ae95267fd22bd1ad095ba5402098f3df2162592e68109999f685427", size = 496695, upload-time = "2025-07-30T12:48:03.701Z" } sdist = { url = "https://files.pythonhosted.org/packages/11/45/38a87bd6949236db5ae3132f41d5861824702b149f86d2627d6900919103/openai-1.99.6.tar.gz", hash = "sha256:f48f4239b938ef187062f3d5199a05b69711d8b600b9a9b6a3853cd271799183", size = 505364, upload-time = "2025-08-09T15:20:54.438Z" }
wheels = [ wheels = [
{ url = "https://files.pythonhosted.org/packages/a8/fe/f64631075b3d63a613c0d8ab761d5941631a470f6fa87eaaee1aa2b4ec0c/openai-1.98.0-py3-none-any.whl", hash = "sha256:b99b794ef92196829120e2df37647722104772d2a74d08305df9ced5f26eae34", size = 767713, upload-time = "2025-07-30T12:48:01.264Z" }, { url = "https://files.pythonhosted.org/packages/d6/dd/9aa956485c2856346b3181542fbb0aea4e5b457fa7a523944726746da8da/openai-1.99.6-py3-none-any.whl", hash = "sha256:e40d44b2989588c45ce13819598788b77b8fb80ba2f7ae95ce90d14e46f1bd26", size = 786296, upload-time = "2025-08-09T15:20:51.95Z" },
] ]
[[package]] [[package]]
@ -4310,7 +4310,7 @@ wheels = [
[[package]] [[package]]
name = "torch" name = "torch"
version = "2.7.1" version = "2.8.0"
source = { registry = "https://download.pytorch.org/whl/cpu" } source = { registry = "https://download.pytorch.org/whl/cpu" }
resolution-markers = [ resolution-markers = [
"python_full_version >= '3.13' and sys_platform == 'darwin'", "python_full_version >= '3.13' and sys_platform == 'darwin'",
@ -4326,14 +4326,14 @@ dependencies = [
{ name = "typing-extensions", marker = "sys_platform == 'darwin'" }, { name = "typing-extensions", marker = "sys_platform == 'darwin'" },
] ]
wheels = [ wheels = [
{ url = "https://download.pytorch.org/whl/cpu/torch-2.7.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:7b4f8b2b83bd08f7d399025a9a7b323bdbb53d20566f1e0d584689bb92d82f9a" }, { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:a47b7986bee3f61ad217d8a8ce24605809ab425baf349f97de758815edd2ef54" },
{ url = "https://download.pytorch.org/whl/cpu/torch-2.7.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:95af97e7b2cecdc89edc0558962a51921bf9c61538597dbec6b7cc48d31e2e13" }, { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:fbe2e149c5174ef90d29a5f84a554dfaf28e003cb4f61fa2c8c024c17ec7ca58" },
{ url = "https://download.pytorch.org/whl/cpu/torch-2.7.1-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:7ecd868a086468e1bcf74b91db425c1c2951a9cfcd0592c4c73377b7e42485ae" }, { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:057efd30a6778d2ee5e2374cd63a63f63311aa6f33321e627c655df60abdd390" },
] ]
[[package]] [[package]]
name = "torch" name = "torch"
version = "2.7.1+cpu" version = "2.8.0+cpu"
source = { registry = "https://download.pytorch.org/whl/cpu" } source = { registry = "https://download.pytorch.org/whl/cpu" }
resolution-markers = [ resolution-markers = [
"(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')", "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
@ -4351,21 +4351,24 @@ dependencies = [
{ name = "typing-extensions", marker = "sys_platform != 'darwin'" }, { name = "typing-extensions", marker = "sys_platform != 'darwin'" },
] ]
wheels = [ wheels = [
{ url = "https://download.pytorch.org/whl/cpu/torch-2.7.1%2Bcpu-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3bf2db5adf77b433844f080887ade049c4705ddf9fe1a32023ff84ff735aa5ad" }, { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-linux_s390x.whl", hash = "sha256:0e34e276722ab7dd0dffa9e12fe2135a9b34a0e300c456ed7ad6430229404eb5" },
{ url = "https://download.pytorch.org/whl/cpu/torch-2.7.1%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:8f8b3cfc53010a4b4a3c7ecb88c212e9decc4f5eeb6af75c3c803937d2d60947" }, { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:610f600c102386e581327d5efc18c0d6edecb9820b4140d26163354a99cd800d" },
{ url = "https://download.pytorch.org/whl/cpu/torch-2.7.1%2Bcpu-cp312-cp312-win_amd64.whl", hash = "sha256:0bc887068772233f532b51a3e8c8cfc682ae62bef74bf4e0c53526c8b9e4138f" }, { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:cb9a8ba8137ab24e36bf1742cb79a1294bd374db570f09fc15a5e1318160db4e" },
{ url = "https://download.pytorch.org/whl/cpu/torch-2.7.1%2Bcpu-cp312-cp312-win_arm64.whl", hash = "sha256:a2618775f32eb4126c5b2050686da52001a08cffa331637d9cf51c8250931e00" }, { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-win_amd64.whl", hash = "sha256:2be20b2c05a0cce10430cc25f32b689259640d273232b2de357c35729132256d" },
{ url = "https://download.pytorch.org/whl/cpu/torch-2.7.1%2Bcpu-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:eb17646792ac4374ffc87e42369f45d21eff17c790868963b90483ef0b6db4ef" }, { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-win_arm64.whl", hash = "sha256:99fc421a5d234580e45957a7b02effbf3e1c884a5dd077afc85352c77bf41434" },
{ url = "https://download.pytorch.org/whl/cpu/torch-2.7.1%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:84ea1f6a1d15663037d01b121d6e33bb9da3c90af8e069e5072c30f413455a57" }, { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-linux_s390x.whl", hash = "sha256:8b5882276633cf91fe3d2d7246c743b94d44a7e660b27f1308007fdb1bb89f7d" },
{ url = "https://download.pytorch.org/whl/cpu/torch-2.7.1%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:b66f77f6f67317344ee083aa7ac4751a14395fcb38060d564bf513978d267153" }, { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:a5064b5e23772c8d164068cc7c12e01a75faf7b948ecd95a0d4007d7487e5f25" },
{ url = "https://download.pytorch.org/whl/cpu/torch-2.7.1%2Bcpu-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:56136a2aca6707df3c8811e46ea2d379eaafd18e656e2fd51e8e4d0ca995651b" }, { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:8f81dedb4c6076ec325acc3b47525f9c550e5284a18eae1d9061c543f7b6e7de" },
{ url = "https://download.pytorch.org/whl/cpu/torch-2.7.1%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:355614185a2aea7155f9c88a20bfd49de5f3063866f3cf9b2f21b6e9e59e31e0" }, { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:e1ee1b2346ade3ea90306dfbec7e8ff17bc220d344109d189ae09078333b0856" },
{ url = "https://download.pytorch.org/whl/cpu/torch-2.7.1%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:464bca1bc9452f2ccd676514688896e66b9488f2a0268ecd3ac497cf09c5aac1" }, { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-win_arm64.whl", hash = "sha256:64c187345509f2b1bb334feed4666e2c781ca381874bde589182f81247e61f88" },
{ url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:af81283ac671f434b1b25c95ba295f270e72db1fad48831eb5e4748ff9840041" },
{ url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:a9dbb6f64f63258bc811e2c0c99640a81e5af93c531ad96e95c5ec777ea46dab" },
{ url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:6d93a7165419bc4b2b907e859ccab0dea5deeab261448ae9a5ec5431f14c0e64" },
] ]
[[package]] [[package]]
name = "torchvision" name = "torchvision"
version = "0.22.1" version = "0.23.0"
source = { registry = "https://download.pytorch.org/whl/cpu" } source = { registry = "https://download.pytorch.org/whl/cpu" }
resolution-markers = [ resolution-markers = [
"python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'", "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
@ -4376,21 +4379,21 @@ resolution-markers = [
dependencies = [ dependencies = [
{ name = "numpy", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or sys_platform == 'darwin'" }, { name = "numpy", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or sys_platform == 'darwin'" },
{ name = "pillow", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or sys_platform == 'darwin'" }, { name = "pillow", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or sys_platform == 'darwin'" },
{ name = "torch", version = "2.7.1", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, { name = "torch", version = "2.8.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" },
{ name = "torch", version = "2.7.1+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, { name = "torch", version = "2.8.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
] ]
wheels = [ wheels = [
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:153f1790e505bd6da123e21eee6e83e2e155df05c0fe7d56347303067d8543c5" }, { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e0e2c04a91403e8dd3af9756c6a024a1d9c0ed9c0d592a8314ded8f4fe30d440" },
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:964414eef19459d55a10e886e2fca50677550e243586d1678f65e3f6f6bac47a" }, { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:6dd7c4d329a0e03157803031bc856220c6155ef08c26d4f5bbac938acecf0948" },
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9c3ae3319624c43cc8127020f46c14aa878406781f0899bb6283ae474afeafbf" }, { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1c37e325e09a184b730c3ef51424f383ec5745378dc0eca244520aca29722600" },
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:4a614a6a408d2ed74208d0ea6c28a2fbb68290e9a7df206c5fef3f0b6865d307" }, { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:2f7fd6c15f3697e80627b77934f77705f3bc0e98278b989b2655de01f6903e1d" },
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:043d9e35ed69c2e586aff6eb9e2887382e7863707115668ac9d140da58f42cba" }, { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:2df618e1143805a7673aaf82cb5720dd9112d4e771983156aaf2ffff692eebf9" },
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:27142bcc8a984227a6dcf560985e83f52b82a7d3f5fe9051af586a2ccc46ef26" }, { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:2a3299d2b1d5a7aed2d3b6ffb69c672ca8830671967eb1cee1497bacd82fe47b" },
] ]
[[package]] [[package]]
name = "torchvision" name = "torchvision"
version = "0.22.1+cpu" version = "0.23.0+cpu"
source = { registry = "https://download.pytorch.org/whl/cpu" } source = { registry = "https://download.pytorch.org/whl/cpu" }
resolution-markers = [ resolution-markers = [
"(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')", "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
@ -4399,15 +4402,15 @@ resolution-markers = [
dependencies = [ dependencies = [
{ name = "numpy", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, { name = "numpy", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
{ name = "pillow", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, { name = "pillow", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
{ name = "torch", version = "2.7.1+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, { name = "torch", version = "2.8.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
] ]
wheels = [ wheels = [
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b5fa7044bd82c6358e8229351c98070cf3a7bf4a6e89ea46352ae6c65745ef94" }, { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:ae459d4509d3b837b978dc6c66106601f916b6d2cda75c137e3f5f48324ce1da" },
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1%2Bcpu-cp312-cp312-win_amd64.whl", hash = "sha256:433cb4dbced7291f17064cea08ac1e5aebd02ec190e1c207d117ad62a8961f2b" }, { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0%2Bcpu-cp312-cp312-win_amd64.whl", hash = "sha256:a651ccc540cf4c87eb988730c59c2220c52b57adc276f044e7efb9830fa65a1d" },
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:a93c21f18c33a819616b3dda7655aa4de40b219682c654175b6bbeb65ecc2e5f" }, { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:dea90a67d60a5366b0358a0b8d6bf267805278697d6fd950cf0e31139e56d1be" },
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:34c914ad4728b81848ac802c5fc5eeb8de8ff4058cc59c1463a74ce4f4fbf0d8" }, { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:82928788025170c62e7df1120dcdc0cd175bfc31c08374613ce6d1a040bc0cda" },
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:ab7ae82529887c704c1b5d1d5198f65dc777d04fc3858b374503a6deedb82b19" }, { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:474d77adbbbed5166db3e5636b4b4ae3399c66ef5bfa12536e254b32259c90c0" },
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:b2d1c4bdbfd8e6c779dc810a6171b56224f1332fc46986810d4081bed1633804" }, { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:8d6a47e23d7896f0ef9aa7ea7179eb6324e82438aa66d19884c2020d0646b104" },
] ]
[[package]] [[package]]