mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-15 14:08:00 +00:00
Merge branch 'meta-llama:main' into dell-distro
This commit is contained in:
commit
839cc911ac
42 changed files with 786 additions and 186 deletions
18
README.md
18
README.md
|
@ -1,5 +1,8 @@
|
||||||
# Llama Stack
|
# Llama Stack
|
||||||
|
|
||||||
|
<a href="https://trendshift.io/repositories/11824" target="_blank"><img src="https://trendshift.io/api/badge/repositories/11824" alt="meta-llama%2Fllama-stack | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
||||||
|
|
||||||
|
-----
|
||||||
[](https://pypi.org/project/llama_stack/)
|
[](https://pypi.org/project/llama_stack/)
|
||||||
[](https://pypi.org/project/llama-stack/)
|
[](https://pypi.org/project/llama-stack/)
|
||||||
[](https://github.com/meta-llama/llama-stack/blob/main/LICENSE)
|
[](https://github.com/meta-llama/llama-stack/blob/main/LICENSE)
|
||||||
|
@ -9,6 +12,7 @@
|
||||||
|
|
||||||
[**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
|
[**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
|
||||||
|
|
||||||
|
|
||||||
### ✨🎉 Llama 4 Support 🎉✨
|
### ✨🎉 Llama 4 Support 🎉✨
|
||||||
We released [Version 0.2.0](https://github.com/meta-llama/llama-stack/releases/tag/v0.2.0) with support for the Llama 4 herd of models released by Meta.
|
We released [Version 0.2.0](https://github.com/meta-llama/llama-stack/releases/tag/v0.2.0) with support for the Llama 4 herd of models released by Meta.
|
||||||
|
|
||||||
|
@ -179,3 +183,17 @@ Please checkout our [Documentation](https://llama-stack.readthedocs.io/en/latest
|
||||||
Check out our client SDKs for connecting to a Llama Stack server in your preferred language, you can choose from [python](https://github.com/meta-llama/llama-stack-client-python), [typescript](https://github.com/meta-llama/llama-stack-client-typescript), [swift](https://github.com/meta-llama/llama-stack-client-swift), and [kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) programming languages to quickly build your applications.
|
Check out our client SDKs for connecting to a Llama Stack server in your preferred language, you can choose from [python](https://github.com/meta-llama/llama-stack-client-python), [typescript](https://github.com/meta-llama/llama-stack-client-typescript), [swift](https://github.com/meta-llama/llama-stack-client-swift), and [kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) programming languages to quickly build your applications.
|
||||||
|
|
||||||
You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repo.
|
You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repo.
|
||||||
|
|
||||||
|
|
||||||
|
## 🌟 GitHub Star History
|
||||||
|
## Star History
|
||||||
|
|
||||||
|
[](https://www.star-history.com/#meta-llama/llama-stack&Date)
|
||||||
|
|
||||||
|
## ✨ Contributors
|
||||||
|
|
||||||
|
Thanks to all of our amazing contributors!
|
||||||
|
|
||||||
|
<a href="https://github.com/meta-llama/llama-stack/graphs/contributors">
|
||||||
|
<img src="https://contrib.rocks/image?repo=meta-llama/llama-stack" />
|
||||||
|
</a>
|
14
docs/_static/js/keyboard_shortcuts.js
vendored
Normal file
14
docs/_static/js/keyboard_shortcuts.js
vendored
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
document.addEventListener('keydown', function(event) {
|
||||||
|
// command+K or ctrl+K
|
||||||
|
if ((event.metaKey || event.ctrlKey) && event.key === 'k') {
|
||||||
|
event.preventDefault();
|
||||||
|
document.querySelector('.search-input, .search-field, input[name="q"]').focus();
|
||||||
|
}
|
||||||
|
|
||||||
|
// forward slash
|
||||||
|
if (event.key === '/' &&
|
||||||
|
!event.target.matches('input, textarea, select')) {
|
||||||
|
event.preventDefault();
|
||||||
|
document.querySelector('.search-input, .search-field, input[name="q"]').focus();
|
||||||
|
}
|
||||||
|
});
|
|
@ -111,7 +111,7 @@ name = "llama-stack-api-weather"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
description = "Weather API for Llama Stack"
|
description = "Weather API for Llama Stack"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.10"
|
requires-python = ">=3.12"
|
||||||
dependencies = ["llama-stack", "pydantic"]
|
dependencies = ["llama-stack", "pydantic"]
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
|
@ -231,7 +231,7 @@ name = "llama-stack-provider-kaze"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
description = "Kaze weather provider for Llama Stack"
|
description = "Kaze weather provider for Llama Stack"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.10"
|
requires-python = ">=3.12"
|
||||||
dependencies = ["llama-stack", "pydantic", "aiohttp"]
|
dependencies = ["llama-stack", "pydantic", "aiohttp"]
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
|
|
|
@ -131,6 +131,7 @@ html_static_path = ["../_static"]
|
||||||
def setup(app):
|
def setup(app):
|
||||||
app.add_css_file("css/my_theme.css")
|
app.add_css_file("css/my_theme.css")
|
||||||
app.add_js_file("js/detect_theme.js")
|
app.add_js_file("js/detect_theme.js")
|
||||||
|
app.add_js_file("js/keyboard_shortcuts.js")
|
||||||
|
|
||||||
def dockerhub_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
|
def dockerhub_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
|
||||||
url = f"https://hub.docker.com/r/llamastack/{text}"
|
url = f"https://hub.docker.com/r/llamastack/{text}"
|
||||||
|
|
|
@ -2,14 +2,28 @@
|
||||||
```{include} ../../../CONTRIBUTING.md
|
```{include} ../../../CONTRIBUTING.md
|
||||||
```
|
```
|
||||||
|
|
||||||
See the [Adding a New API Provider](new_api_provider.md) which describes how to add new API providers to the Stack.
|
## Testing
|
||||||
|
|
||||||
|
See the [Test Page](testing.md) which describes how to test your changes.
|
||||||
|
```{toctree}
|
||||||
|
:maxdepth: 1
|
||||||
|
:hidden:
|
||||||
|
:caption: Testing
|
||||||
|
|
||||||
|
testing
|
||||||
|
```
|
||||||
|
|
||||||
|
## Adding a New Provider
|
||||||
|
|
||||||
|
See the [Adding a New API Provider Page](new_api_provider.md) which describes how to add new API providers to the Stack.
|
||||||
|
|
||||||
|
See the [Vector Database Page](new_vector_database.md) which describes how to add a new vector databases with Llama Stack.
|
||||||
|
|
||||||
|
See the [External Provider Page](../providers/external/index.md) which describes how to add external providers to the Stack.
|
||||||
```{toctree}
|
```{toctree}
|
||||||
:maxdepth: 1
|
:maxdepth: 1
|
||||||
:hidden:
|
:hidden:
|
||||||
|
|
||||||
new_api_provider
|
new_api_provider
|
||||||
testing
|
new_vector_database
|
||||||
```
|
```
|
||||||
|
|
75
docs/source/contributing/new_vector_database.md
Normal file
75
docs/source/contributing/new_vector_database.md
Normal file
|
@ -0,0 +1,75 @@
|
||||||
|
# Adding a New Vector Database
|
||||||
|
|
||||||
|
This guide will walk you through the process of adding a new vector database to Llama Stack.
|
||||||
|
|
||||||
|
> **_NOTE:_** Here's an example Pull Request of the [Milvus Vector Database Provider](https://github.com/meta-llama/llama-stack/pull/1467).
|
||||||
|
|
||||||
|
Vector Database providers are used to store and retrieve vector embeddings. Vector databases are not limited to vector
|
||||||
|
search but can support keyword and hybrid search. Additionally, vector database can also support operations like
|
||||||
|
filtering, sorting, and aggregating vectors.
|
||||||
|
|
||||||
|
## Steps to Add a New Vector Database Provider
|
||||||
|
1. **Choose the Database Type**: Determine if your vector database is a remote service, inline, or both.
|
||||||
|
- Remote databases make requests to external services, while inline databases execute locally. Some providers support both.
|
||||||
|
2. **Implement the Provider**: Create a new provider class that inherits from `VectorDatabaseProvider` and implements the required methods.
|
||||||
|
- Implement methods for vector storage, retrieval, search, and any additional features your database supports.
|
||||||
|
- You will need to implement the following methods for `YourVectorIndex`:
|
||||||
|
- `YourVectorIndex.create()`
|
||||||
|
- `YourVectorIndex.initialize()`
|
||||||
|
- `YourVectorIndex.add_chunks()`
|
||||||
|
- `YourVectorIndex.delete_chunk()`
|
||||||
|
- `YourVectorIndex.query_vector()`
|
||||||
|
- `YourVectorIndex.query_keyword()`
|
||||||
|
- `YourVectorIndex.query_hybrid()`
|
||||||
|
- You will need to implement the following methods for `YourVectorIOAdapter`:
|
||||||
|
- `YourVectorIOAdapter.initialize()`
|
||||||
|
- `YourVectorIOAdapter.shutdown()`
|
||||||
|
- `YourVectorIOAdapter.list_vector_dbs()`
|
||||||
|
- `YourVectorIOAdapter.register_vector_db()`
|
||||||
|
- `YourVectorIOAdapter.unregister_vector_db()`
|
||||||
|
- `YourVectorIOAdapter.insert_chunks()`
|
||||||
|
- `YourVectorIOAdapter.query_chunks()`
|
||||||
|
- `YourVectorIOAdapter.delete_chunks()`
|
||||||
|
3. **Add to Registry**: Register your provider in the appropriate registry file.
|
||||||
|
- Update {repopath}`llama_stack/providers/registry/vector_io.py` to include your new provider.
|
||||||
|
```python
|
||||||
|
from llama_stack.providers.registry.specs import InlineProviderSpec
|
||||||
|
from llama_stack.providers.registry.api import Api
|
||||||
|
|
||||||
|
InlineProviderSpec(
|
||||||
|
api=Api.vector_io,
|
||||||
|
provider_type="inline::milvus",
|
||||||
|
pip_packages=["pymilvus>=2.4.10"],
|
||||||
|
module="llama_stack.providers.inline.vector_io.milvus",
|
||||||
|
config_class="llama_stack.providers.inline.vector_io.milvus.MilvusVectorIOConfig",
|
||||||
|
api_dependencies=[Api.inference],
|
||||||
|
optional_api_dependencies=[Api.files],
|
||||||
|
description="",
|
||||||
|
),
|
||||||
|
```
|
||||||
|
4. **Add Tests**: Create unit tests and integration tests for your provider in the `tests/` directory.
|
||||||
|
- Unit Tests
|
||||||
|
- By following the structure of the class methods, you will be able to easily run unit and integration tests for your database.
|
||||||
|
1. You have to configure the tests for your provide in `/tests/unit/providers/vector_io/conftest.py`.
|
||||||
|
2. Update the `vector_provider` fixture to include your provider if they are an inline provider.
|
||||||
|
3. Create a `your_vectorprovider_index` fixture that initializes your vector index.
|
||||||
|
4. Create a `your_vectorprovider_adapter` fixture that initializes your vector adapter.
|
||||||
|
5. Add your provider to the `vector_io_providers` fixture dictionary.
|
||||||
|
- Please follow the naming convention of `your_vectorprovider_index` and `your_vectorprovider_adapter` as the tests require this to execute properly.
|
||||||
|
- Integration Tests
|
||||||
|
- Integration tests are located in {repopath}`tests/integration`. These tests use the python client-SDK APIs (from the `llama_stack_client` package) to test functionality.
|
||||||
|
- The two set of integration tests are:
|
||||||
|
- `tests/integration/vector_io/test_vector_io.py`: This file tests registration, insertion, and retrieval.
|
||||||
|
- `tests/integration/vector_io/test_openai_vector_stores.py`: These tests are for OpenAI-compatible vector stores and test the OpenAI API compatibility.
|
||||||
|
- You will need to update `skip_if_provider_doesnt_support_openai_vector_stores` to include your provider as well as `skip_if_provider_doesnt_support_openai_vector_stores_search` to test the appropriate search functionality.
|
||||||
|
- Running the tests in the GitHub CI
|
||||||
|
- You will need to update the `.github/workflows/integration-vector-io-tests.yml` file to include your provider.
|
||||||
|
- If your provider is a remote provider, you will also have to add a container to spin up and run it in the action.
|
||||||
|
- Updating the pyproject.yml
|
||||||
|
- If you are adding tests for the `inline` provider you will have to update the `unit` group.
|
||||||
|
- `uv add new_pip_package --group unit`
|
||||||
|
- If you are adding tests for the `remote` provider you will have to update the `test` group, which is used in the GitHub CI for integration tests.
|
||||||
|
- `uv add new_pip_package --group test`
|
||||||
|
5. **Update Documentation**: Please update the documentation for end users
|
||||||
|
- Generate the provider documentation by running {repopath}`./scripts/provider_codegen.py`.
|
||||||
|
- Update the autogenerated content in the registry/vector_io.py file with information about your provider. Please see other providers for examples.
|
|
@ -1,6 +1,8 @@
|
||||||
# Testing Llama Stack
|
```{include} ../../../tests/README.md
|
||||||
|
```
|
||||||
|
|
||||||
Tests are of three different kinds:
|
```{include} ../../../tests/unit/README.md
|
||||||
- Unit tests
|
```
|
||||||
- Provider focused integration tests
|
|
||||||
- Client SDK tests
|
```{include} ../../../tests/integration/README.md
|
||||||
|
```
|
||||||
|
|
|
@ -226,7 +226,7 @@ uv init
|
||||||
name = "llama-stack-provider-ollama"
|
name = "llama-stack-provider-ollama"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
description = "Ollama provider for Llama Stack"
|
description = "Ollama provider for Llama Stack"
|
||||||
requires-python = ">=3.10"
|
requires-python = ">=3.12"
|
||||||
dependencies = ["llama-stack", "pydantic", "ollama", "aiohttp"]
|
dependencies = ["llama-stack", "pydantic", "ollama", "aiohttp"]
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -29,6 +29,7 @@ remote_runpod
|
||||||
remote_sambanova
|
remote_sambanova
|
||||||
remote_tgi
|
remote_tgi
|
||||||
remote_together
|
remote_together
|
||||||
|
remote_vertexai
|
||||||
remote_vllm
|
remote_vllm
|
||||||
remote_watsonx
|
remote_watsonx
|
||||||
```
|
```
|
||||||
|
|
40
docs/source/providers/inference/remote_vertexai.md
Normal file
40
docs/source/providers/inference/remote_vertexai.md
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
# remote::vertexai
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Google Vertex AI inference provider enables you to use Google's Gemini models through Google Cloud's Vertex AI platform, providing several advantages:
|
||||||
|
|
||||||
|
• Enterprise-grade security: Uses Google Cloud's security controls and IAM
|
||||||
|
• Better integration: Seamless integration with other Google Cloud services
|
||||||
|
• Advanced features: Access to additional Vertex AI features like model tuning and monitoring
|
||||||
|
• Authentication: Uses Google Cloud Application Default Credentials (ADC) instead of API keys
|
||||||
|
|
||||||
|
Configuration:
|
||||||
|
- Set VERTEX_AI_PROJECT environment variable (required)
|
||||||
|
- Set VERTEX_AI_LOCATION environment variable (optional, defaults to us-central1)
|
||||||
|
- Use Google Cloud Application Default Credentials or service account key
|
||||||
|
|
||||||
|
Authentication Setup:
|
||||||
|
Option 1 (Recommended): gcloud auth application-default login
|
||||||
|
Option 2: Set GOOGLE_APPLICATION_CREDENTIALS to service account key path
|
||||||
|
|
||||||
|
Available Models:
|
||||||
|
- vertex_ai/gemini-2.0-flash
|
||||||
|
- vertex_ai/gemini-2.5-flash
|
||||||
|
- vertex_ai/gemini-2.5-pro
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `project` | `<class 'str'>` | No | | Google Cloud project ID for Vertex AI |
|
||||||
|
| `location` | `<class 'str'>` | No | us-central1 | Google Cloud location for Vertex AI |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
project: ${env.VERTEX_AI_PROJECT:=}
|
||||||
|
location: ${env.VERTEX_AI_LOCATION:=us-central1}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -12,6 +12,18 @@ That means you'll get fast and efficient vector retrieval.
|
||||||
- Lightweight and easy to use
|
- Lightweight and easy to use
|
||||||
- Fully integrated with Llama Stack
|
- Fully integrated with Llama Stack
|
||||||
- GPU support
|
- GPU support
|
||||||
|
- **Vector search** - FAISS supports pure vector similarity search using embeddings
|
||||||
|
|
||||||
|
## Search Modes
|
||||||
|
|
||||||
|
**Supported:**
|
||||||
|
- **Vector Search** (`mode="vector"`): Performs vector similarity search using embeddings
|
||||||
|
|
||||||
|
**Not Supported:**
|
||||||
|
- **Keyword Search** (`mode="keyword"`): Not supported by FAISS
|
||||||
|
- **Hybrid Search** (`mode="hybrid"`): Not supported by FAISS
|
||||||
|
|
||||||
|
> **Note**: FAISS is designed as a pure vector similarity search library. See the [FAISS GitHub repository](https://github.com/facebookresearch/faiss) for more details about FAISS's core functionality.
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
|
|
|
@ -11,6 +11,7 @@ That means you're not limited to storing vectors in memory or in a separate serv
|
||||||
|
|
||||||
- Easy to use
|
- Easy to use
|
||||||
- Fully integrated with Llama Stack
|
- Fully integrated with Llama Stack
|
||||||
|
- Supports all search modes: vector, keyword, and hybrid search (both inline and remote configurations)
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
|
@ -101,6 +102,92 @@ vector_io:
|
||||||
- **`client_pem_path`**: Path to the **client certificate** file (required for mTLS).
|
- **`client_pem_path`**: Path to the **client certificate** file (required for mTLS).
|
||||||
- **`client_key_path`**: Path to the **client private key** file (required for mTLS).
|
- **`client_key_path`**: Path to the **client private key** file (required for mTLS).
|
||||||
|
|
||||||
|
## Search Modes
|
||||||
|
|
||||||
|
Milvus supports three different search modes for both inline and remote configurations:
|
||||||
|
|
||||||
|
### Vector Search
|
||||||
|
Vector search uses semantic similarity to find the most relevant chunks based on embedding vectors. This is the default search mode and works well for finding conceptually similar content.
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Vector search example
|
||||||
|
search_response = client.vector_stores.search(
|
||||||
|
vector_store_id=vector_store.id,
|
||||||
|
query="What is machine learning?",
|
||||||
|
search_mode="vector",
|
||||||
|
max_num_results=5,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Keyword Search
|
||||||
|
Keyword search uses traditional text-based matching to find chunks containing specific terms or phrases. This is useful when you need exact term matches.
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Keyword search example
|
||||||
|
search_response = client.vector_stores.search(
|
||||||
|
vector_store_id=vector_store.id,
|
||||||
|
query="Python programming language",
|
||||||
|
search_mode="keyword",
|
||||||
|
max_num_results=5,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Hybrid Search
|
||||||
|
Hybrid search combines both vector and keyword search methods to provide more comprehensive results. It leverages the strengths of both semantic similarity and exact term matching.
|
||||||
|
|
||||||
|
#### Basic Hybrid Search
|
||||||
|
```python
|
||||||
|
# Basic hybrid search example (uses RRF ranker with default impact_factor=60.0)
|
||||||
|
search_response = client.vector_stores.search(
|
||||||
|
vector_store_id=vector_store.id,
|
||||||
|
query="neural networks in Python",
|
||||||
|
search_mode="hybrid",
|
||||||
|
max_num_results=5,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note**: The default `impact_factor` value of 60.0 was empirically determined to be optimal in the original RRF research paper: ["Reciprocal Rank Fusion outperforms Condorcet and individual Rank Learning Methods"](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) (Cormack et al., 2009).
|
||||||
|
|
||||||
|
#### Hybrid Search with RRF (Reciprocal Rank Fusion) Ranker
|
||||||
|
RRF combines rankings from vector and keyword search by using reciprocal ranks. The impact factor controls how much weight is given to higher-ranked results.
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Hybrid search with custom RRF parameters
|
||||||
|
search_response = client.vector_stores.search(
|
||||||
|
vector_store_id=vector_store.id,
|
||||||
|
query="neural networks in Python",
|
||||||
|
search_mode="hybrid",
|
||||||
|
max_num_results=5,
|
||||||
|
ranking_options={
|
||||||
|
"ranker": {
|
||||||
|
"type": "rrf",
|
||||||
|
"impact_factor": 100.0, # Higher values give more weight to top-ranked results
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Hybrid Search with Weighted Ranker
|
||||||
|
Weighted ranker linearly combines normalized scores from vector and keyword search. The alpha parameter controls the balance between the two search methods.
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Hybrid search with weighted ranker
|
||||||
|
search_response = client.vector_stores.search(
|
||||||
|
vector_store_id=vector_store.id,
|
||||||
|
query="neural networks in Python",
|
||||||
|
search_mode="hybrid",
|
||||||
|
max_num_results=5,
|
||||||
|
ranking_options={
|
||||||
|
"ranker": {
|
||||||
|
"type": "weighted",
|
||||||
|
"alpha": 0.7, # 70% vector search, 30% keyword search
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
For detailed documentation on RRF and Weighted rankers, please refer to the [Milvus Reranking Guide](https://milvus.io/docs/reranking.md).
|
||||||
|
|
||||||
## Documentation
|
## Documentation
|
||||||
See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general.
|
See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general.
|
||||||
|
|
||||||
|
|
|
@ -62,3 +62,13 @@ class SessionNotFoundError(ValueError):
|
||||||
def __init__(self, session_name: str) -> None:
|
def __init__(self, session_name: str) -> None:
|
||||||
message = f"Session '{session_name}' not found or access denied."
|
message = f"Session '{session_name}' not found or access denied."
|
||||||
super().__init__(message)
|
super().__init__(message)
|
||||||
|
|
||||||
|
|
||||||
|
class ModelTypeError(TypeError):
|
||||||
|
"""raised when a model is present but not the correct type"""
|
||||||
|
|
||||||
|
def __init__(self, model_name: str, model_type: str, expected_model_type: str) -> None:
|
||||||
|
message = (
|
||||||
|
f"Model '{model_name}' is of type '{model_type}' rather than the expected type '{expected_model_type}'"
|
||||||
|
)
|
||||||
|
super().__init__(message)
|
||||||
|
|
|
@ -18,7 +18,7 @@ from llama_stack.apis.common.content_types import (
|
||||||
InterleavedContent,
|
InterleavedContent,
|
||||||
InterleavedContentItem,
|
InterleavedContentItem,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.common.errors import ModelNotFoundError
|
from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
|
||||||
from llama_stack.apis.inference import (
|
from llama_stack.apis.inference import (
|
||||||
BatchChatCompletionResponse,
|
BatchChatCompletionResponse,
|
||||||
BatchCompletionResponse,
|
BatchCompletionResponse,
|
||||||
|
@ -177,6 +177,15 @@ class InferenceRouter(Inference):
|
||||||
encoded = self.formatter.encode_content(messages)
|
encoded = self.formatter.encode_content(messages)
|
||||||
return len(encoded.tokens) if encoded and encoded.tokens else 0
|
return len(encoded.tokens) if encoded and encoded.tokens else 0
|
||||||
|
|
||||||
|
async def _get_model(self, model_id: str, expected_model_type: str) -> Model:
|
||||||
|
"""takes a model id and gets model after ensuring that it is accessible and of the correct type"""
|
||||||
|
model = await self.routing_table.get_model(model_id)
|
||||||
|
if model is None:
|
||||||
|
raise ModelNotFoundError(model_id)
|
||||||
|
if model.model_type != expected_model_type:
|
||||||
|
raise ModelTypeError(model_id, model.model_type, expected_model_type)
|
||||||
|
return model
|
||||||
|
|
||||||
async def chat_completion(
|
async def chat_completion(
|
||||||
self,
|
self,
|
||||||
model_id: str,
|
model_id: str,
|
||||||
|
@ -195,11 +204,7 @@ class InferenceRouter(Inference):
|
||||||
)
|
)
|
||||||
if sampling_params is None:
|
if sampling_params is None:
|
||||||
sampling_params = SamplingParams()
|
sampling_params = SamplingParams()
|
||||||
model = await self.routing_table.get_model(model_id)
|
model = await self._get_model(model_id, ModelType.llm)
|
||||||
if model is None:
|
|
||||||
raise ModelNotFoundError(model_id)
|
|
||||||
if model.model_type == ModelType.embedding:
|
|
||||||
raise ValueError(f"Model '{model_id}' is an embedding model and does not support chat completions")
|
|
||||||
if tool_config:
|
if tool_config:
|
||||||
if tool_choice and tool_choice != tool_config.tool_choice:
|
if tool_choice and tool_choice != tool_config.tool_choice:
|
||||||
raise ValueError("tool_choice and tool_config.tool_choice must match")
|
raise ValueError("tool_choice and tool_config.tool_choice must match")
|
||||||
|
@ -301,11 +306,7 @@ class InferenceRouter(Inference):
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"InferenceRouter.completion: {model_id=}, {stream=}, {content=}, {sampling_params=}, {response_format=}",
|
f"InferenceRouter.completion: {model_id=}, {stream=}, {content=}, {sampling_params=}, {response_format=}",
|
||||||
)
|
)
|
||||||
model = await self.routing_table.get_model(model_id)
|
model = await self._get_model(model_id, ModelType.llm)
|
||||||
if model is None:
|
|
||||||
raise ModelNotFoundError(model_id)
|
|
||||||
if model.model_type == ModelType.embedding:
|
|
||||||
raise ValueError(f"Model '{model_id}' is an embedding model and does not support chat completions")
|
|
||||||
provider = await self.routing_table.get_provider_impl(model_id)
|
provider = await self.routing_table.get_provider_impl(model_id)
|
||||||
params = dict(
|
params = dict(
|
||||||
model_id=model_id,
|
model_id=model_id,
|
||||||
|
@ -355,11 +356,7 @@ class InferenceRouter(Inference):
|
||||||
task_type: EmbeddingTaskType | None = None,
|
task_type: EmbeddingTaskType | None = None,
|
||||||
) -> EmbeddingsResponse:
|
) -> EmbeddingsResponse:
|
||||||
logger.debug(f"InferenceRouter.embeddings: {model_id}")
|
logger.debug(f"InferenceRouter.embeddings: {model_id}")
|
||||||
model = await self.routing_table.get_model(model_id)
|
await self._get_model(model_id, ModelType.embedding)
|
||||||
if model is None:
|
|
||||||
raise ModelNotFoundError(model_id)
|
|
||||||
if model.model_type == ModelType.llm:
|
|
||||||
raise ValueError(f"Model '{model_id}' is an LLM model and does not support embeddings")
|
|
||||||
provider = await self.routing_table.get_provider_impl(model_id)
|
provider = await self.routing_table.get_provider_impl(model_id)
|
||||||
return await provider.embeddings(
|
return await provider.embeddings(
|
||||||
model_id=model_id,
|
model_id=model_id,
|
||||||
|
@ -395,12 +392,7 @@ class InferenceRouter(Inference):
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"InferenceRouter.openai_completion: {model=}, {stream=}, {prompt=}",
|
f"InferenceRouter.openai_completion: {model=}, {stream=}, {prompt=}",
|
||||||
)
|
)
|
||||||
model_obj = await self.routing_table.get_model(model)
|
model_obj = await self._get_model(model, ModelType.llm)
|
||||||
if model_obj is None:
|
|
||||||
raise ModelNotFoundError(model)
|
|
||||||
if model_obj.model_type == ModelType.embedding:
|
|
||||||
raise ValueError(f"Model '{model}' is an embedding model and does not support completions")
|
|
||||||
|
|
||||||
params = dict(
|
params = dict(
|
||||||
model=model_obj.identifier,
|
model=model_obj.identifier,
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
|
@ -476,11 +468,7 @@ class InferenceRouter(Inference):
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"InferenceRouter.openai_chat_completion: {model=}, {stream=}, {messages=}",
|
f"InferenceRouter.openai_chat_completion: {model=}, {stream=}, {messages=}",
|
||||||
)
|
)
|
||||||
model_obj = await self.routing_table.get_model(model)
|
model_obj = await self._get_model(model, ModelType.llm)
|
||||||
if model_obj is None:
|
|
||||||
raise ModelNotFoundError(model)
|
|
||||||
if model_obj.model_type == ModelType.embedding:
|
|
||||||
raise ValueError(f"Model '{model}' is an embedding model and does not support chat completions")
|
|
||||||
|
|
||||||
# Use the OpenAI client for a bit of extra input validation without
|
# Use the OpenAI client for a bit of extra input validation without
|
||||||
# exposing the OpenAI client itself as part of our API surface
|
# exposing the OpenAI client itself as part of our API surface
|
||||||
|
@ -567,12 +555,7 @@ class InferenceRouter(Inference):
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"InferenceRouter.openai_embeddings: {model=}, input_type={type(input)}, {encoding_format=}, {dimensions=}",
|
f"InferenceRouter.openai_embeddings: {model=}, input_type={type(input)}, {encoding_format=}, {dimensions=}",
|
||||||
)
|
)
|
||||||
model_obj = await self.routing_table.get_model(model)
|
model_obj = await self._get_model(model, ModelType.embedding)
|
||||||
if model_obj is None:
|
|
||||||
raise ModelNotFoundError(model)
|
|
||||||
if model_obj.model_type != ModelType.embedding:
|
|
||||||
raise ValueError(f"Model '{model}' is not an embedding model")
|
|
||||||
|
|
||||||
params = dict(
|
params = dict(
|
||||||
model=model_obj.identifier,
|
model=model_obj.identifier,
|
||||||
input=input,
|
input=input,
|
||||||
|
|
|
@ -124,10 +124,7 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
|
||||||
return toolgroup
|
return toolgroup
|
||||||
|
|
||||||
async def unregister_toolgroup(self, toolgroup_id: str) -> None:
|
async def unregister_toolgroup(self, toolgroup_id: str) -> None:
|
||||||
tool_group = await self.get_tool_group(toolgroup_id)
|
await self.unregister_object(await self.get_tool_group(toolgroup_id))
|
||||||
if tool_group is None:
|
|
||||||
raise ToolGroupNotFoundError(toolgroup_id)
|
|
||||||
await self.unregister_object(tool_group)
|
|
||||||
|
|
||||||
async def shutdown(self) -> None:
|
async def shutdown(self) -> None:
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -8,7 +8,7 @@ from typing import Any
|
||||||
|
|
||||||
from pydantic import TypeAdapter
|
from pydantic import TypeAdapter
|
||||||
|
|
||||||
from llama_stack.apis.common.errors import ModelNotFoundError, VectorStoreNotFoundError
|
from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError, VectorStoreNotFoundError
|
||||||
from llama_stack.apis.models import ModelType
|
from llama_stack.apis.models import ModelType
|
||||||
from llama_stack.apis.resource import ResourceType
|
from llama_stack.apis.resource import ResourceType
|
||||||
from llama_stack.apis.vector_dbs import ListVectorDBsResponse, VectorDB, VectorDBs
|
from llama_stack.apis.vector_dbs import ListVectorDBsResponse, VectorDB, VectorDBs
|
||||||
|
@ -66,7 +66,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
|
||||||
if model is None:
|
if model is None:
|
||||||
raise ModelNotFoundError(embedding_model)
|
raise ModelNotFoundError(embedding_model)
|
||||||
if model.model_type != ModelType.embedding:
|
if model.model_type != ModelType.embedding:
|
||||||
raise ValueError(f"Model {embedding_model} is not an embedding model")
|
raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding)
|
||||||
if "embedding_dimension" not in model.metadata:
|
if "embedding_dimension" not in model.metadata:
|
||||||
raise ValueError(f"Model {embedding_model} does not have an embedding dimension")
|
raise ValueError(f"Model {embedding_model} does not have an embedding dimension")
|
||||||
vector_db_data = {
|
vector_db_data = {
|
||||||
|
|
|
@ -14,6 +14,7 @@ distribution_spec:
|
||||||
- provider_type: remote::openai
|
- provider_type: remote::openai
|
||||||
- provider_type: remote::anthropic
|
- provider_type: remote::anthropic
|
||||||
- provider_type: remote::gemini
|
- provider_type: remote::gemini
|
||||||
|
- provider_type: remote::vertexai
|
||||||
- provider_type: remote::groq
|
- provider_type: remote::groq
|
||||||
- provider_type: remote::sambanova
|
- provider_type: remote::sambanova
|
||||||
- provider_type: inline::sentence-transformers
|
- provider_type: inline::sentence-transformers
|
||||||
|
|
|
@ -65,6 +65,11 @@ providers:
|
||||||
provider_type: remote::gemini
|
provider_type: remote::gemini
|
||||||
config:
|
config:
|
||||||
api_key: ${env.GEMINI_API_KEY:=}
|
api_key: ${env.GEMINI_API_KEY:=}
|
||||||
|
- provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
|
||||||
|
provider_type: remote::vertexai
|
||||||
|
config:
|
||||||
|
project: ${env.VERTEX_AI_PROJECT:=}
|
||||||
|
location: ${env.VERTEX_AI_LOCATION:=us-central1}
|
||||||
- provider_id: groq
|
- provider_id: groq
|
||||||
provider_type: remote::groq
|
provider_type: remote::groq
|
||||||
config:
|
config:
|
||||||
|
|
|
@ -14,6 +14,7 @@ distribution_spec:
|
||||||
- provider_type: remote::openai
|
- provider_type: remote::openai
|
||||||
- provider_type: remote::anthropic
|
- provider_type: remote::anthropic
|
||||||
- provider_type: remote::gemini
|
- provider_type: remote::gemini
|
||||||
|
- provider_type: remote::vertexai
|
||||||
- provider_type: remote::groq
|
- provider_type: remote::groq
|
||||||
- provider_type: remote::sambanova
|
- provider_type: remote::sambanova
|
||||||
- provider_type: inline::sentence-transformers
|
- provider_type: inline::sentence-transformers
|
||||||
|
|
|
@ -65,6 +65,11 @@ providers:
|
||||||
provider_type: remote::gemini
|
provider_type: remote::gemini
|
||||||
config:
|
config:
|
||||||
api_key: ${env.GEMINI_API_KEY:=}
|
api_key: ${env.GEMINI_API_KEY:=}
|
||||||
|
- provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
|
||||||
|
provider_type: remote::vertexai
|
||||||
|
config:
|
||||||
|
project: ${env.VERTEX_AI_PROJECT:=}
|
||||||
|
location: ${env.VERTEX_AI_LOCATION:=us-central1}
|
||||||
- provider_id: groq
|
- provider_id: groq
|
||||||
provider_type: remote::groq
|
provider_type: remote::groq
|
||||||
config:
|
config:
|
||||||
|
|
|
@ -56,6 +56,7 @@ ENABLED_INFERENCE_PROVIDERS = [
|
||||||
"fireworks",
|
"fireworks",
|
||||||
"together",
|
"together",
|
||||||
"gemini",
|
"gemini",
|
||||||
|
"vertexai",
|
||||||
"groq",
|
"groq",
|
||||||
"sambanova",
|
"sambanova",
|
||||||
"anthropic",
|
"anthropic",
|
||||||
|
@ -71,6 +72,7 @@ INFERENCE_PROVIDER_IDS = {
|
||||||
"tgi": "${env.TGI_URL:+tgi}",
|
"tgi": "${env.TGI_URL:+tgi}",
|
||||||
"cerebras": "${env.CEREBRAS_API_KEY:+cerebras}",
|
"cerebras": "${env.CEREBRAS_API_KEY:+cerebras}",
|
||||||
"nvidia": "${env.NVIDIA_API_KEY:+nvidia}",
|
"nvidia": "${env.NVIDIA_API_KEY:+nvidia}",
|
||||||
|
"vertexai": "${env.VERTEX_AI_PROJECT:+vertexai}",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -246,6 +248,14 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"",
|
"",
|
||||||
"Gemini API Key",
|
"Gemini API Key",
|
||||||
),
|
),
|
||||||
|
"VERTEX_AI_PROJECT": (
|
||||||
|
"",
|
||||||
|
"Google Cloud Project ID for Vertex AI",
|
||||||
|
),
|
||||||
|
"VERTEX_AI_LOCATION": (
|
||||||
|
"us-central1",
|
||||||
|
"Google Cloud Location for Vertex AI",
|
||||||
|
),
|
||||||
"SAMBANOVA_API_KEY": (
|
"SAMBANOVA_API_KEY": (
|
||||||
"",
|
"",
|
||||||
"SambaNova API Key",
|
"SambaNova API Key",
|
||||||
|
|
|
@ -99,7 +99,8 @@ def parse_environment_config(env_config: str) -> dict[str, int]:
|
||||||
Dict[str, int]: A dictionary mapping categories to their log levels.
|
Dict[str, int]: A dictionary mapping categories to their log levels.
|
||||||
"""
|
"""
|
||||||
category_levels = {}
|
category_levels = {}
|
||||||
for pair in env_config.split(";"):
|
delimiter = ","
|
||||||
|
for pair in env_config.split(delimiter):
|
||||||
if not pair.strip():
|
if not pair.strip():
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
|
@ -15,6 +15,7 @@ from llama_stack.apis.safety import (
|
||||||
RunShieldResponse,
|
RunShieldResponse,
|
||||||
Safety,
|
Safety,
|
||||||
SafetyViolation,
|
SafetyViolation,
|
||||||
|
ShieldStore,
|
||||||
ViolationLevel,
|
ViolationLevel,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.shields import Shield
|
from llama_stack.apis.shields import Shield
|
||||||
|
@ -32,6 +33,8 @@ PROMPT_GUARD_MODEL = "Prompt-Guard-86M"
|
||||||
|
|
||||||
|
|
||||||
class PromptGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
|
class PromptGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
|
||||||
|
shield_store: ShieldStore
|
||||||
|
|
||||||
def __init__(self, config: PromptGuardConfig, _deps) -> None:
|
def __init__(self, config: PromptGuardConfig, _deps) -> None:
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
|
@ -53,7 +56,7 @@ class PromptGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
|
||||||
self,
|
self,
|
||||||
shield_id: str,
|
shield_id: str,
|
||||||
messages: list[Message],
|
messages: list[Message],
|
||||||
params: dict[str, Any] = None,
|
params: dict[str, Any],
|
||||||
) -> RunShieldResponse:
|
) -> RunShieldResponse:
|
||||||
shield = await self.shield_store.get_shield(shield_id)
|
shield = await self.shield_store.get_shield(shield_id)
|
||||||
if not shield:
|
if not shield:
|
||||||
|
@ -61,6 +64,9 @@ class PromptGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
|
||||||
|
|
||||||
return await self.shield.run(messages)
|
return await self.shield.run(messages)
|
||||||
|
|
||||||
|
async def run_moderation(self, input: str | list[str], model: str):
|
||||||
|
raise NotImplementedError("run_moderation not implemented for PromptGuard")
|
||||||
|
|
||||||
|
|
||||||
class PromptGuardShield:
|
class PromptGuardShield:
|
||||||
def __init__(
|
def __init__(
|
||||||
|
@ -117,8 +123,10 @@ class PromptGuardShield:
|
||||||
elif self.config.guard_type == PromptGuardType.jailbreak.value and score_malicious > self.threshold:
|
elif self.config.guard_type == PromptGuardType.jailbreak.value and score_malicious > self.threshold:
|
||||||
violation = SafetyViolation(
|
violation = SafetyViolation(
|
||||||
violation_level=ViolationLevel.ERROR,
|
violation_level=ViolationLevel.ERROR,
|
||||||
violation_type=f"prompt_injection:malicious={score_malicious}",
|
user_message="Sorry, I cannot do this.",
|
||||||
violation_return_message="Sorry, I cannot do this.",
|
metadata={
|
||||||
|
"violation_type": f"prompt_injection:malicious={score_malicious}",
|
||||||
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
return RunShieldResponse(violation=violation)
|
return RunShieldResponse(violation=violation)
|
||||||
|
|
|
@ -174,7 +174,9 @@ class FaissIndex(EmbeddingIndex):
|
||||||
k: int,
|
k: int,
|
||||||
score_threshold: float,
|
score_threshold: float,
|
||||||
) -> QueryChunksResponse:
|
) -> QueryChunksResponse:
|
||||||
raise NotImplementedError("Keyword search is not supported in FAISS")
|
raise NotImplementedError(
|
||||||
|
"Keyword search is not supported - underlying DB FAISS does not support this search mode"
|
||||||
|
)
|
||||||
|
|
||||||
async def query_hybrid(
|
async def query_hybrid(
|
||||||
self,
|
self,
|
||||||
|
@ -185,7 +187,9 @@ class FaissIndex(EmbeddingIndex):
|
||||||
reranker_type: str,
|
reranker_type: str,
|
||||||
reranker_params: dict[str, Any] | None = None,
|
reranker_params: dict[str, Any] | None = None,
|
||||||
) -> QueryChunksResponse:
|
) -> QueryChunksResponse:
|
||||||
raise NotImplementedError("Hybrid search is not supported in FAISS")
|
raise NotImplementedError(
|
||||||
|
"Hybrid search is not supported - underlying DB FAISS does not support this search mode"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
|
class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
|
||||||
|
|
|
@ -213,6 +213,36 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
description="Google Gemini inference provider for accessing Gemini models and Google's AI services.",
|
description="Google Gemini inference provider for accessing Gemini models and Google's AI services.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
|
remote_provider_spec(
|
||||||
|
api=Api.inference,
|
||||||
|
adapter=AdapterSpec(
|
||||||
|
adapter_type="vertexai",
|
||||||
|
pip_packages=["litellm", "google-cloud-aiplatform"],
|
||||||
|
module="llama_stack.providers.remote.inference.vertexai",
|
||||||
|
config_class="llama_stack.providers.remote.inference.vertexai.VertexAIConfig",
|
||||||
|
provider_data_validator="llama_stack.providers.remote.inference.vertexai.config.VertexAIProviderDataValidator",
|
||||||
|
description="""Google Vertex AI inference provider enables you to use Google's Gemini models through Google Cloud's Vertex AI platform, providing several advantages:
|
||||||
|
|
||||||
|
• Enterprise-grade security: Uses Google Cloud's security controls and IAM
|
||||||
|
• Better integration: Seamless integration with other Google Cloud services
|
||||||
|
• Advanced features: Access to additional Vertex AI features like model tuning and monitoring
|
||||||
|
• Authentication: Uses Google Cloud Application Default Credentials (ADC) instead of API keys
|
||||||
|
|
||||||
|
Configuration:
|
||||||
|
- Set VERTEX_AI_PROJECT environment variable (required)
|
||||||
|
- Set VERTEX_AI_LOCATION environment variable (optional, defaults to us-central1)
|
||||||
|
- Use Google Cloud Application Default Credentials or service account key
|
||||||
|
|
||||||
|
Authentication Setup:
|
||||||
|
Option 1 (Recommended): gcloud auth application-default login
|
||||||
|
Option 2: Set GOOGLE_APPLICATION_CREDENTIALS to service account key path
|
||||||
|
|
||||||
|
Available Models:
|
||||||
|
- vertex_ai/gemini-2.0-flash
|
||||||
|
- vertex_ai/gemini-2.5-flash
|
||||||
|
- vertex_ai/gemini-2.5-pro""",
|
||||||
|
),
|
||||||
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
|
|
|
@ -45,6 +45,18 @@ That means you'll get fast and efficient vector retrieval.
|
||||||
- Lightweight and easy to use
|
- Lightweight and easy to use
|
||||||
- Fully integrated with Llama Stack
|
- Fully integrated with Llama Stack
|
||||||
- GPU support
|
- GPU support
|
||||||
|
- **Vector search** - FAISS supports pure vector similarity search using embeddings
|
||||||
|
|
||||||
|
## Search Modes
|
||||||
|
|
||||||
|
**Supported:**
|
||||||
|
- **Vector Search** (`mode="vector"`): Performs vector similarity search using embeddings
|
||||||
|
|
||||||
|
**Not Supported:**
|
||||||
|
- **Keyword Search** (`mode="keyword"`): Not supported by FAISS
|
||||||
|
- **Hybrid Search** (`mode="hybrid"`): Not supported by FAISS
|
||||||
|
|
||||||
|
> **Note**: FAISS is designed as a pure vector similarity search library. See the [FAISS GitHub repository](https://github.com/facebookresearch/faiss) for more details about FAISS's core functionality.
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
|
@ -535,6 +547,7 @@ That means you're not limited to storing vectors in memory or in a separate serv
|
||||||
|
|
||||||
- Easy to use
|
- Easy to use
|
||||||
- Fully integrated with Llama Stack
|
- Fully integrated with Llama Stack
|
||||||
|
- Supports all search modes: vector, keyword, and hybrid search (both inline and remote configurations)
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
|
@ -625,6 +638,92 @@ vector_io:
|
||||||
- **`client_pem_path`**: Path to the **client certificate** file (required for mTLS).
|
- **`client_pem_path`**: Path to the **client certificate** file (required for mTLS).
|
||||||
- **`client_key_path`**: Path to the **client private key** file (required for mTLS).
|
- **`client_key_path`**: Path to the **client private key** file (required for mTLS).
|
||||||
|
|
||||||
|
## Search Modes
|
||||||
|
|
||||||
|
Milvus supports three different search modes for both inline and remote configurations:
|
||||||
|
|
||||||
|
### Vector Search
|
||||||
|
Vector search uses semantic similarity to find the most relevant chunks based on embedding vectors. This is the default search mode and works well for finding conceptually similar content.
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Vector search example
|
||||||
|
search_response = client.vector_stores.search(
|
||||||
|
vector_store_id=vector_store.id,
|
||||||
|
query="What is machine learning?",
|
||||||
|
search_mode="vector",
|
||||||
|
max_num_results=5,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Keyword Search
|
||||||
|
Keyword search uses traditional text-based matching to find chunks containing specific terms or phrases. This is useful when you need exact term matches.
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Keyword search example
|
||||||
|
search_response = client.vector_stores.search(
|
||||||
|
vector_store_id=vector_store.id,
|
||||||
|
query="Python programming language",
|
||||||
|
search_mode="keyword",
|
||||||
|
max_num_results=5,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Hybrid Search
|
||||||
|
Hybrid search combines both vector and keyword search methods to provide more comprehensive results. It leverages the strengths of both semantic similarity and exact term matching.
|
||||||
|
|
||||||
|
#### Basic Hybrid Search
|
||||||
|
```python
|
||||||
|
# Basic hybrid search example (uses RRF ranker with default impact_factor=60.0)
|
||||||
|
search_response = client.vector_stores.search(
|
||||||
|
vector_store_id=vector_store.id,
|
||||||
|
query="neural networks in Python",
|
||||||
|
search_mode="hybrid",
|
||||||
|
max_num_results=5,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note**: The default `impact_factor` value of 60.0 was empirically determined to be optimal in the original RRF research paper: ["Reciprocal Rank Fusion outperforms Condorcet and individual Rank Learning Methods"](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) (Cormack et al., 2009).
|
||||||
|
|
||||||
|
#### Hybrid Search with RRF (Reciprocal Rank Fusion) Ranker
|
||||||
|
RRF combines rankings from vector and keyword search by using reciprocal ranks. The impact factor controls how much weight is given to higher-ranked results.
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Hybrid search with custom RRF parameters
|
||||||
|
search_response = client.vector_stores.search(
|
||||||
|
vector_store_id=vector_store.id,
|
||||||
|
query="neural networks in Python",
|
||||||
|
search_mode="hybrid",
|
||||||
|
max_num_results=5,
|
||||||
|
ranking_options={
|
||||||
|
"ranker": {
|
||||||
|
"type": "rrf",
|
||||||
|
"impact_factor": 100.0, # Higher values give more weight to top-ranked results
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Hybrid Search with Weighted Ranker
|
||||||
|
Weighted ranker linearly combines normalized scores from vector and keyword search. The alpha parameter controls the balance between the two search methods.
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Hybrid search with weighted ranker
|
||||||
|
search_response = client.vector_stores.search(
|
||||||
|
vector_store_id=vector_store.id,
|
||||||
|
query="neural networks in Python",
|
||||||
|
search_mode="hybrid",
|
||||||
|
max_num_results=5,
|
||||||
|
ranking_options={
|
||||||
|
"ranker": {
|
||||||
|
"type": "weighted",
|
||||||
|
"alpha": 0.7, # 70% vector search, 30% keyword search
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
For detailed documentation on RRF and Weighted rankers, please refer to the [Milvus Reranking Guide](https://milvus.io/docs/reranking.md).
|
||||||
|
|
||||||
## Documentation
|
## Documentation
|
||||||
See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general.
|
See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general.
|
||||||
|
|
||||||
|
|
|
@ -13,7 +13,9 @@ LLM_MODEL_IDS = [
|
||||||
"gemini-1.5-flash",
|
"gemini-1.5-flash",
|
||||||
"gemini-1.5-pro",
|
"gemini-1.5-pro",
|
||||||
"gemini-2.0-flash",
|
"gemini-2.0-flash",
|
||||||
|
"gemini-2.0-flash-lite",
|
||||||
"gemini-2.5-flash",
|
"gemini-2.5-flash",
|
||||||
|
"gemini-2.5-flash-lite",
|
||||||
"gemini-2.5-pro",
|
"gemini-2.5-pro",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -457,9 +457,6 @@ class OllamaInferenceAdapter(
|
||||||
user: str | None = None,
|
user: str | None = None,
|
||||||
) -> OpenAIEmbeddingsResponse:
|
) -> OpenAIEmbeddingsResponse:
|
||||||
model_obj = await self._get_model(model)
|
model_obj = await self._get_model(model)
|
||||||
if model_obj.model_type != ModelType.embedding:
|
|
||||||
raise ValueError(f"Model {model} is not an embedding model")
|
|
||||||
|
|
||||||
if model_obj.provider_resource_id is None:
|
if model_obj.provider_resource_id is None:
|
||||||
raise ValueError(f"Model {model} has no provider_resource_id set")
|
raise ValueError(f"Model {model} has no provider_resource_id set")
|
||||||
|
|
||||||
|
|
15
llama_stack/providers/remote/inference/vertexai/__init__.py
Normal file
15
llama_stack/providers/remote/inference/vertexai/__init__.py
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from .config import VertexAIConfig
|
||||||
|
|
||||||
|
|
||||||
|
async def get_adapter_impl(config: VertexAIConfig, _deps):
|
||||||
|
from .vertexai import VertexAIInferenceAdapter
|
||||||
|
|
||||||
|
impl = VertexAIInferenceAdapter(config)
|
||||||
|
await impl.initialize()
|
||||||
|
return impl
|
45
llama_stack/providers/remote/inference/vertexai/config.py
Normal file
45
llama_stack/providers/remote/inference/vertexai/config.py
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from llama_stack.schema_utils import json_schema_type
|
||||||
|
|
||||||
|
|
||||||
|
class VertexAIProviderDataValidator(BaseModel):
|
||||||
|
vertex_project: str | None = Field(
|
||||||
|
default=None,
|
||||||
|
description="Google Cloud project ID for Vertex AI",
|
||||||
|
)
|
||||||
|
vertex_location: str | None = Field(
|
||||||
|
default=None,
|
||||||
|
description="Google Cloud location for Vertex AI (e.g., us-central1)",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class VertexAIConfig(BaseModel):
|
||||||
|
project: str = Field(
|
||||||
|
description="Google Cloud project ID for Vertex AI",
|
||||||
|
)
|
||||||
|
location: str = Field(
|
||||||
|
default="us-central1",
|
||||||
|
description="Google Cloud location for Vertex AI",
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def sample_run_config(
|
||||||
|
cls,
|
||||||
|
project: str = "${env.VERTEX_AI_PROJECT:=}",
|
||||||
|
location: str = "${env.VERTEX_AI_LOCATION:=us-central1}",
|
||||||
|
**kwargs,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"project": project,
|
||||||
|
"location": location,
|
||||||
|
}
|
20
llama_stack/providers/remote/inference/vertexai/models.py
Normal file
20
llama_stack/providers/remote/inference/vertexai/models.py
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
|
ProviderModelEntry,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Vertex AI model IDs with vertex_ai/ prefix as required by litellm
|
||||||
|
LLM_MODEL_IDS = [
|
||||||
|
"vertex_ai/gemini-2.0-flash",
|
||||||
|
"vertex_ai/gemini-2.5-flash",
|
||||||
|
"vertex_ai/gemini-2.5-pro",
|
||||||
|
]
|
||||||
|
|
||||||
|
SAFETY_MODELS_ENTRIES = list[ProviderModelEntry]()
|
||||||
|
|
||||||
|
MODEL_ENTRIES = [ProviderModelEntry(provider_model_id=m) for m in LLM_MODEL_IDS] + SAFETY_MODELS_ENTRIES
|
52
llama_stack/providers/remote/inference/vertexai/vertexai.py
Normal file
52
llama_stack/providers/remote/inference/vertexai/vertexai.py
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from llama_stack.apis.inference import ChatCompletionRequest
|
||||||
|
from llama_stack.providers.utils.inference.litellm_openai_mixin import (
|
||||||
|
LiteLLMOpenAIMixin,
|
||||||
|
)
|
||||||
|
|
||||||
|
from .config import VertexAIConfig
|
||||||
|
from .models import MODEL_ENTRIES
|
||||||
|
|
||||||
|
|
||||||
|
class VertexAIInferenceAdapter(LiteLLMOpenAIMixin):
|
||||||
|
def __init__(self, config: VertexAIConfig) -> None:
|
||||||
|
LiteLLMOpenAIMixin.__init__(
|
||||||
|
self,
|
||||||
|
MODEL_ENTRIES,
|
||||||
|
litellm_provider_name="vertex_ai",
|
||||||
|
api_key_from_config=None, # Vertex AI uses ADC, not API keys
|
||||||
|
provider_data_api_key_field="vertex_project", # Use project for validation
|
||||||
|
)
|
||||||
|
self.config = config
|
||||||
|
|
||||||
|
def get_api_key(self) -> str:
|
||||||
|
# Vertex AI doesn't use API keys, it uses Application Default Credentials
|
||||||
|
# Return empty string to let litellm handle authentication via ADC
|
||||||
|
return ""
|
||||||
|
|
||||||
|
async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]:
|
||||||
|
# Get base parameters from parent
|
||||||
|
params = await super()._get_params(request)
|
||||||
|
|
||||||
|
# Add Vertex AI specific parameters
|
||||||
|
provider_data = self.get_request_provider_data()
|
||||||
|
if provider_data:
|
||||||
|
if getattr(provider_data, "vertex_project", None):
|
||||||
|
params["vertex_project"] = provider_data.vertex_project
|
||||||
|
if getattr(provider_data, "vertex_location", None):
|
||||||
|
params["vertex_location"] = provider_data.vertex_location
|
||||||
|
else:
|
||||||
|
params["vertex_project"] = self.config.project
|
||||||
|
params["vertex_location"] = self.config.location
|
||||||
|
|
||||||
|
# Remove api_key since Vertex AI uses ADC
|
||||||
|
params.pop("api_key", None)
|
||||||
|
|
||||||
|
return params
|
|
@ -70,7 +70,7 @@ from openai.types.chat.chat_completion_chunk import (
|
||||||
from openai.types.chat.chat_completion_content_part_image_param import (
|
from openai.types.chat.chat_completion_content_part_image_param import (
|
||||||
ImageURL as OpenAIImageURL,
|
ImageURL as OpenAIImageURL,
|
||||||
)
|
)
|
||||||
from openai.types.chat.chat_completion_message_tool_call_param import (
|
from openai.types.chat.chat_completion_message_tool_call import (
|
||||||
Function as OpenAIFunction,
|
Function as OpenAIFunction,
|
||||||
)
|
)
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
|
@ -9,7 +9,9 @@ import contextvars
|
||||||
import logging
|
import logging
|
||||||
import queue
|
import queue
|
||||||
import random
|
import random
|
||||||
|
import sys
|
||||||
import threading
|
import threading
|
||||||
|
import time
|
||||||
from collections.abc import Callable
|
from collections.abc import Callable
|
||||||
from datetime import UTC, datetime
|
from datetime import UTC, datetime
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
|
@ -30,6 +32,16 @@ from llama_stack.providers.utils.telemetry.trace_protocol import serialize_value
|
||||||
|
|
||||||
logger = get_logger(__name__, category="core")
|
logger = get_logger(__name__, category="core")
|
||||||
|
|
||||||
|
# Fallback logger that does NOT propagate to TelemetryHandler to avoid recursion
|
||||||
|
_fallback_logger = logging.getLogger("llama_stack.telemetry.background")
|
||||||
|
if not _fallback_logger.handlers:
|
||||||
|
_fallback_logger.propagate = False
|
||||||
|
_fallback_logger.setLevel(logging.ERROR)
|
||||||
|
_fallback_handler = logging.StreamHandler(sys.stderr)
|
||||||
|
_fallback_handler.setLevel(logging.ERROR)
|
||||||
|
_fallback_handler.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s"))
|
||||||
|
_fallback_logger.addHandler(_fallback_handler)
|
||||||
|
|
||||||
|
|
||||||
INVALID_SPAN_ID = 0x0000000000000000
|
INVALID_SPAN_ID = 0x0000000000000000
|
||||||
INVALID_TRACE_ID = 0x00000000000000000000000000000000
|
INVALID_TRACE_ID = 0x00000000000000000000000000000000
|
||||||
|
@ -79,19 +91,32 @@ def generate_trace_id() -> str:
|
||||||
CURRENT_TRACE_CONTEXT = contextvars.ContextVar("trace_context", default=None)
|
CURRENT_TRACE_CONTEXT = contextvars.ContextVar("trace_context", default=None)
|
||||||
BACKGROUND_LOGGER = None
|
BACKGROUND_LOGGER = None
|
||||||
|
|
||||||
|
LOG_QUEUE_FULL_LOG_INTERVAL_SECONDS = 60.0
|
||||||
|
|
||||||
|
|
||||||
class BackgroundLogger:
|
class BackgroundLogger:
|
||||||
def __init__(self, api: Telemetry, capacity: int = 100000):
|
def __init__(self, api: Telemetry, capacity: int = 100000):
|
||||||
self.api = api
|
self.api = api
|
||||||
self.log_queue = queue.Queue(maxsize=capacity)
|
self.log_queue: queue.Queue[Any] = queue.Queue(maxsize=capacity)
|
||||||
self.worker_thread = threading.Thread(target=self._process_logs, daemon=True)
|
self.worker_thread = threading.Thread(target=self._process_logs, daemon=True)
|
||||||
self.worker_thread.start()
|
self.worker_thread.start()
|
||||||
|
self._last_queue_full_log_time: float = 0.0
|
||||||
|
self._dropped_since_last_notice: int = 0
|
||||||
|
|
||||||
def log_event(self, event):
|
def log_event(self, event):
|
||||||
try:
|
try:
|
||||||
self.log_queue.put_nowait(event)
|
self.log_queue.put_nowait(event)
|
||||||
except queue.Full:
|
except queue.Full:
|
||||||
logger.error("Log queue is full, dropping event")
|
# Aggregate drops and emit at most once per interval via fallback logger
|
||||||
|
self._dropped_since_last_notice += 1
|
||||||
|
current_time = time.time()
|
||||||
|
if current_time - self._last_queue_full_log_time >= LOG_QUEUE_FULL_LOG_INTERVAL_SECONDS:
|
||||||
|
_fallback_logger.error(
|
||||||
|
"Log queue is full; dropped %d events since last notice",
|
||||||
|
self._dropped_since_last_notice,
|
||||||
|
)
|
||||||
|
self._last_queue_full_log_time = current_time
|
||||||
|
self._dropped_since_last_notice = 0
|
||||||
|
|
||||||
def _process_logs(self):
|
def _process_logs(self):
|
||||||
while True:
|
while True:
|
||||||
|
|
|
@ -175,7 +175,7 @@ const handleSubmitWithContent = async (content: string) => {
|
||||||
return (
|
return (
|
||||||
<div className="flex flex-col h-full max-w-4xl mx-auto">
|
<div className="flex flex-col h-full max-w-4xl mx-auto">
|
||||||
<div className="mb-4 flex justify-between items-center">
|
<div className="mb-4 flex justify-between items-center">
|
||||||
<h1 className="text-2xl font-bold">Chat Playground</h1>
|
<h1 className="text-2xl font-bold">Chat Playground (Completions)</h1>
|
||||||
<div className="flex gap-2">
|
<div className="flex gap-2">
|
||||||
<Select value={selectedModel} onValueChange={setSelectedModel} disabled={isModelsLoading || isGenerating}>
|
<Select value={selectedModel} onValueChange={setSelectedModel} disabled={isModelsLoading || isGenerating}>
|
||||||
<SelectTrigger className="w-[180px]">
|
<SelectTrigger className="w-[180px]">
|
||||||
|
|
|
@ -6,6 +6,8 @@ import {
|
||||||
MoveUpRight,
|
MoveUpRight,
|
||||||
Database,
|
Database,
|
||||||
MessageCircle,
|
MessageCircle,
|
||||||
|
Settings2,
|
||||||
|
Compass,
|
||||||
} from "lucide-react";
|
} from "lucide-react";
|
||||||
import Link from "next/link";
|
import Link from "next/link";
|
||||||
import { usePathname } from "next/navigation";
|
import { usePathname } from "next/navigation";
|
||||||
|
@ -22,15 +24,16 @@ import {
|
||||||
SidebarMenuItem,
|
SidebarMenuItem,
|
||||||
SidebarHeader,
|
SidebarHeader,
|
||||||
} from "@/components/ui/sidebar";
|
} from "@/components/ui/sidebar";
|
||||||
// Extracted Chat Playground item
|
|
||||||
const chatPlaygroundItem = {
|
|
||||||
title: "Chat Playground",
|
|
||||||
url: "/chat-playground",
|
|
||||||
icon: MessageCircle,
|
|
||||||
};
|
|
||||||
|
|
||||||
// Removed Chat Playground from log items
|
const createItems = [
|
||||||
const logItems = [
|
{
|
||||||
|
title: "Chat Playground",
|
||||||
|
url: "/chat-playground",
|
||||||
|
icon: MessageCircle,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
const manageItems = [
|
||||||
{
|
{
|
||||||
title: "Chat Completions",
|
title: "Chat Completions",
|
||||||
url: "/logs/chat-completions",
|
url: "/logs/chat-completions",
|
||||||
|
@ -53,77 +56,96 @@ const logItems = [
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
|
const optimizeItems: { title: string; url: string; icon: React.ElementType }[] = [
|
||||||
|
{
|
||||||
|
title: "Evaluations",
|
||||||
|
url: "",
|
||||||
|
icon: Compass,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: "Fine-tuning",
|
||||||
|
url: "",
|
||||||
|
icon: Settings2,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
interface SidebarItem {
|
||||||
|
title: string;
|
||||||
|
url: string;
|
||||||
|
icon: React.ElementType;
|
||||||
|
}
|
||||||
|
|
||||||
export function AppSidebar() {
|
export function AppSidebar() {
|
||||||
const pathname = usePathname();
|
const pathname = usePathname();
|
||||||
|
|
||||||
return (
|
const renderSidebarItems = (items: SidebarItem[]) => {
|
||||||
<Sidebar>
|
return items.map((item) => {
|
||||||
<SidebarHeader>
|
const isActive = pathname.startsWith(item.url);
|
||||||
<Link href="/">Llama Stack</Link>
|
return (
|
||||||
</SidebarHeader>
|
<SidebarMenuItem key={item.title}>
|
||||||
<SidebarContent>
|
<SidebarMenuButton
|
||||||
{/* Chat Playground as its own section */}
|
asChild
|
||||||
<SidebarGroup>
|
className={cn(
|
||||||
<SidebarGroupContent>
|
"justify-start",
|
||||||
<SidebarMenu>
|
isActive &&
|
||||||
<SidebarMenuItem>
|
"bg-gray-200 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-700 text-gray-900 dark:text-gray-100",
|
||||||
|
)}
|
||||||
|
>
|
||||||
|
<Link href={item.url}>
|
||||||
|
<item.icon
|
||||||
|
className={cn(
|
||||||
|
isActive && "text-gray-900 dark:text-gray-100",
|
||||||
|
"mr-2 h-4 w-4",
|
||||||
|
)}
|
||||||
|
/>
|
||||||
|
<span>{item.title}</span>
|
||||||
|
</Link>
|
||||||
|
</SidebarMenuButton>
|
||||||
|
</SidebarMenuItem>
|
||||||
|
);
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Sidebar>
|
||||||
|
<SidebarHeader>
|
||||||
|
<Link href="/">Llama Stack</Link>
|
||||||
|
</SidebarHeader>
|
||||||
|
<SidebarContent>
|
||||||
|
<SidebarGroup>
|
||||||
|
<SidebarGroupLabel>Create</SidebarGroupLabel>
|
||||||
|
<SidebarGroupContent>
|
||||||
|
<SidebarMenu>{renderSidebarItems(createItems)}</SidebarMenu>
|
||||||
|
</SidebarGroupContent>
|
||||||
|
</SidebarGroup>
|
||||||
|
|
||||||
|
<SidebarGroup>
|
||||||
|
<SidebarGroupLabel>Manage</SidebarGroupLabel>
|
||||||
|
<SidebarGroupContent>
|
||||||
|
<SidebarMenu>{renderSidebarItems(manageItems)}</SidebarMenu>
|
||||||
|
</SidebarGroupContent>
|
||||||
|
</SidebarGroup>
|
||||||
|
|
||||||
|
<SidebarGroup>
|
||||||
|
<SidebarGroupLabel>Optimize</SidebarGroupLabel>
|
||||||
|
<SidebarGroupContent>
|
||||||
|
<SidebarMenu>
|
||||||
|
{optimizeItems.map((item) => (
|
||||||
|
<SidebarMenuItem key={item.title}>
|
||||||
<SidebarMenuButton
|
<SidebarMenuButton
|
||||||
asChild
|
disabled
|
||||||
className={cn(
|
className="justify-start opacity-60 cursor-not-allowed"
|
||||||
"justify-start",
|
|
||||||
pathname.startsWith(chatPlaygroundItem.url) &&
|
|
||||||
"bg-gray-200 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-700 text-gray-900 dark:text-gray-100",
|
|
||||||
)}
|
|
||||||
>
|
>
|
||||||
<Link href={chatPlaygroundItem.url}>
|
<item.icon className="mr-2 h-4 w-4" />
|
||||||
<chatPlaygroundItem.icon
|
<span>{item.title}</span>
|
||||||
className={cn(
|
<span className="ml-2 text-xs text-gray-500">(Coming Soon)</span>
|
||||||
pathname.startsWith(chatPlaygroundItem.url) && "text-gray-900 dark:text-gray-100",
|
|
||||||
"mr-2 h-4 w-4",
|
|
||||||
)}
|
|
||||||
/>
|
|
||||||
<span>{chatPlaygroundItem.title}</span>
|
|
||||||
</Link>
|
|
||||||
</SidebarMenuButton>
|
</SidebarMenuButton>
|
||||||
</SidebarMenuItem>
|
</SidebarMenuItem>
|
||||||
</SidebarMenu>
|
))}
|
||||||
</SidebarGroupContent>
|
</SidebarMenu>
|
||||||
</SidebarGroup>
|
</SidebarGroupContent>
|
||||||
|
</SidebarGroup>
|
||||||
{/* Logs section */}
|
</SidebarContent>
|
||||||
<SidebarGroup>
|
</Sidebar>
|
||||||
<SidebarGroupLabel>Logs</SidebarGroupLabel>
|
|
||||||
<SidebarGroupContent>
|
|
||||||
<SidebarMenu>
|
|
||||||
{logItems.map((item) => {
|
|
||||||
const isActive = pathname.startsWith(item.url);
|
|
||||||
return (
|
|
||||||
<SidebarMenuItem key={item.title}>
|
|
||||||
<SidebarMenuButton
|
|
||||||
asChild
|
|
||||||
className={cn(
|
|
||||||
"justify-start",
|
|
||||||
isActive &&
|
|
||||||
"bg-gray-200 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-700 text-gray-900 dark:text-gray-100",
|
|
||||||
)}
|
|
||||||
>
|
|
||||||
<Link href={item.url}>
|
|
||||||
<item.icon
|
|
||||||
className={cn(
|
|
||||||
isActive && "text-gray-900 dark:text-gray-100",
|
|
||||||
"mr-2 h-4 w-4",
|
|
||||||
)}
|
|
||||||
/>
|
|
||||||
<span>{item.title}</span>
|
|
||||||
</Link>
|
|
||||||
</SidebarMenuButton>
|
|
||||||
</SidebarMenuItem>
|
|
||||||
);
|
|
||||||
})}
|
|
||||||
</SidebarMenu>
|
|
||||||
</SidebarGroupContent>
|
|
||||||
</SidebarGroup>
|
|
||||||
</SidebarContent>
|
|
||||||
</Sidebar>
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,7 +33,7 @@ dependencies = [
|
||||||
"jsonschema",
|
"jsonschema",
|
||||||
"llama-stack-client>=0.2.17",
|
"llama-stack-client>=0.2.17",
|
||||||
"llama-api-client>=0.1.2",
|
"llama-api-client>=0.1.2",
|
||||||
"openai>=1.66",
|
"openai>=1.99.6",
|
||||||
"prompt-toolkit",
|
"prompt-toolkit",
|
||||||
"python-dotenv",
|
"python-dotenv",
|
||||||
"python-jose[cryptography]",
|
"python-jose[cryptography]",
|
||||||
|
@ -266,7 +266,6 @@ exclude = [
|
||||||
"^llama_stack/providers/inline/post_training/common/validator\\.py$",
|
"^llama_stack/providers/inline/post_training/common/validator\\.py$",
|
||||||
"^llama_stack/providers/inline/safety/code_scanner/",
|
"^llama_stack/providers/inline/safety/code_scanner/",
|
||||||
"^llama_stack/providers/inline/safety/llama_guard/",
|
"^llama_stack/providers/inline/safety/llama_guard/",
|
||||||
"^llama_stack/providers/inline/safety/prompt_guard/",
|
|
||||||
"^llama_stack/providers/inline/scoring/basic/",
|
"^llama_stack/providers/inline/scoring/basic/",
|
||||||
"^llama_stack/providers/inline/scoring/braintrust/",
|
"^llama_stack/providers/inline/scoring/braintrust/",
|
||||||
"^llama_stack/providers/inline/scoring/llm_as_judge/",
|
"^llama_stack/providers/inline/scoring/llm_as_judge/",
|
||||||
|
|
|
@ -3,7 +3,7 @@ name = "llama-stack-api-weather"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
description = "Weather API for Llama Stack"
|
description = "Weather API for Llama Stack"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.10"
|
requires-python = ">=3.12"
|
||||||
dependencies = ["llama-stack", "pydantic"]
|
dependencies = ["llama-stack", "pydantic"]
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
|
|
|
@ -3,7 +3,7 @@ name = "llama-stack-provider-kaze"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
description = "Kaze weather provider for Llama Stack"
|
description = "Kaze weather provider for Llama Stack"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.10"
|
requires-python = ">=3.12"
|
||||||
dependencies = ["llama-stack", "pydantic", "aiohttp"]
|
dependencies = ["llama-stack", "pydantic", "aiohttp"]
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
|
|
|
@ -34,6 +34,7 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
|
||||||
"remote::runpod",
|
"remote::runpod",
|
||||||
"remote::sambanova",
|
"remote::sambanova",
|
||||||
"remote::tgi",
|
"remote::tgi",
|
||||||
|
"remote::vertexai",
|
||||||
):
|
):
|
||||||
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.")
|
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.")
|
||||||
|
|
||||||
|
|
|
@ -29,6 +29,7 @@ def skip_if_model_doesnt_support_completion(client_with_models, model_id):
|
||||||
"remote::openai",
|
"remote::openai",
|
||||||
"remote::anthropic",
|
"remote::anthropic",
|
||||||
"remote::gemini",
|
"remote::gemini",
|
||||||
|
"remote::vertexai",
|
||||||
"remote::groq",
|
"remote::groq",
|
||||||
"remote::sambanova",
|
"remote::sambanova",
|
||||||
)
|
)
|
||||||
|
|
97
uv.lock
generated
97
uv.lock
generated
|
@ -476,7 +476,7 @@ wheels = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "chromadb"
|
name = "chromadb"
|
||||||
version = "1.0.15"
|
version = "1.0.16"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "bcrypt" },
|
{ name = "bcrypt" },
|
||||||
|
@ -507,13 +507,13 @@ dependencies = [
|
||||||
{ name = "typing-extensions" },
|
{ name = "typing-extensions" },
|
||||||
{ name = "uvicorn", extra = ["standard"] },
|
{ name = "uvicorn", extra = ["standard"] },
|
||||||
]
|
]
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/ad/e2/0653b2e539db5512d2200c759f1bc7f9ef5609fe47f3c7d24b82f62dc00f/chromadb-1.0.15.tar.gz", hash = "sha256:3e910da3f5414e2204f89c7beca1650847f2bf3bd71f11a2e40aad1eb31050aa", size = 1218840, upload-time = "2025-07-02T17:07:09.875Z" }
|
sdist = { url = "https://files.pythonhosted.org/packages/15/2a/5b7e793d2a27c425e9f1813e9cb965b70e9bda08b69ee15a10e07dc3e59a/chromadb-1.0.16.tar.gz", hash = "sha256:3c864b5beb5e131bdc1f83c0b63a01ec481c6ee52028f088563ffba8478478e1", size = 1241545, upload-time = "2025-08-08T00:25:41.414Z" }
|
||||||
wheels = [
|
wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/85/5a/866c6f0c2160cbc8dca0cf77b2fb391dcf435b32a58743da1bc1a08dc442/chromadb-1.0.15-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:51791553014297798b53df4e043e9c30f4e8bd157647971a6bb02b04bfa65f82", size = 18838820, upload-time = "2025-07-02T17:07:07.632Z" },
|
{ url = "https://files.pythonhosted.org/packages/a3/9d/bffcc814272c9b7982551803b2d45b77f39eeea1b9e965c00c05ee81c649/chromadb-1.0.16-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:144163ce7ca4f4448684d5d0c13ebb37c4d68490ecb60967a95d05cea30e0d2d", size = 18942157, upload-time = "2025-08-08T00:25:38.459Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/e1/18/ff9b58ab5d334f5ecff7fdbacd6761bac467176708fa4d2500ae7c048af0/chromadb-1.0.15-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:48015803c0631c3a817befc276436dc084bb628c37fd4214047212afb2056291", size = 18057131, upload-time = "2025-07-02T17:07:05.15Z" },
|
{ url = "https://files.pythonhosted.org/packages/58/4e/de0086f3cbcfd667d75d112bb546386803ab5335599bf7099272a675e98b/chromadb-1.0.16-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:4ebcc5894e6fbb6b576452bbf4659746bfe58d9daf99a18363364e9497434bd2", size = 18147831, upload-time = "2025-08-08T00:25:35.546Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/31/49/74e34cc5aeeb25aff2c0ede6790b3671e14c1b91574dd8f98d266a4c5aad/chromadb-1.0.15-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b73cd6fb32fcdd91c577cca16ea6112b691d72b441bb3f2140426d1e79e453a", size = 18595284, upload-time = "2025-07-02T17:06:59.102Z" },
|
{ url = "https://files.pythonhosted.org/packages/0e/7f/a8aff4ce96281bcb9731d10b2554f41963dd0b47acb4f90a78b2b7c4f199/chromadb-1.0.16-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:937051fc3aae94f7c171503d8f1f7662820aacc75acf45f28d3656c75c5ff1f8", size = 18682195, upload-time = "2025-08-08T00:25:29.654Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/cb/33/190df917a057067e37f8b48d082d769bed8b3c0c507edefc7b6c6bb577d0/chromadb-1.0.15-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:479f1b401af9e7c20f50642ffb3376abbfd78e2b5b170429f7c79eff52e367db", size = 19526626, upload-time = "2025-07-02T17:07:02.163Z" },
|
{ url = "https://files.pythonhosted.org/packages/a3/9c/2a97d0257176aae472dff6f1ef1b7050449f384e420120e0f31d2d8f532f/chromadb-1.0.16-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0f5c5ad0c59154a9cab1506b857bab8487b588352e668cf1222c54bb9d52daa", size = 19635695, upload-time = "2025-08-08T00:25:32.68Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/a1/30/6890da607358993f87a01e80bcce916b4d91515ce865f07dc06845cb472f/chromadb-1.0.15-cp39-abi3-win_amd64.whl", hash = "sha256:e0cb3b93fdc42b1786f151d413ef36299f30f783a30ce08bf0bfb12e552b4190", size = 19520490, upload-time = "2025-07-02T17:07:11.559Z" },
|
{ url = "https://files.pythonhosted.org/packages/96/8a/f7e810f3cbdc9186ba4e649dc32711b7ab2c23aba37cf61175f731d22293/chromadb-1.0.16-cp39-abi3-win_amd64.whl", hash = "sha256:2528c01bd8b3facca9d0e1ffac866767c386b94604df484fc792ee891c86e09a", size = 19641144, upload-time = "2025-08-08T00:25:43.446Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -1632,10 +1632,10 @@ test = [
|
||||||
{ name = "pypdf" },
|
{ name = "pypdf" },
|
||||||
{ name = "requests" },
|
{ name = "requests" },
|
||||||
{ name = "sqlalchemy", extra = ["asyncio"] },
|
{ name = "sqlalchemy", extra = ["asyncio"] },
|
||||||
{ name = "torch", version = "2.7.1", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" },
|
{ name = "torch", version = "2.8.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" },
|
||||||
{ name = "torch", version = "2.7.1+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" },
|
{ name = "torch", version = "2.8.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" },
|
||||||
{ name = "torchvision", version = "0.22.1", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or sys_platform == 'darwin'" },
|
{ name = "torchvision", version = "0.23.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or sys_platform == 'darwin'" },
|
||||||
{ name = "torchvision", version = "0.22.1+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
|
{ name = "torchvision", version = "0.23.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
|
||||||
{ name = "transformers" },
|
{ name = "transformers" },
|
||||||
{ name = "weaviate-client" },
|
{ name = "weaviate-client" },
|
||||||
]
|
]
|
||||||
|
@ -1674,7 +1674,7 @@ requires-dist = [
|
||||||
{ name = "llama-api-client", specifier = ">=0.1.2" },
|
{ name = "llama-api-client", specifier = ">=0.1.2" },
|
||||||
{ name = "llama-stack-client", specifier = ">=0.2.17" },
|
{ name = "llama-stack-client", specifier = ">=0.2.17" },
|
||||||
{ name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.17" },
|
{ name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.17" },
|
||||||
{ name = "openai", specifier = ">=1.66" },
|
{ name = "openai", specifier = ">=1.99.6" },
|
||||||
{ name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" },
|
{ name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" },
|
||||||
{ name = "opentelemetry-sdk", specifier = ">=1.30.0" },
|
{ name = "opentelemetry-sdk", specifier = ">=1.30.0" },
|
||||||
{ name = "pandas", marker = "extra == 'ui'" },
|
{ name = "pandas", marker = "extra == 'ui'" },
|
||||||
|
@ -2301,7 +2301,7 @@ wheels = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "openai"
|
name = "openai"
|
||||||
version = "1.98.0"
|
version = "1.99.6"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "anyio" },
|
{ name = "anyio" },
|
||||||
|
@ -2313,9 +2313,9 @@ dependencies = [
|
||||||
{ name = "tqdm" },
|
{ name = "tqdm" },
|
||||||
{ name = "typing-extensions" },
|
{ name = "typing-extensions" },
|
||||||
]
|
]
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/d8/9d/52eadb15c92802711d6b6cf00df3a6d0d18b588f4c5ba5ff210c6419fc03/openai-1.98.0.tar.gz", hash = "sha256:3ee0fcc50ae95267fd22bd1ad095ba5402098f3df2162592e68109999f685427", size = 496695, upload-time = "2025-07-30T12:48:03.701Z" }
|
sdist = { url = "https://files.pythonhosted.org/packages/11/45/38a87bd6949236db5ae3132f41d5861824702b149f86d2627d6900919103/openai-1.99.6.tar.gz", hash = "sha256:f48f4239b938ef187062f3d5199a05b69711d8b600b9a9b6a3853cd271799183", size = 505364, upload-time = "2025-08-09T15:20:54.438Z" }
|
||||||
wheels = [
|
wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/a8/fe/f64631075b3d63a613c0d8ab761d5941631a470f6fa87eaaee1aa2b4ec0c/openai-1.98.0-py3-none-any.whl", hash = "sha256:b99b794ef92196829120e2df37647722104772d2a74d08305df9ced5f26eae34", size = 767713, upload-time = "2025-07-30T12:48:01.264Z" },
|
{ url = "https://files.pythonhosted.org/packages/d6/dd/9aa956485c2856346b3181542fbb0aea4e5b457fa7a523944726746da8da/openai-1.99.6-py3-none-any.whl", hash = "sha256:e40d44b2989588c45ce13819598788b77b8fb80ba2f7ae95ce90d14e46f1bd26", size = 786296, upload-time = "2025-08-09T15:20:51.95Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -4310,7 +4310,7 @@ wheels = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "torch"
|
name = "torch"
|
||||||
version = "2.7.1"
|
version = "2.8.0"
|
||||||
source = { registry = "https://download.pytorch.org/whl/cpu" }
|
source = { registry = "https://download.pytorch.org/whl/cpu" }
|
||||||
resolution-markers = [
|
resolution-markers = [
|
||||||
"python_full_version >= '3.13' and sys_platform == 'darwin'",
|
"python_full_version >= '3.13' and sys_platform == 'darwin'",
|
||||||
|
@ -4326,14 +4326,14 @@ dependencies = [
|
||||||
{ name = "typing-extensions", marker = "sys_platform == 'darwin'" },
|
{ name = "typing-extensions", marker = "sys_platform == 'darwin'" },
|
||||||
]
|
]
|
||||||
wheels = [
|
wheels = [
|
||||||
{ url = "https://download.pytorch.org/whl/cpu/torch-2.7.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:7b4f8b2b83bd08f7d399025a9a7b323bdbb53d20566f1e0d584689bb92d82f9a" },
|
{ url = "https://download.pytorch.org/whl/cpu/torch-2.8.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:a47b7986bee3f61ad217d8a8ce24605809ab425baf349f97de758815edd2ef54" },
|
||||||
{ url = "https://download.pytorch.org/whl/cpu/torch-2.7.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:95af97e7b2cecdc89edc0558962a51921bf9c61538597dbec6b7cc48d31e2e13" },
|
{ url = "https://download.pytorch.org/whl/cpu/torch-2.8.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:fbe2e149c5174ef90d29a5f84a554dfaf28e003cb4f61fa2c8c024c17ec7ca58" },
|
||||||
{ url = "https://download.pytorch.org/whl/cpu/torch-2.7.1-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:7ecd868a086468e1bcf74b91db425c1c2951a9cfcd0592c4c73377b7e42485ae" },
|
{ url = "https://download.pytorch.org/whl/cpu/torch-2.8.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:057efd30a6778d2ee5e2374cd63a63f63311aa6f33321e627c655df60abdd390" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "torch"
|
name = "torch"
|
||||||
version = "2.7.1+cpu"
|
version = "2.8.0+cpu"
|
||||||
source = { registry = "https://download.pytorch.org/whl/cpu" }
|
source = { registry = "https://download.pytorch.org/whl/cpu" }
|
||||||
resolution-markers = [
|
resolution-markers = [
|
||||||
"(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
|
"(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
|
||||||
|
@ -4351,21 +4351,24 @@ dependencies = [
|
||||||
{ name = "typing-extensions", marker = "sys_platform != 'darwin'" },
|
{ name = "typing-extensions", marker = "sys_platform != 'darwin'" },
|
||||||
]
|
]
|
||||||
wheels = [
|
wheels = [
|
||||||
{ url = "https://download.pytorch.org/whl/cpu/torch-2.7.1%2Bcpu-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3bf2db5adf77b433844f080887ade049c4705ddf9fe1a32023ff84ff735aa5ad" },
|
{ url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-linux_s390x.whl", hash = "sha256:0e34e276722ab7dd0dffa9e12fe2135a9b34a0e300c456ed7ad6430229404eb5" },
|
||||||
{ url = "https://download.pytorch.org/whl/cpu/torch-2.7.1%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:8f8b3cfc53010a4b4a3c7ecb88c212e9decc4f5eeb6af75c3c803937d2d60947" },
|
{ url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:610f600c102386e581327d5efc18c0d6edecb9820b4140d26163354a99cd800d" },
|
||||||
{ url = "https://download.pytorch.org/whl/cpu/torch-2.7.1%2Bcpu-cp312-cp312-win_amd64.whl", hash = "sha256:0bc887068772233f532b51a3e8c8cfc682ae62bef74bf4e0c53526c8b9e4138f" },
|
{ url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:cb9a8ba8137ab24e36bf1742cb79a1294bd374db570f09fc15a5e1318160db4e" },
|
||||||
{ url = "https://download.pytorch.org/whl/cpu/torch-2.7.1%2Bcpu-cp312-cp312-win_arm64.whl", hash = "sha256:a2618775f32eb4126c5b2050686da52001a08cffa331637d9cf51c8250931e00" },
|
{ url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-win_amd64.whl", hash = "sha256:2be20b2c05a0cce10430cc25f32b689259640d273232b2de357c35729132256d" },
|
||||||
{ url = "https://download.pytorch.org/whl/cpu/torch-2.7.1%2Bcpu-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:eb17646792ac4374ffc87e42369f45d21eff17c790868963b90483ef0b6db4ef" },
|
{ url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-win_arm64.whl", hash = "sha256:99fc421a5d234580e45957a7b02effbf3e1c884a5dd077afc85352c77bf41434" },
|
||||||
{ url = "https://download.pytorch.org/whl/cpu/torch-2.7.1%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:84ea1f6a1d15663037d01b121d6e33bb9da3c90af8e069e5072c30f413455a57" },
|
{ url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-linux_s390x.whl", hash = "sha256:8b5882276633cf91fe3d2d7246c743b94d44a7e660b27f1308007fdb1bb89f7d" },
|
||||||
{ url = "https://download.pytorch.org/whl/cpu/torch-2.7.1%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:b66f77f6f67317344ee083aa7ac4751a14395fcb38060d564bf513978d267153" },
|
{ url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:a5064b5e23772c8d164068cc7c12e01a75faf7b948ecd95a0d4007d7487e5f25" },
|
||||||
{ url = "https://download.pytorch.org/whl/cpu/torch-2.7.1%2Bcpu-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:56136a2aca6707df3c8811e46ea2d379eaafd18e656e2fd51e8e4d0ca995651b" },
|
{ url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:8f81dedb4c6076ec325acc3b47525f9c550e5284a18eae1d9061c543f7b6e7de" },
|
||||||
{ url = "https://download.pytorch.org/whl/cpu/torch-2.7.1%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:355614185a2aea7155f9c88a20bfd49de5f3063866f3cf9b2f21b6e9e59e31e0" },
|
{ url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:e1ee1b2346ade3ea90306dfbec7e8ff17bc220d344109d189ae09078333b0856" },
|
||||||
{ url = "https://download.pytorch.org/whl/cpu/torch-2.7.1%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:464bca1bc9452f2ccd676514688896e66b9488f2a0268ecd3ac497cf09c5aac1" },
|
{ url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-win_arm64.whl", hash = "sha256:64c187345509f2b1bb334feed4666e2c781ca381874bde589182f81247e61f88" },
|
||||||
|
{ url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:af81283ac671f434b1b25c95ba295f270e72db1fad48831eb5e4748ff9840041" },
|
||||||
|
{ url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:a9dbb6f64f63258bc811e2c0c99640a81e5af93c531ad96e95c5ec777ea46dab" },
|
||||||
|
{ url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:6d93a7165419bc4b2b907e859ccab0dea5deeab261448ae9a5ec5431f14c0e64" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "torchvision"
|
name = "torchvision"
|
||||||
version = "0.22.1"
|
version = "0.23.0"
|
||||||
source = { registry = "https://download.pytorch.org/whl/cpu" }
|
source = { registry = "https://download.pytorch.org/whl/cpu" }
|
||||||
resolution-markers = [
|
resolution-markers = [
|
||||||
"python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
|
"python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
|
||||||
|
@ -4376,21 +4379,21 @@ resolution-markers = [
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "numpy", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or sys_platform == 'darwin'" },
|
{ name = "numpy", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or sys_platform == 'darwin'" },
|
||||||
{ name = "pillow", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or sys_platform == 'darwin'" },
|
{ name = "pillow", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or sys_platform == 'darwin'" },
|
||||||
{ name = "torch", version = "2.7.1", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" },
|
{ name = "torch", version = "2.8.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" },
|
||||||
{ name = "torch", version = "2.7.1+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
|
{ name = "torch", version = "2.8.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
|
||||||
]
|
]
|
||||||
wheels = [
|
wheels = [
|
||||||
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:153f1790e505bd6da123e21eee6e83e2e155df05c0fe7d56347303067d8543c5" },
|
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e0e2c04a91403e8dd3af9756c6a024a1d9c0ed9c0d592a8314ded8f4fe30d440" },
|
||||||
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:964414eef19459d55a10e886e2fca50677550e243586d1678f65e3f6f6bac47a" },
|
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:6dd7c4d329a0e03157803031bc856220c6155ef08c26d4f5bbac938acecf0948" },
|
||||||
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9c3ae3319624c43cc8127020f46c14aa878406781f0899bb6283ae474afeafbf" },
|
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1c37e325e09a184b730c3ef51424f383ec5745378dc0eca244520aca29722600" },
|
||||||
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:4a614a6a408d2ed74208d0ea6c28a2fbb68290e9a7df206c5fef3f0b6865d307" },
|
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:2f7fd6c15f3697e80627b77934f77705f3bc0e98278b989b2655de01f6903e1d" },
|
||||||
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:043d9e35ed69c2e586aff6eb9e2887382e7863707115668ac9d140da58f42cba" },
|
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:2df618e1143805a7673aaf82cb5720dd9112d4e771983156aaf2ffff692eebf9" },
|
||||||
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:27142bcc8a984227a6dcf560985e83f52b82a7d3f5fe9051af586a2ccc46ef26" },
|
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:2a3299d2b1d5a7aed2d3b6ffb69c672ca8830671967eb1cee1497bacd82fe47b" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "torchvision"
|
name = "torchvision"
|
||||||
version = "0.22.1+cpu"
|
version = "0.23.0+cpu"
|
||||||
source = { registry = "https://download.pytorch.org/whl/cpu" }
|
source = { registry = "https://download.pytorch.org/whl/cpu" }
|
||||||
resolution-markers = [
|
resolution-markers = [
|
||||||
"(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
|
"(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
|
||||||
|
@ -4399,15 +4402,15 @@ resolution-markers = [
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "numpy", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
|
{ name = "numpy", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
|
||||||
{ name = "pillow", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
|
{ name = "pillow", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
|
||||||
{ name = "torch", version = "2.7.1+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
|
{ name = "torch", version = "2.8.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
|
||||||
]
|
]
|
||||||
wheels = [
|
wheels = [
|
||||||
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b5fa7044bd82c6358e8229351c98070cf3a7bf4a6e89ea46352ae6c65745ef94" },
|
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:ae459d4509d3b837b978dc6c66106601f916b6d2cda75c137e3f5f48324ce1da" },
|
||||||
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1%2Bcpu-cp312-cp312-win_amd64.whl", hash = "sha256:433cb4dbced7291f17064cea08ac1e5aebd02ec190e1c207d117ad62a8961f2b" },
|
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0%2Bcpu-cp312-cp312-win_amd64.whl", hash = "sha256:a651ccc540cf4c87eb988730c59c2220c52b57adc276f044e7efb9830fa65a1d" },
|
||||||
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:a93c21f18c33a819616b3dda7655aa4de40b219682c654175b6bbeb65ecc2e5f" },
|
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:dea90a67d60a5366b0358a0b8d6bf267805278697d6fd950cf0e31139e56d1be" },
|
||||||
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:34c914ad4728b81848ac802c5fc5eeb8de8ff4058cc59c1463a74ce4f4fbf0d8" },
|
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:82928788025170c62e7df1120dcdc0cd175bfc31c08374613ce6d1a040bc0cda" },
|
||||||
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:ab7ae82529887c704c1b5d1d5198f65dc777d04fc3858b374503a6deedb82b19" },
|
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:474d77adbbbed5166db3e5636b4b4ae3399c66ef5bfa12536e254b32259c90c0" },
|
||||||
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:b2d1c4bdbfd8e6c779dc810a6171b56224f1332fc46986810d4081bed1633804" },
|
{ url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:8d6a47e23d7896f0ef9aa7ea7179eb6324e82438aa66d19884c2020d0646b104" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue