mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-15 14:08:00 +00:00
Merge branch 'main' into content-extension
This commit is contained in:
commit
2fbddb4beb
30 changed files with 669 additions and 92 deletions
18
README.md
18
README.md
|
@ -1,5 +1,8 @@
|
||||||
# Llama Stack
|
# Llama Stack
|
||||||
|
|
||||||
|
<a href="https://trendshift.io/repositories/11824" target="_blank"><img src="https://trendshift.io/api/badge/repositories/11824" alt="meta-llama%2Fllama-stack | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
||||||
|
|
||||||
|
-----
|
||||||
[](https://pypi.org/project/llama_stack/)
|
[](https://pypi.org/project/llama_stack/)
|
||||||
[](https://pypi.org/project/llama-stack/)
|
[](https://pypi.org/project/llama-stack/)
|
||||||
[](https://github.com/meta-llama/llama-stack/blob/main/LICENSE)
|
[](https://github.com/meta-llama/llama-stack/blob/main/LICENSE)
|
||||||
|
@ -9,6 +12,7 @@
|
||||||
|
|
||||||
[**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
|
[**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
|
||||||
|
|
||||||
|
|
||||||
### ✨🎉 Llama 4 Support 🎉✨
|
### ✨🎉 Llama 4 Support 🎉✨
|
||||||
We released [Version 0.2.0](https://github.com/meta-llama/llama-stack/releases/tag/v0.2.0) with support for the Llama 4 herd of models released by Meta.
|
We released [Version 0.2.0](https://github.com/meta-llama/llama-stack/releases/tag/v0.2.0) with support for the Llama 4 herd of models released by Meta.
|
||||||
|
|
||||||
|
@ -179,3 +183,17 @@ Please checkout our [Documentation](https://llama-stack.readthedocs.io/en/latest
|
||||||
Check out our client SDKs for connecting to a Llama Stack server in your preferred language, you can choose from [python](https://github.com/meta-llama/llama-stack-client-python), [typescript](https://github.com/meta-llama/llama-stack-client-typescript), [swift](https://github.com/meta-llama/llama-stack-client-swift), and [kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) programming languages to quickly build your applications.
|
Check out our client SDKs for connecting to a Llama Stack server in your preferred language, you can choose from [python](https://github.com/meta-llama/llama-stack-client-python), [typescript](https://github.com/meta-llama/llama-stack-client-typescript), [swift](https://github.com/meta-llama/llama-stack-client-swift), and [kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) programming languages to quickly build your applications.
|
||||||
|
|
||||||
You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repo.
|
You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repo.
|
||||||
|
|
||||||
|
|
||||||
|
## 🌟 GitHub Star History
|
||||||
|
## Star History
|
||||||
|
|
||||||
|
[](https://www.star-history.com/#meta-llama/llama-stack&Date)
|
||||||
|
|
||||||
|
## ✨ Contributors
|
||||||
|
|
||||||
|
Thanks to all of our amazing contributors!
|
||||||
|
|
||||||
|
<a href="https://github.com/meta-llama/llama-stack/graphs/contributors">
|
||||||
|
<img src="https://contrib.rocks/image?repo=meta-llama/llama-stack" />
|
||||||
|
</a>
|
14
docs/_static/js/keyboard_shortcuts.js
vendored
Normal file
14
docs/_static/js/keyboard_shortcuts.js
vendored
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
document.addEventListener('keydown', function(event) {
|
||||||
|
// command+K or ctrl+K
|
||||||
|
if ((event.metaKey || event.ctrlKey) && event.key === 'k') {
|
||||||
|
event.preventDefault();
|
||||||
|
document.querySelector('.search-input, .search-field, input[name="q"]').focus();
|
||||||
|
}
|
||||||
|
|
||||||
|
// forward slash
|
||||||
|
if (event.key === '/' &&
|
||||||
|
!event.target.matches('input, textarea, select')) {
|
||||||
|
event.preventDefault();
|
||||||
|
document.querySelector('.search-input, .search-field, input[name="q"]').focus();
|
||||||
|
}
|
||||||
|
});
|
|
@ -131,6 +131,7 @@ html_static_path = ["../_static"]
|
||||||
def setup(app):
|
def setup(app):
|
||||||
app.add_css_file("css/my_theme.css")
|
app.add_css_file("css/my_theme.css")
|
||||||
app.add_js_file("js/detect_theme.js")
|
app.add_js_file("js/detect_theme.js")
|
||||||
|
app.add_js_file("js/keyboard_shortcuts.js")
|
||||||
|
|
||||||
def dockerhub_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
|
def dockerhub_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
|
||||||
url = f"https://hub.docker.com/r/llamastack/{text}"
|
url = f"https://hub.docker.com/r/llamastack/{text}"
|
||||||
|
|
|
@ -2,14 +2,28 @@
|
||||||
```{include} ../../../CONTRIBUTING.md
|
```{include} ../../../CONTRIBUTING.md
|
||||||
```
|
```
|
||||||
|
|
||||||
See the [Adding a New API Provider](new_api_provider.md) which describes how to add new API providers to the Stack.
|
## Testing
|
||||||
|
|
||||||
|
See the [Test Page](testing.md) which describes how to test your changes.
|
||||||
|
```{toctree}
|
||||||
|
:maxdepth: 1
|
||||||
|
:hidden:
|
||||||
|
:caption: Testing
|
||||||
|
|
||||||
|
testing
|
||||||
|
```
|
||||||
|
|
||||||
|
## Adding a New Provider
|
||||||
|
|
||||||
|
See the [Adding a New API Provider Page](new_api_provider.md) which describes how to add new API providers to the Stack.
|
||||||
|
|
||||||
|
See the [Vector Database Page](new_vector_database.md) which describes how to add a new vector databases with Llama Stack.
|
||||||
|
|
||||||
|
See the [External Provider Page](../providers/external/index.md) which describes how to add external providers to the Stack.
|
||||||
```{toctree}
|
```{toctree}
|
||||||
:maxdepth: 1
|
:maxdepth: 1
|
||||||
:hidden:
|
:hidden:
|
||||||
|
|
||||||
new_api_provider
|
new_api_provider
|
||||||
testing
|
new_vector_database
|
||||||
```
|
```
|
||||||
|
|
75
docs/source/contributing/new_vector_database.md
Normal file
75
docs/source/contributing/new_vector_database.md
Normal file
|
@ -0,0 +1,75 @@
|
||||||
|
# Adding a New Vector Database
|
||||||
|
|
||||||
|
This guide will walk you through the process of adding a new vector database to Llama Stack.
|
||||||
|
|
||||||
|
> **_NOTE:_** Here's an example Pull Request of the [Milvus Vector Database Provider](https://github.com/meta-llama/llama-stack/pull/1467).
|
||||||
|
|
||||||
|
Vector Database providers are used to store and retrieve vector embeddings. Vector databases are not limited to vector
|
||||||
|
search but can support keyword and hybrid search. Additionally, vector database can also support operations like
|
||||||
|
filtering, sorting, and aggregating vectors.
|
||||||
|
|
||||||
|
## Steps to Add a New Vector Database Provider
|
||||||
|
1. **Choose the Database Type**: Determine if your vector database is a remote service, inline, or both.
|
||||||
|
- Remote databases make requests to external services, while inline databases execute locally. Some providers support both.
|
||||||
|
2. **Implement the Provider**: Create a new provider class that inherits from `VectorDatabaseProvider` and implements the required methods.
|
||||||
|
- Implement methods for vector storage, retrieval, search, and any additional features your database supports.
|
||||||
|
- You will need to implement the following methods for `YourVectorIndex`:
|
||||||
|
- `YourVectorIndex.create()`
|
||||||
|
- `YourVectorIndex.initialize()`
|
||||||
|
- `YourVectorIndex.add_chunks()`
|
||||||
|
- `YourVectorIndex.delete_chunk()`
|
||||||
|
- `YourVectorIndex.query_vector()`
|
||||||
|
- `YourVectorIndex.query_keyword()`
|
||||||
|
- `YourVectorIndex.query_hybrid()`
|
||||||
|
- You will need to implement the following methods for `YourVectorIOAdapter`:
|
||||||
|
- `YourVectorIOAdapter.initialize()`
|
||||||
|
- `YourVectorIOAdapter.shutdown()`
|
||||||
|
- `YourVectorIOAdapter.list_vector_dbs()`
|
||||||
|
- `YourVectorIOAdapter.register_vector_db()`
|
||||||
|
- `YourVectorIOAdapter.unregister_vector_db()`
|
||||||
|
- `YourVectorIOAdapter.insert_chunks()`
|
||||||
|
- `YourVectorIOAdapter.query_chunks()`
|
||||||
|
- `YourVectorIOAdapter.delete_chunks()`
|
||||||
|
3. **Add to Registry**: Register your provider in the appropriate registry file.
|
||||||
|
- Update {repopath}`llama_stack/providers/registry/vector_io.py` to include your new provider.
|
||||||
|
```python
|
||||||
|
from llama_stack.providers.registry.specs import InlineProviderSpec
|
||||||
|
from llama_stack.providers.registry.api import Api
|
||||||
|
|
||||||
|
InlineProviderSpec(
|
||||||
|
api=Api.vector_io,
|
||||||
|
provider_type="inline::milvus",
|
||||||
|
pip_packages=["pymilvus>=2.4.10"],
|
||||||
|
module="llama_stack.providers.inline.vector_io.milvus",
|
||||||
|
config_class="llama_stack.providers.inline.vector_io.milvus.MilvusVectorIOConfig",
|
||||||
|
api_dependencies=[Api.inference],
|
||||||
|
optional_api_dependencies=[Api.files],
|
||||||
|
description="",
|
||||||
|
),
|
||||||
|
```
|
||||||
|
4. **Add Tests**: Create unit tests and integration tests for your provider in the `tests/` directory.
|
||||||
|
- Unit Tests
|
||||||
|
- By following the structure of the class methods, you will be able to easily run unit and integration tests for your database.
|
||||||
|
1. You have to configure the tests for your provide in `/tests/unit/providers/vector_io/conftest.py`.
|
||||||
|
2. Update the `vector_provider` fixture to include your provider if they are an inline provider.
|
||||||
|
3. Create a `your_vectorprovider_index` fixture that initializes your vector index.
|
||||||
|
4. Create a `your_vectorprovider_adapter` fixture that initializes your vector adapter.
|
||||||
|
5. Add your provider to the `vector_io_providers` fixture dictionary.
|
||||||
|
- Please follow the naming convention of `your_vectorprovider_index` and `your_vectorprovider_adapter` as the tests require this to execute properly.
|
||||||
|
- Integration Tests
|
||||||
|
- Integration tests are located in {repopath}`tests/integration`. These tests use the python client-SDK APIs (from the `llama_stack_client` package) to test functionality.
|
||||||
|
- The two set of integration tests are:
|
||||||
|
- `tests/integration/vector_io/test_vector_io.py`: This file tests registration, insertion, and retrieval.
|
||||||
|
- `tests/integration/vector_io/test_openai_vector_stores.py`: These tests are for OpenAI-compatible vector stores and test the OpenAI API compatibility.
|
||||||
|
- You will need to update `skip_if_provider_doesnt_support_openai_vector_stores` to include your provider as well as `skip_if_provider_doesnt_support_openai_vector_stores_search` to test the appropriate search functionality.
|
||||||
|
- Running the tests in the GitHub CI
|
||||||
|
- You will need to update the `.github/workflows/integration-vector-io-tests.yml` file to include your provider.
|
||||||
|
- If your provider is a remote provider, you will also have to add a container to spin up and run it in the action.
|
||||||
|
- Updating the pyproject.yml
|
||||||
|
- If you are adding tests for the `inline` provider you will have to update the `unit` group.
|
||||||
|
- `uv add new_pip_package --group unit`
|
||||||
|
- If you are adding tests for the `remote` provider you will have to update the `test` group, which is used in the GitHub CI for integration tests.
|
||||||
|
- `uv add new_pip_package --group test`
|
||||||
|
5. **Update Documentation**: Please update the documentation for end users
|
||||||
|
- Generate the provider documentation by running {repopath}`./scripts/provider_codegen.py`.
|
||||||
|
- Update the autogenerated content in the registry/vector_io.py file with information about your provider. Please see other providers for examples.
|
|
@ -1,6 +1,8 @@
|
||||||
# Testing Llama Stack
|
```{include} ../../../tests/README.md
|
||||||
|
```
|
||||||
|
|
||||||
Tests are of three different kinds:
|
```{include} ../../../tests/unit/README.md
|
||||||
- Unit tests
|
```
|
||||||
- Provider focused integration tests
|
|
||||||
- Client SDK tests
|
```{include} ../../../tests/integration/README.md
|
||||||
|
```
|
||||||
|
|
|
@ -29,6 +29,7 @@ remote_runpod
|
||||||
remote_sambanova
|
remote_sambanova
|
||||||
remote_tgi
|
remote_tgi
|
||||||
remote_together
|
remote_together
|
||||||
|
remote_vertexai
|
||||||
remote_vllm
|
remote_vllm
|
||||||
remote_watsonx
|
remote_watsonx
|
||||||
```
|
```
|
||||||
|
|
40
docs/source/providers/inference/remote_vertexai.md
Normal file
40
docs/source/providers/inference/remote_vertexai.md
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
# remote::vertexai
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Google Vertex AI inference provider enables you to use Google's Gemini models through Google Cloud's Vertex AI platform, providing several advantages:
|
||||||
|
|
||||||
|
• Enterprise-grade security: Uses Google Cloud's security controls and IAM
|
||||||
|
• Better integration: Seamless integration with other Google Cloud services
|
||||||
|
• Advanced features: Access to additional Vertex AI features like model tuning and monitoring
|
||||||
|
• Authentication: Uses Google Cloud Application Default Credentials (ADC) instead of API keys
|
||||||
|
|
||||||
|
Configuration:
|
||||||
|
- Set VERTEX_AI_PROJECT environment variable (required)
|
||||||
|
- Set VERTEX_AI_LOCATION environment variable (optional, defaults to us-central1)
|
||||||
|
- Use Google Cloud Application Default Credentials or service account key
|
||||||
|
|
||||||
|
Authentication Setup:
|
||||||
|
Option 1 (Recommended): gcloud auth application-default login
|
||||||
|
Option 2: Set GOOGLE_APPLICATION_CREDENTIALS to service account key path
|
||||||
|
|
||||||
|
Available Models:
|
||||||
|
- vertex_ai/gemini-2.0-flash
|
||||||
|
- vertex_ai/gemini-2.5-flash
|
||||||
|
- vertex_ai/gemini-2.5-pro
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `project` | `<class 'str'>` | No | | Google Cloud project ID for Vertex AI |
|
||||||
|
| `location` | `<class 'str'>` | No | us-central1 | Google Cloud location for Vertex AI |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
project: ${env.VERTEX_AI_PROJECT:=}
|
||||||
|
location: ${env.VERTEX_AI_LOCATION:=us-central1}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -12,6 +12,18 @@ That means you'll get fast and efficient vector retrieval.
|
||||||
- Lightweight and easy to use
|
- Lightweight and easy to use
|
||||||
- Fully integrated with Llama Stack
|
- Fully integrated with Llama Stack
|
||||||
- GPU support
|
- GPU support
|
||||||
|
- **Vector search** - FAISS supports pure vector similarity search using embeddings
|
||||||
|
|
||||||
|
## Search Modes
|
||||||
|
|
||||||
|
**Supported:**
|
||||||
|
- **Vector Search** (`mode="vector"`): Performs vector similarity search using embeddings
|
||||||
|
|
||||||
|
**Not Supported:**
|
||||||
|
- **Keyword Search** (`mode="keyword"`): Not supported by FAISS
|
||||||
|
- **Hybrid Search** (`mode="hybrid"`): Not supported by FAISS
|
||||||
|
|
||||||
|
> **Note**: FAISS is designed as a pure vector similarity search library. See the [FAISS GitHub repository](https://github.com/facebookresearch/faiss) for more details about FAISS's core functionality.
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
|
|
|
@ -11,6 +11,7 @@ That means you're not limited to storing vectors in memory or in a separate serv
|
||||||
|
|
||||||
- Easy to use
|
- Easy to use
|
||||||
- Fully integrated with Llama Stack
|
- Fully integrated with Llama Stack
|
||||||
|
- Supports all search modes: vector, keyword, and hybrid search (both inline and remote configurations)
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
|
@ -101,6 +102,92 @@ vector_io:
|
||||||
- **`client_pem_path`**: Path to the **client certificate** file (required for mTLS).
|
- **`client_pem_path`**: Path to the **client certificate** file (required for mTLS).
|
||||||
- **`client_key_path`**: Path to the **client private key** file (required for mTLS).
|
- **`client_key_path`**: Path to the **client private key** file (required for mTLS).
|
||||||
|
|
||||||
|
## Search Modes
|
||||||
|
|
||||||
|
Milvus supports three different search modes for both inline and remote configurations:
|
||||||
|
|
||||||
|
### Vector Search
|
||||||
|
Vector search uses semantic similarity to find the most relevant chunks based on embedding vectors. This is the default search mode and works well for finding conceptually similar content.
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Vector search example
|
||||||
|
search_response = client.vector_stores.search(
|
||||||
|
vector_store_id=vector_store.id,
|
||||||
|
query="What is machine learning?",
|
||||||
|
search_mode="vector",
|
||||||
|
max_num_results=5,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Keyword Search
|
||||||
|
Keyword search uses traditional text-based matching to find chunks containing specific terms or phrases. This is useful when you need exact term matches.
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Keyword search example
|
||||||
|
search_response = client.vector_stores.search(
|
||||||
|
vector_store_id=vector_store.id,
|
||||||
|
query="Python programming language",
|
||||||
|
search_mode="keyword",
|
||||||
|
max_num_results=5,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Hybrid Search
|
||||||
|
Hybrid search combines both vector and keyword search methods to provide more comprehensive results. It leverages the strengths of both semantic similarity and exact term matching.
|
||||||
|
|
||||||
|
#### Basic Hybrid Search
|
||||||
|
```python
|
||||||
|
# Basic hybrid search example (uses RRF ranker with default impact_factor=60.0)
|
||||||
|
search_response = client.vector_stores.search(
|
||||||
|
vector_store_id=vector_store.id,
|
||||||
|
query="neural networks in Python",
|
||||||
|
search_mode="hybrid",
|
||||||
|
max_num_results=5,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note**: The default `impact_factor` value of 60.0 was empirically determined to be optimal in the original RRF research paper: ["Reciprocal Rank Fusion outperforms Condorcet and individual Rank Learning Methods"](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) (Cormack et al., 2009).
|
||||||
|
|
||||||
|
#### Hybrid Search with RRF (Reciprocal Rank Fusion) Ranker
|
||||||
|
RRF combines rankings from vector and keyword search by using reciprocal ranks. The impact factor controls how much weight is given to higher-ranked results.
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Hybrid search with custom RRF parameters
|
||||||
|
search_response = client.vector_stores.search(
|
||||||
|
vector_store_id=vector_store.id,
|
||||||
|
query="neural networks in Python",
|
||||||
|
search_mode="hybrid",
|
||||||
|
max_num_results=5,
|
||||||
|
ranking_options={
|
||||||
|
"ranker": {
|
||||||
|
"type": "rrf",
|
||||||
|
"impact_factor": 100.0, # Higher values give more weight to top-ranked results
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Hybrid Search with Weighted Ranker
|
||||||
|
Weighted ranker linearly combines normalized scores from vector and keyword search. The alpha parameter controls the balance between the two search methods.
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Hybrid search with weighted ranker
|
||||||
|
search_response = client.vector_stores.search(
|
||||||
|
vector_store_id=vector_store.id,
|
||||||
|
query="neural networks in Python",
|
||||||
|
search_mode="hybrid",
|
||||||
|
max_num_results=5,
|
||||||
|
ranking_options={
|
||||||
|
"ranker": {
|
||||||
|
"type": "weighted",
|
||||||
|
"alpha": 0.7, # 70% vector search, 30% keyword search
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
For detailed documentation on RRF and Weighted rankers, please refer to the [Milvus Reranking Guide](https://milvus.io/docs/reranking.md).
|
||||||
|
|
||||||
## Documentation
|
## Documentation
|
||||||
See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general.
|
See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general.
|
||||||
|
|
||||||
|
|
|
@ -124,10 +124,7 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
|
||||||
return toolgroup
|
return toolgroup
|
||||||
|
|
||||||
async def unregister_toolgroup(self, toolgroup_id: str) -> None:
|
async def unregister_toolgroup(self, toolgroup_id: str) -> None:
|
||||||
tool_group = await self.get_tool_group(toolgroup_id)
|
await self.unregister_object(await self.get_tool_group(toolgroup_id))
|
||||||
if tool_group is None:
|
|
||||||
raise ToolGroupNotFoundError(toolgroup_id)
|
|
||||||
await self.unregister_object(tool_group)
|
|
||||||
|
|
||||||
async def shutdown(self) -> None:
|
async def shutdown(self) -> None:
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -14,6 +14,7 @@ distribution_spec:
|
||||||
- provider_type: remote::openai
|
- provider_type: remote::openai
|
||||||
- provider_type: remote::anthropic
|
- provider_type: remote::anthropic
|
||||||
- provider_type: remote::gemini
|
- provider_type: remote::gemini
|
||||||
|
- provider_type: remote::vertexai
|
||||||
- provider_type: remote::groq
|
- provider_type: remote::groq
|
||||||
- provider_type: remote::sambanova
|
- provider_type: remote::sambanova
|
||||||
- provider_type: inline::sentence-transformers
|
- provider_type: inline::sentence-transformers
|
||||||
|
|
|
@ -65,6 +65,11 @@ providers:
|
||||||
provider_type: remote::gemini
|
provider_type: remote::gemini
|
||||||
config:
|
config:
|
||||||
api_key: ${env.GEMINI_API_KEY:=}
|
api_key: ${env.GEMINI_API_KEY:=}
|
||||||
|
- provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
|
||||||
|
provider_type: remote::vertexai
|
||||||
|
config:
|
||||||
|
project: ${env.VERTEX_AI_PROJECT:=}
|
||||||
|
location: ${env.VERTEX_AI_LOCATION:=us-central1}
|
||||||
- provider_id: groq
|
- provider_id: groq
|
||||||
provider_type: remote::groq
|
provider_type: remote::groq
|
||||||
config:
|
config:
|
||||||
|
|
|
@ -14,6 +14,7 @@ distribution_spec:
|
||||||
- provider_type: remote::openai
|
- provider_type: remote::openai
|
||||||
- provider_type: remote::anthropic
|
- provider_type: remote::anthropic
|
||||||
- provider_type: remote::gemini
|
- provider_type: remote::gemini
|
||||||
|
- provider_type: remote::vertexai
|
||||||
- provider_type: remote::groq
|
- provider_type: remote::groq
|
||||||
- provider_type: remote::sambanova
|
- provider_type: remote::sambanova
|
||||||
- provider_type: inline::sentence-transformers
|
- provider_type: inline::sentence-transformers
|
||||||
|
|
|
@ -65,6 +65,11 @@ providers:
|
||||||
provider_type: remote::gemini
|
provider_type: remote::gemini
|
||||||
config:
|
config:
|
||||||
api_key: ${env.GEMINI_API_KEY:=}
|
api_key: ${env.GEMINI_API_KEY:=}
|
||||||
|
- provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
|
||||||
|
provider_type: remote::vertexai
|
||||||
|
config:
|
||||||
|
project: ${env.VERTEX_AI_PROJECT:=}
|
||||||
|
location: ${env.VERTEX_AI_LOCATION:=us-central1}
|
||||||
- provider_id: groq
|
- provider_id: groq
|
||||||
provider_type: remote::groq
|
provider_type: remote::groq
|
||||||
config:
|
config:
|
||||||
|
|
|
@ -56,6 +56,7 @@ ENABLED_INFERENCE_PROVIDERS = [
|
||||||
"fireworks",
|
"fireworks",
|
||||||
"together",
|
"together",
|
||||||
"gemini",
|
"gemini",
|
||||||
|
"vertexai",
|
||||||
"groq",
|
"groq",
|
||||||
"sambanova",
|
"sambanova",
|
||||||
"anthropic",
|
"anthropic",
|
||||||
|
@ -71,6 +72,7 @@ INFERENCE_PROVIDER_IDS = {
|
||||||
"tgi": "${env.TGI_URL:+tgi}",
|
"tgi": "${env.TGI_URL:+tgi}",
|
||||||
"cerebras": "${env.CEREBRAS_API_KEY:+cerebras}",
|
"cerebras": "${env.CEREBRAS_API_KEY:+cerebras}",
|
||||||
"nvidia": "${env.NVIDIA_API_KEY:+nvidia}",
|
"nvidia": "${env.NVIDIA_API_KEY:+nvidia}",
|
||||||
|
"vertexai": "${env.VERTEX_AI_PROJECT:+vertexai}",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -246,6 +248,14 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"",
|
"",
|
||||||
"Gemini API Key",
|
"Gemini API Key",
|
||||||
),
|
),
|
||||||
|
"VERTEX_AI_PROJECT": (
|
||||||
|
"",
|
||||||
|
"Google Cloud Project ID for Vertex AI",
|
||||||
|
),
|
||||||
|
"VERTEX_AI_LOCATION": (
|
||||||
|
"us-central1",
|
||||||
|
"Google Cloud Location for Vertex AI",
|
||||||
|
),
|
||||||
"SAMBANOVA_API_KEY": (
|
"SAMBANOVA_API_KEY": (
|
||||||
"",
|
"",
|
||||||
"SambaNova API Key",
|
"SambaNova API Key",
|
||||||
|
|
|
@ -99,7 +99,8 @@ def parse_environment_config(env_config: str) -> dict[str, int]:
|
||||||
Dict[str, int]: A dictionary mapping categories to their log levels.
|
Dict[str, int]: A dictionary mapping categories to their log levels.
|
||||||
"""
|
"""
|
||||||
category_levels = {}
|
category_levels = {}
|
||||||
for pair in env_config.split(";"):
|
delimiter = ","
|
||||||
|
for pair in env_config.split(delimiter):
|
||||||
if not pair.strip():
|
if not pair.strip():
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
|
@ -15,6 +15,7 @@ from llama_stack.apis.safety import (
|
||||||
RunShieldResponse,
|
RunShieldResponse,
|
||||||
Safety,
|
Safety,
|
||||||
SafetyViolation,
|
SafetyViolation,
|
||||||
|
ShieldStore,
|
||||||
ViolationLevel,
|
ViolationLevel,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.shields import Shield
|
from llama_stack.apis.shields import Shield
|
||||||
|
@ -32,6 +33,8 @@ PROMPT_GUARD_MODEL = "Prompt-Guard-86M"
|
||||||
|
|
||||||
|
|
||||||
class PromptGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
|
class PromptGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
|
||||||
|
shield_store: ShieldStore
|
||||||
|
|
||||||
def __init__(self, config: PromptGuardConfig, _deps) -> None:
|
def __init__(self, config: PromptGuardConfig, _deps) -> None:
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
|
@ -53,7 +56,7 @@ class PromptGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
|
||||||
self,
|
self,
|
||||||
shield_id: str,
|
shield_id: str,
|
||||||
messages: list[Message],
|
messages: list[Message],
|
||||||
params: dict[str, Any] = None,
|
params: dict[str, Any],
|
||||||
) -> RunShieldResponse:
|
) -> RunShieldResponse:
|
||||||
shield = await self.shield_store.get_shield(shield_id)
|
shield = await self.shield_store.get_shield(shield_id)
|
||||||
if not shield:
|
if not shield:
|
||||||
|
@ -117,8 +120,10 @@ class PromptGuardShield:
|
||||||
elif self.config.guard_type == PromptGuardType.jailbreak.value and score_malicious > self.threshold:
|
elif self.config.guard_type == PromptGuardType.jailbreak.value and score_malicious > self.threshold:
|
||||||
violation = SafetyViolation(
|
violation = SafetyViolation(
|
||||||
violation_level=ViolationLevel.ERROR,
|
violation_level=ViolationLevel.ERROR,
|
||||||
violation_type=f"prompt_injection:malicious={score_malicious}",
|
user_message="Sorry, I cannot do this.",
|
||||||
violation_return_message="Sorry, I cannot do this.",
|
metadata={
|
||||||
|
"violation_type": f"prompt_injection:malicious={score_malicious}",
|
||||||
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
return RunShieldResponse(violation=violation)
|
return RunShieldResponse(violation=violation)
|
||||||
|
|
|
@ -174,7 +174,9 @@ class FaissIndex(EmbeddingIndex):
|
||||||
k: int,
|
k: int,
|
||||||
score_threshold: float,
|
score_threshold: float,
|
||||||
) -> QueryChunksResponse:
|
) -> QueryChunksResponse:
|
||||||
raise NotImplementedError("Keyword search is not supported in FAISS")
|
raise NotImplementedError(
|
||||||
|
"Keyword search is not supported - underlying DB FAISS does not support this search mode"
|
||||||
|
)
|
||||||
|
|
||||||
async def query_hybrid(
|
async def query_hybrid(
|
||||||
self,
|
self,
|
||||||
|
@ -185,7 +187,9 @@ class FaissIndex(EmbeddingIndex):
|
||||||
reranker_type: str,
|
reranker_type: str,
|
||||||
reranker_params: dict[str, Any] | None = None,
|
reranker_params: dict[str, Any] | None = None,
|
||||||
) -> QueryChunksResponse:
|
) -> QueryChunksResponse:
|
||||||
raise NotImplementedError("Hybrid search is not supported in FAISS")
|
raise NotImplementedError(
|
||||||
|
"Hybrid search is not supported - underlying DB FAISS does not support this search mode"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
|
class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
|
||||||
|
|
|
@ -213,6 +213,36 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
description="Google Gemini inference provider for accessing Gemini models and Google's AI services.",
|
description="Google Gemini inference provider for accessing Gemini models and Google's AI services.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
|
remote_provider_spec(
|
||||||
|
api=Api.inference,
|
||||||
|
adapter=AdapterSpec(
|
||||||
|
adapter_type="vertexai",
|
||||||
|
pip_packages=["litellm", "google-cloud-aiplatform"],
|
||||||
|
module="llama_stack.providers.remote.inference.vertexai",
|
||||||
|
config_class="llama_stack.providers.remote.inference.vertexai.VertexAIConfig",
|
||||||
|
provider_data_validator="llama_stack.providers.remote.inference.vertexai.config.VertexAIProviderDataValidator",
|
||||||
|
description="""Google Vertex AI inference provider enables you to use Google's Gemini models through Google Cloud's Vertex AI platform, providing several advantages:
|
||||||
|
|
||||||
|
• Enterprise-grade security: Uses Google Cloud's security controls and IAM
|
||||||
|
• Better integration: Seamless integration with other Google Cloud services
|
||||||
|
• Advanced features: Access to additional Vertex AI features like model tuning and monitoring
|
||||||
|
• Authentication: Uses Google Cloud Application Default Credentials (ADC) instead of API keys
|
||||||
|
|
||||||
|
Configuration:
|
||||||
|
- Set VERTEX_AI_PROJECT environment variable (required)
|
||||||
|
- Set VERTEX_AI_LOCATION environment variable (optional, defaults to us-central1)
|
||||||
|
- Use Google Cloud Application Default Credentials or service account key
|
||||||
|
|
||||||
|
Authentication Setup:
|
||||||
|
Option 1 (Recommended): gcloud auth application-default login
|
||||||
|
Option 2: Set GOOGLE_APPLICATION_CREDENTIALS to service account key path
|
||||||
|
|
||||||
|
Available Models:
|
||||||
|
- vertex_ai/gemini-2.0-flash
|
||||||
|
- vertex_ai/gemini-2.5-flash
|
||||||
|
- vertex_ai/gemini-2.5-pro""",
|
||||||
|
),
|
||||||
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
|
|
|
@ -45,6 +45,18 @@ That means you'll get fast and efficient vector retrieval.
|
||||||
- Lightweight and easy to use
|
- Lightweight and easy to use
|
||||||
- Fully integrated with Llama Stack
|
- Fully integrated with Llama Stack
|
||||||
- GPU support
|
- GPU support
|
||||||
|
- **Vector search** - FAISS supports pure vector similarity search using embeddings
|
||||||
|
|
||||||
|
## Search Modes
|
||||||
|
|
||||||
|
**Supported:**
|
||||||
|
- **Vector Search** (`mode="vector"`): Performs vector similarity search using embeddings
|
||||||
|
|
||||||
|
**Not Supported:**
|
||||||
|
- **Keyword Search** (`mode="keyword"`): Not supported by FAISS
|
||||||
|
- **Hybrid Search** (`mode="hybrid"`): Not supported by FAISS
|
||||||
|
|
||||||
|
> **Note**: FAISS is designed as a pure vector similarity search library. See the [FAISS GitHub repository](https://github.com/facebookresearch/faiss) for more details about FAISS's core functionality.
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
|
@ -535,6 +547,7 @@ That means you're not limited to storing vectors in memory or in a separate serv
|
||||||
|
|
||||||
- Easy to use
|
- Easy to use
|
||||||
- Fully integrated with Llama Stack
|
- Fully integrated with Llama Stack
|
||||||
|
- Supports all search modes: vector, keyword, and hybrid search (both inline and remote configurations)
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
|
@ -625,6 +638,92 @@ vector_io:
|
||||||
- **`client_pem_path`**: Path to the **client certificate** file (required for mTLS).
|
- **`client_pem_path`**: Path to the **client certificate** file (required for mTLS).
|
||||||
- **`client_key_path`**: Path to the **client private key** file (required for mTLS).
|
- **`client_key_path`**: Path to the **client private key** file (required for mTLS).
|
||||||
|
|
||||||
|
## Search Modes
|
||||||
|
|
||||||
|
Milvus supports three different search modes for both inline and remote configurations:
|
||||||
|
|
||||||
|
### Vector Search
|
||||||
|
Vector search uses semantic similarity to find the most relevant chunks based on embedding vectors. This is the default search mode and works well for finding conceptually similar content.
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Vector search example
|
||||||
|
search_response = client.vector_stores.search(
|
||||||
|
vector_store_id=vector_store.id,
|
||||||
|
query="What is machine learning?",
|
||||||
|
search_mode="vector",
|
||||||
|
max_num_results=5,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Keyword Search
|
||||||
|
Keyword search uses traditional text-based matching to find chunks containing specific terms or phrases. This is useful when you need exact term matches.
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Keyword search example
|
||||||
|
search_response = client.vector_stores.search(
|
||||||
|
vector_store_id=vector_store.id,
|
||||||
|
query="Python programming language",
|
||||||
|
search_mode="keyword",
|
||||||
|
max_num_results=5,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Hybrid Search
|
||||||
|
Hybrid search combines both vector and keyword search methods to provide more comprehensive results. It leverages the strengths of both semantic similarity and exact term matching.
|
||||||
|
|
||||||
|
#### Basic Hybrid Search
|
||||||
|
```python
|
||||||
|
# Basic hybrid search example (uses RRF ranker with default impact_factor=60.0)
|
||||||
|
search_response = client.vector_stores.search(
|
||||||
|
vector_store_id=vector_store.id,
|
||||||
|
query="neural networks in Python",
|
||||||
|
search_mode="hybrid",
|
||||||
|
max_num_results=5,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note**: The default `impact_factor` value of 60.0 was empirically determined to be optimal in the original RRF research paper: ["Reciprocal Rank Fusion outperforms Condorcet and individual Rank Learning Methods"](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) (Cormack et al., 2009).
|
||||||
|
|
||||||
|
#### Hybrid Search with RRF (Reciprocal Rank Fusion) Ranker
|
||||||
|
RRF combines rankings from vector and keyword search by using reciprocal ranks. The impact factor controls how much weight is given to higher-ranked results.
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Hybrid search with custom RRF parameters
|
||||||
|
search_response = client.vector_stores.search(
|
||||||
|
vector_store_id=vector_store.id,
|
||||||
|
query="neural networks in Python",
|
||||||
|
search_mode="hybrid",
|
||||||
|
max_num_results=5,
|
||||||
|
ranking_options={
|
||||||
|
"ranker": {
|
||||||
|
"type": "rrf",
|
||||||
|
"impact_factor": 100.0, # Higher values give more weight to top-ranked results
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Hybrid Search with Weighted Ranker
|
||||||
|
Weighted ranker linearly combines normalized scores from vector and keyword search. The alpha parameter controls the balance between the two search methods.
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Hybrid search with weighted ranker
|
||||||
|
search_response = client.vector_stores.search(
|
||||||
|
vector_store_id=vector_store.id,
|
||||||
|
query="neural networks in Python",
|
||||||
|
search_mode="hybrid",
|
||||||
|
max_num_results=5,
|
||||||
|
ranking_options={
|
||||||
|
"ranker": {
|
||||||
|
"type": "weighted",
|
||||||
|
"alpha": 0.7, # 70% vector search, 30% keyword search
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
For detailed documentation on RRF and Weighted rankers, please refer to the [Milvus Reranking Guide](https://milvus.io/docs/reranking.md).
|
||||||
|
|
||||||
## Documentation
|
## Documentation
|
||||||
See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general.
|
See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general.
|
||||||
|
|
||||||
|
|
15
llama_stack/providers/remote/inference/vertexai/__init__.py
Normal file
15
llama_stack/providers/remote/inference/vertexai/__init__.py
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from .config import VertexAIConfig
|
||||||
|
|
||||||
|
|
||||||
|
async def get_adapter_impl(config: VertexAIConfig, _deps):
|
||||||
|
from .vertexai import VertexAIInferenceAdapter
|
||||||
|
|
||||||
|
impl = VertexAIInferenceAdapter(config)
|
||||||
|
await impl.initialize()
|
||||||
|
return impl
|
45
llama_stack/providers/remote/inference/vertexai/config.py
Normal file
45
llama_stack/providers/remote/inference/vertexai/config.py
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from llama_stack.schema_utils import json_schema_type
|
||||||
|
|
||||||
|
|
||||||
|
class VertexAIProviderDataValidator(BaseModel):
|
||||||
|
vertex_project: str | None = Field(
|
||||||
|
default=None,
|
||||||
|
description="Google Cloud project ID for Vertex AI",
|
||||||
|
)
|
||||||
|
vertex_location: str | None = Field(
|
||||||
|
default=None,
|
||||||
|
description="Google Cloud location for Vertex AI (e.g., us-central1)",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class VertexAIConfig(BaseModel):
|
||||||
|
project: str = Field(
|
||||||
|
description="Google Cloud project ID for Vertex AI",
|
||||||
|
)
|
||||||
|
location: str = Field(
|
||||||
|
default="us-central1",
|
||||||
|
description="Google Cloud location for Vertex AI",
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def sample_run_config(
|
||||||
|
cls,
|
||||||
|
project: str = "${env.VERTEX_AI_PROJECT:=}",
|
||||||
|
location: str = "${env.VERTEX_AI_LOCATION:=us-central1}",
|
||||||
|
**kwargs,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"project": project,
|
||||||
|
"location": location,
|
||||||
|
}
|
20
llama_stack/providers/remote/inference/vertexai/models.py
Normal file
20
llama_stack/providers/remote/inference/vertexai/models.py
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
|
ProviderModelEntry,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Vertex AI model IDs with vertex_ai/ prefix as required by litellm
|
||||||
|
LLM_MODEL_IDS = [
|
||||||
|
"vertex_ai/gemini-2.0-flash",
|
||||||
|
"vertex_ai/gemini-2.5-flash",
|
||||||
|
"vertex_ai/gemini-2.5-pro",
|
||||||
|
]
|
||||||
|
|
||||||
|
SAFETY_MODELS_ENTRIES = list[ProviderModelEntry]()
|
||||||
|
|
||||||
|
MODEL_ENTRIES = [ProviderModelEntry(provider_model_id=m) for m in LLM_MODEL_IDS] + SAFETY_MODELS_ENTRIES
|
52
llama_stack/providers/remote/inference/vertexai/vertexai.py
Normal file
52
llama_stack/providers/remote/inference/vertexai/vertexai.py
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from llama_stack.apis.inference import ChatCompletionRequest
|
||||||
|
from llama_stack.providers.utils.inference.litellm_openai_mixin import (
|
||||||
|
LiteLLMOpenAIMixin,
|
||||||
|
)
|
||||||
|
|
||||||
|
from .config import VertexAIConfig
|
||||||
|
from .models import MODEL_ENTRIES
|
||||||
|
|
||||||
|
|
||||||
|
class VertexAIInferenceAdapter(LiteLLMOpenAIMixin):
|
||||||
|
def __init__(self, config: VertexAIConfig) -> None:
|
||||||
|
LiteLLMOpenAIMixin.__init__(
|
||||||
|
self,
|
||||||
|
MODEL_ENTRIES,
|
||||||
|
litellm_provider_name="vertex_ai",
|
||||||
|
api_key_from_config=None, # Vertex AI uses ADC, not API keys
|
||||||
|
provider_data_api_key_field="vertex_project", # Use project for validation
|
||||||
|
)
|
||||||
|
self.config = config
|
||||||
|
|
||||||
|
def get_api_key(self) -> str:
|
||||||
|
# Vertex AI doesn't use API keys, it uses Application Default Credentials
|
||||||
|
# Return empty string to let litellm handle authentication via ADC
|
||||||
|
return ""
|
||||||
|
|
||||||
|
async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]:
|
||||||
|
# Get base parameters from parent
|
||||||
|
params = await super()._get_params(request)
|
||||||
|
|
||||||
|
# Add Vertex AI specific parameters
|
||||||
|
provider_data = self.get_request_provider_data()
|
||||||
|
if provider_data:
|
||||||
|
if getattr(provider_data, "vertex_project", None):
|
||||||
|
params["vertex_project"] = provider_data.vertex_project
|
||||||
|
if getattr(provider_data, "vertex_location", None):
|
||||||
|
params["vertex_location"] = provider_data.vertex_location
|
||||||
|
else:
|
||||||
|
params["vertex_project"] = self.config.project
|
||||||
|
params["vertex_location"] = self.config.location
|
||||||
|
|
||||||
|
# Remove api_key since Vertex AI uses ADC
|
||||||
|
params.pop("api_key", None)
|
||||||
|
|
||||||
|
return params
|
|
@ -175,7 +175,7 @@ const handleSubmitWithContent = async (content: string) => {
|
||||||
return (
|
return (
|
||||||
<div className="flex flex-col h-full max-w-4xl mx-auto">
|
<div className="flex flex-col h-full max-w-4xl mx-auto">
|
||||||
<div className="mb-4 flex justify-between items-center">
|
<div className="mb-4 flex justify-between items-center">
|
||||||
<h1 className="text-2xl font-bold">Chat Playground</h1>
|
<h1 className="text-2xl font-bold">Chat Playground (Completions)</h1>
|
||||||
<div className="flex gap-2">
|
<div className="flex gap-2">
|
||||||
<Select value={selectedModel} onValueChange={setSelectedModel} disabled={isModelsLoading || isGenerating}>
|
<Select value={selectedModel} onValueChange={setSelectedModel} disabled={isModelsLoading || isGenerating}>
|
||||||
<SelectTrigger className="w-[180px]">
|
<SelectTrigger className="w-[180px]">
|
||||||
|
|
|
@ -6,6 +6,8 @@ import {
|
||||||
MoveUpRight,
|
MoveUpRight,
|
||||||
Database,
|
Database,
|
||||||
MessageCircle,
|
MessageCircle,
|
||||||
|
Settings2,
|
||||||
|
Compass,
|
||||||
} from "lucide-react";
|
} from "lucide-react";
|
||||||
import Link from "next/link";
|
import Link from "next/link";
|
||||||
import { usePathname } from "next/navigation";
|
import { usePathname } from "next/navigation";
|
||||||
|
@ -22,15 +24,16 @@ import {
|
||||||
SidebarMenuItem,
|
SidebarMenuItem,
|
||||||
SidebarHeader,
|
SidebarHeader,
|
||||||
} from "@/components/ui/sidebar";
|
} from "@/components/ui/sidebar";
|
||||||
// Extracted Chat Playground item
|
|
||||||
const chatPlaygroundItem = {
|
|
||||||
title: "Chat Playground",
|
|
||||||
url: "/chat-playground",
|
|
||||||
icon: MessageCircle,
|
|
||||||
};
|
|
||||||
|
|
||||||
// Removed Chat Playground from log items
|
const createItems = [
|
||||||
const logItems = [
|
{
|
||||||
|
title: "Chat Playground",
|
||||||
|
url: "/chat-playground",
|
||||||
|
icon: MessageCircle,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
const manageItems = [
|
||||||
{
|
{
|
||||||
title: "Chat Completions",
|
title: "Chat Completions",
|
||||||
url: "/logs/chat-completions",
|
url: "/logs/chat-completions",
|
||||||
|
@ -53,77 +56,96 @@ const logItems = [
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
|
const optimizeItems: { title: string; url: string; icon: React.ElementType }[] = [
|
||||||
|
{
|
||||||
|
title: "Evaluations",
|
||||||
|
url: "",
|
||||||
|
icon: Compass,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: "Fine-tuning",
|
||||||
|
url: "",
|
||||||
|
icon: Settings2,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
interface SidebarItem {
|
||||||
|
title: string;
|
||||||
|
url: string;
|
||||||
|
icon: React.ElementType;
|
||||||
|
}
|
||||||
|
|
||||||
export function AppSidebar() {
|
export function AppSidebar() {
|
||||||
const pathname = usePathname();
|
const pathname = usePathname();
|
||||||
|
|
||||||
return (
|
const renderSidebarItems = (items: SidebarItem[]) => {
|
||||||
<Sidebar>
|
return items.map((item) => {
|
||||||
<SidebarHeader>
|
const isActive = pathname.startsWith(item.url);
|
||||||
<Link href="/">Llama Stack</Link>
|
return (
|
||||||
</SidebarHeader>
|
<SidebarMenuItem key={item.title}>
|
||||||
<SidebarContent>
|
<SidebarMenuButton
|
||||||
{/* Chat Playground as its own section */}
|
asChild
|
||||||
<SidebarGroup>
|
className={cn(
|
||||||
<SidebarGroupContent>
|
"justify-start",
|
||||||
<SidebarMenu>
|
isActive &&
|
||||||
<SidebarMenuItem>
|
"bg-gray-200 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-700 text-gray-900 dark:text-gray-100",
|
||||||
|
)}
|
||||||
|
>
|
||||||
|
<Link href={item.url}>
|
||||||
|
<item.icon
|
||||||
|
className={cn(
|
||||||
|
isActive && "text-gray-900 dark:text-gray-100",
|
||||||
|
"mr-2 h-4 w-4",
|
||||||
|
)}
|
||||||
|
/>
|
||||||
|
<span>{item.title}</span>
|
||||||
|
</Link>
|
||||||
|
</SidebarMenuButton>
|
||||||
|
</SidebarMenuItem>
|
||||||
|
);
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Sidebar>
|
||||||
|
<SidebarHeader>
|
||||||
|
<Link href="/">Llama Stack</Link>
|
||||||
|
</SidebarHeader>
|
||||||
|
<SidebarContent>
|
||||||
|
<SidebarGroup>
|
||||||
|
<SidebarGroupLabel>Create</SidebarGroupLabel>
|
||||||
|
<SidebarGroupContent>
|
||||||
|
<SidebarMenu>{renderSidebarItems(createItems)}</SidebarMenu>
|
||||||
|
</SidebarGroupContent>
|
||||||
|
</SidebarGroup>
|
||||||
|
|
||||||
|
<SidebarGroup>
|
||||||
|
<SidebarGroupLabel>Manage</SidebarGroupLabel>
|
||||||
|
<SidebarGroupContent>
|
||||||
|
<SidebarMenu>{renderSidebarItems(manageItems)}</SidebarMenu>
|
||||||
|
</SidebarGroupContent>
|
||||||
|
</SidebarGroup>
|
||||||
|
|
||||||
|
<SidebarGroup>
|
||||||
|
<SidebarGroupLabel>Optimize</SidebarGroupLabel>
|
||||||
|
<SidebarGroupContent>
|
||||||
|
<SidebarMenu>
|
||||||
|
{optimizeItems.map((item) => (
|
||||||
|
<SidebarMenuItem key={item.title}>
|
||||||
<SidebarMenuButton
|
<SidebarMenuButton
|
||||||
asChild
|
disabled
|
||||||
className={cn(
|
className="justify-start opacity-60 cursor-not-allowed"
|
||||||
"justify-start",
|
|
||||||
pathname.startsWith(chatPlaygroundItem.url) &&
|
|
||||||
"bg-gray-200 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-700 text-gray-900 dark:text-gray-100",
|
|
||||||
)}
|
|
||||||
>
|
>
|
||||||
<Link href={chatPlaygroundItem.url}>
|
<item.icon className="mr-2 h-4 w-4" />
|
||||||
<chatPlaygroundItem.icon
|
<span>{item.title}</span>
|
||||||
className={cn(
|
<span className="ml-2 text-xs text-gray-500">(Coming Soon)</span>
|
||||||
pathname.startsWith(chatPlaygroundItem.url) && "text-gray-900 dark:text-gray-100",
|
|
||||||
"mr-2 h-4 w-4",
|
|
||||||
)}
|
|
||||||
/>
|
|
||||||
<span>{chatPlaygroundItem.title}</span>
|
|
||||||
</Link>
|
|
||||||
</SidebarMenuButton>
|
</SidebarMenuButton>
|
||||||
</SidebarMenuItem>
|
</SidebarMenuItem>
|
||||||
</SidebarMenu>
|
))}
|
||||||
</SidebarGroupContent>
|
</SidebarMenu>
|
||||||
</SidebarGroup>
|
</SidebarGroupContent>
|
||||||
|
</SidebarGroup>
|
||||||
{/* Logs section */}
|
</SidebarContent>
|
||||||
<SidebarGroup>
|
</Sidebar>
|
||||||
<SidebarGroupLabel>Logs</SidebarGroupLabel>
|
|
||||||
<SidebarGroupContent>
|
|
||||||
<SidebarMenu>
|
|
||||||
{logItems.map((item) => {
|
|
||||||
const isActive = pathname.startsWith(item.url);
|
|
||||||
return (
|
|
||||||
<SidebarMenuItem key={item.title}>
|
|
||||||
<SidebarMenuButton
|
|
||||||
asChild
|
|
||||||
className={cn(
|
|
||||||
"justify-start",
|
|
||||||
isActive &&
|
|
||||||
"bg-gray-200 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-700 text-gray-900 dark:text-gray-100",
|
|
||||||
)}
|
|
||||||
>
|
|
||||||
<Link href={item.url}>
|
|
||||||
<item.icon
|
|
||||||
className={cn(
|
|
||||||
isActive && "text-gray-900 dark:text-gray-100",
|
|
||||||
"mr-2 h-4 w-4",
|
|
||||||
)}
|
|
||||||
/>
|
|
||||||
<span>{item.title}</span>
|
|
||||||
</Link>
|
|
||||||
</SidebarMenuButton>
|
|
||||||
</SidebarMenuItem>
|
|
||||||
);
|
|
||||||
})}
|
|
||||||
</SidebarMenu>
|
|
||||||
</SidebarGroupContent>
|
|
||||||
</SidebarGroup>
|
|
||||||
</SidebarContent>
|
|
||||||
</Sidebar>
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
@ -266,7 +266,6 @@ exclude = [
|
||||||
"^llama_stack/providers/inline/post_training/common/validator\\.py$",
|
"^llama_stack/providers/inline/post_training/common/validator\\.py$",
|
||||||
"^llama_stack/providers/inline/safety/code_scanner/",
|
"^llama_stack/providers/inline/safety/code_scanner/",
|
||||||
"^llama_stack/providers/inline/safety/llama_guard/",
|
"^llama_stack/providers/inline/safety/llama_guard/",
|
||||||
"^llama_stack/providers/inline/safety/prompt_guard/",
|
|
||||||
"^llama_stack/providers/inline/scoring/basic/",
|
"^llama_stack/providers/inline/scoring/basic/",
|
||||||
"^llama_stack/providers/inline/scoring/braintrust/",
|
"^llama_stack/providers/inline/scoring/braintrust/",
|
||||||
"^llama_stack/providers/inline/scoring/llm_as_judge/",
|
"^llama_stack/providers/inline/scoring/llm_as_judge/",
|
||||||
|
|
|
@ -34,6 +34,7 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
|
||||||
"remote::runpod",
|
"remote::runpod",
|
||||||
"remote::sambanova",
|
"remote::sambanova",
|
||||||
"remote::tgi",
|
"remote::tgi",
|
||||||
|
"remote::vertexai",
|
||||||
):
|
):
|
||||||
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.")
|
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.")
|
||||||
|
|
||||||
|
|
|
@ -29,6 +29,7 @@ def skip_if_model_doesnt_support_completion(client_with_models, model_id):
|
||||||
"remote::openai",
|
"remote::openai",
|
||||||
"remote::anthropic",
|
"remote::anthropic",
|
||||||
"remote::gemini",
|
"remote::gemini",
|
||||||
|
"remote::vertexai",
|
||||||
"remote::groq",
|
"remote::groq",
|
||||||
"remote::sambanova",
|
"remote::sambanova",
|
||||||
)
|
)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue