Merge branch 'main' into content-extension

This commit is contained in:
Francisco Arceo 2025-08-11 13:45:25 -06:00 committed by GitHub
commit 2fbddb4beb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
30 changed files with 669 additions and 92 deletions

View file

@ -124,10 +124,7 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
return toolgroup
async def unregister_toolgroup(self, toolgroup_id: str) -> None:
tool_group = await self.get_tool_group(toolgroup_id)
if tool_group is None:
raise ToolGroupNotFoundError(toolgroup_id)
await self.unregister_object(tool_group)
await self.unregister_object(await self.get_tool_group(toolgroup_id))
async def shutdown(self) -> None:
pass

View file

@ -14,6 +14,7 @@ distribution_spec:
- provider_type: remote::openai
- provider_type: remote::anthropic
- provider_type: remote::gemini
- provider_type: remote::vertexai
- provider_type: remote::groq
- provider_type: remote::sambanova
- provider_type: inline::sentence-transformers

View file

@ -65,6 +65,11 @@ providers:
provider_type: remote::gemini
config:
api_key: ${env.GEMINI_API_KEY:=}
- provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
provider_type: remote::vertexai
config:
project: ${env.VERTEX_AI_PROJECT:=}
location: ${env.VERTEX_AI_LOCATION:=us-central1}
- provider_id: groq
provider_type: remote::groq
config:

View file

@ -14,6 +14,7 @@ distribution_spec:
- provider_type: remote::openai
- provider_type: remote::anthropic
- provider_type: remote::gemini
- provider_type: remote::vertexai
- provider_type: remote::groq
- provider_type: remote::sambanova
- provider_type: inline::sentence-transformers

View file

@ -65,6 +65,11 @@ providers:
provider_type: remote::gemini
config:
api_key: ${env.GEMINI_API_KEY:=}
- provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
provider_type: remote::vertexai
config:
project: ${env.VERTEX_AI_PROJECT:=}
location: ${env.VERTEX_AI_LOCATION:=us-central1}
- provider_id: groq
provider_type: remote::groq
config:

View file

@ -56,6 +56,7 @@ ENABLED_INFERENCE_PROVIDERS = [
"fireworks",
"together",
"gemini",
"vertexai",
"groq",
"sambanova",
"anthropic",
@ -71,6 +72,7 @@ INFERENCE_PROVIDER_IDS = {
"tgi": "${env.TGI_URL:+tgi}",
"cerebras": "${env.CEREBRAS_API_KEY:+cerebras}",
"nvidia": "${env.NVIDIA_API_KEY:+nvidia}",
"vertexai": "${env.VERTEX_AI_PROJECT:+vertexai}",
}
@ -246,6 +248,14 @@ def get_distribution_template() -> DistributionTemplate:
"",
"Gemini API Key",
),
"VERTEX_AI_PROJECT": (
"",
"Google Cloud Project ID for Vertex AI",
),
"VERTEX_AI_LOCATION": (
"us-central1",
"Google Cloud Location for Vertex AI",
),
"SAMBANOVA_API_KEY": (
"",
"SambaNova API Key",

View file

@ -99,7 +99,8 @@ def parse_environment_config(env_config: str) -> dict[str, int]:
Dict[str, int]: A dictionary mapping categories to their log levels.
"""
category_levels = {}
for pair in env_config.split(";"):
delimiter = ","
for pair in env_config.split(delimiter):
if not pair.strip():
continue

View file

@ -15,6 +15,7 @@ from llama_stack.apis.safety import (
RunShieldResponse,
Safety,
SafetyViolation,
ShieldStore,
ViolationLevel,
)
from llama_stack.apis.shields import Shield
@ -32,6 +33,8 @@ PROMPT_GUARD_MODEL = "Prompt-Guard-86M"
class PromptGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
shield_store: ShieldStore
def __init__(self, config: PromptGuardConfig, _deps) -> None:
self.config = config
@ -53,7 +56,7 @@ class PromptGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
self,
shield_id: str,
messages: list[Message],
params: dict[str, Any] = None,
params: dict[str, Any],
) -> RunShieldResponse:
shield = await self.shield_store.get_shield(shield_id)
if not shield:
@ -117,8 +120,10 @@ class PromptGuardShield:
elif self.config.guard_type == PromptGuardType.jailbreak.value and score_malicious > self.threshold:
violation = SafetyViolation(
violation_level=ViolationLevel.ERROR,
violation_type=f"prompt_injection:malicious={score_malicious}",
violation_return_message="Sorry, I cannot do this.",
user_message="Sorry, I cannot do this.",
metadata={
"violation_type": f"prompt_injection:malicious={score_malicious}",
},
)
return RunShieldResponse(violation=violation)

View file

@ -174,7 +174,9 @@ class FaissIndex(EmbeddingIndex):
k: int,
score_threshold: float,
) -> QueryChunksResponse:
raise NotImplementedError("Keyword search is not supported in FAISS")
raise NotImplementedError(
"Keyword search is not supported - underlying DB FAISS does not support this search mode"
)
async def query_hybrid(
self,
@ -185,7 +187,9 @@ class FaissIndex(EmbeddingIndex):
reranker_type: str,
reranker_params: dict[str, Any] | None = None,
) -> QueryChunksResponse:
raise NotImplementedError("Hybrid search is not supported in FAISS")
raise NotImplementedError(
"Hybrid search is not supported - underlying DB FAISS does not support this search mode"
)
class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):

View file

@ -213,6 +213,36 @@ def available_providers() -> list[ProviderSpec]:
description="Google Gemini inference provider for accessing Gemini models and Google's AI services.",
),
),
remote_provider_spec(
api=Api.inference,
adapter=AdapterSpec(
adapter_type="vertexai",
pip_packages=["litellm", "google-cloud-aiplatform"],
module="llama_stack.providers.remote.inference.vertexai",
config_class="llama_stack.providers.remote.inference.vertexai.VertexAIConfig",
provider_data_validator="llama_stack.providers.remote.inference.vertexai.config.VertexAIProviderDataValidator",
description="""Google Vertex AI inference provider enables you to use Google's Gemini models through Google Cloud's Vertex AI platform, providing several advantages:
Enterprise-grade security: Uses Google Cloud's security controls and IAM
Better integration: Seamless integration with other Google Cloud services
Advanced features: Access to additional Vertex AI features like model tuning and monitoring
Authentication: Uses Google Cloud Application Default Credentials (ADC) instead of API keys
Configuration:
- Set VERTEX_AI_PROJECT environment variable (required)
- Set VERTEX_AI_LOCATION environment variable (optional, defaults to us-central1)
- Use Google Cloud Application Default Credentials or service account key
Authentication Setup:
Option 1 (Recommended): gcloud auth application-default login
Option 2: Set GOOGLE_APPLICATION_CREDENTIALS to service account key path
Available Models:
- vertex_ai/gemini-2.0-flash
- vertex_ai/gemini-2.5-flash
- vertex_ai/gemini-2.5-pro""",
),
),
remote_provider_spec(
api=Api.inference,
adapter=AdapterSpec(

View file

@ -45,6 +45,18 @@ That means you'll get fast and efficient vector retrieval.
- Lightweight and easy to use
- Fully integrated with Llama Stack
- GPU support
- **Vector search** - FAISS supports pure vector similarity search using embeddings
## Search Modes
**Supported:**
- **Vector Search** (`mode="vector"`): Performs vector similarity search using embeddings
**Not Supported:**
- **Keyword Search** (`mode="keyword"`): Not supported by FAISS
- **Hybrid Search** (`mode="hybrid"`): Not supported by FAISS
> **Note**: FAISS is designed as a pure vector similarity search library. See the [FAISS GitHub repository](https://github.com/facebookresearch/faiss) for more details about FAISS's core functionality.
## Usage
@ -535,6 +547,7 @@ That means you're not limited to storing vectors in memory or in a separate serv
- Easy to use
- Fully integrated with Llama Stack
- Supports all search modes: vector, keyword, and hybrid search (both inline and remote configurations)
## Usage
@ -625,6 +638,92 @@ vector_io:
- **`client_pem_path`**: Path to the **client certificate** file (required for mTLS).
- **`client_key_path`**: Path to the **client private key** file (required for mTLS).
## Search Modes
Milvus supports three different search modes for both inline and remote configurations:
### Vector Search
Vector search uses semantic similarity to find the most relevant chunks based on embedding vectors. This is the default search mode and works well for finding conceptually similar content.
```python
# Vector search example
search_response = client.vector_stores.search(
vector_store_id=vector_store.id,
query="What is machine learning?",
search_mode="vector",
max_num_results=5,
)
```
### Keyword Search
Keyword search uses traditional text-based matching to find chunks containing specific terms or phrases. This is useful when you need exact term matches.
```python
# Keyword search example
search_response = client.vector_stores.search(
vector_store_id=vector_store.id,
query="Python programming language",
search_mode="keyword",
max_num_results=5,
)
```
### Hybrid Search
Hybrid search combines both vector and keyword search methods to provide more comprehensive results. It leverages the strengths of both semantic similarity and exact term matching.
#### Basic Hybrid Search
```python
# Basic hybrid search example (uses RRF ranker with default impact_factor=60.0)
search_response = client.vector_stores.search(
vector_store_id=vector_store.id,
query="neural networks in Python",
search_mode="hybrid",
max_num_results=5,
)
```
**Note**: The default `impact_factor` value of 60.0 was empirically determined to be optimal in the original RRF research paper: ["Reciprocal Rank Fusion outperforms Condorcet and individual Rank Learning Methods"](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) (Cormack et al., 2009).
#### Hybrid Search with RRF (Reciprocal Rank Fusion) Ranker
RRF combines rankings from vector and keyword search by using reciprocal ranks. The impact factor controls how much weight is given to higher-ranked results.
```python
# Hybrid search with custom RRF parameters
search_response = client.vector_stores.search(
vector_store_id=vector_store.id,
query="neural networks in Python",
search_mode="hybrid",
max_num_results=5,
ranking_options={
"ranker": {
"type": "rrf",
"impact_factor": 100.0, # Higher values give more weight to top-ranked results
}
},
)
```
#### Hybrid Search with Weighted Ranker
Weighted ranker linearly combines normalized scores from vector and keyword search. The alpha parameter controls the balance between the two search methods.
```python
# Hybrid search with weighted ranker
search_response = client.vector_stores.search(
vector_store_id=vector_store.id,
query="neural networks in Python",
search_mode="hybrid",
max_num_results=5,
ranking_options={
"ranker": {
"type": "weighted",
"alpha": 0.7, # 70% vector search, 30% keyword search
}
},
)
```
For detailed documentation on RRF and Weighted rankers, please refer to the [Milvus Reranking Guide](https://milvus.io/docs/reranking.md).
## Documentation
See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general.

View file

@ -0,0 +1,15 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from .config import VertexAIConfig
async def get_adapter_impl(config: VertexAIConfig, _deps):
from .vertexai import VertexAIInferenceAdapter
impl = VertexAIInferenceAdapter(config)
await impl.initialize()
return impl

View file

@ -0,0 +1,45 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Any
from pydantic import BaseModel, Field
from llama_stack.schema_utils import json_schema_type
class VertexAIProviderDataValidator(BaseModel):
vertex_project: str | None = Field(
default=None,
description="Google Cloud project ID for Vertex AI",
)
vertex_location: str | None = Field(
default=None,
description="Google Cloud location for Vertex AI (e.g., us-central1)",
)
@json_schema_type
class VertexAIConfig(BaseModel):
project: str = Field(
description="Google Cloud project ID for Vertex AI",
)
location: str = Field(
default="us-central1",
description="Google Cloud location for Vertex AI",
)
@classmethod
def sample_run_config(
cls,
project: str = "${env.VERTEX_AI_PROJECT:=}",
location: str = "${env.VERTEX_AI_LOCATION:=us-central1}",
**kwargs,
) -> dict[str, Any]:
return {
"project": project,
"location": location,
}

View file

@ -0,0 +1,20 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.providers.utils.inference.model_registry import (
ProviderModelEntry,
)
# Vertex AI model IDs with vertex_ai/ prefix as required by litellm
LLM_MODEL_IDS = [
"vertex_ai/gemini-2.0-flash",
"vertex_ai/gemini-2.5-flash",
"vertex_ai/gemini-2.5-pro",
]
SAFETY_MODELS_ENTRIES = list[ProviderModelEntry]()
MODEL_ENTRIES = [ProviderModelEntry(provider_model_id=m) for m in LLM_MODEL_IDS] + SAFETY_MODELS_ENTRIES

View file

@ -0,0 +1,52 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Any
from llama_stack.apis.inference import ChatCompletionRequest
from llama_stack.providers.utils.inference.litellm_openai_mixin import (
LiteLLMOpenAIMixin,
)
from .config import VertexAIConfig
from .models import MODEL_ENTRIES
class VertexAIInferenceAdapter(LiteLLMOpenAIMixin):
def __init__(self, config: VertexAIConfig) -> None:
LiteLLMOpenAIMixin.__init__(
self,
MODEL_ENTRIES,
litellm_provider_name="vertex_ai",
api_key_from_config=None, # Vertex AI uses ADC, not API keys
provider_data_api_key_field="vertex_project", # Use project for validation
)
self.config = config
def get_api_key(self) -> str:
# Vertex AI doesn't use API keys, it uses Application Default Credentials
# Return empty string to let litellm handle authentication via ADC
return ""
async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]:
# Get base parameters from parent
params = await super()._get_params(request)
# Add Vertex AI specific parameters
provider_data = self.get_request_provider_data()
if provider_data:
if getattr(provider_data, "vertex_project", None):
params["vertex_project"] = provider_data.vertex_project
if getattr(provider_data, "vertex_location", None):
params["vertex_location"] = provider_data.vertex_location
else:
params["vertex_project"] = self.config.project
params["vertex_location"] = self.config.location
# Remove api_key since Vertex AI uses ADC
params.pop("api_key", None)
return params

View file

@ -175,7 +175,7 @@ const handleSubmitWithContent = async (content: string) => {
return (
<div className="flex flex-col h-full max-w-4xl mx-auto">
<div className="mb-4 flex justify-between items-center">
<h1 className="text-2xl font-bold">Chat Playground</h1>
<h1 className="text-2xl font-bold">Chat Playground (Completions)</h1>
<div className="flex gap-2">
<Select value={selectedModel} onValueChange={setSelectedModel} disabled={isModelsLoading || isGenerating}>
<SelectTrigger className="w-[180px]">

View file

@ -6,6 +6,8 @@ import {
MoveUpRight,
Database,
MessageCircle,
Settings2,
Compass,
} from "lucide-react";
import Link from "next/link";
import { usePathname } from "next/navigation";
@ -22,15 +24,16 @@ import {
SidebarMenuItem,
SidebarHeader,
} from "@/components/ui/sidebar";
// Extracted Chat Playground item
const chatPlaygroundItem = {
title: "Chat Playground",
url: "/chat-playground",
icon: MessageCircle,
};
// Removed Chat Playground from log items
const logItems = [
const createItems = [
{
title: "Chat Playground",
url: "/chat-playground",
icon: MessageCircle,
},
];
const manageItems = [
{
title: "Chat Completions",
url: "/logs/chat-completions",
@ -53,77 +56,96 @@ const logItems = [
},
];
const optimizeItems: { title: string; url: string; icon: React.ElementType }[] = [
{
title: "Evaluations",
url: "",
icon: Compass,
},
{
title: "Fine-tuning",
url: "",
icon: Settings2,
},
];
interface SidebarItem {
title: string;
url: string;
icon: React.ElementType;
}
export function AppSidebar() {
const pathname = usePathname();
return (
<Sidebar>
<SidebarHeader>
<Link href="/">Llama Stack</Link>
</SidebarHeader>
<SidebarContent>
{/* Chat Playground as its own section */}
<SidebarGroup>
<SidebarGroupContent>
<SidebarMenu>
<SidebarMenuItem>
const renderSidebarItems = (items: SidebarItem[]) => {
return items.map((item) => {
const isActive = pathname.startsWith(item.url);
return (
<SidebarMenuItem key={item.title}>
<SidebarMenuButton
asChild
className={cn(
"justify-start",
isActive &&
"bg-gray-200 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-700 text-gray-900 dark:text-gray-100",
)}
>
<Link href={item.url}>
<item.icon
className={cn(
isActive && "text-gray-900 dark:text-gray-100",
"mr-2 h-4 w-4",
)}
/>
<span>{item.title}</span>
</Link>
</SidebarMenuButton>
</SidebarMenuItem>
);
});
};
return (
<Sidebar>
<SidebarHeader>
<Link href="/">Llama Stack</Link>
</SidebarHeader>
<SidebarContent>
<SidebarGroup>
<SidebarGroupLabel>Create</SidebarGroupLabel>
<SidebarGroupContent>
<SidebarMenu>{renderSidebarItems(createItems)}</SidebarMenu>
</SidebarGroupContent>
</SidebarGroup>
<SidebarGroup>
<SidebarGroupLabel>Manage</SidebarGroupLabel>
<SidebarGroupContent>
<SidebarMenu>{renderSidebarItems(manageItems)}</SidebarMenu>
</SidebarGroupContent>
</SidebarGroup>
<SidebarGroup>
<SidebarGroupLabel>Optimize</SidebarGroupLabel>
<SidebarGroupContent>
<SidebarMenu>
{optimizeItems.map((item) => (
<SidebarMenuItem key={item.title}>
<SidebarMenuButton
asChild
className={cn(
"justify-start",
pathname.startsWith(chatPlaygroundItem.url) &&
"bg-gray-200 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-700 text-gray-900 dark:text-gray-100",
)}
disabled
className="justify-start opacity-60 cursor-not-allowed"
>
<Link href={chatPlaygroundItem.url}>
<chatPlaygroundItem.icon
className={cn(
pathname.startsWith(chatPlaygroundItem.url) && "text-gray-900 dark:text-gray-100",
"mr-2 h-4 w-4",
)}
/>
<span>{chatPlaygroundItem.title}</span>
</Link>
<item.icon className="mr-2 h-4 w-4" />
<span>{item.title}</span>
<span className="ml-2 text-xs text-gray-500">(Coming Soon)</span>
</SidebarMenuButton>
</SidebarMenuItem>
</SidebarMenu>
</SidebarGroupContent>
</SidebarGroup>
{/* Logs section */}
<SidebarGroup>
<SidebarGroupLabel>Logs</SidebarGroupLabel>
<SidebarGroupContent>
<SidebarMenu>
{logItems.map((item) => {
const isActive = pathname.startsWith(item.url);
return (
<SidebarMenuItem key={item.title}>
<SidebarMenuButton
asChild
className={cn(
"justify-start",
isActive &&
"bg-gray-200 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-700 text-gray-900 dark:text-gray-100",
)}
>
<Link href={item.url}>
<item.icon
className={cn(
isActive && "text-gray-900 dark:text-gray-100",
"mr-2 h-4 w-4",
)}
/>
<span>{item.title}</span>
</Link>
</SidebarMenuButton>
</SidebarMenuItem>
);
})}
</SidebarMenu>
</SidebarGroupContent>
</SidebarGroup>
</SidebarContent>
</Sidebar>
))}
</SidebarMenu>
</SidebarGroupContent>
</SidebarGroup>
</SidebarContent>
</Sidebar>
);
}