mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-18 02:42:31 +00:00
Merge branch 'main' into suffic
This commit is contained in:
commit
2edb9eb7e0
37 changed files with 2105 additions and 63 deletions
|
@ -81,6 +81,15 @@ class OpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
|
|||
type: Literal["web_search_call"] = "web_search_call"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
|
||||
id: str
|
||||
queries: list[str]
|
||||
status: str
|
||||
type: Literal["file_search_call"] = "file_search_call"
|
||||
results: list[dict[str, Any]] | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseOutputMessageFunctionToolCall(BaseModel):
|
||||
call_id: str
|
||||
|
@ -119,6 +128,7 @@ class OpenAIResponseOutputMessageMCPListTools(BaseModel):
|
|||
OpenAIResponseOutput = Annotated[
|
||||
OpenAIResponseMessage
|
||||
| OpenAIResponseOutputMessageWebSearchToolCall
|
||||
| OpenAIResponseOutputMessageFileSearchToolCall
|
||||
| OpenAIResponseOutputMessageFunctionToolCall
|
||||
| OpenAIResponseOutputMessageMCPCall
|
||||
| OpenAIResponseOutputMessageMCPListTools,
|
||||
|
@ -362,6 +372,7 @@ class OpenAIResponseInputFunctionToolCallOutput(BaseModel):
|
|||
OpenAIResponseInput = Annotated[
|
||||
# Responses API allows output messages to be passed in as input
|
||||
OpenAIResponseOutputMessageWebSearchToolCall
|
||||
| OpenAIResponseOutputMessageFileSearchToolCall
|
||||
| OpenAIResponseOutputMessageFunctionToolCall
|
||||
| OpenAIResponseInputFunctionToolCallOutput
|
||||
|
|
||||
|
@ -397,9 +408,10 @@ class FileSearchRankingOptions(BaseModel):
|
|||
@json_schema_type
|
||||
class OpenAIResponseInputToolFileSearch(BaseModel):
|
||||
type: Literal["file_search"] = "file_search"
|
||||
vector_store_id: list[str]
|
||||
vector_store_ids: list[str]
|
||||
filters: dict[str, Any] | None = None
|
||||
max_num_results: int | None = Field(default=10, ge=1, le=50)
|
||||
ranking_options: FileSearchRankingOptions | None = None
|
||||
# TODO: add filters
|
||||
|
||||
|
||||
class ApprovalFilter(BaseModel):
|
||||
|
|
|
@ -15,6 +15,48 @@ from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
|||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class RRFRanker(BaseModel):
|
||||
"""
|
||||
Reciprocal Rank Fusion (RRF) ranker configuration.
|
||||
|
||||
:param type: The type of ranker, always "rrf"
|
||||
:param impact_factor: The impact factor for RRF scoring. Higher values give more weight to higher-ranked results.
|
||||
Must be greater than 0. Default of 60 is from the original RRF paper (Cormack et al., 2009).
|
||||
"""
|
||||
|
||||
type: Literal["rrf"] = "rrf"
|
||||
impact_factor: float = Field(default=60.0, gt=0.0) # default of 60 for optimal performance
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class WeightedRanker(BaseModel):
|
||||
"""
|
||||
Weighted ranker configuration that combines vector and keyword scores.
|
||||
|
||||
:param type: The type of ranker, always "weighted"
|
||||
:param alpha: Weight factor between 0 and 1.
|
||||
0 means only use keyword scores,
|
||||
1 means only use vector scores,
|
||||
values in between blend both scores.
|
||||
"""
|
||||
|
||||
type: Literal["weighted"] = "weighted"
|
||||
alpha: float = Field(
|
||||
default=0.5,
|
||||
ge=0.0,
|
||||
le=1.0,
|
||||
description="Weight factor between 0 and 1. 0 means only keyword scores, 1 means only vector scores.",
|
||||
)
|
||||
|
||||
|
||||
Ranker = Annotated[
|
||||
RRFRanker | WeightedRanker,
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
register_schema(Ranker, name="Ranker")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class RAGDocument(BaseModel):
|
||||
"""
|
||||
|
@ -76,7 +118,8 @@ class RAGQueryConfig(BaseModel):
|
|||
:param chunk_template: Template for formatting each retrieved chunk in the context.
|
||||
Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict).
|
||||
Default: "Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n"
|
||||
:param mode: Search mode for retrieval—either "vector" or "keyword". Default "vector".
|
||||
:param mode: Search mode for retrieval—either "vector", "keyword", or "hybrid". Default "vector".
|
||||
:param ranker: Configuration for the ranker to use in hybrid search. Defaults to RRF ranker.
|
||||
"""
|
||||
|
||||
# This config defines how a query is generated using the messages
|
||||
|
@ -86,6 +129,7 @@ class RAGQueryConfig(BaseModel):
|
|||
max_chunks: int = 5
|
||||
chunk_template: str = "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n"
|
||||
mode: str | None = None
|
||||
ranker: Ranker | None = Field(default=None) # Only used for hybrid mode
|
||||
|
||||
@field_validator("chunk_template")
|
||||
def validate_chunk_template(cls, v: str) -> str:
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
from typing import Any, Literal, Protocol, runtime_checkable
|
||||
from typing import Annotated, Any, Literal, Protocol, runtime_checkable
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
@ -16,6 +16,7 @@ from llama_stack.apis.inference import InterleavedContent
|
|||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
from llama_stack.strong_typing.schema import register_schema
|
||||
|
||||
|
||||
class Chunk(BaseModel):
|
||||
|
@ -133,6 +134,50 @@ class VectorStoreDeleteResponse(BaseModel):
|
|||
deleted: bool = True
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreChunkingStrategyAuto(BaseModel):
|
||||
type: Literal["auto"] = "auto"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreChunkingStrategyStaticConfig(BaseModel):
|
||||
chunk_overlap_tokens: int = 400
|
||||
max_chunk_size_tokens: int = Field(800, ge=100, le=4096)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreChunkingStrategyStatic(BaseModel):
|
||||
type: Literal["static"] = "static"
|
||||
static: VectorStoreChunkingStrategyStaticConfig
|
||||
|
||||
|
||||
VectorStoreChunkingStrategy = Annotated[
|
||||
VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic, Field(discriminator="type")
|
||||
]
|
||||
register_schema(VectorStoreChunkingStrategy, name="VectorStoreChunkingStrategy")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreFileLastError(BaseModel):
|
||||
code: Literal["server_error"] | Literal["rate_limit_exceeded"]
|
||||
message: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreFileObject(BaseModel):
|
||||
"""OpenAI Vector Store File object."""
|
||||
|
||||
id: str
|
||||
object: str = "vector_store.file"
|
||||
attributes: dict[str, Any] = Field(default_factory=dict)
|
||||
chunking_strategy: VectorStoreChunkingStrategy
|
||||
created_at: int
|
||||
last_error: VectorStoreFileLastError | None = None
|
||||
status: Literal["completed"] | Literal["in_progress"] | Literal["cancelled"] | Literal["failed"]
|
||||
usage_bytes: int = 0
|
||||
vector_store_id: str
|
||||
|
||||
|
||||
class VectorDBStore(Protocol):
|
||||
def get_vector_db(self, vector_db_id: str) -> VectorDB | None: ...
|
||||
|
||||
|
@ -290,3 +335,21 @@ class VectorIO(Protocol):
|
|||
:returns: A VectorStoreSearchResponse containing the search results.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="POST")
|
||||
async def openai_attach_file_to_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
attributes: dict[str, Any] | None = None,
|
||||
chunking_strategy: VectorStoreChunkingStrategy | None = None,
|
||||
) -> VectorStoreFileObject:
|
||||
"""Attach a file to a vector store.
|
||||
|
||||
:param vector_store_id: The ID of the vector store to attach the file to.
|
||||
:param file_id: The ID of the file to attach to the vector store.
|
||||
:param attributes: The key-value attributes stored with the file, which can be used for filtering.
|
||||
:param chunking_strategy: The chunking strategy to use for the file.
|
||||
:returns: A VectorStoreFileObject representing the attached file.
|
||||
"""
|
||||
...
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue