mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-04 18:13:44 +00:00
Merge branch 'main' into chroma
This commit is contained in:
commit
470adfc2df
750 changed files with 243399 additions and 28283 deletions
|
|
@ -43,17 +43,17 @@ from .openai_responses import (
|
|||
|
||||
|
||||
@json_schema_type
|
||||
class ResponseShieldSpec(BaseModel):
|
||||
"""Specification for a shield to apply during response generation.
|
||||
class ResponseGuardrailSpec(BaseModel):
|
||||
"""Specification for a guardrail to apply during response generation.
|
||||
|
||||
:param type: The type/identifier of the shield.
|
||||
:param type: The type/identifier of the guardrail.
|
||||
"""
|
||||
|
||||
type: str
|
||||
# TODO: more fields to be added for shield configuration
|
||||
# TODO: more fields to be added for guardrail configuration
|
||||
|
||||
|
||||
ResponseShield = str | ResponseShieldSpec
|
||||
ResponseGuardrail = str | ResponseGuardrailSpec
|
||||
|
||||
|
||||
class Attachment(BaseModel):
|
||||
|
|
@ -820,10 +820,10 @@ class Agents(Protocol):
|
|||
tools: list[OpenAIResponseInputTool] | None = None,
|
||||
include: list[str] | None = None,
|
||||
max_infer_iters: int | None = 10, # this is an extension to the OpenAI API
|
||||
shields: Annotated[
|
||||
list[ResponseShield] | None,
|
||||
guardrails: Annotated[
|
||||
list[ResponseGuardrail] | None,
|
||||
ExtraBodyField(
|
||||
"List of shields to apply during response generation. Shields provide safety and content moderation."
|
||||
"List of guardrails to apply during response generation. Guardrails provide safety and content moderation."
|
||||
),
|
||||
] = None,
|
||||
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
|
||||
|
|
@ -834,7 +834,7 @@ class Agents(Protocol):
|
|||
:param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
|
||||
:param conversation: (Optional) The ID of a conversation to add the response to. Must begin with 'conv_'. Input and output messages will be automatically added to the conversation.
|
||||
:param include: (Optional) Additional fields to include in the response.
|
||||
:param shields: (Optional) List of shields to apply during response generation. Can be shield IDs (strings) or shield specifications.
|
||||
:param guardrails: (Optional) List of guardrails to apply during response generation. Can be guardrail IDs (strings) or guardrail specifications.
|
||||
:returns: An OpenAIResponseObject.
|
||||
"""
|
||||
...
|
||||
|
|
|
|||
|
|
@ -131,8 +131,20 @@ class OpenAIResponseOutputMessageContentOutputText(BaseModel):
|
|||
annotations: list[OpenAIResponseAnnotations] = Field(default_factory=list)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseContentPartRefusal(BaseModel):
|
||||
"""Refusal content within a streamed response part.
|
||||
|
||||
:param type: Content part type identifier, always "refusal"
|
||||
:param refusal: Refusal text supplied by the model
|
||||
"""
|
||||
|
||||
type: Literal["refusal"] = "refusal"
|
||||
refusal: str
|
||||
|
||||
|
||||
OpenAIResponseOutputMessageContent = Annotated[
|
||||
OpenAIResponseOutputMessageContentOutputText,
|
||||
OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal,
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
register_schema(OpenAIResponseOutputMessageContent, name="OpenAIResponseOutputMessageContent")
|
||||
|
|
@ -533,6 +545,7 @@ class OpenAIResponseObject(BaseModel):
|
|||
:param tools: (Optional) An array of tools the model may call while generating a response.
|
||||
:param truncation: (Optional) Truncation strategy applied to the response
|
||||
:param usage: (Optional) Token usage information for the response
|
||||
:param instructions: (Optional) System message inserted into the model's context
|
||||
"""
|
||||
|
||||
created_at: int
|
||||
|
|
@ -552,6 +565,7 @@ class OpenAIResponseObject(BaseModel):
|
|||
tools: list[OpenAIResponseTool] | None = None
|
||||
truncation: str | None = None
|
||||
usage: OpenAIResponseUsage | None = None
|
||||
instructions: str | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
|
|
@ -878,18 +892,6 @@ class OpenAIResponseContentPartOutputText(BaseModel):
|
|||
logprobs: list[dict[str, Any]] | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseContentPartRefusal(BaseModel):
|
||||
"""Refusal content within a streamed response part.
|
||||
|
||||
:param type: Content part type identifier, always "refusal"
|
||||
:param refusal: Refusal text supplied by the model
|
||||
"""
|
||||
|
||||
type: Literal["refusal"] = "refusal"
|
||||
refusal: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseContentPartReasoningText(BaseModel):
|
||||
"""Reasoning text emitted as part of a streamed response.
|
||||
|
|
@ -1258,9 +1260,9 @@ OpenAIResponseInput = Annotated[
|
|||
| OpenAIResponseInputFunctionToolCallOutput
|
||||
| OpenAIResponseMCPApprovalRequest
|
||||
| OpenAIResponseMCPApprovalResponse
|
||||
|
|
||||
# Fallback to the generic message type as a last resort
|
||||
OpenAIResponseMessage,
|
||||
| OpenAIResponseOutputMessageMCPCall
|
||||
| OpenAIResponseOutputMessageMCPListTools
|
||||
| OpenAIResponseMessage,
|
||||
Field(union_mode="left_to_right"),
|
||||
]
|
||||
register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
|
||||
|
|
|
|||
|
|
@ -12,6 +12,9 @@ from openai.types.responses.response_includable import ResponseIncludable
|
|||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.agents.openai_responses import (
|
||||
OpenAIResponseInputFunctionToolCallOutput,
|
||||
OpenAIResponseMCPApprovalRequest,
|
||||
OpenAIResponseMCPApprovalResponse,
|
||||
OpenAIResponseMessage,
|
||||
OpenAIResponseOutputMessageFileSearchToolCall,
|
||||
OpenAIResponseOutputMessageFunctionToolCall,
|
||||
|
|
@ -61,9 +64,14 @@ class ConversationMessage(BaseModel):
|
|||
|
||||
ConversationItem = Annotated[
|
||||
OpenAIResponseMessage
|
||||
| OpenAIResponseOutputMessageFunctionToolCall
|
||||
| OpenAIResponseOutputMessageFileSearchToolCall
|
||||
| OpenAIResponseOutputMessageWebSearchToolCall
|
||||
| OpenAIResponseOutputMessageFileSearchToolCall
|
||||
| OpenAIResponseOutputMessageFunctionToolCall
|
||||
| OpenAIResponseInputFunctionToolCallOutput
|
||||
| OpenAIResponseMCPApprovalRequest
|
||||
| OpenAIResponseMCPApprovalResponse
|
||||
| OpenAIResponseOutputMessageMCPCall
|
||||
| OpenAIResponseOutputMessageMCPListTools
|
||||
| OpenAIResponseOutputMessageMCPCall
|
||||
| OpenAIResponseOutputMessageMCPListTools,
|
||||
Field(discriminator="type"),
|
||||
|
|
@ -165,7 +173,9 @@ class ConversationItemDeletedResource(BaseModel):
|
|||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class Conversations(Protocol):
|
||||
"""Protocol for conversation management operations."""
|
||||
"""Conversations
|
||||
|
||||
Protocol for conversation management operations."""
|
||||
|
||||
@webmethod(route="/conversations", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def create_conversation(
|
||||
|
|
@ -173,6 +183,8 @@ class Conversations(Protocol):
|
|||
) -> Conversation:
|
||||
"""Create a conversation.
|
||||
|
||||
Create a conversation.
|
||||
|
||||
:param items: Initial items to include in the conversation context.
|
||||
:param metadata: Set of key-value pairs that can be attached to an object.
|
||||
:returns: The created conversation object.
|
||||
|
|
@ -181,7 +193,9 @@ class Conversations(Protocol):
|
|||
|
||||
@webmethod(route="/conversations/{conversation_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_conversation(self, conversation_id: str) -> Conversation:
|
||||
"""Get a conversation with the given ID.
|
||||
"""Retrieve a conversation.
|
||||
|
||||
Get a conversation with the given ID.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:returns: The conversation object.
|
||||
|
|
@ -190,7 +204,9 @@ class Conversations(Protocol):
|
|||
|
||||
@webmethod(route="/conversations/{conversation_id}", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def update_conversation(self, conversation_id: str, metadata: Metadata) -> Conversation:
|
||||
"""Update a conversation's metadata with the given ID.
|
||||
"""Update a conversation.
|
||||
|
||||
Update a conversation's metadata with the given ID.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:param metadata: Set of key-value pairs that can be attached to an object.
|
||||
|
|
@ -200,7 +216,9 @@ class Conversations(Protocol):
|
|||
|
||||
@webmethod(route="/conversations/{conversation_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def openai_delete_conversation(self, conversation_id: str) -> ConversationDeletedResource:
|
||||
"""Delete a conversation with the given ID.
|
||||
"""Delete a conversation.
|
||||
|
||||
Delete a conversation with the given ID.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:returns: The deleted conversation resource.
|
||||
|
|
@ -209,7 +227,9 @@ class Conversations(Protocol):
|
|||
|
||||
@webmethod(route="/conversations/{conversation_id}/items", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def add_items(self, conversation_id: str, items: list[ConversationItem]) -> ConversationItemList:
|
||||
"""Create items in the conversation.
|
||||
"""Create items.
|
||||
|
||||
Create items in the conversation.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:param items: Items to include in the conversation context.
|
||||
|
|
@ -219,7 +239,9 @@ class Conversations(Protocol):
|
|||
|
||||
@webmethod(route="/conversations/{conversation_id}/items/{item_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def retrieve(self, conversation_id: str, item_id: str) -> ConversationItem:
|
||||
"""Retrieve a conversation item.
|
||||
"""Retrieve an item.
|
||||
|
||||
Retrieve a conversation item.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:param item_id: The item identifier.
|
||||
|
|
@ -236,7 +258,9 @@ class Conversations(Protocol):
|
|||
limit: int | NotGiven = NOT_GIVEN,
|
||||
order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
|
||||
) -> ConversationItemList:
|
||||
"""List items in the conversation.
|
||||
"""List items.
|
||||
|
||||
List items in the conversation.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:param after: An item ID to list items after, used in pagination.
|
||||
|
|
@ -251,7 +275,9 @@ class Conversations(Protocol):
|
|||
async def openai_delete_conversation_item(
|
||||
self, conversation_id: str, item_id: str
|
||||
) -> ConversationItemDeletedResource:
|
||||
"""Delete a conversation item.
|
||||
"""Delete an item.
|
||||
|
||||
Delete a conversation item.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:param item_id: The item identifier.
|
||||
|
|
|
|||
|
|
@ -121,6 +121,7 @@ class Api(Enum, metaclass=DynamicApiMeta):
|
|||
|
||||
models = "models"
|
||||
shields = "shields"
|
||||
vector_stores = "vector_stores" # only used for routing table
|
||||
datasets = "datasets"
|
||||
scoring_functions = "scoring_functions"
|
||||
benchmarks = "benchmarks"
|
||||
|
|
|
|||
|
|
@ -82,7 +82,9 @@ class EvaluateResponse(BaseModel):
|
|||
|
||||
|
||||
class Eval(Protocol):
|
||||
"""Llama Stack Evaluation API for running evaluations on model and agent candidates."""
|
||||
"""Evaluations
|
||||
|
||||
Llama Stack Evaluation API for running evaluations on model and agent candidates."""
|
||||
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ from pydantic import BaseModel, Field
|
|||
class ResourceType(StrEnum):
|
||||
model = "model"
|
||||
shield = "shield"
|
||||
vector_db = "vector_db"
|
||||
vector_store = "vector_store"
|
||||
dataset = "dataset"
|
||||
scoring_function = "scoring_function"
|
||||
benchmark = "benchmark"
|
||||
|
|
@ -34,4 +34,4 @@ class Resource(BaseModel):
|
|||
|
||||
provider_id: str = Field(description="ID of the provider that owns this resource")
|
||||
|
||||
type: ResourceType = Field(description="Type of resource (e.g. 'model', 'shield', 'vector_db', etc.)")
|
||||
type: ResourceType = Field(description="Type of resource (e.g. 'model', 'shield', 'vector_store', etc.)")
|
||||
|
|
|
|||
|
|
@ -16,15 +16,12 @@ from typing import (
|
|||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
||||
from llama_stack.models.llama.datatypes import Primitive
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema
|
||||
|
||||
# Add this constant near the top of the file, after the imports
|
||||
DEFAULT_TTL_DAYS = 7
|
||||
|
||||
REQUIRED_SCOPE = "telemetry.read"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class SpanStatus(Enum):
|
||||
|
|
@ -413,7 +410,6 @@ class QueryMetricsResponse(BaseModel):
|
|||
|
||||
@runtime_checkable
|
||||
class Telemetry(Protocol):
|
||||
@webmethod(route="/telemetry/events", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def log_event(
|
||||
self,
|
||||
event: Event,
|
||||
|
|
@ -425,174 +421,3 @@ class Telemetry(Protocol):
|
|||
:param ttl_seconds: The time to live of the event.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/telemetry/traces",
|
||||
method="POST",
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
deprecated=True,
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
@webmethod(route="/telemetry/traces", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def query_traces(
|
||||
self,
|
||||
attribute_filters: list[QueryCondition] | None = None,
|
||||
limit: int | None = 100,
|
||||
offset: int | None = 0,
|
||||
order_by: list[str] | None = None,
|
||||
) -> QueryTracesResponse:
|
||||
"""Query traces.
|
||||
|
||||
:param attribute_filters: The attribute filters to apply to the traces.
|
||||
:param limit: The limit of traces to return.
|
||||
:param offset: The offset of the traces to return.
|
||||
:param order_by: The order by of the traces to return.
|
||||
:returns: A QueryTracesResponse.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/telemetry/traces/{trace_id:path}",
|
||||
method="GET",
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
deprecated=True,
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
@webmethod(
|
||||
route="/telemetry/traces/{trace_id:path}",
|
||||
method="GET",
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
level=LLAMA_STACK_API_V1ALPHA,
|
||||
)
|
||||
async def get_trace(self, trace_id: str) -> Trace:
|
||||
"""Get a trace by its ID.
|
||||
|
||||
:param trace_id: The ID of the trace to get.
|
||||
:returns: A Trace.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}",
|
||||
method="GET",
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
deprecated=True,
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
@webmethod(
|
||||
route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}",
|
||||
method="GET",
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
level=LLAMA_STACK_API_V1ALPHA,
|
||||
)
|
||||
async def get_span(self, trace_id: str, span_id: str) -> Span:
|
||||
"""Get a span by its ID.
|
||||
|
||||
:param trace_id: The ID of the trace to get the span from.
|
||||
:param span_id: The ID of the span to get.
|
||||
:returns: A Span.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/telemetry/spans/{span_id:path}/tree",
|
||||
method="POST",
|
||||
deprecated=True,
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
@webmethod(
|
||||
route="/telemetry/spans/{span_id:path}/tree",
|
||||
method="POST",
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
level=LLAMA_STACK_API_V1ALPHA,
|
||||
)
|
||||
async def get_span_tree(
|
||||
self,
|
||||
span_id: str,
|
||||
attributes_to_return: list[str] | None = None,
|
||||
max_depth: int | None = None,
|
||||
) -> QuerySpanTreeResponse:
|
||||
"""Get a span tree by its ID.
|
||||
|
||||
:param span_id: The ID of the span to get the tree from.
|
||||
:param attributes_to_return: The attributes to return in the tree.
|
||||
:param max_depth: The maximum depth of the tree.
|
||||
:returns: A QuerySpanTreeResponse.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/telemetry/spans",
|
||||
method="POST",
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
deprecated=True,
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
@webmethod(route="/telemetry/spans", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def query_spans(
|
||||
self,
|
||||
attribute_filters: list[QueryCondition],
|
||||
attributes_to_return: list[str],
|
||||
max_depth: int | None = None,
|
||||
) -> QuerySpansResponse:
|
||||
"""Query spans.
|
||||
|
||||
:param attribute_filters: The attribute filters to apply to the spans.
|
||||
:param attributes_to_return: The attributes to return in the spans.
|
||||
:param max_depth: The maximum depth of the tree.
|
||||
:returns: A QuerySpansResponse.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/telemetry/spans/export", method="POST", deprecated=True, level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/telemetry/spans/export", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def save_spans_to_dataset(
|
||||
self,
|
||||
attribute_filters: list[QueryCondition],
|
||||
attributes_to_save: list[str],
|
||||
dataset_id: str,
|
||||
max_depth: int | None = None,
|
||||
) -> None:
|
||||
"""Save spans to a dataset.
|
||||
|
||||
:param attribute_filters: The attribute filters to apply to the spans.
|
||||
:param attributes_to_save: The attributes to save to the dataset.
|
||||
:param dataset_id: The ID of the dataset to save the spans to.
|
||||
:param max_depth: The maximum depth of the tree.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/telemetry/metrics/{metric_name}",
|
||||
method="POST",
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
deprecated=True,
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
@webmethod(
|
||||
route="/telemetry/metrics/{metric_name}",
|
||||
method="POST",
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
level=LLAMA_STACK_API_V1ALPHA,
|
||||
)
|
||||
async def query_metrics(
|
||||
self,
|
||||
metric_name: str,
|
||||
start_time: int,
|
||||
end_time: int | None = None,
|
||||
granularity: str | None = None,
|
||||
query_type: MetricQueryType = MetricQueryType.RANGE,
|
||||
label_matchers: list[MetricLabelMatcher] | None = None,
|
||||
) -> QueryMetricsResponse:
|
||||
"""Query metrics.
|
||||
|
||||
:param metric_name: The name of the metric to query.
|
||||
:param start_time: The start time of the metric to query.
|
||||
:param end_time: The end time of the metric to query.
|
||||
:param granularity: The granularity of the metric to query.
|
||||
:param query_type: The type of query to perform.
|
||||
:param label_matchers: The label matchers to apply to the metric.
|
||||
:returns: A QueryMetricsResponse.
|
||||
"""
|
||||
...
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ from fastapi import Body
|
|||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.inference import InterleavedContent
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_stores import VectorStore
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
|
||||
|
|
@ -93,6 +93,22 @@ class Chunk(BaseModel):
|
|||
|
||||
return generate_chunk_id(str(uuid.uuid4()), str(self.content))
|
||||
|
||||
@property
|
||||
def document_id(self) -> str | None:
|
||||
"""Returns the document_id from either metadata or chunk_metadata, with metadata taking precedence."""
|
||||
# Check metadata first (takes precedence)
|
||||
doc_id = self.metadata.get("document_id")
|
||||
if doc_id is not None:
|
||||
if not isinstance(doc_id, str):
|
||||
raise TypeError(f"metadata['document_id'] must be a string, got {type(doc_id).__name__}: {doc_id!r}")
|
||||
return doc_id
|
||||
|
||||
# Fall back to chunk_metadata if available (Pydantic ensures type safety)
|
||||
if self.chunk_metadata is not None:
|
||||
return self.chunk_metadata.document_id
|
||||
|
||||
return None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class QueryChunksResponse(BaseModel):
|
||||
|
|
@ -124,6 +140,7 @@ class VectorStoreFileCounts(BaseModel):
|
|||
total: int
|
||||
|
||||
|
||||
# TODO: rename this as OpenAIVectorStore
|
||||
@json_schema_type
|
||||
class VectorStoreObject(BaseModel):
|
||||
"""OpenAI Vector Store object.
|
||||
|
|
@ -501,17 +518,18 @@ class OpenAICreateVectorStoreFileBatchRequestWithExtraBody(BaseModel, extra="all
|
|||
chunking_strategy: VectorStoreChunkingStrategy | None = None
|
||||
|
||||
|
||||
class VectorDBStore(Protocol):
|
||||
def get_vector_db(self, vector_db_id: str) -> VectorDB | None: ...
|
||||
class VectorStoreTable(Protocol):
|
||||
def get_vector_store(self, vector_store_id: str) -> VectorStore | None: ...
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class VectorIO(Protocol):
|
||||
vector_db_store: VectorDBStore | None = None
|
||||
vector_store_table: VectorStoreTable | None = None
|
||||
|
||||
# this will just block now until chunks are inserted, but it should
|
||||
# probably return a Job instance which can be polled for completion
|
||||
# TODO: rename vector_db_id to vector_store_id once Stainless is working
|
||||
@webmethod(route="/vector-io/insert", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def insert_chunks(
|
||||
self,
|
||||
|
|
@ -530,6 +548,7 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
# TODO: rename vector_db_id to vector_store_id once Stainless is working
|
||||
@webmethod(route="/vector-io/query", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def query_chunks(
|
||||
self,
|
||||
|
|
|
|||
7
llama_stack/apis/vector_stores/__init__.py
Normal file
7
llama_stack/apis/vector_stores/__init__.py
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .vector_stores import *
|
||||
|
|
@ -9,53 +9,43 @@ from typing import Literal
|
|||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
from llama_stack.schema_utils import json_schema_type
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorDB(Resource):
|
||||
# Internal resource type for storing the vector store routing and other information
|
||||
class VectorStore(Resource):
|
||||
"""Vector database resource for storing and querying vector embeddings.
|
||||
|
||||
:param type: Type of resource, always 'vector_db' for vector databases
|
||||
:param type: Type of resource, always 'vector_store' for vector stores
|
||||
:param embedding_model: Name of the embedding model to use for vector generation
|
||||
:param embedding_dimension: Dimension of the embedding vectors
|
||||
"""
|
||||
|
||||
type: Literal[ResourceType.vector_db] = ResourceType.vector_db
|
||||
type: Literal[ResourceType.vector_store] = ResourceType.vector_store
|
||||
|
||||
embedding_model: str
|
||||
embedding_dimension: int
|
||||
vector_db_name: str | None = None
|
||||
vector_store_name: str | None = None
|
||||
|
||||
@property
|
||||
def vector_db_id(self) -> str:
|
||||
def vector_store_id(self) -> str:
|
||||
return self.identifier
|
||||
|
||||
@property
|
||||
def provider_vector_db_id(self) -> str | None:
|
||||
def provider_vector_store_id(self) -> str | None:
|
||||
return self.provider_resource_id
|
||||
|
||||
|
||||
class VectorDBInput(BaseModel):
|
||||
class VectorStoreInput(BaseModel):
|
||||
"""Input parameters for creating or configuring a vector database.
|
||||
|
||||
:param vector_db_id: Unique identifier for the vector database
|
||||
:param vector_store_id: Unique identifier for the vector store
|
||||
:param embedding_model: Name of the embedding model to use for vector generation
|
||||
:param embedding_dimension: Dimension of the embedding vectors
|
||||
:param provider_vector_db_id: (Optional) Provider-specific identifier for the vector database
|
||||
:param provider_vector_store_id: (Optional) Provider-specific identifier for the vector store
|
||||
"""
|
||||
|
||||
vector_db_id: str
|
||||
vector_store_id: str
|
||||
embedding_model: str
|
||||
embedding_dimension: int
|
||||
provider_id: str | None = None
|
||||
provider_vector_db_id: str | None = None
|
||||
|
||||
|
||||
class ListVectorDBsResponse(BaseModel):
|
||||
"""Response from listing vector databases.
|
||||
|
||||
:param data: List of vector databases
|
||||
"""
|
||||
|
||||
data: list[VectorDB]
|
||||
provider_vector_store_id: str | None = None
|
||||
|
|
@ -6,6 +6,8 @@
|
|||
|
||||
import argparse
|
||||
|
||||
from llama_stack.log import setup_logging
|
||||
|
||||
from .stack import StackParser
|
||||
from .stack.utils import print_subcommand_description
|
||||
|
||||
|
|
@ -42,6 +44,9 @@ class LlamaCLIParser:
|
|||
|
||||
|
||||
def main():
|
||||
# Initialize logging from environment variables before any other operations
|
||||
setup_logging()
|
||||
|
||||
parser = LlamaCLIParser()
|
||||
args = parser.parse_args()
|
||||
parser.run(args)
|
||||
|
|
|
|||
|
|
@ -1,490 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import argparse
|
||||
import importlib.resources
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import textwrap
|
||||
from functools import lru_cache
|
||||
from importlib.abc import Traversable
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
from prompt_toolkit import prompt
|
||||
from prompt_toolkit.completion import WordCompleter
|
||||
from prompt_toolkit.validation import Validator
|
||||
from termcolor import colored, cprint
|
||||
|
||||
from llama_stack.cli.stack.utils import ImageType
|
||||
from llama_stack.cli.table import print_table
|
||||
from llama_stack.core.build import (
|
||||
SERVER_DEPENDENCIES,
|
||||
build_image,
|
||||
get_provider_dependencies,
|
||||
)
|
||||
from llama_stack.core.configure import parse_and_maybe_upgrade_config
|
||||
from llama_stack.core.datatypes import (
|
||||
BuildConfig,
|
||||
BuildProvider,
|
||||
DistributionSpec,
|
||||
Provider,
|
||||
StackRunConfig,
|
||||
)
|
||||
from llama_stack.core.distribution import get_provider_registry
|
||||
from llama_stack.core.external import load_external_apis
|
||||
from llama_stack.core.resolver import InvalidProviderError
|
||||
from llama_stack.core.stack import replace_env_vars
|
||||
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
|
||||
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||
from llama_stack.core.utils.exec import formulate_run_args, run_command
|
||||
from llama_stack.core.utils.image_types import LlamaStackImageType
|
||||
from llama_stack.providers.datatypes import Api
|
||||
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
|
||||
|
||||
DISTRIBS_PATH = Path(__file__).parent.parent.parent / "distributions"
|
||||
|
||||
|
||||
@lru_cache
|
||||
def available_distros_specs() -> dict[str, BuildConfig]:
|
||||
import yaml
|
||||
|
||||
distro_specs = {}
|
||||
for p in DISTRIBS_PATH.rglob("*build.yaml"):
|
||||
distro_name = p.parent.name
|
||||
with open(p) as f:
|
||||
build_config = BuildConfig(**yaml.safe_load(f))
|
||||
distro_specs[distro_name] = build_config
|
||||
return distro_specs
|
||||
|
||||
|
||||
def run_stack_build_command(args: argparse.Namespace) -> None:
|
||||
if args.list_distros:
|
||||
return _run_distro_list_cmd()
|
||||
|
||||
if args.image_type == ImageType.VENV.value:
|
||||
current_venv = os.environ.get("VIRTUAL_ENV")
|
||||
image_name = args.image_name or current_venv
|
||||
else:
|
||||
image_name = args.image_name
|
||||
|
||||
if args.template:
|
||||
cprint(
|
||||
"The --template argument is deprecated. Please use --distro instead.",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
distro_name = args.template
|
||||
else:
|
||||
distro_name = args.distribution
|
||||
|
||||
if distro_name:
|
||||
available_distros = available_distros_specs()
|
||||
if distro_name not in available_distros:
|
||||
cprint(
|
||||
f"Could not find distribution {distro_name}. Please run `llama stack build --list-distros` to check out the available distributions",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
build_config = available_distros[distro_name]
|
||||
if args.image_type:
|
||||
build_config.image_type = args.image_type
|
||||
else:
|
||||
cprint(
|
||||
f"Please specify a image-type ({' | '.join(e.value for e in ImageType)}) for {distro_name}",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
elif args.providers:
|
||||
provider_list: dict[str, list[BuildProvider]] = dict()
|
||||
for api_provider in args.providers.split(","):
|
||||
if "=" not in api_provider:
|
||||
cprint(
|
||||
"Could not parse `--providers`. Please ensure the list is in the format api1=provider1,api2=provider2",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
api, provider_type = api_provider.split("=")
|
||||
providers_for_api = get_provider_registry().get(Api(api), None)
|
||||
if providers_for_api is None:
|
||||
cprint(
|
||||
f"{api} is not a valid API.",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
if provider_type in providers_for_api:
|
||||
provider = BuildProvider(
|
||||
provider_type=provider_type,
|
||||
module=None,
|
||||
)
|
||||
provider_list.setdefault(api, []).append(provider)
|
||||
else:
|
||||
cprint(
|
||||
f"{provider} is not a valid provider for the {api} API.",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
distribution_spec = DistributionSpec(
|
||||
providers=provider_list,
|
||||
description=",".join(args.providers),
|
||||
)
|
||||
if not args.image_type:
|
||||
cprint(
|
||||
f"Please specify a image-type (container | venv) for {args.template}",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
build_config = BuildConfig(image_type=args.image_type, distribution_spec=distribution_spec)
|
||||
elif not args.config and not distro_name:
|
||||
name = prompt(
|
||||
"> Enter a name for your Llama Stack (e.g. my-local-stack): ",
|
||||
validator=Validator.from_callable(
|
||||
lambda x: len(x) > 0,
|
||||
error_message="Name cannot be empty, please enter a name",
|
||||
),
|
||||
)
|
||||
|
||||
image_type = prompt(
|
||||
"> Enter the image type you want your Llama Stack to be built as (use <TAB> to see options): ",
|
||||
completer=WordCompleter([e.value for e in ImageType]),
|
||||
complete_while_typing=True,
|
||||
validator=Validator.from_callable(
|
||||
lambda x: x in [e.value for e in ImageType],
|
||||
error_message="Invalid image type. Use <TAB> to see options",
|
||||
),
|
||||
)
|
||||
|
||||
image_name = f"llamastack-{name}"
|
||||
|
||||
cprint(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
Llama Stack is composed of several APIs working together. Let's select
|
||||
the provider types (implementations) you want to use for these APIs.
|
||||
""",
|
||||
),
|
||||
color="green",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
cprint("Tip: use <TAB> to see options for the providers.\n", color="green", file=sys.stderr)
|
||||
|
||||
providers: dict[str, list[BuildProvider]] = dict()
|
||||
for api, providers_for_api in get_provider_registry().items():
|
||||
available_providers = [x for x in providers_for_api.keys() if x not in ("remote", "remote::sample")]
|
||||
if not available_providers:
|
||||
continue
|
||||
api_provider = prompt(
|
||||
f"> Enter provider for API {api.value}: ",
|
||||
completer=WordCompleter(available_providers),
|
||||
complete_while_typing=True,
|
||||
validator=Validator.from_callable(
|
||||
lambda x: x in available_providers, # noqa: B023 - see https://github.com/astral-sh/ruff/issues/7847
|
||||
error_message="Invalid provider, use <TAB> to see options",
|
||||
),
|
||||
)
|
||||
|
||||
string_providers = api_provider.split(" ")
|
||||
|
||||
for provider in string_providers:
|
||||
providers.setdefault(api.value, []).append(BuildProvider(provider_type=provider))
|
||||
|
||||
description = prompt(
|
||||
"\n > (Optional) Enter a short description for your Llama Stack: ",
|
||||
default="",
|
||||
)
|
||||
|
||||
distribution_spec = DistributionSpec(
|
||||
providers=providers,
|
||||
description=description,
|
||||
)
|
||||
|
||||
build_config = BuildConfig(image_type=image_type, distribution_spec=distribution_spec)
|
||||
else:
|
||||
with open(args.config) as f:
|
||||
try:
|
||||
contents = yaml.safe_load(f)
|
||||
contents = replace_env_vars(contents)
|
||||
build_config = BuildConfig(**contents)
|
||||
if args.image_type:
|
||||
build_config.image_type = args.image_type
|
||||
except Exception as e:
|
||||
cprint(
|
||||
f"Could not parse config file {args.config}: {e}",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
if args.print_deps_only:
|
||||
print(f"# Dependencies for {distro_name or args.config or image_name}")
|
||||
normal_deps, special_deps, external_provider_dependencies = get_provider_dependencies(build_config)
|
||||
normal_deps += SERVER_DEPENDENCIES
|
||||
print(f"uv pip install {' '.join(normal_deps)}")
|
||||
for special_dep in special_deps:
|
||||
print(f"uv pip install {special_dep}")
|
||||
for external_dep in external_provider_dependencies:
|
||||
print(f"uv pip install {external_dep}")
|
||||
return
|
||||
|
||||
try:
|
||||
run_config = _run_stack_build_command_from_build_config(
|
||||
build_config,
|
||||
image_name=image_name,
|
||||
config_path=args.config,
|
||||
distro_name=distro_name,
|
||||
)
|
||||
|
||||
except (Exception, RuntimeError) as exc:
|
||||
import traceback
|
||||
|
||||
cprint(
|
||||
f"Error building stack: {exc}",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
cprint("Stack trace:", color="red", file=sys.stderr)
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
||||
if run_config is None:
|
||||
cprint(
|
||||
"Run config path is empty",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
if args.run:
|
||||
config_dict = yaml.safe_load(run_config.read_text())
|
||||
config = parse_and_maybe_upgrade_config(config_dict)
|
||||
if config.external_providers_dir and not config.external_providers_dir.exists():
|
||||
config.external_providers_dir.mkdir(exist_ok=True)
|
||||
run_args = formulate_run_args(args.image_type, image_name or config.image_name)
|
||||
run_args.extend([str(os.getenv("LLAMA_STACK_PORT", 8321)), "--config", str(run_config)])
|
||||
run_command(run_args)
|
||||
|
||||
|
||||
def _generate_run_config(
|
||||
build_config: BuildConfig,
|
||||
build_dir: Path,
|
||||
image_name: str,
|
||||
) -> Path:
|
||||
"""
|
||||
Generate a run.yaml template file for user to edit from a build.yaml file
|
||||
"""
|
||||
apis = list(build_config.distribution_spec.providers.keys())
|
||||
run_config = StackRunConfig(
|
||||
container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None),
|
||||
image_name=image_name,
|
||||
apis=apis,
|
||||
providers={},
|
||||
external_providers_dir=build_config.external_providers_dir
|
||||
if build_config.external_providers_dir
|
||||
else EXTERNAL_PROVIDERS_DIR,
|
||||
)
|
||||
if not run_config.inference_store:
|
||||
run_config.inference_store = SqliteSqlStoreConfig(
|
||||
**SqliteSqlStoreConfig.sample_run_config(
|
||||
__distro_dir__=(DISTRIBS_BASE_DIR / image_name).as_posix(), db_name="inference_store.db"
|
||||
)
|
||||
)
|
||||
# build providers dict
|
||||
provider_registry = get_provider_registry(build_config)
|
||||
for api in apis:
|
||||
run_config.providers[api] = []
|
||||
providers = build_config.distribution_spec.providers[api]
|
||||
|
||||
for provider in providers:
|
||||
pid = provider.provider_type.split("::")[-1]
|
||||
|
||||
p = provider_registry[Api(api)][provider.provider_type]
|
||||
if p.deprecation_error:
|
||||
raise InvalidProviderError(p.deprecation_error)
|
||||
|
||||
try:
|
||||
config_type = instantiate_class_type(provider_registry[Api(api)][provider.provider_type].config_class)
|
||||
except (ModuleNotFoundError, ValueError) as exc:
|
||||
# HACK ALERT:
|
||||
# This code executes after building is done, the import cannot work since the
|
||||
# package is either available in the venv or container - not available on the host.
|
||||
# TODO: use a "is_external" flag in ProviderSpec to check if the provider is
|
||||
# external
|
||||
cprint(
|
||||
f"Failed to import provider {provider.provider_type} for API {api} - assuming it's external, skipping: {exc}",
|
||||
color="yellow",
|
||||
file=sys.stderr,
|
||||
)
|
||||
# Set config_type to None to avoid UnboundLocalError
|
||||
config_type = None
|
||||
|
||||
if config_type is not None and hasattr(config_type, "sample_run_config"):
|
||||
config = config_type.sample_run_config(__distro_dir__=f"~/.llama/distributions/{image_name}")
|
||||
else:
|
||||
config = {}
|
||||
|
||||
p_spec = Provider(
|
||||
provider_id=pid,
|
||||
provider_type=provider.provider_type,
|
||||
config=config,
|
||||
module=provider.module,
|
||||
)
|
||||
run_config.providers[api].append(p_spec)
|
||||
|
||||
run_config_file = build_dir / f"{image_name}-run.yaml"
|
||||
|
||||
with open(run_config_file, "w") as f:
|
||||
to_write = json.loads(run_config.model_dump_json())
|
||||
f.write(yaml.dump(to_write, sort_keys=False))
|
||||
|
||||
# Only print this message for non-container builds since it will be displayed before the
|
||||
# container is built
|
||||
# For non-container builds, the run.yaml is generated at the very end of the build process so it
|
||||
# makes sense to display this message
|
||||
if build_config.image_type != LlamaStackImageType.CONTAINER.value:
|
||||
cprint(f"You can now run your stack with `llama stack run {run_config_file}`", color="green", file=sys.stderr)
|
||||
return run_config_file
|
||||
|
||||
|
||||
def _run_stack_build_command_from_build_config(
|
||||
build_config: BuildConfig,
|
||||
image_name: str | None = None,
|
||||
distro_name: str | None = None,
|
||||
config_path: str | None = None,
|
||||
) -> Path | Traversable:
|
||||
image_name = image_name or build_config.image_name
|
||||
if build_config.image_type == LlamaStackImageType.CONTAINER.value:
|
||||
if distro_name:
|
||||
image_name = f"distribution-{distro_name}"
|
||||
else:
|
||||
if not image_name:
|
||||
raise ValueError("Please specify an image name when building a container image without a template")
|
||||
else:
|
||||
if not image_name and os.environ.get("UV_SYSTEM_PYTHON"):
|
||||
image_name = "__system__"
|
||||
if not image_name:
|
||||
raise ValueError("Please specify an image name when building a venv image")
|
||||
|
||||
# At this point, image_name should be guaranteed to be a string
|
||||
if image_name is None:
|
||||
raise ValueError("image_name should not be None after validation")
|
||||
|
||||
if distro_name:
|
||||
build_dir = DISTRIBS_BASE_DIR / distro_name
|
||||
build_file_path = build_dir / f"{distro_name}-build.yaml"
|
||||
else:
|
||||
if image_name is None:
|
||||
raise ValueError("image_name cannot be None")
|
||||
build_dir = DISTRIBS_BASE_DIR / image_name
|
||||
build_file_path = build_dir / f"{image_name}-build.yaml"
|
||||
|
||||
os.makedirs(build_dir, exist_ok=True)
|
||||
run_config_file = None
|
||||
# Generate the run.yaml so it can be included in the container image with the proper entrypoint
|
||||
# Only do this if we're building a container image and we're not using a template
|
||||
if build_config.image_type == LlamaStackImageType.CONTAINER.value and not distro_name and config_path:
|
||||
cprint("Generating run.yaml file", color="yellow", file=sys.stderr)
|
||||
run_config_file = _generate_run_config(build_config, build_dir, image_name)
|
||||
|
||||
with open(build_file_path, "w") as f:
|
||||
to_write = json.loads(build_config.model_dump_json(exclude_none=True))
|
||||
f.write(yaml.dump(to_write, sort_keys=False))
|
||||
|
||||
# We first install the external APIs so that the build process can use them and discover the
|
||||
# providers dependencies
|
||||
if build_config.external_apis_dir:
|
||||
cprint("Installing external APIs", color="yellow", file=sys.stderr)
|
||||
external_apis = load_external_apis(build_config)
|
||||
if external_apis:
|
||||
# install the external APIs
|
||||
packages = []
|
||||
for _, api_spec in external_apis.items():
|
||||
if api_spec.pip_packages:
|
||||
packages.extend(api_spec.pip_packages)
|
||||
cprint(
|
||||
f"Installing {api_spec.name} with pip packages {api_spec.pip_packages}",
|
||||
color="yellow",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return_code = run_command(["uv", "pip", "install", *packages])
|
||||
if return_code != 0:
|
||||
packages_str = ", ".join(packages)
|
||||
raise RuntimeError(
|
||||
f"Failed to install external APIs packages: {packages_str} (return code: {return_code})"
|
||||
)
|
||||
|
||||
return_code = build_image(
|
||||
build_config,
|
||||
image_name,
|
||||
distro_or_config=distro_name or config_path or str(build_file_path),
|
||||
run_config=run_config_file.as_posix() if run_config_file else None,
|
||||
)
|
||||
if return_code != 0:
|
||||
raise RuntimeError(f"Failed to build image {image_name}")
|
||||
|
||||
if distro_name:
|
||||
# copy run.yaml from distribution to build_dir instead of generating it again
|
||||
distro_path = importlib.resources.files("llama_stack") / f"distributions/{distro_name}/run.yaml"
|
||||
run_config_file = build_dir / f"{distro_name}-run.yaml"
|
||||
|
||||
with importlib.resources.as_file(distro_path) as path:
|
||||
shutil.copy(path, run_config_file)
|
||||
|
||||
cprint("Build Successful!", color="green", file=sys.stderr)
|
||||
cprint(f"You can find the newly-built distribution here: {run_config_file}", color="blue", file=sys.stderr)
|
||||
if build_config.image_type == LlamaStackImageType.VENV:
|
||||
cprint(
|
||||
"You can run the new Llama Stack distro (after activating "
|
||||
+ colored(image_name, "cyan")
|
||||
+ ") via: "
|
||||
+ colored(f"llama stack run {run_config_file}", "blue"),
|
||||
color="green",
|
||||
file=sys.stderr,
|
||||
)
|
||||
elif build_config.image_type == LlamaStackImageType.CONTAINER:
|
||||
cprint(
|
||||
"You can run the container with: "
|
||||
+ colored(
|
||||
f"docker run -p 8321:8321 -v ~/.llama:/root/.llama localhost/{image_name} --port 8321", "blue"
|
||||
),
|
||||
color="green",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return distro_path
|
||||
else:
|
||||
return _generate_run_config(build_config, build_dir, image_name)
|
||||
|
||||
|
||||
def _run_distro_list_cmd() -> None:
|
||||
headers = [
|
||||
"Distribution Name",
|
||||
# "Providers",
|
||||
"Description",
|
||||
]
|
||||
|
||||
rows = []
|
||||
for distro_name, spec in available_distros_specs().items():
|
||||
rows.append(
|
||||
[
|
||||
distro_name,
|
||||
# json.dumps(spec.distribution_spec.providers, indent=2),
|
||||
spec.distribution_spec.description,
|
||||
]
|
||||
)
|
||||
print_table(
|
||||
rows,
|
||||
headers,
|
||||
separate_rows=True,
|
||||
)
|
||||
182
llama_stack/cli/stack/_list_deps.py
Normal file
182
llama_stack/cli/stack/_list_deps.py
Normal file
|
|
@ -0,0 +1,182 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
from termcolor import cprint
|
||||
|
||||
from llama_stack.cli.stack.utils import ImageType
|
||||
from llama_stack.core.build import get_provider_dependencies
|
||||
from llama_stack.core.datatypes import (
|
||||
BuildConfig,
|
||||
BuildProvider,
|
||||
DistributionSpec,
|
||||
)
|
||||
from llama_stack.core.distribution import get_provider_registry
|
||||
from llama_stack.core.stack import replace_env_vars
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import Api
|
||||
|
||||
TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates"
|
||||
|
||||
logger = get_logger(name=__name__, category="cli")
|
||||
|
||||
|
||||
# These are the dependencies needed by the distribution server.
|
||||
# `llama-stack` is automatically installed by the installation script.
|
||||
SERVER_DEPENDENCIES = [
|
||||
"aiosqlite",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"uvicorn",
|
||||
"opentelemetry-sdk",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
]
|
||||
|
||||
|
||||
def format_output_deps_only(
|
||||
normal_deps: list[str],
|
||||
special_deps: list[str],
|
||||
external_deps: list[str],
|
||||
uv: bool = False,
|
||||
) -> str:
|
||||
"""Format dependencies as a list."""
|
||||
lines = []
|
||||
|
||||
uv_str = ""
|
||||
if uv:
|
||||
uv_str = "uv pip install "
|
||||
|
||||
# Quote deps with commas
|
||||
quoted_normal_deps = [quote_if_needed(dep) for dep in normal_deps]
|
||||
lines.append(f"{uv_str}{' '.join(quoted_normal_deps)}")
|
||||
|
||||
for special_dep in special_deps:
|
||||
lines.append(f"{uv_str}{quote_special_dep(special_dep)}")
|
||||
|
||||
for external_dep in external_deps:
|
||||
lines.append(f"{uv_str}{quote_special_dep(external_dep)}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def run_stack_list_deps_command(args: argparse.Namespace) -> None:
|
||||
if args.config:
|
||||
try:
|
||||
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
||||
|
||||
config_file = resolve_config_or_distro(args.config, Mode.BUILD)
|
||||
except ValueError as e:
|
||||
cprint(
|
||||
f"Could not parse config file {args.config}: {e}",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
if config_file:
|
||||
with open(config_file) as f:
|
||||
try:
|
||||
contents = yaml.safe_load(f)
|
||||
contents = replace_env_vars(contents)
|
||||
build_config = BuildConfig(**contents)
|
||||
build_config.image_type = "venv"
|
||||
except Exception as e:
|
||||
cprint(
|
||||
f"Could not parse config file {config_file}: {e}",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
elif args.providers:
|
||||
provider_list: dict[str, list[BuildProvider]] = dict()
|
||||
for api_provider in args.providers.split(","):
|
||||
if "=" not in api_provider:
|
||||
cprint(
|
||||
"Could not parse `--providers`. Please ensure the list is in the format api1=provider1,api2=provider2",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
api, provider_type = api_provider.split("=")
|
||||
providers_for_api = get_provider_registry().get(Api(api), None)
|
||||
if providers_for_api is None:
|
||||
cprint(
|
||||
f"{api} is not a valid API.",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
if provider_type in providers_for_api:
|
||||
provider = BuildProvider(
|
||||
provider_type=provider_type,
|
||||
module=None,
|
||||
)
|
||||
provider_list.setdefault(api, []).append(provider)
|
||||
else:
|
||||
cprint(
|
||||
f"{provider_type} is not a valid provider for the {api} API.",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
distribution_spec = DistributionSpec(
|
||||
providers=provider_list,
|
||||
description=",".join(args.providers),
|
||||
)
|
||||
build_config = BuildConfig(image_type=ImageType.VENV.value, distribution_spec=distribution_spec)
|
||||
|
||||
normal_deps, special_deps, external_provider_dependencies = get_provider_dependencies(build_config)
|
||||
normal_deps += SERVER_DEPENDENCIES
|
||||
|
||||
# Add external API dependencies
|
||||
if build_config.external_apis_dir:
|
||||
from llama_stack.core.external import load_external_apis
|
||||
|
||||
external_apis = load_external_apis(build_config)
|
||||
if external_apis:
|
||||
for _, api_spec in external_apis.items():
|
||||
normal_deps.extend(api_spec.pip_packages)
|
||||
|
||||
# Format and output based on requested format
|
||||
output = format_output_deps_only(
|
||||
normal_deps=normal_deps,
|
||||
special_deps=special_deps,
|
||||
external_deps=external_provider_dependencies,
|
||||
uv=args.format == "uv",
|
||||
)
|
||||
|
||||
print(output)
|
||||
|
||||
|
||||
def quote_if_needed(dep):
|
||||
# Add quotes if the dependency contains special characters that need escaping in shell
|
||||
# This includes: commas, comparison operators (<, >, <=, >=, ==, !=)
|
||||
needs_quoting = any(char in dep for char in [",", "<", ">", "="])
|
||||
return f"'{dep}'" if needs_quoting else dep
|
||||
|
||||
|
||||
def quote_special_dep(dep_string):
|
||||
"""
|
||||
Quote individual packages in a special dependency string.
|
||||
Special deps may contain multiple packages and flags like --extra-index-url.
|
||||
We need to quote only the package specs that contain special characters.
|
||||
"""
|
||||
parts = dep_string.split()
|
||||
quoted_parts = []
|
||||
|
||||
for part in parts:
|
||||
# Don't quote flags (they start with -)
|
||||
if part.startswith("-"):
|
||||
quoted_parts.append(part)
|
||||
else:
|
||||
# Quote package specs that need it
|
||||
quoted_parts.append(quote_if_needed(part))
|
||||
|
||||
return " ".join(quoted_parts)
|
||||
|
|
@ -1,100 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
import argparse
|
||||
import textwrap
|
||||
|
||||
from llama_stack.cli.stack.utils import ImageType
|
||||
from llama_stack.cli.subcommand import Subcommand
|
||||
|
||||
|
||||
class StackBuild(Subcommand):
|
||||
def __init__(self, subparsers: argparse._SubParsersAction):
|
||||
super().__init__()
|
||||
self.parser = subparsers.add_parser(
|
||||
"build",
|
||||
prog="llama stack build",
|
||||
description="Build a Llama stack container",
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
)
|
||||
self._add_arguments()
|
||||
self.parser.set_defaults(func=self._run_stack_build_command)
|
||||
|
||||
def _add_arguments(self):
|
||||
self.parser.add_argument(
|
||||
"--config",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Path to a config file to use for the build. You can find example configs in llama_stack.cores/**/build.yaml. If this argument is not provided, you will be prompted to enter information interactively",
|
||||
)
|
||||
|
||||
self.parser.add_argument(
|
||||
"--template",
|
||||
type=str,
|
||||
default=None,
|
||||
help="""(deprecated) Name of the example template config to use for build. You may use `llama stack build --list-distros` to check out the available distributions""",
|
||||
)
|
||||
self.parser.add_argument(
|
||||
"--distro",
|
||||
"--distribution",
|
||||
dest="distribution",
|
||||
type=str,
|
||||
default=None,
|
||||
help="""Name of the distribution to use for build. You may use `llama stack build --list-distros` to check out the available distributions""",
|
||||
)
|
||||
|
||||
self.parser.add_argument(
|
||||
"--list-distros",
|
||||
"--list-distributions",
|
||||
action="store_true",
|
||||
dest="list_distros",
|
||||
default=False,
|
||||
help="Show the available distributions for building a Llama Stack distribution",
|
||||
)
|
||||
|
||||
self.parser.add_argument(
|
||||
"--image-type",
|
||||
type=str,
|
||||
help="Image Type to use for the build. If not specified, will use the image type from the template config.",
|
||||
choices=[e.value for e in ImageType],
|
||||
default=None, # no default so we can detect if a user specified --image-type and override image_type in the config
|
||||
)
|
||||
|
||||
self.parser.add_argument(
|
||||
"--image-name",
|
||||
type=str,
|
||||
help=textwrap.dedent(
|
||||
f"""[for image-type={"|".join(e.value for e in ImageType)}] Name of the virtual environment to use for
|
||||
the build. If not specified, currently active environment will be used if found.
|
||||
"""
|
||||
),
|
||||
default=None,
|
||||
)
|
||||
self.parser.add_argument(
|
||||
"--print-deps-only",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Print the dependencies for the stack only, without building the stack",
|
||||
)
|
||||
|
||||
self.parser.add_argument(
|
||||
"--run",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Run the stack after building using the same image type, name, and other applicable arguments",
|
||||
)
|
||||
self.parser.add_argument(
|
||||
"--providers",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Build a config for a list of providers and only those providers. This list is formatted like: api1=provider1,api2=provider2. Where there can be multiple providers per API.",
|
||||
)
|
||||
|
||||
def _run_stack_build_command(self, args: argparse.Namespace) -> None:
|
||||
# always keep implementation completely silo-ed away from CLI so CLI
|
||||
# can be fast to load and reduces dependencies
|
||||
from ._build import run_stack_build_command
|
||||
|
||||
return run_stack_build_command(args)
|
||||
51
llama_stack/cli/stack/list_deps.py
Normal file
51
llama_stack/cli/stack/list_deps.py
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
import argparse
|
||||
|
||||
from llama_stack.cli.subcommand import Subcommand
|
||||
|
||||
|
||||
class StackListDeps(Subcommand):
|
||||
def __init__(self, subparsers: argparse._SubParsersAction):
|
||||
super().__init__()
|
||||
self.parser = subparsers.add_parser(
|
||||
"list-deps",
|
||||
prog="llama stack list-deps",
|
||||
description="list the dependencies for a llama stack distribution",
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
)
|
||||
self._add_arguments()
|
||||
self.parser.set_defaults(func=self._run_stack_list_deps_command)
|
||||
|
||||
def _add_arguments(self):
|
||||
self.parser.add_argument(
|
||||
"config",
|
||||
type=str,
|
||||
nargs="?", # Make it optional
|
||||
metavar="config | distro",
|
||||
help="Path to config file to use or name of known distro (llama stack list for a list).",
|
||||
)
|
||||
|
||||
self.parser.add_argument(
|
||||
"--providers",
|
||||
type=str,
|
||||
default=None,
|
||||
help="sync dependencies for a list of providers and only those providers. This list is formatted like: api1=provider1,api2=provider2. Where there can be multiple providers per API.",
|
||||
)
|
||||
self.parser.add_argument(
|
||||
"--format",
|
||||
type=str,
|
||||
choices=["uv", "deps-only"],
|
||||
default="deps-only",
|
||||
help="Output format: 'uv' shows shell commands, 'deps-only' shows just the list of dependencies without `uv` (default)",
|
||||
)
|
||||
|
||||
def _run_stack_list_deps_command(self, args: argparse.Namespace) -> None:
|
||||
# always keep implementation completely silo-ed away from CLI so CLI
|
||||
# can be fast to load and reduces dependencies
|
||||
from ._list_deps import run_stack_list_deps_command
|
||||
|
||||
return run_stack_list_deps_command(args)
|
||||
|
|
@ -11,8 +11,8 @@ from llama_stack.cli.stack.list_stacks import StackListBuilds
|
|||
from llama_stack.cli.stack.utils import print_subcommand_description
|
||||
from llama_stack.cli.subcommand import Subcommand
|
||||
|
||||
from .build import StackBuild
|
||||
from .list_apis import StackListApis
|
||||
from .list_deps import StackListDeps
|
||||
from .list_providers import StackListProviders
|
||||
from .remove import StackRemove
|
||||
from .run import StackRun
|
||||
|
|
@ -39,7 +39,7 @@ class StackParser(Subcommand):
|
|||
subparsers = self.parser.add_subparsers(title="stack_subcommands")
|
||||
|
||||
# Add sub-commands
|
||||
StackBuild.create(subparsers)
|
||||
StackListDeps.create(subparsers)
|
||||
StackListApis.create(subparsers)
|
||||
StackListProviders.create(subparsers)
|
||||
StackRun.create(subparsers)
|
||||
|
|
|
|||
|
|
@ -4,7 +4,37 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import json
|
||||
import sys
|
||||
from enum import Enum
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
from termcolor import cprint
|
||||
|
||||
from llama_stack.core.datatypes import (
|
||||
BuildConfig,
|
||||
Provider,
|
||||
StackRunConfig,
|
||||
StorageConfig,
|
||||
)
|
||||
from llama_stack.core.distribution import get_provider_registry
|
||||
from llama_stack.core.resolver import InvalidProviderError
|
||||
from llama_stack.core.storage.datatypes import (
|
||||
InferenceStoreReference,
|
||||
KVStoreReference,
|
||||
ServerStoresConfig,
|
||||
SqliteKVStoreConfig,
|
||||
SqliteSqlStoreConfig,
|
||||
SqlStoreReference,
|
||||
)
|
||||
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
|
||||
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||
from llama_stack.core.utils.image_types import LlamaStackImageType
|
||||
from llama_stack.providers.datatypes import Api
|
||||
|
||||
TEMPLATES_PATH = Path(__file__).parent.parent.parent / "distributions"
|
||||
|
||||
|
||||
class ImageType(Enum):
|
||||
|
|
@ -19,3 +49,103 @@ def print_subcommand_description(parser, subparsers):
|
|||
description = subcommand.description
|
||||
description_text += f" {name:<21} {description}\n"
|
||||
parser.epilog = description_text
|
||||
|
||||
|
||||
def generate_run_config(
|
||||
build_config: BuildConfig,
|
||||
build_dir: Path,
|
||||
image_name: str,
|
||||
) -> Path:
|
||||
"""
|
||||
Generate a run.yaml template file for user to edit from a build.yaml file
|
||||
"""
|
||||
apis = list(build_config.distribution_spec.providers.keys())
|
||||
distro_dir = DISTRIBS_BASE_DIR / image_name
|
||||
run_config = StackRunConfig(
|
||||
container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None),
|
||||
image_name=image_name,
|
||||
apis=apis,
|
||||
providers={},
|
||||
storage=StorageConfig(
|
||||
backends={
|
||||
"kv_default": SqliteKVStoreConfig(db_path=str(distro_dir / "kvstore.db")),
|
||||
"sql_default": SqliteSqlStoreConfig(db_path=str(distro_dir / "sql_store.db")),
|
||||
},
|
||||
stores=ServerStoresConfig(
|
||||
metadata=KVStoreReference(backend="kv_default", namespace="registry"),
|
||||
inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
|
||||
conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"),
|
||||
),
|
||||
),
|
||||
external_providers_dir=build_config.external_providers_dir
|
||||
if build_config.external_providers_dir
|
||||
else EXTERNAL_PROVIDERS_DIR,
|
||||
)
|
||||
# build providers dict
|
||||
provider_registry = get_provider_registry(build_config)
|
||||
for api in apis:
|
||||
run_config.providers[api] = []
|
||||
providers = build_config.distribution_spec.providers[api]
|
||||
|
||||
for provider in providers:
|
||||
pid = provider.provider_type.split("::")[-1]
|
||||
|
||||
p = provider_registry[Api(api)][provider.provider_type]
|
||||
if p.deprecation_error:
|
||||
raise InvalidProviderError(p.deprecation_error)
|
||||
|
||||
try:
|
||||
config_type = instantiate_class_type(provider_registry[Api(api)][provider.provider_type].config_class)
|
||||
except (ModuleNotFoundError, ValueError) as exc:
|
||||
# HACK ALERT:
|
||||
# This code executes after building is done, the import cannot work since the
|
||||
# package is either available in the venv or container - not available on the host.
|
||||
# TODO: use a "is_external" flag in ProviderSpec to check if the provider is
|
||||
# external
|
||||
cprint(
|
||||
f"Failed to import provider {provider.provider_type} for API {api} - assuming it's external, skipping: {exc}",
|
||||
color="yellow",
|
||||
file=sys.stderr,
|
||||
)
|
||||
# Set config_type to None to avoid UnboundLocalError
|
||||
config_type = None
|
||||
|
||||
if config_type is not None and hasattr(config_type, "sample_run_config"):
|
||||
config = config_type.sample_run_config(__distro_dir__=f"~/.llama/distributions/{image_name}")
|
||||
else:
|
||||
config = {}
|
||||
|
||||
p_spec = Provider(
|
||||
provider_id=pid,
|
||||
provider_type=provider.provider_type,
|
||||
config=config,
|
||||
module=provider.module,
|
||||
)
|
||||
run_config.providers[api].append(p_spec)
|
||||
|
||||
run_config_file = build_dir / f"{image_name}-run.yaml"
|
||||
|
||||
with open(run_config_file, "w") as f:
|
||||
to_write = json.loads(run_config.model_dump_json())
|
||||
f.write(yaml.dump(to_write, sort_keys=False))
|
||||
|
||||
# Only print this message for non-container builds since it will be displayed before the
|
||||
# container is built
|
||||
# For non-container builds, the run.yaml is generated at the very end of the build process so it
|
||||
# makes sense to display this message
|
||||
if build_config.image_type != LlamaStackImageType.CONTAINER.value:
|
||||
cprint(f"You can now run your stack with `llama stack run {run_config_file}`", color="green", file=sys.stderr)
|
||||
return run_config_file
|
||||
|
||||
|
||||
@lru_cache
|
||||
def available_templates_specs() -> dict[str, BuildConfig]:
|
||||
import yaml
|
||||
|
||||
template_specs = {}
|
||||
for p in TEMPLATES_PATH.rglob("*build.yaml"):
|
||||
template_name = p.parent.name
|
||||
with open(p) as f:
|
||||
build_config = BuildConfig(**yaml.safe_load(f))
|
||||
template_specs[template_name] = build_config
|
||||
return template_specs
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ class AccessRule(BaseModel):
|
|||
A rule defines a list of action either to permit or to forbid. It may specify a
|
||||
principal or a resource that must match for the rule to take effect. The resource
|
||||
to match should be specified in the form of a type qualified identifier, e.g.
|
||||
model::my-model or vector_db::some-db, or a wildcard for all resources of a type,
|
||||
model::my-model or vector_store::some-db, or a wildcard for all resources of a type,
|
||||
e.g. model::*. If the principal or resource are not specified, they will match all
|
||||
requests.
|
||||
|
||||
|
|
@ -79,9 +79,9 @@ class AccessRule(BaseModel):
|
|||
description: any user has read access to any resource created by a member of their team
|
||||
- forbid:
|
||||
actions: [create, read, delete]
|
||||
resource: vector_db::*
|
||||
resource: vector_store::*
|
||||
unless: user with admin in roles
|
||||
description: only user with admin role can use vector_db resources
|
||||
description: only user with admin role can use vector_store resources
|
||||
|
||||
"""
|
||||
|
||||
|
|
|
|||
|
|
@ -1,410 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-}
|
||||
LLAMA_STACK_CLIENT_DIR=${LLAMA_STACK_CLIENT_DIR:-}
|
||||
|
||||
TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-}
|
||||
PYPI_VERSION=${PYPI_VERSION:-}
|
||||
BUILD_PLATFORM=${BUILD_PLATFORM:-}
|
||||
# This timeout (in seconds) is necessary when installing PyTorch via uv since it's likely to time out
|
||||
# Reference: https://github.com/astral-sh/uv/pull/1694
|
||||
UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT:-500}
|
||||
|
||||
# mounting is not supported by docker buildx, so we use COPY instead
|
||||
USE_COPY_NOT_MOUNT=${USE_COPY_NOT_MOUNT:-}
|
||||
# Path to the run.yaml file in the container
|
||||
RUN_CONFIG_PATH=/app/run.yaml
|
||||
|
||||
BUILD_CONTEXT_DIR=$(pwd)
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Define color codes
|
||||
RED='\033[0;31m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Usage function
|
||||
usage() {
|
||||
echo "Usage: $0 --image-name <image_name> --container-base <container_base> --normal-deps <pip_dependencies> [--run-config <run_config>] [--external-provider-deps <external_provider_deps>] [--optional-deps <special_pip_deps>]"
|
||||
echo "Example: $0 --image-name llama-stack-img --container-base python:3.12-slim --normal-deps 'numpy pandas' --run-config ./run.yaml --external-provider-deps 'foo' --optional-deps 'bar'"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Parse arguments
|
||||
image_name=""
|
||||
container_base=""
|
||||
normal_deps=""
|
||||
external_provider_deps=""
|
||||
optional_deps=""
|
||||
run_config=""
|
||||
distro_or_config=""
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
key="$1"
|
||||
case "$key" in
|
||||
--image-name)
|
||||
if [[ -z "$2" || "$2" == --* ]]; then
|
||||
echo "Error: --image-name requires a string value" >&2
|
||||
usage
|
||||
fi
|
||||
image_name="$2"
|
||||
shift 2
|
||||
;;
|
||||
--container-base)
|
||||
if [[ -z "$2" || "$2" == --* ]]; then
|
||||
echo "Error: --container-base requires a string value" >&2
|
||||
usage
|
||||
fi
|
||||
container_base="$2"
|
||||
shift 2
|
||||
;;
|
||||
--normal-deps)
|
||||
if [[ -z "$2" || "$2" == --* ]]; then
|
||||
echo "Error: --normal-deps requires a string value" >&2
|
||||
usage
|
||||
fi
|
||||
normal_deps="$2"
|
||||
shift 2
|
||||
;;
|
||||
--external-provider-deps)
|
||||
if [[ -z "$2" || "$2" == --* ]]; then
|
||||
echo "Error: --external-provider-deps requires a string value" >&2
|
||||
usage
|
||||
fi
|
||||
external_provider_deps="$2"
|
||||
shift 2
|
||||
;;
|
||||
--optional-deps)
|
||||
if [[ -z "$2" || "$2" == --* ]]; then
|
||||
echo "Error: --optional-deps requires a string value" >&2
|
||||
usage
|
||||
fi
|
||||
optional_deps="$2"
|
||||
shift 2
|
||||
;;
|
||||
--run-config)
|
||||
if [[ -z "$2" || "$2" == --* ]]; then
|
||||
echo "Error: --run-config requires a string value" >&2
|
||||
usage
|
||||
fi
|
||||
run_config="$2"
|
||||
shift 2
|
||||
;;
|
||||
--distro-or-config)
|
||||
if [[ -z "$2" || "$2" == --* ]]; then
|
||||
echo "Error: --distro-or-config requires a string value" >&2
|
||||
usage
|
||||
fi
|
||||
distro_or_config="$2"
|
||||
shift 2
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1" >&2
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Check required arguments
|
||||
if [[ -z "$image_name" || -z "$container_base" || -z "$normal_deps" ]]; then
|
||||
echo "Error: --image-name, --container-base, and --normal-deps are required." >&2
|
||||
usage
|
||||
fi
|
||||
|
||||
CONTAINER_BINARY=${CONTAINER_BINARY:-docker}
|
||||
CONTAINER_OPTS=${CONTAINER_OPTS:---progress=plain}
|
||||
TEMP_DIR=$(mktemp -d)
|
||||
SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
|
||||
source "$SCRIPT_DIR/common.sh"
|
||||
|
||||
add_to_container() {
|
||||
output_file="$TEMP_DIR/Containerfile"
|
||||
if [ -t 0 ]; then
|
||||
printf '%s\n' "$1" >>"$output_file"
|
||||
else
|
||||
cat >>"$output_file"
|
||||
fi
|
||||
}
|
||||
|
||||
if ! is_command_available "$CONTAINER_BINARY"; then
|
||||
printf "${RED}Error: ${CONTAINER_BINARY} command not found. Is ${CONTAINER_BINARY} installed and in your PATH?${NC}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ $container_base == *"registry.access.redhat.com/ubi9"* ]]; then
|
||||
add_to_container << EOF
|
||||
FROM $container_base
|
||||
WORKDIR /app
|
||||
|
||||
# We install the Python 3.12 dev headers and build tools so that any
|
||||
# C-extension wheels (e.g. polyleven, faiss-cpu) can compile successfully.
|
||||
|
||||
RUN dnf -y update && dnf install -y iputils git net-tools wget \
|
||||
vim-minimal python3.12 python3.12-pip python3.12-wheel \
|
||||
python3.12-setuptools python3.12-devel gcc gcc-c++ make && \
|
||||
ln -s /bin/pip3.12 /bin/pip && ln -s /bin/python3.12 /bin/python && dnf clean all
|
||||
|
||||
ENV UV_SYSTEM_PYTHON=1
|
||||
RUN pip install uv
|
||||
EOF
|
||||
else
|
||||
add_to_container << EOF
|
||||
FROM $container_base
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && apt-get install -y \
|
||||
iputils-ping net-tools iproute2 dnsutils telnet \
|
||||
curl wget telnet git\
|
||||
procps psmisc lsof \
|
||||
traceroute \
|
||||
bubblewrap \
|
||||
gcc g++ \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ENV UV_SYSTEM_PYTHON=1
|
||||
RUN pip install uv
|
||||
EOF
|
||||
fi
|
||||
|
||||
# Add pip dependencies first since llama-stack is what will change most often
|
||||
# so we can reuse layers.
|
||||
if [ -n "$normal_deps" ]; then
|
||||
read -ra pip_args <<< "$normal_deps"
|
||||
quoted_deps=$(printf " %q" "${pip_args[@]}")
|
||||
add_to_container << EOF
|
||||
RUN uv pip install --no-cache $quoted_deps
|
||||
EOF
|
||||
fi
|
||||
|
||||
if [ -n "$optional_deps" ]; then
|
||||
IFS='#' read -ra parts <<<"$optional_deps"
|
||||
for part in "${parts[@]}"; do
|
||||
read -ra pip_args <<< "$part"
|
||||
quoted_deps=$(printf " %q" "${pip_args[@]}")
|
||||
add_to_container <<EOF
|
||||
RUN uv pip install --no-cache $quoted_deps
|
||||
EOF
|
||||
done
|
||||
fi
|
||||
|
||||
if [ -n "$external_provider_deps" ]; then
|
||||
IFS='#' read -ra parts <<<"$external_provider_deps"
|
||||
for part in "${parts[@]}"; do
|
||||
read -ra pip_args <<< "$part"
|
||||
quoted_deps=$(printf " %q" "${pip_args[@]}")
|
||||
add_to_container <<EOF
|
||||
RUN uv pip install --no-cache $quoted_deps
|
||||
EOF
|
||||
add_to_container <<EOF
|
||||
RUN python3 - <<PYTHON | uv pip install --no-cache -r -
|
||||
import importlib
|
||||
import sys
|
||||
|
||||
try:
|
||||
package_name = '$part'.split('==')[0].split('>=')[0].split('<=')[0].split('!=')[0].split('<')[0].split('>')[0]
|
||||
module = importlib.import_module(f'{package_name}.provider')
|
||||
spec = module.get_provider_spec()
|
||||
if hasattr(spec, 'pip_packages') and spec.pip_packages:
|
||||
if isinstance(spec.pip_packages, (list, tuple)):
|
||||
print('\n'.join(spec.pip_packages))
|
||||
except Exception as e:
|
||||
print(f'Error getting provider spec for {package_name}: {e}', file=sys.stderr)
|
||||
PYTHON
|
||||
EOF
|
||||
done
|
||||
fi
|
||||
|
||||
get_python_cmd() {
|
||||
if is_command_available python; then
|
||||
echo "python"
|
||||
elif is_command_available python3; then
|
||||
echo "python3"
|
||||
else
|
||||
echo "Error: Neither python nor python3 is installed. Please install Python to continue." >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
if [ -n "$run_config" ]; then
|
||||
# Copy the run config to the build context since it's an absolute path
|
||||
cp "$run_config" "$BUILD_CONTEXT_DIR/run.yaml"
|
||||
|
||||
# Parse the run.yaml configuration to identify external provider directories
|
||||
# If external providers are specified, copy their directory to the container
|
||||
# and update the configuration to reference the new container path
|
||||
python_cmd=$(get_python_cmd)
|
||||
external_providers_dir=$($python_cmd -c "import yaml; config = yaml.safe_load(open('$run_config')); print(config.get('external_providers_dir') or '')")
|
||||
external_providers_dir=$(eval echo "$external_providers_dir")
|
||||
if [ -n "$external_providers_dir" ]; then
|
||||
if [ -d "$external_providers_dir" ]; then
|
||||
echo "Copying external providers directory: $external_providers_dir"
|
||||
cp -r "$external_providers_dir" "$BUILD_CONTEXT_DIR/providers.d"
|
||||
add_to_container << EOF
|
||||
COPY providers.d /.llama/providers.d
|
||||
EOF
|
||||
fi
|
||||
|
||||
# Edit the run.yaml file to change the external_providers_dir to /.llama/providers.d
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
sed -i.bak -e 's|external_providers_dir:.*|external_providers_dir: /.llama/providers.d|' "$BUILD_CONTEXT_DIR/run.yaml"
|
||||
rm -f "$BUILD_CONTEXT_DIR/run.yaml.bak"
|
||||
else
|
||||
sed -i 's|external_providers_dir:.*|external_providers_dir: /.llama/providers.d|' "$BUILD_CONTEXT_DIR/run.yaml"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Copy run config into docker image
|
||||
add_to_container << EOF
|
||||
COPY run.yaml $RUN_CONFIG_PATH
|
||||
EOF
|
||||
fi
|
||||
|
||||
stack_mount="/app/llama-stack-source"
|
||||
client_mount="/app/llama-stack-client-source"
|
||||
|
||||
install_local_package() {
|
||||
local dir="$1"
|
||||
local mount_point="$2"
|
||||
local name="$3"
|
||||
|
||||
if [ ! -d "$dir" ]; then
|
||||
echo "${RED}Warning: $name is set but directory does not exist: $dir${NC}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "$USE_COPY_NOT_MOUNT" = "true" ]; then
|
||||
add_to_container << EOF
|
||||
COPY $dir $mount_point
|
||||
EOF
|
||||
fi
|
||||
add_to_container << EOF
|
||||
RUN uv pip install --no-cache -e $mount_point
|
||||
EOF
|
||||
}
|
||||
|
||||
|
||||
if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then
|
||||
install_local_package "$LLAMA_STACK_CLIENT_DIR" "$client_mount" "LLAMA_STACK_CLIENT_DIR"
|
||||
fi
|
||||
|
||||
if [ -n "$LLAMA_STACK_DIR" ]; then
|
||||
install_local_package "$LLAMA_STACK_DIR" "$stack_mount" "LLAMA_STACK_DIR"
|
||||
else
|
||||
if [ -n "$TEST_PYPI_VERSION" ]; then
|
||||
# these packages are damaged in test-pypi, so install them first
|
||||
add_to_container << EOF
|
||||
RUN uv pip install --no-cache fastapi libcst
|
||||
EOF
|
||||
add_to_container << EOF
|
||||
RUN uv pip install --no-cache --extra-index-url https://test.pypi.org/simple/ \
|
||||
--index-strategy unsafe-best-match \
|
||||
llama-stack==$TEST_PYPI_VERSION
|
||||
|
||||
EOF
|
||||
else
|
||||
if [ -n "$PYPI_VERSION" ]; then
|
||||
SPEC_VERSION="llama-stack==${PYPI_VERSION}"
|
||||
else
|
||||
SPEC_VERSION="llama-stack"
|
||||
fi
|
||||
add_to_container << EOF
|
||||
RUN uv pip install --no-cache $SPEC_VERSION
|
||||
EOF
|
||||
fi
|
||||
fi
|
||||
|
||||
# remove uv after installation
|
||||
add_to_container << EOF
|
||||
RUN pip uninstall -y uv
|
||||
EOF
|
||||
|
||||
# If a run config is provided, we use the llama stack CLI
|
||||
if [[ -n "$run_config" ]]; then
|
||||
add_to_container << EOF
|
||||
ENTRYPOINT ["llama", "stack", "run", "$RUN_CONFIG_PATH"]
|
||||
EOF
|
||||
elif [[ "$distro_or_config" != *.yaml ]]; then
|
||||
add_to_container << EOF
|
||||
ENTRYPOINT ["llama", "stack", "run", "$distro_or_config"]
|
||||
EOF
|
||||
fi
|
||||
|
||||
# Add other require item commands genearic to all containers
|
||||
add_to_container << EOF
|
||||
|
||||
RUN mkdir -p /.llama /.cache && chmod -R g+rw /app /.llama /.cache
|
||||
EOF
|
||||
|
||||
printf "Containerfile created successfully in %s/Containerfile\n\n" "$TEMP_DIR"
|
||||
cat "$TEMP_DIR"/Containerfile
|
||||
printf "\n"
|
||||
|
||||
# Start building the CLI arguments
|
||||
CLI_ARGS=()
|
||||
|
||||
# Read CONTAINER_OPTS and put it in an array
|
||||
read -ra CLI_ARGS <<< "$CONTAINER_OPTS"
|
||||
|
||||
if [ "$USE_COPY_NOT_MOUNT" != "true" ]; then
|
||||
if [ -n "$LLAMA_STACK_DIR" ]; then
|
||||
CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_STACK_DIR"):$stack_mount")
|
||||
fi
|
||||
if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then
|
||||
CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_STACK_CLIENT_DIR"):$client_mount")
|
||||
fi
|
||||
fi
|
||||
|
||||
if is_command_available selinuxenabled && selinuxenabled; then
|
||||
# Disable SELinux labels -- we don't want to relabel the llama-stack source dir
|
||||
CLI_ARGS+=("--security-opt" "label=disable")
|
||||
fi
|
||||
|
||||
# Set version tag based on PyPI version
|
||||
if [ -n "$PYPI_VERSION" ]; then
|
||||
version_tag="$PYPI_VERSION"
|
||||
elif [ -n "$TEST_PYPI_VERSION" ]; then
|
||||
version_tag="test-$TEST_PYPI_VERSION"
|
||||
elif [[ -n "$LLAMA_STACK_DIR" || -n "$LLAMA_STACK_CLIENT_DIR" ]]; then
|
||||
version_tag="dev"
|
||||
else
|
||||
URL="https://pypi.org/pypi/llama-stack/json"
|
||||
version_tag=$(curl -s $URL | jq -r '.info.version')
|
||||
fi
|
||||
|
||||
# Add version tag to image name
|
||||
image_tag="$image_name:$version_tag"
|
||||
|
||||
# Detect platform architecture
|
||||
ARCH=$(uname -m)
|
||||
if [ -n "$BUILD_PLATFORM" ]; then
|
||||
CLI_ARGS+=("--platform" "$BUILD_PLATFORM")
|
||||
elif [ "$ARCH" = "arm64" ] || [ "$ARCH" = "aarch64" ]; then
|
||||
CLI_ARGS+=("--platform" "linux/arm64")
|
||||
elif [ "$ARCH" = "x86_64" ]; then
|
||||
CLI_ARGS+=("--platform" "linux/amd64")
|
||||
else
|
||||
echo "Unsupported architecture: $ARCH"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "PWD: $(pwd)"
|
||||
echo "Containerfile: $TEMP_DIR/Containerfile"
|
||||
set -x
|
||||
|
||||
$CONTAINER_BINARY build \
|
||||
"${CLI_ARGS[@]}" \
|
||||
-t "$image_tag" \
|
||||
-f "$TEMP_DIR/Containerfile" \
|
||||
"$BUILD_CONTEXT_DIR"
|
||||
|
||||
# clean up tmp/configs
|
||||
rm -rf "$BUILD_CONTEXT_DIR/run.yaml" "$TEMP_DIR"
|
||||
set +x
|
||||
|
||||
echo "Success!"
|
||||
|
|
@ -1,220 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-}
|
||||
LLAMA_STACK_CLIENT_DIR=${LLAMA_STACK_CLIENT_DIR:-}
|
||||
TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-}
|
||||
# This timeout (in seconds) is necessary when installing PyTorch via uv since it's likely to time out
|
||||
# Reference: https://github.com/astral-sh/uv/pull/1694
|
||||
UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT:-500}
|
||||
UV_SYSTEM_PYTHON=${UV_SYSTEM_PYTHON:-}
|
||||
VIRTUAL_ENV=${VIRTUAL_ENV:-}
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Define color codes
|
||||
RED='\033[0;31m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
|
||||
source "$SCRIPT_DIR/common.sh"
|
||||
|
||||
# Usage function
|
||||
usage() {
|
||||
echo "Usage: $0 --env-name <env_name> --normal-deps <pip_dependencies> [--external-provider-deps <external_provider_deps>] [--optional-deps <special_pip_deps>]"
|
||||
echo "Example: $0 --env-name mybuild --normal-deps 'numpy pandas scipy' --external-provider-deps 'foo' --optional-deps 'bar'"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Parse arguments
|
||||
env_name=""
|
||||
normal_deps=""
|
||||
external_provider_deps=""
|
||||
optional_deps=""
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
key="$1"
|
||||
case "$key" in
|
||||
--env-name)
|
||||
if [[ -z "$2" || "$2" == --* ]]; then
|
||||
echo "Error: --env-name requires a string value" >&2
|
||||
usage
|
||||
fi
|
||||
env_name="$2"
|
||||
shift 2
|
||||
;;
|
||||
--normal-deps)
|
||||
if [[ -z "$2" || "$2" == --* ]]; then
|
||||
echo "Error: --normal-deps requires a string value" >&2
|
||||
usage
|
||||
fi
|
||||
normal_deps="$2"
|
||||
shift 2
|
||||
;;
|
||||
--external-provider-deps)
|
||||
if [[ -z "$2" || "$2" == --* ]]; then
|
||||
echo "Error: --external-provider-deps requires a string value" >&2
|
||||
usage
|
||||
fi
|
||||
external_provider_deps="$2"
|
||||
shift 2
|
||||
;;
|
||||
--optional-deps)
|
||||
if [[ -z "$2" || "$2" == --* ]]; then
|
||||
echo "Error: --optional-deps requires a string value" >&2
|
||||
usage
|
||||
fi
|
||||
optional_deps="$2"
|
||||
shift 2
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1" >&2
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Check required arguments
|
||||
if [[ -z "$env_name" || -z "$normal_deps" ]]; then
|
||||
echo "Error: --env-name and --normal-deps are required." >&2
|
||||
usage
|
||||
fi
|
||||
|
||||
if [ -n "$LLAMA_STACK_DIR" ]; then
|
||||
echo "Using llama-stack-dir=$LLAMA_STACK_DIR"
|
||||
fi
|
||||
if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then
|
||||
echo "Using llama-stack-client-dir=$LLAMA_STACK_CLIENT_DIR"
|
||||
fi
|
||||
|
||||
ENVNAME=""
|
||||
|
||||
# pre-run checks to make sure we can proceed with the installation
|
||||
pre_run_checks() {
|
||||
local env_name="$1"
|
||||
|
||||
if ! is_command_available uv; then
|
||||
echo "uv is not installed, trying to install it."
|
||||
if ! is_command_available pip; then
|
||||
echo "pip is not installed, cannot automatically install 'uv'."
|
||||
echo "Follow this link to install it:"
|
||||
echo "https://docs.astral.sh/uv/getting-started/installation/"
|
||||
exit 1
|
||||
else
|
||||
pip install uv
|
||||
fi
|
||||
fi
|
||||
|
||||
# checking if an environment with the same name already exists
|
||||
if [ -d "$env_name" ]; then
|
||||
echo "Environment '$env_name' already exists, re-using it."
|
||||
fi
|
||||
}
|
||||
|
||||
run() {
|
||||
# Use only global variables set by flag parser
|
||||
if [ -n "$UV_SYSTEM_PYTHON" ] || [ "$env_name" == "__system__" ]; then
|
||||
echo "Installing dependencies in system Python environment"
|
||||
export UV_SYSTEM_PYTHON=1
|
||||
elif [ "$VIRTUAL_ENV" == "$env_name" ]; then
|
||||
echo "Virtual environment $env_name is already active"
|
||||
else
|
||||
echo "Using virtual environment $env_name"
|
||||
uv venv "$env_name"
|
||||
source "$env_name/bin/activate"
|
||||
fi
|
||||
|
||||
if [ -n "$TEST_PYPI_VERSION" ]; then
|
||||
uv pip install fastapi libcst
|
||||
uv pip install --extra-index-url https://test.pypi.org/simple/ \
|
||||
--index-strategy unsafe-best-match \
|
||||
llama-stack=="$TEST_PYPI_VERSION" \
|
||||
$normal_deps
|
||||
if [ -n "$optional_deps" ]; then
|
||||
IFS='#' read -ra parts <<<"$optional_deps"
|
||||
for part in "${parts[@]}"; do
|
||||
echo "$part"
|
||||
uv pip install $part
|
||||
done
|
||||
fi
|
||||
if [ -n "$external_provider_deps" ]; then
|
||||
IFS='#' read -ra parts <<<"$external_provider_deps"
|
||||
for part in "${parts[@]}"; do
|
||||
echo "$part"
|
||||
uv pip install "$part"
|
||||
done
|
||||
fi
|
||||
else
|
||||
if [ -n "$LLAMA_STACK_DIR" ]; then
|
||||
# only warn if DIR does not start with "git+"
|
||||
if [ ! -d "$LLAMA_STACK_DIR" ] && [[ "$LLAMA_STACK_DIR" != git+* ]]; then
|
||||
printf "${RED}Warning: LLAMA_STACK_DIR is set but directory does not exist: %s${NC}\n" "$LLAMA_STACK_DIR" >&2
|
||||
exit 1
|
||||
fi
|
||||
printf "Installing from LLAMA_STACK_DIR: %s\n" "$LLAMA_STACK_DIR"
|
||||
# editable only if LLAMA_STACK_DIR does not start with "git+"
|
||||
if [[ "$LLAMA_STACK_DIR" != git+* ]]; then
|
||||
EDITABLE="-e"
|
||||
else
|
||||
EDITABLE=""
|
||||
fi
|
||||
uv pip install --no-cache-dir $EDITABLE "$LLAMA_STACK_DIR"
|
||||
else
|
||||
uv pip install --no-cache-dir llama-stack
|
||||
fi
|
||||
|
||||
if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then
|
||||
# only warn if DIR does not start with "git+"
|
||||
if [ ! -d "$LLAMA_STACK_CLIENT_DIR" ] && [[ "$LLAMA_STACK_CLIENT_DIR" != git+* ]]; then
|
||||
printf "${RED}Warning: LLAMA_STACK_CLIENT_DIR is set but directory does not exist: %s${NC}\n" "$LLAMA_STACK_CLIENT_DIR" >&2
|
||||
exit 1
|
||||
fi
|
||||
printf "Installing from LLAMA_STACK_CLIENT_DIR: %s\n" "$LLAMA_STACK_CLIENT_DIR"
|
||||
# editable only if LLAMA_STACK_CLIENT_DIR does not start with "git+"
|
||||
if [[ "$LLAMA_STACK_CLIENT_DIR" != git+* ]]; then
|
||||
EDITABLE="-e"
|
||||
else
|
||||
EDITABLE=""
|
||||
fi
|
||||
uv pip install --no-cache-dir $EDITABLE "$LLAMA_STACK_CLIENT_DIR"
|
||||
fi
|
||||
|
||||
printf "Installing pip dependencies\n"
|
||||
uv pip install $normal_deps
|
||||
if [ -n "$optional_deps" ]; then
|
||||
IFS='#' read -ra parts <<<"$optional_deps"
|
||||
for part in "${parts[@]}"; do
|
||||
echo "Installing special provider module: $part"
|
||||
uv pip install $part
|
||||
done
|
||||
fi
|
||||
if [ -n "$external_provider_deps" ]; then
|
||||
IFS='#' read -ra parts <<<"$external_provider_deps"
|
||||
for part in "${parts[@]}"; do
|
||||
echo "Installing external provider module: $part"
|
||||
uv pip install "$part"
|
||||
echo "Getting provider spec for module: $part and installing dependencies"
|
||||
package_name=$(echo "$part" | sed 's/[<>=!].*//')
|
||||
python3 -c "
|
||||
import importlib
|
||||
import sys
|
||||
try:
|
||||
module = importlib.import_module(f'$package_name.provider')
|
||||
spec = module.get_provider_spec()
|
||||
if hasattr(spec, 'pip_packages') and spec.pip_packages:
|
||||
print('\\n'.join(spec.pip_packages))
|
||||
except Exception as e:
|
||||
print(f'Error getting provider spec for $package_name: {e}', file=sys.stderr)
|
||||
" | uv pip install -r -
|
||||
done
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
pre_run_checks "$env_name"
|
||||
run
|
||||
|
|
@ -64,7 +64,7 @@ def configure_api_providers(config: StackRunConfig, build_spec: DistributionSpec
|
|||
if config.apis:
|
||||
apis_to_serve = config.apis
|
||||
else:
|
||||
apis_to_serve = [a.value for a in Api if a not in (Api.telemetry, Api.inspect, Api.providers)]
|
||||
apis_to_serve = [a.value for a in Api if a not in (Api.inspect, Api.providers)]
|
||||
|
||||
for api_str in apis_to_serve:
|
||||
api = Api(api_str)
|
||||
|
|
@ -159,6 +159,37 @@ def upgrade_from_routing_table(
|
|||
config_dict["apis"] = config_dict["apis_to_serve"]
|
||||
config_dict.pop("apis_to_serve", None)
|
||||
|
||||
# Add default storage config if not present
|
||||
if "storage" not in config_dict:
|
||||
config_dict["storage"] = {
|
||||
"backends": {
|
||||
"kv_default": {
|
||||
"type": "kv_sqlite",
|
||||
"db_path": "~/.llama/kvstore.db",
|
||||
},
|
||||
"sql_default": {
|
||||
"type": "sql_sqlite",
|
||||
"db_path": "~/.llama/sql_store.db",
|
||||
},
|
||||
},
|
||||
"stores": {
|
||||
"metadata": {
|
||||
"namespace": "registry",
|
||||
"backend": "kv_default",
|
||||
},
|
||||
"inference": {
|
||||
"table_name": "inference_store",
|
||||
"backend": "sql_default",
|
||||
"max_write_queue_size": 10000,
|
||||
"num_writers": 4,
|
||||
},
|
||||
"conversations": {
|
||||
"table_name": "openai_conversations",
|
||||
"backend": "sql_default",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
return config_dict
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import os
|
||||
import secrets
|
||||
import time
|
||||
from typing import Any
|
||||
|
|
@ -21,16 +20,11 @@ from llama_stack.apis.conversations.conversations import (
|
|||
Conversations,
|
||||
Metadata,
|
||||
)
|
||||
from llama_stack.core.datatypes import AccessRule
|
||||
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
||||
from llama_stack.core.datatypes import AccessRule, StackRunConfig
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
|
||||
from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
||||
from llama_stack.providers.utils.sqlstore.sqlstore import (
|
||||
SqliteSqlStoreConfig,
|
||||
SqlStoreConfig,
|
||||
sqlstore_impl,
|
||||
)
|
||||
from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
|
||||
|
||||
logger = get_logger(name=__name__, category="openai_conversations")
|
||||
|
||||
|
|
@ -38,13 +32,11 @@ logger = get_logger(name=__name__, category="openai_conversations")
|
|||
class ConversationServiceConfig(BaseModel):
|
||||
"""Configuration for the built-in conversation service.
|
||||
|
||||
:param conversations_store: SQL store configuration for conversations (defaults to SQLite)
|
||||
:param run_config: Stack run configuration for resolving persistence
|
||||
:param policy: Access control rules
|
||||
"""
|
||||
|
||||
conversations_store: SqlStoreConfig = SqliteSqlStoreConfig(
|
||||
db_path=(DISTRIBS_BASE_DIR / "conversations.db").as_posix()
|
||||
)
|
||||
run_config: StackRunConfig
|
||||
policy: list[AccessRule] = []
|
||||
|
||||
|
||||
|
|
@ -63,14 +55,16 @@ class ConversationServiceImpl(Conversations):
|
|||
self.deps = deps
|
||||
self.policy = config.policy
|
||||
|
||||
base_sql_store = sqlstore_impl(config.conversations_store)
|
||||
# Use conversations store reference from run config
|
||||
conversations_ref = config.run_config.storage.stores.conversations
|
||||
if not conversations_ref:
|
||||
raise ValueError("storage.stores.conversations must be configured in run config")
|
||||
|
||||
base_sql_store = sqlstore_impl(conversations_ref)
|
||||
self.sql_store = AuthorizedSqlStore(base_sql_store, self.policy)
|
||||
|
||||
async def initialize(self) -> None:
|
||||
"""Initialize the store and create tables."""
|
||||
if isinstance(self.config.conversations_store, SqliteSqlStoreConfig):
|
||||
os.makedirs(os.path.dirname(self.config.conversations_store.db_path), exist_ok=True)
|
||||
|
||||
await self.sql_store.create_table(
|
||||
"openai_conversations",
|
||||
{
|
||||
|
|
@ -135,7 +129,7 @@ class ConversationServiceImpl(Conversations):
|
|||
object="conversation",
|
||||
)
|
||||
|
||||
logger.info(f"Created conversation {conversation_id}")
|
||||
logger.debug(f"Created conversation {conversation_id}")
|
||||
return conversation
|
||||
|
||||
async def get_conversation(self, conversation_id: str) -> Conversation:
|
||||
|
|
@ -161,7 +155,7 @@ class ConversationServiceImpl(Conversations):
|
|||
"""Delete a conversation with the given ID."""
|
||||
await self.sql_store.delete(table="openai_conversations", where={"id": conversation_id})
|
||||
|
||||
logger.info(f"Deleted conversation {conversation_id}")
|
||||
logger.debug(f"Deleted conversation {conversation_id}")
|
||||
return ConversationDeletedResource(id=conversation_id)
|
||||
|
||||
def _validate_conversation_id(self, conversation_id: str) -> None:
|
||||
|
|
@ -222,7 +216,7 @@ class ConversationServiceImpl(Conversations):
|
|||
|
||||
created_items.append(item_dict)
|
||||
|
||||
logger.info(f"Created {len(created_items)} items in conversation {conversation_id}")
|
||||
logger.debug(f"Created {len(created_items)} items in conversation {conversation_id}")
|
||||
|
||||
# Convert created items (dicts) to proper ConversationItem types
|
||||
adapter: TypeAdapter[ConversationItem] = TypeAdapter(ConversationItem)
|
||||
|
|
@ -255,6 +249,12 @@ class ConversationServiceImpl(Conversations):
|
|||
|
||||
async def list(self, conversation_id: str, after=NOT_GIVEN, include=NOT_GIVEN, limit=NOT_GIVEN, order=NOT_GIVEN):
|
||||
"""List items in the conversation."""
|
||||
if not conversation_id:
|
||||
raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}")
|
||||
|
||||
# check if conversation exists
|
||||
await self.get_conversation(conversation_id)
|
||||
|
||||
result = await self.sql_store.fetch_all(table="conversation_items", where={"conversation_id": conversation_id})
|
||||
records = result.data
|
||||
|
||||
|
|
@ -305,5 +305,5 @@ class ConversationServiceImpl(Conversations):
|
|||
table="conversation_items", where={"id": item_id, "conversation_id": conversation_id}
|
||||
)
|
||||
|
||||
logger.info(f"Deleted item {item_id} from conversation {conversation_id}")
|
||||
logger.debug(f"Deleted item {item_id} from conversation {conversation_id}")
|
||||
return ConversationItemDeletedResource(id=item_id)
|
||||
|
|
|
|||
|
|
@ -23,12 +23,15 @@ from llama_stack.apis.scoring import Scoring
|
|||
from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnInput
|
||||
from llama_stack.apis.shields import Shield, ShieldInput
|
||||
from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
|
||||
from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput
|
||||
from llama_stack.apis.vector_io import VectorIO
|
||||
from llama_stack.apis.vector_stores import VectorStore, VectorStoreInput
|
||||
from llama_stack.core.access_control.datatypes import AccessRule
|
||||
from llama_stack.core.storage.datatypes import (
|
||||
KVStoreReference,
|
||||
StorageBackendType,
|
||||
StorageConfig,
|
||||
)
|
||||
from llama_stack.providers.datatypes import Api, ProviderSpec
|
||||
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
|
||||
from llama_stack.providers.utils.sqlstore.sqlstore import SqlStoreConfig
|
||||
|
||||
LLAMA_STACK_BUILD_CONFIG_VERSION = 2
|
||||
LLAMA_STACK_RUN_CONFIG_VERSION = 2
|
||||
|
|
@ -68,7 +71,7 @@ class ShieldWithOwner(Shield, ResourceWithOwner):
|
|||
pass
|
||||
|
||||
|
||||
class VectorDBWithOwner(VectorDB, ResourceWithOwner):
|
||||
class VectorStoreWithOwner(VectorStore, ResourceWithOwner):
|
||||
pass
|
||||
|
||||
|
||||
|
|
@ -88,12 +91,12 @@ class ToolGroupWithOwner(ToolGroup, ResourceWithOwner):
|
|||
pass
|
||||
|
||||
|
||||
RoutableObject = Model | Shield | VectorDB | Dataset | ScoringFn | Benchmark | ToolGroup
|
||||
RoutableObject = Model | Shield | VectorStore | Dataset | ScoringFn | Benchmark | ToolGroup
|
||||
|
||||
RoutableObjectWithProvider = Annotated[
|
||||
ModelWithOwner
|
||||
| ShieldWithOwner
|
||||
| VectorDBWithOwner
|
||||
| VectorStoreWithOwner
|
||||
| DatasetWithOwner
|
||||
| ScoringFnWithOwner
|
||||
| BenchmarkWithOwner
|
||||
|
|
@ -176,6 +179,22 @@ class DistributionSpec(BaseModel):
|
|||
)
|
||||
|
||||
|
||||
class TelemetryConfig(BaseModel):
|
||||
"""
|
||||
Configuration for telemetry.
|
||||
|
||||
Llama Stack uses OpenTelemetry for telemetry. Please refer to https://opentelemetry.io/docs/languages/sdk-configuration/
|
||||
for env variables to configure the OpenTelemetry SDK.
|
||||
|
||||
Example:
|
||||
```bash
|
||||
OTEL_SERVICE_NAME=llama-stack OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 uv run llama stack run starter
|
||||
```
|
||||
"""
|
||||
|
||||
enabled: bool = Field(default=False, description="enable or disable telemetry")
|
||||
|
||||
|
||||
class LoggingConfig(BaseModel):
|
||||
category_levels: dict[str, str] = Field(
|
||||
default_factory=dict,
|
||||
|
|
@ -335,12 +354,32 @@ class AuthenticationRequiredError(Exception):
|
|||
pass
|
||||
|
||||
|
||||
class QualifiedModel(BaseModel):
|
||||
"""A qualified model identifier, consisting of a provider ID and a model ID."""
|
||||
|
||||
provider_id: str
|
||||
model_id: str
|
||||
|
||||
|
||||
class VectorStoresConfig(BaseModel):
|
||||
"""Configuration for vector stores in the stack."""
|
||||
|
||||
default_provider_id: str | None = Field(
|
||||
default=None,
|
||||
description="ID of the vector_io provider to use as default when multiple providers are available and none is specified.",
|
||||
)
|
||||
default_embedding_model: QualifiedModel | None = Field(
|
||||
default=None,
|
||||
description="Default embedding model configuration for vector stores.",
|
||||
)
|
||||
|
||||
|
||||
class QuotaPeriod(StrEnum):
|
||||
DAY = "day"
|
||||
|
||||
|
||||
class QuotaConfig(BaseModel):
|
||||
kvstore: SqliteKVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)")
|
||||
kvstore: KVStoreReference = Field(description="Config for KV store backend (SQLite only for now)")
|
||||
anonymous_max_requests: int = Field(default=100, description="Max requests for unauthenticated clients per period")
|
||||
authenticated_max_requests: int = Field(
|
||||
default=1000, description="Max requests for authenticated clients per period"
|
||||
|
|
@ -383,6 +422,18 @@ def process_cors_config(cors_config: bool | CORSConfig | None) -> CORSConfig | N
|
|||
raise ValueError(f"Expected bool or CORSConfig, got {type(cors_config).__name__}")
|
||||
|
||||
|
||||
class RegisteredResources(BaseModel):
|
||||
"""Registry of resources available in the distribution."""
|
||||
|
||||
models: list[ModelInput] = Field(default_factory=list)
|
||||
shields: list[ShieldInput] = Field(default_factory=list)
|
||||
vector_stores: list[VectorStoreInput] = Field(default_factory=list)
|
||||
datasets: list[DatasetInput] = Field(default_factory=list)
|
||||
scoring_fns: list[ScoringFnInput] = Field(default_factory=list)
|
||||
benchmarks: list[BenchmarkInput] = Field(default_factory=list)
|
||||
tool_groups: list[ToolGroupInput] = Field(default_factory=list)
|
||||
|
||||
|
||||
class ServerConfig(BaseModel):
|
||||
port: int = Field(
|
||||
default=8321,
|
||||
|
|
@ -422,18 +473,6 @@ class ServerConfig(BaseModel):
|
|||
)
|
||||
|
||||
|
||||
class InferenceStoreConfig(BaseModel):
|
||||
sql_store_config: SqlStoreConfig
|
||||
max_write_queue_size: int = Field(default=10000, description="Max queued writes for inference store")
|
||||
num_writers: int = Field(default=4, description="Number of concurrent background writers")
|
||||
|
||||
|
||||
class ResponsesStoreConfig(BaseModel):
|
||||
sql_store_config: SqlStoreConfig
|
||||
max_write_queue_size: int = Field(default=10000, description="Max queued writes for responses store")
|
||||
num_writers: int = Field(default=4, description="Number of concurrent background writers")
|
||||
|
||||
|
||||
class StackRunConfig(BaseModel):
|
||||
version: int = LLAMA_STACK_RUN_CONFIG_VERSION
|
||||
|
||||
|
|
@ -460,39 +499,19 @@ One or more providers to use for each API. The same provider_type (e.g., meta-re
|
|||
can be instantiated multiple times (with different configs) if necessary.
|
||||
""",
|
||||
)
|
||||
metadata_store: KVStoreConfig | None = Field(
|
||||
default=None,
|
||||
description="""
|
||||
Configuration for the persistence store used by the distribution registry. If not specified,
|
||||
a default SQLite store will be used.""",
|
||||
storage: StorageConfig = Field(
|
||||
description="Catalog of named storage backends and references available to the stack",
|
||||
)
|
||||
|
||||
inference_store: InferenceStoreConfig | SqlStoreConfig | None = Field(
|
||||
default=None,
|
||||
description="""
|
||||
Configuration for the persistence store used by the inference API. Can be either a
|
||||
InferenceStoreConfig (with queue tuning parameters) or a SqlStoreConfig (deprecated).
|
||||
If not specified, a default SQLite store will be used.""",
|
||||
registered_resources: RegisteredResources = Field(
|
||||
default_factory=RegisteredResources,
|
||||
description="Registry of resources available in the distribution",
|
||||
)
|
||||
|
||||
conversations_store: SqlStoreConfig | None = Field(
|
||||
default=None,
|
||||
description="""
|
||||
Configuration for the persistence store used by the conversations API.
|
||||
If not specified, a default SQLite store will be used.""",
|
||||
)
|
||||
|
||||
# registry of "resources" in the distribution
|
||||
models: list[ModelInput] = Field(default_factory=list)
|
||||
shields: list[ShieldInput] = Field(default_factory=list)
|
||||
vector_dbs: list[VectorDBInput] = Field(default_factory=list)
|
||||
datasets: list[DatasetInput] = Field(default_factory=list)
|
||||
scoring_fns: list[ScoringFnInput] = Field(default_factory=list)
|
||||
benchmarks: list[BenchmarkInput] = Field(default_factory=list)
|
||||
tool_groups: list[ToolGroupInput] = Field(default_factory=list)
|
||||
|
||||
logging: LoggingConfig | None = Field(default=None, description="Configuration for Llama Stack Logging")
|
||||
|
||||
telemetry: TelemetryConfig = Field(default_factory=TelemetryConfig, description="Configuration for telemetry")
|
||||
|
||||
server: ServerConfig = Field(
|
||||
default_factory=ServerConfig,
|
||||
description="Configuration for the HTTP(S) server",
|
||||
|
|
@ -508,6 +527,11 @@ If not specified, a default SQLite store will be used.""",
|
|||
description="Path to directory containing external API implementations. The APIs code and dependencies must be installed on the system.",
|
||||
)
|
||||
|
||||
vector_stores: VectorStoresConfig | None = Field(
|
||||
default=None,
|
||||
description="Configuration for vector stores, including default embedding model",
|
||||
)
|
||||
|
||||
@field_validator("external_providers_dir")
|
||||
@classmethod
|
||||
def validate_external_providers_dir(cls, v):
|
||||
|
|
@ -517,6 +541,49 @@ If not specified, a default SQLite store will be used.""",
|
|||
return Path(v)
|
||||
return v
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_server_stores(self) -> "StackRunConfig":
|
||||
backend_map = self.storage.backends
|
||||
stores = self.storage.stores
|
||||
kv_backends = {
|
||||
name
|
||||
for name, cfg in backend_map.items()
|
||||
if cfg.type
|
||||
in {
|
||||
StorageBackendType.KV_REDIS,
|
||||
StorageBackendType.KV_SQLITE,
|
||||
StorageBackendType.KV_POSTGRES,
|
||||
StorageBackendType.KV_MONGODB,
|
||||
}
|
||||
}
|
||||
sql_backends = {
|
||||
name
|
||||
for name, cfg in backend_map.items()
|
||||
if cfg.type in {StorageBackendType.SQL_SQLITE, StorageBackendType.SQL_POSTGRES}
|
||||
}
|
||||
|
||||
def _ensure_backend(reference, expected_set, store_name: str) -> None:
|
||||
if reference is None:
|
||||
return
|
||||
backend_name = reference.backend
|
||||
if backend_name not in backend_map:
|
||||
raise ValueError(
|
||||
f"{store_name} references unknown backend '{backend_name}'. "
|
||||
f"Available backends: {sorted(backend_map)}"
|
||||
)
|
||||
if backend_name not in expected_set:
|
||||
raise ValueError(
|
||||
f"{store_name} references backend '{backend_name}' of type "
|
||||
f"'{backend_map[backend_name].type.value}', but a backend of type "
|
||||
f"{'kv_*' if expected_set is kv_backends else 'sql_*'} is required."
|
||||
)
|
||||
|
||||
_ensure_backend(stores.metadata, kv_backends, "storage.stores.metadata")
|
||||
_ensure_backend(stores.inference, sql_backends, "storage.stores.inference")
|
||||
_ensure_backend(stores.conversations, sql_backends, "storage.stores.conversations")
|
||||
_ensure_backend(stores.responses, sql_backends, "storage.stores.responses")
|
||||
return self
|
||||
|
||||
|
||||
class BuildConfig(BaseModel):
|
||||
version: int = LLAMA_STACK_BUILD_CONFIG_VERSION
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ from llama_stack.providers.datatypes import (
|
|||
logger = get_logger(name=__name__, category="core")
|
||||
|
||||
|
||||
INTERNAL_APIS = {Api.inspect, Api.providers, Api.prompts, Api.conversations}
|
||||
INTERNAL_APIS = {Api.inspect, Api.providers, Api.prompts, Api.conversations, Api.telemetry}
|
||||
|
||||
|
||||
def stack_apis() -> list[Api]:
|
||||
|
|
@ -63,6 +63,10 @@ def builtin_automatically_routed_apis() -> list[AutoRoutedApiInfo]:
|
|||
routing_table_api=Api.tool_groups,
|
||||
router_api=Api.tool_runtime,
|
||||
),
|
||||
AutoRoutedApiInfo(
|
||||
routing_table_api=Api.vector_stores,
|
||||
router_api=Api.vector_io,
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -47,13 +47,8 @@ from llama_stack.core.stack import (
|
|||
from llama_stack.core.utils.config import redact_sensitive_fields
|
||||
from llama_stack.core.utils.context import preserve_contexts_async_generator
|
||||
from llama_stack.core.utils.exec import in_notebook
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.telemetry.tracing import (
|
||||
CURRENT_TRACE_CONTEXT,
|
||||
end_trace,
|
||||
setup_logger,
|
||||
start_trace,
|
||||
)
|
||||
from llama_stack.log import get_logger, setup_logging
|
||||
from llama_stack.providers.utils.telemetry.tracing import CURRENT_TRACE_CONTEXT, end_trace, setup_logger, start_trace
|
||||
from llama_stack.strong_typing.inspection import is_unwrapped_body_param
|
||||
|
||||
logger = get_logger(name=__name__, category="core")
|
||||
|
|
@ -205,10 +200,14 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|||
skip_logger_removal: bool = False,
|
||||
):
|
||||
super().__init__()
|
||||
# Initialize logging from environment variables first
|
||||
setup_logging()
|
||||
|
||||
# when using the library client, we should not log to console since many
|
||||
# of our logs are intended for server-side usage
|
||||
current_sinks = os.environ.get("TELEMETRY_SINKS", "sqlite").split(",")
|
||||
os.environ["TELEMETRY_SINKS"] = ",".join(sink for sink in current_sinks if sink != "console")
|
||||
if sinks_from_env := os.environ.get("TELEMETRY_SINKS", None):
|
||||
current_sinks = sinks_from_env.strip().lower().split(",")
|
||||
os.environ["TELEMETRY_SINKS"] = ",".join(sink for sink in current_sinks if sink != "console")
|
||||
|
||||
if in_notebook():
|
||||
import nest_asyncio
|
||||
|
|
@ -282,7 +281,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|||
else:
|
||||
prefix = "!" if in_notebook() else ""
|
||||
cprint(
|
||||
f"Please run:\n\n{prefix}llama stack build --distro {self.config_path_or_distro_name} --image-type venv\n\n",
|
||||
f"Please run:\n\n{prefix}llama stack list-deps {self.config_path_or_distro_name} | xargs -L1 uv pip install\n\n",
|
||||
"yellow",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
|
@ -496,12 +495,11 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|||
return await response.parse()
|
||||
|
||||
def _convert_body(self, func: Any, body: dict | None = None, exclude_params: set[str] | None = None) -> dict:
|
||||
if not body:
|
||||
return {}
|
||||
|
||||
body = body or {}
|
||||
exclude_params = exclude_params or set()
|
||||
sig = inspect.signature(func)
|
||||
params_list = [p for p in sig.parameters.values() if p.name != "self"]
|
||||
|
||||
# Flatten if there's a single unwrapped body parameter (BaseModel or Annotated[BaseModel, Body(embed=False)])
|
||||
if len(params_list) == 1:
|
||||
param = params_list[0]
|
||||
|
|
@ -530,11 +528,12 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|||
converted_body[param_name] = value
|
||||
else:
|
||||
converted_body[param_name] = convert_to_pydantic(param.annotation, value)
|
||||
elif unwrapped_body_param and param.name == unwrapped_body_param.name:
|
||||
# This is the unwrapped body param - construct it from remaining body keys
|
||||
base_type = get_args(param.annotation)[0]
|
||||
# Extract only the keys that aren't already used by other params
|
||||
remaining_keys = {k: v for k, v in body.items() if k not in converted_body}
|
||||
converted_body[param.name] = base_type(**remaining_keys)
|
||||
|
||||
# handle unwrapped body parameter after processing all named parameters
|
||||
if unwrapped_body_param:
|
||||
base_type = get_args(unwrapped_body_param.annotation)[0]
|
||||
# extract only keys not already used by other params
|
||||
remaining_keys = {k: v for k, v in body.items() if k not in converted_body}
|
||||
converted_body[unwrapped_body_param.name] = base_type(**remaining_keys)
|
||||
|
||||
return converted_body
|
||||
|
|
|
|||
|
|
@ -11,9 +11,8 @@ from pydantic import BaseModel
|
|||
|
||||
from llama_stack.apis.prompts import ListPromptsResponse, Prompt, Prompts
|
||||
from llama_stack.core.datatypes import StackRunConfig
|
||||
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
||||
from llama_stack.core.storage.datatypes import KVStoreReference
|
||||
from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
|
||||
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
|
||||
|
||||
|
||||
class PromptServiceConfig(BaseModel):
|
||||
|
|
@ -41,10 +40,12 @@ class PromptServiceImpl(Prompts):
|
|||
self.kvstore: KVStore
|
||||
|
||||
async def initialize(self) -> None:
|
||||
kvstore_config = SqliteKVStoreConfig(
|
||||
db_path=(DISTRIBS_BASE_DIR / self.config.run_config.image_name / "prompts.db").as_posix()
|
||||
)
|
||||
self.kvstore = await kvstore_impl(kvstore_config)
|
||||
# Use metadata store backend with prompts-specific namespace
|
||||
metadata_ref = self.config.run_config.storage.stores.metadata
|
||||
if not metadata_ref:
|
||||
raise ValueError("storage.stores.metadata must be configured in run config")
|
||||
prompts_ref = KVStoreReference(namespace="prompts", backend=metadata_ref.backend)
|
||||
self.kvstore = await kvstore_impl(prompts_ref)
|
||||
|
||||
def _get_default_key(self, prompt_id: str) -> str:
|
||||
"""Get the KVStore key that stores the default version number."""
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
import importlib
|
||||
import importlib.metadata
|
||||
import inspect
|
||||
from typing import Any
|
||||
|
||||
|
|
@ -29,6 +30,7 @@ from llama_stack.apis.shields import Shields
|
|||
from llama_stack.apis.telemetry import Telemetry
|
||||
from llama_stack.apis.tools import ToolGroups, ToolRuntime
|
||||
from llama_stack.apis.vector_io import VectorIO
|
||||
from llama_stack.apis.vector_stores import VectorStore
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
|
||||
from llama_stack.core.client import get_client_impl
|
||||
from llama_stack.core.datatypes import (
|
||||
|
|
@ -47,6 +49,7 @@ from llama_stack.providers.datatypes import (
|
|||
Api,
|
||||
BenchmarksProtocolPrivate,
|
||||
DatasetsProtocolPrivate,
|
||||
InlineProviderSpec,
|
||||
ModelsProtocolPrivate,
|
||||
ProviderSpec,
|
||||
RemoteProviderConfig,
|
||||
|
|
@ -79,10 +82,10 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) ->
|
|||
Api.inspect: Inspect,
|
||||
Api.batches: Batches,
|
||||
Api.vector_io: VectorIO,
|
||||
Api.vector_stores: VectorStore,
|
||||
Api.models: Models,
|
||||
Api.safety: Safety,
|
||||
Api.shields: Shields,
|
||||
Api.telemetry: Telemetry,
|
||||
Api.datasetio: DatasetIO,
|
||||
Api.datasets: Datasets,
|
||||
Api.scoring: Scoring,
|
||||
|
|
@ -95,6 +98,7 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) ->
|
|||
Api.files: Files,
|
||||
Api.prompts: Prompts,
|
||||
Api.conversations: Conversations,
|
||||
Api.telemetry: Telemetry,
|
||||
}
|
||||
|
||||
if external_apis:
|
||||
|
|
@ -204,9 +208,7 @@ def specs_for_autorouted_apis(apis_to_serve: list[str] | set[str]) -> dict[str,
|
|||
module="llama_stack.core.routers",
|
||||
routing_table_api=info.routing_table_api,
|
||||
api_dependencies=[info.routing_table_api],
|
||||
# Add telemetry as an optional dependency to all auto-routed providers
|
||||
optional_api_dependencies=[Api.telemetry],
|
||||
deps__=([info.routing_table_api.value, Api.telemetry.value]),
|
||||
deps__=([info.routing_table_api.value]),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
|
@ -239,6 +241,24 @@ def validate_and_prepare_providers(
|
|||
key = api_str if api not in router_apis else f"inner-{api_str}"
|
||||
providers_with_specs[key] = specs
|
||||
|
||||
# TODO: remove this logic, telemetry should not have providers.
|
||||
# if telemetry has been enabled in the config initialize our internal impl
|
||||
# telemetry is not an external API so it SHOULD NOT be auto-routed.
|
||||
if run_config.telemetry.enabled:
|
||||
specs = {}
|
||||
p = InlineProviderSpec(
|
||||
api=Api.telemetry,
|
||||
provider_type="inline::meta-reference",
|
||||
pip_packages=[],
|
||||
optional_api_dependencies=[Api.datasetio],
|
||||
module="llama_stack.providers.inline.telemetry.meta_reference",
|
||||
config_class="llama_stack.providers.inline.telemetry.meta_reference.config.TelemetryConfig",
|
||||
description="Meta's reference implementation of telemetry and observability using OpenTelemetry.",
|
||||
)
|
||||
spec = ProviderWithSpec(spec=p, provider_type="inline::meta-reference", provider_id="meta-reference")
|
||||
specs["meta-reference"] = spec
|
||||
providers_with_specs["telemetry"] = specs
|
||||
|
||||
return providers_with_specs
|
||||
|
||||
|
||||
|
|
@ -389,6 +409,8 @@ async def instantiate_provider(
|
|||
args = [config, deps]
|
||||
if "policy" in inspect.signature(getattr(module, method)).parameters:
|
||||
args.append(policy)
|
||||
if "telemetry_enabled" in inspect.signature(getattr(module, method)).parameters and run_config.telemetry:
|
||||
args.append(run_config.telemetry.enabled)
|
||||
|
||||
fn = getattr(module, method)
|
||||
impl = await fn(*args)
|
||||
|
|
|
|||
|
|
@ -6,7 +6,10 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.core.datatypes import AccessRule, RoutedProtocol
|
||||
from llama_stack.core.datatypes import (
|
||||
AccessRule,
|
||||
RoutedProtocol,
|
||||
)
|
||||
from llama_stack.core.stack import StackRunConfig
|
||||
from llama_stack.core.store import DistributionRegistry
|
||||
from llama_stack.providers.datatypes import Api, RoutingTable
|
||||
|
|
@ -26,6 +29,7 @@ async def get_routing_table_impl(
|
|||
from ..routing_tables.scoring_functions import ScoringFunctionsRoutingTable
|
||||
from ..routing_tables.shields import ShieldsRoutingTable
|
||||
from ..routing_tables.toolgroups import ToolGroupsRoutingTable
|
||||
from ..routing_tables.vector_stores import VectorStoresRoutingTable
|
||||
|
||||
api_to_tables = {
|
||||
"models": ModelsRoutingTable,
|
||||
|
|
@ -34,6 +38,7 @@ async def get_routing_table_impl(
|
|||
"scoring_functions": ScoringFunctionsRoutingTable,
|
||||
"benchmarks": BenchmarksRoutingTable,
|
||||
"tool_groups": ToolGroupsRoutingTable,
|
||||
"vector_stores": VectorStoresRoutingTable,
|
||||
}
|
||||
|
||||
if api.value not in api_to_tables:
|
||||
|
|
@ -63,26 +68,34 @@ async def get_auto_router_impl(
|
|||
"eval": EvalRouter,
|
||||
"tool_runtime": ToolRuntimeRouter,
|
||||
}
|
||||
api_to_deps = {
|
||||
"inference": {"telemetry": Api.telemetry},
|
||||
}
|
||||
if api.value not in api_to_routers:
|
||||
raise ValueError(f"API {api.value} not found in router map")
|
||||
|
||||
api_to_dep_impl = {}
|
||||
for dep_name, dep_api in api_to_deps.get(api.value, {}).items():
|
||||
if dep_api in deps:
|
||||
api_to_dep_impl[dep_name] = deps[dep_api]
|
||||
if run_config.telemetry.enabled:
|
||||
api_to_deps = {
|
||||
"inference": {"telemetry": Api.telemetry},
|
||||
}
|
||||
for dep_name, dep_api in api_to_deps.get(api.value, {}).items():
|
||||
if dep_api in deps:
|
||||
api_to_dep_impl[dep_name] = deps[dep_api]
|
||||
|
||||
# TODO: move pass configs to routers instead
|
||||
if api == Api.inference and run_config.inference_store:
|
||||
if api == Api.inference:
|
||||
inference_ref = run_config.storage.stores.inference
|
||||
if not inference_ref:
|
||||
raise ValueError("storage.stores.inference must be configured in run config")
|
||||
|
||||
inference_store = InferenceStore(
|
||||
config=run_config.inference_store,
|
||||
reference=inference_ref,
|
||||
policy=policy,
|
||||
)
|
||||
await inference_store.initialize()
|
||||
api_to_dep_impl["store"] = inference_store
|
||||
|
||||
elif api == Api.vector_io:
|
||||
api_to_dep_impl["vector_stores_config"] = run_config.vector_stores
|
||||
|
||||
impl = api_to_routers[api.value](routing_table, **api_to_dep_impl)
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
|
|
|||
|
|
@ -37,24 +37,24 @@ class ToolRuntimeRouter(ToolRuntime):
|
|||
async def query(
|
||||
self,
|
||||
content: InterleavedContent,
|
||||
vector_db_ids: list[str],
|
||||
vector_store_ids: list[str],
|
||||
query_config: RAGQueryConfig | None = None,
|
||||
) -> RAGQueryResult:
|
||||
logger.debug(f"ToolRuntimeRouter.RagToolImpl.query: {vector_db_ids}")
|
||||
logger.debug(f"ToolRuntimeRouter.RagToolImpl.query: {vector_store_ids}")
|
||||
provider = await self.routing_table.get_provider_impl("knowledge_search")
|
||||
return await provider.query(content, vector_db_ids, query_config)
|
||||
return await provider.query(content, vector_store_ids, query_config)
|
||||
|
||||
async def insert(
|
||||
self,
|
||||
documents: list[RAGDocument],
|
||||
vector_db_id: str,
|
||||
vector_store_id: str,
|
||||
chunk_size_in_tokens: int = 512,
|
||||
) -> None:
|
||||
logger.debug(
|
||||
f"ToolRuntimeRouter.RagToolImpl.insert: {vector_db_id}, {len(documents)} documents, chunk_size={chunk_size_in_tokens}"
|
||||
f"ToolRuntimeRouter.RagToolImpl.insert: {vector_store_id}, {len(documents)} documents, chunk_size={chunk_size_in_tokens}"
|
||||
)
|
||||
provider = await self.routing_table.get_provider_impl("insert_into_memory")
|
||||
return await provider.insert(documents, vector_db_id, chunk_size_in_tokens)
|
||||
return await provider.insert(documents, vector_store_id, chunk_size_in_tokens)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ from llama_stack.apis.vector_io import (
|
|||
VectorStoreObject,
|
||||
VectorStoreSearchResponsePage,
|
||||
)
|
||||
from llama_stack.core.datatypes import VectorStoresConfig
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
|
||||
|
||||
|
|
@ -43,9 +44,11 @@ class VectorIORouter(VectorIO):
|
|||
def __init__(
|
||||
self,
|
||||
routing_table: RoutingTable,
|
||||
vector_stores_config: VectorStoresConfig | None = None,
|
||||
) -> None:
|
||||
logger.debug("Initializing VectorIORouter")
|
||||
self.routing_table = routing_table
|
||||
self.vector_stores_config = vector_stores_config
|
||||
|
||||
async def initialize(self) -> None:
|
||||
logger.debug("VectorIORouter.initialize")
|
||||
|
|
@ -68,33 +71,16 @@ class VectorIORouter(VectorIO):
|
|||
|
||||
raise ValueError(f"Embedding model '{embedding_model_id}' not found or not an embedding model")
|
||||
|
||||
async def register_vector_db(
|
||||
self,
|
||||
vector_db_id: str,
|
||||
embedding_model: str,
|
||||
embedding_dimension: int | None = 384,
|
||||
provider_id: str | None = None,
|
||||
vector_db_name: str | None = None,
|
||||
provider_vector_db_id: str | None = None,
|
||||
) -> None:
|
||||
logger.debug(f"VectorIORouter.register_vector_db: {vector_db_id}, {embedding_model}")
|
||||
await self.routing_table.register_vector_db(
|
||||
vector_db_id,
|
||||
embedding_model,
|
||||
embedding_dimension,
|
||||
provider_id,
|
||||
vector_db_name,
|
||||
provider_vector_db_id,
|
||||
)
|
||||
|
||||
async def insert_chunks(
|
||||
self,
|
||||
vector_db_id: str,
|
||||
chunks: list[Chunk],
|
||||
ttl_seconds: int | None = None,
|
||||
) -> None:
|
||||
doc_ids = [chunk.document_id for chunk in chunks[:3]]
|
||||
logger.debug(
|
||||
f"VectorIORouter.insert_chunks: {vector_db_id}, {len(chunks)} chunks, ttl_seconds={ttl_seconds}, chunk_ids={[chunk.metadata['document_id'] for chunk in chunks[:3]]}{' and more...' if len(chunks) > 3 else ''}",
|
||||
f"VectorIORouter.insert_chunks: {vector_db_id}, {len(chunks)} chunks, "
|
||||
f"ttl_seconds={ttl_seconds}, chunk_ids={doc_ids}{' and more...' if len(chunks) > 3 else ''}"
|
||||
)
|
||||
provider = await self.routing_table.get_provider_impl(vector_db_id)
|
||||
return await provider.insert_chunks(vector_db_id, chunks, ttl_seconds)
|
||||
|
|
@ -120,13 +106,18 @@ class VectorIORouter(VectorIO):
|
|||
embedding_dimension = extra.get("embedding_dimension")
|
||||
provider_id = extra.get("provider_id")
|
||||
|
||||
logger.debug(f"VectorIORouter.openai_create_vector_store: name={params.name}, provider_id={provider_id}")
|
||||
# Use default embedding model if not specified
|
||||
if (
|
||||
embedding_model is None
|
||||
and self.vector_stores_config
|
||||
and self.vector_stores_config.default_embedding_model is not None
|
||||
):
|
||||
# Construct the full model ID with provider prefix
|
||||
embedding_provider_id = self.vector_stores_config.default_embedding_model.provider_id
|
||||
model_id = self.vector_stores_config.default_embedding_model.model_id
|
||||
embedding_model = f"{embedding_provider_id}/{model_id}"
|
||||
|
||||
# Require explicit embedding model specification
|
||||
if embedding_model is None:
|
||||
raise ValueError("embedding_model is required in extra_body when creating a vector store")
|
||||
|
||||
if embedding_dimension is None:
|
||||
if embedding_model is not None and embedding_dimension is None:
|
||||
embedding_dimension = await self._get_embedding_model_dimension(embedding_model)
|
||||
|
||||
# Auto-select provider if not specified
|
||||
|
|
@ -136,30 +127,45 @@ class VectorIORouter(VectorIO):
|
|||
raise ValueError("No vector_io providers available")
|
||||
if num_providers > 1:
|
||||
available_providers = list(self.routing_table.impls_by_provider_id.keys())
|
||||
raise ValueError(
|
||||
f"Multiple vector_io providers available. Please specify provider_id in extra_body. "
|
||||
f"Available providers: {available_providers}"
|
||||
)
|
||||
provider_id = list(self.routing_table.impls_by_provider_id.keys())[0]
|
||||
# Use default configured provider
|
||||
if self.vector_stores_config and self.vector_stores_config.default_provider_id:
|
||||
default_provider = self.vector_stores_config.default_provider_id
|
||||
if default_provider in available_providers:
|
||||
provider_id = default_provider
|
||||
logger.debug(f"Using configured default vector store provider: {provider_id}")
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Configured default vector store provider '{default_provider}' not found. "
|
||||
f"Available providers: {available_providers}"
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Multiple vector_io providers available. Please specify provider_id in extra_body. "
|
||||
f"Available providers: {available_providers}"
|
||||
)
|
||||
else:
|
||||
provider_id = list(self.routing_table.impls_by_provider_id.keys())[0]
|
||||
|
||||
vector_db_id = f"vs_{uuid.uuid4()}"
|
||||
registered_vector_db = await self.routing_table.register_vector_db(
|
||||
vector_db_id=vector_db_id,
|
||||
vector_store_id = f"vs_{uuid.uuid4()}"
|
||||
registered_vector_store = await self.routing_table.register_vector_store(
|
||||
vector_store_id=vector_store_id,
|
||||
embedding_model=embedding_model,
|
||||
embedding_dimension=embedding_dimension,
|
||||
provider_id=provider_id,
|
||||
provider_vector_db_id=vector_db_id,
|
||||
vector_db_name=params.name,
|
||||
provider_vector_store_id=vector_store_id,
|
||||
vector_store_name=params.name,
|
||||
)
|
||||
provider = await self.routing_table.get_provider_impl(registered_vector_db.identifier)
|
||||
provider = await self.routing_table.get_provider_impl(registered_vector_store.identifier)
|
||||
|
||||
# Update model_extra with registered values so provider uses the already-registered vector_db
|
||||
# Update model_extra with registered values so provider uses the already-registered vector_store
|
||||
if params.model_extra is None:
|
||||
params.model_extra = {}
|
||||
params.model_extra["provider_vector_db_id"] = registered_vector_db.provider_resource_id
|
||||
params.model_extra["provider_id"] = registered_vector_db.provider_id
|
||||
params.model_extra["embedding_model"] = embedding_model
|
||||
params.model_extra["embedding_dimension"] = embedding_dimension
|
||||
params.model_extra["provider_vector_store_id"] = registered_vector_store.provider_resource_id
|
||||
params.model_extra["provider_id"] = registered_vector_store.provider_id
|
||||
if embedding_model is not None:
|
||||
params.model_extra["embedding_model"] = embedding_model
|
||||
if embedding_dimension is not None:
|
||||
params.model_extra["embedding_dimension"] = embedding_dimension
|
||||
|
||||
return await provider.openai_create_vector_store(params)
|
||||
|
||||
|
|
@ -173,15 +179,15 @@ class VectorIORouter(VectorIO):
|
|||
logger.debug(f"VectorIORouter.openai_list_vector_stores: limit={limit}")
|
||||
# Route to default provider for now - could aggregate from all providers in the future
|
||||
# call retrieve on each vector dbs to get list of vector stores
|
||||
vector_dbs = await self.routing_table.get_all_with_type("vector_db")
|
||||
vector_stores = await self.routing_table.get_all_with_type("vector_store")
|
||||
all_stores = []
|
||||
for vector_db in vector_dbs:
|
||||
for vector_store in vector_stores:
|
||||
try:
|
||||
provider = await self.routing_table.get_provider_impl(vector_db.identifier)
|
||||
vector_store = await provider.openai_retrieve_vector_store(vector_db.identifier)
|
||||
provider = await self.routing_table.get_provider_impl(vector_store.identifier)
|
||||
vector_store = await provider.openai_retrieve_vector_store(vector_store.identifier)
|
||||
all_stores.append(vector_store)
|
||||
except Exception as e:
|
||||
logger.error(f"Error retrieving vector store {vector_db.identifier}: {e}")
|
||||
logger.error(f"Error retrieving vector store {vector_store.identifier}: {e}")
|
||||
continue
|
||||
|
||||
# Sort by created_at
|
||||
|
|
@ -245,8 +251,7 @@ class VectorIORouter(VectorIO):
|
|||
vector_store_id: str,
|
||||
) -> VectorStoreDeleteResponse:
|
||||
logger.debug(f"VectorIORouter.openai_delete_vector_store: {vector_store_id}")
|
||||
provider = await self.routing_table.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_delete_vector_store(vector_store_id)
|
||||
return await self.routing_table.openai_delete_vector_store(vector_store_id)
|
||||
|
||||
async def openai_search_vector_store(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ async def register_object_with_provider(obj: RoutableObject, p: Any) -> Routable
|
|||
elif api == Api.safety:
|
||||
return await p.register_shield(obj)
|
||||
elif api == Api.vector_io:
|
||||
return await p.register_vector_db(obj)
|
||||
return await p.register_vector_store(obj)
|
||||
elif api == Api.datasetio:
|
||||
return await p.register_dataset(obj)
|
||||
elif api == Api.scoring:
|
||||
|
|
@ -57,7 +57,7 @@ async def register_object_with_provider(obj: RoutableObject, p: Any) -> Routable
|
|||
async def unregister_object_from_provider(obj: RoutableObject, p: Any) -> None:
|
||||
api = get_impl_api(p)
|
||||
if api == Api.vector_io:
|
||||
return await p.unregister_vector_db(obj.identifier)
|
||||
return await p.unregister_vector_store(obj.identifier)
|
||||
elif api == Api.inference:
|
||||
return await p.unregister_model(obj.identifier)
|
||||
elif api == Api.safety:
|
||||
|
|
@ -108,7 +108,7 @@ class CommonRoutingTableImpl(RoutingTable):
|
|||
elif api == Api.safety:
|
||||
p.shield_store = self
|
||||
elif api == Api.vector_io:
|
||||
p.vector_db_store = self
|
||||
p.vector_store_store = self
|
||||
elif api == Api.datasetio:
|
||||
p.dataset_store = self
|
||||
elif api == Api.scoring:
|
||||
|
|
@ -134,12 +134,15 @@ class CommonRoutingTableImpl(RoutingTable):
|
|||
from .scoring_functions import ScoringFunctionsRoutingTable
|
||||
from .shields import ShieldsRoutingTable
|
||||
from .toolgroups import ToolGroupsRoutingTable
|
||||
from .vector_stores import VectorStoresRoutingTable
|
||||
|
||||
def apiname_object():
|
||||
if isinstance(self, ModelsRoutingTable):
|
||||
return ("Inference", "model")
|
||||
elif isinstance(self, ShieldsRoutingTable):
|
||||
return ("Safety", "shield")
|
||||
elif isinstance(self, VectorStoresRoutingTable):
|
||||
return ("VectorIO", "vector_store")
|
||||
elif isinstance(self, DatasetsRoutingTable):
|
||||
return ("DatasetIO", "dataset")
|
||||
elif isinstance(self, ScoringFunctionsRoutingTable):
|
||||
|
|
@ -245,25 +248,7 @@ class CommonRoutingTableImpl(RoutingTable):
|
|||
|
||||
|
||||
async def lookup_model(routing_table: CommonRoutingTableImpl, model_id: str) -> Model:
|
||||
# first try to get the model by identifier
|
||||
# this works if model_id is an alias or is of the form provider_id/provider_model_id
|
||||
model = await routing_table.get_object_by_identifier("model", model_id)
|
||||
if model is not None:
|
||||
return model
|
||||
|
||||
logger.warning(
|
||||
f"WARNING: model identifier '{model_id}' not found in routing table. Falling back to "
|
||||
"searching in all providers. This is only for backwards compatibility and will stop working "
|
||||
"soon. Migrate your calls to use fully scoped `provider_id/model_id` names."
|
||||
)
|
||||
# if not found, this means model_id is an unscoped provider_model_id, we need
|
||||
# to iterate (given a lack of an efficient index on the KVStore)
|
||||
models = await routing_table.get_all_with_type("model")
|
||||
matching_models = [m for m in models if m.provider_resource_id == model_id]
|
||||
if len(matching_models) == 0:
|
||||
if not model:
|
||||
raise ModelNotFoundError(model_id)
|
||||
|
||||
if len(matching_models) > 1:
|
||||
raise ValueError(f"Multiple providers found for '{model_id}': {[m.provider_id for m in matching_models]}")
|
||||
|
||||
return matching_models[0]
|
||||
return model
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
|
|||
try:
|
||||
models = await provider.list_models()
|
||||
except Exception as e:
|
||||
logger.debug(f"Model refresh failed for provider {provider_id}: {e}")
|
||||
logger.warning(f"Model refresh failed for provider {provider_id}: {e}")
|
||||
continue
|
||||
|
||||
self.listed_providers.add(provider_id)
|
||||
|
|
@ -104,15 +104,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
|
|||
if "embedding_dimension" not in metadata and model_type == ModelType.embedding:
|
||||
raise ValueError("Embedding model must have an embedding dimension in its metadata")
|
||||
|
||||
# an identifier different than provider_model_id implies it is an alias, so that
|
||||
# becomes the globally unique identifier. otherwise provider_model_ids can conflict,
|
||||
# so as a general rule we must use the provider_id to disambiguate.
|
||||
|
||||
if model_id != provider_model_id:
|
||||
identifier = model_id
|
||||
else:
|
||||
identifier = f"{provider_id}/{provider_model_id}"
|
||||
|
||||
identifier = f"{provider_id}/{provider_model_id}"
|
||||
model = ModelWithOwner(
|
||||
identifier=identifier,
|
||||
provider_resource_id=provider_model_id,
|
||||
|
|
|
|||
292
llama_stack/core/routing_tables/vector_stores.py
Normal file
292
llama_stack/core/routing_tables/vector_stores.py
Normal file
|
|
@ -0,0 +1,292 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack.apis.resource import ResourceType
|
||||
|
||||
# Removed VectorStores import to avoid exposing public API
|
||||
from llama_stack.apis.vector_io.vector_io import (
|
||||
SearchRankingOptions,
|
||||
VectorStoreChunkingStrategy,
|
||||
VectorStoreDeleteResponse,
|
||||
VectorStoreFileContentsResponse,
|
||||
VectorStoreFileDeleteResponse,
|
||||
VectorStoreFileObject,
|
||||
VectorStoreFileStatus,
|
||||
VectorStoreObject,
|
||||
VectorStoreSearchResponsePage,
|
||||
)
|
||||
from llama_stack.core.datatypes import (
|
||||
VectorStoreWithOwner,
|
||||
)
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
from .common import CommonRoutingTableImpl, lookup_model
|
||||
|
||||
logger = get_logger(name=__name__, category="core::routing_tables")
|
||||
|
||||
|
||||
class VectorStoresRoutingTable(CommonRoutingTableImpl):
|
||||
"""Internal routing table for vector_store operations.
|
||||
|
||||
Does not inherit from VectorStores to avoid exposing public API endpoints.
|
||||
Only provides internal routing functionality for VectorIORouter.
|
||||
"""
|
||||
|
||||
# Internal methods only - no public API exposure
|
||||
|
||||
async def register_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
embedding_model: str,
|
||||
embedding_dimension: int | None = 384,
|
||||
provider_id: str | None = None,
|
||||
provider_vector_store_id: str | None = None,
|
||||
vector_store_name: str | None = None,
|
||||
) -> Any:
|
||||
if provider_id is None:
|
||||
if len(self.impls_by_provider_id) > 0:
|
||||
provider_id = list(self.impls_by_provider_id.keys())[0]
|
||||
if len(self.impls_by_provider_id) > 1:
|
||||
logger.warning(
|
||||
f"No provider specified and multiple providers available. Arbitrarily selected the first provider {provider_id}."
|
||||
)
|
||||
else:
|
||||
raise ValueError("No provider available. Please configure a vector_io provider.")
|
||||
model = await lookup_model(self, embedding_model)
|
||||
if model is None:
|
||||
raise ModelNotFoundError(embedding_model)
|
||||
if model.model_type != ModelType.embedding:
|
||||
raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding)
|
||||
|
||||
vector_store = VectorStoreWithOwner(
|
||||
identifier=vector_store_id,
|
||||
type=ResourceType.vector_store.value,
|
||||
provider_id=provider_id,
|
||||
provider_resource_id=provider_vector_store_id,
|
||||
embedding_model=embedding_model,
|
||||
embedding_dimension=embedding_dimension,
|
||||
vector_store_name=vector_store_name,
|
||||
)
|
||||
await self.register_object(vector_store)
|
||||
return vector_store
|
||||
|
||||
async def openai_retrieve_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
) -> VectorStoreObject:
|
||||
await self.assert_action_allowed("read", "vector_store", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_retrieve_vector_store(vector_store_id)
|
||||
|
||||
async def openai_update_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
name: str | None = None,
|
||||
expires_after: dict[str, Any] | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> VectorStoreObject:
|
||||
await self.assert_action_allowed("update", "vector_store", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_update_vector_store(
|
||||
vector_store_id=vector_store_id,
|
||||
name=name,
|
||||
expires_after=expires_after,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
async def openai_delete_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
) -> VectorStoreDeleteResponse:
|
||||
await self.assert_action_allowed("delete", "vector_store", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
result = await provider.openai_delete_vector_store(vector_store_id)
|
||||
await self.unregister_vector_store(vector_store_id)
|
||||
return result
|
||||
|
||||
async def unregister_vector_store(self, vector_store_id: str) -> None:
|
||||
"""Remove the vector store from the routing table registry."""
|
||||
try:
|
||||
vector_store_obj = await self.get_object_by_identifier("vector_store", vector_store_id)
|
||||
if vector_store_obj:
|
||||
await self.unregister_object(vector_store_obj)
|
||||
except Exception as e:
|
||||
# Log the error but don't fail the operation
|
||||
logger.warning(f"Failed to unregister vector store {vector_store_id} from routing table: {e}")
|
||||
|
||||
async def openai_search_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
query: str | list[str],
|
||||
filters: dict[str, Any] | None = None,
|
||||
max_num_results: int | None = 10,
|
||||
ranking_options: SearchRankingOptions | None = None,
|
||||
rewrite_query: bool | None = False,
|
||||
search_mode: str | None = "vector",
|
||||
) -> VectorStoreSearchResponsePage:
|
||||
await self.assert_action_allowed("read", "vector_store", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_search_vector_store(
|
||||
vector_store_id=vector_store_id,
|
||||
query=query,
|
||||
filters=filters,
|
||||
max_num_results=max_num_results,
|
||||
ranking_options=ranking_options,
|
||||
rewrite_query=rewrite_query,
|
||||
search_mode=search_mode,
|
||||
)
|
||||
|
||||
async def openai_attach_file_to_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
attributes: dict[str, Any] | None = None,
|
||||
chunking_strategy: VectorStoreChunkingStrategy | None = None,
|
||||
) -> VectorStoreFileObject:
|
||||
await self.assert_action_allowed("update", "vector_store", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_attach_file_to_vector_store(
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
attributes=attributes,
|
||||
chunking_strategy=chunking_strategy,
|
||||
)
|
||||
|
||||
async def openai_list_files_in_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
limit: int | None = 20,
|
||||
order: str | None = "desc",
|
||||
after: str | None = None,
|
||||
before: str | None = None,
|
||||
filter: VectorStoreFileStatus | None = None,
|
||||
) -> list[VectorStoreFileObject]:
|
||||
await self.assert_action_allowed("read", "vector_store", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_list_files_in_vector_store(
|
||||
vector_store_id=vector_store_id,
|
||||
limit=limit,
|
||||
order=order,
|
||||
after=after,
|
||||
before=before,
|
||||
filter=filter,
|
||||
)
|
||||
|
||||
async def openai_retrieve_vector_store_file(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
) -> VectorStoreFileObject:
|
||||
await self.assert_action_allowed("read", "vector_store", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_retrieve_vector_store_file(
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
)
|
||||
|
||||
async def openai_retrieve_vector_store_file_contents(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
) -> VectorStoreFileContentsResponse:
|
||||
await self.assert_action_allowed("read", "vector_store", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_retrieve_vector_store_file_contents(
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
)
|
||||
|
||||
async def openai_update_vector_store_file(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
attributes: dict[str, Any],
|
||||
) -> VectorStoreFileObject:
|
||||
await self.assert_action_allowed("update", "vector_store", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_update_vector_store_file(
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
attributes=attributes,
|
||||
)
|
||||
|
||||
async def openai_delete_vector_store_file(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
) -> VectorStoreFileDeleteResponse:
|
||||
await self.assert_action_allowed("delete", "vector_store", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_delete_vector_store_file(
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
)
|
||||
|
||||
async def openai_create_vector_store_file_batch(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_ids: list[str],
|
||||
attributes: dict[str, Any] | None = None,
|
||||
chunking_strategy: Any | None = None,
|
||||
):
|
||||
await self.assert_action_allowed("update", "vector_store", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_create_vector_store_file_batch(
|
||||
vector_store_id=vector_store_id,
|
||||
file_ids=file_ids,
|
||||
attributes=attributes,
|
||||
chunking_strategy=chunking_strategy,
|
||||
)
|
||||
|
||||
async def openai_retrieve_vector_store_file_batch(
|
||||
self,
|
||||
batch_id: str,
|
||||
vector_store_id: str,
|
||||
):
|
||||
await self.assert_action_allowed("read", "vector_store", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_retrieve_vector_store_file_batch(
|
||||
batch_id=batch_id,
|
||||
vector_store_id=vector_store_id,
|
||||
)
|
||||
|
||||
async def openai_list_files_in_vector_store_file_batch(
|
||||
self,
|
||||
batch_id: str,
|
||||
vector_store_id: str,
|
||||
after: str | None = None,
|
||||
before: str | None = None,
|
||||
filter: str | None = None,
|
||||
limit: int | None = 20,
|
||||
order: str | None = "desc",
|
||||
):
|
||||
await self.assert_action_allowed("read", "vector_store", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_list_files_in_vector_store_file_batch(
|
||||
batch_id=batch_id,
|
||||
vector_store_id=vector_store_id,
|
||||
after=after,
|
||||
before=before,
|
||||
filter=filter,
|
||||
limit=limit,
|
||||
order=order,
|
||||
)
|
||||
|
||||
async def openai_cancel_vector_store_file_batch(
|
||||
self,
|
||||
batch_id: str,
|
||||
vector_store_id: str,
|
||||
):
|
||||
await self.assert_action_allowed("update", "vector_store", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_cancel_vector_store_file_batch(
|
||||
batch_id=batch_id,
|
||||
vector_store_id=vector_store_id,
|
||||
)
|
||||
|
|
@ -72,13 +72,30 @@ class AuthProvider(ABC):
|
|||
def get_attributes_from_claims(claims: dict[str, str], mapping: dict[str, str]) -> dict[str, list[str]]:
|
||||
attributes: dict[str, list[str]] = {}
|
||||
for claim_key, attribute_key in mapping.items():
|
||||
if claim_key not in claims:
|
||||
# First try dot notation for nested traversal (e.g., "resource_access.llamastack.roles")
|
||||
# Then fall back to literal key with dots (e.g., "my.dotted.key")
|
||||
claim: object = claims
|
||||
keys = claim_key.split(".")
|
||||
for key in keys:
|
||||
if isinstance(claim, dict) and key in claim:
|
||||
claim = claim[key]
|
||||
else:
|
||||
claim = None
|
||||
break
|
||||
|
||||
if claim is None and claim_key in claims:
|
||||
# Fall back to checking if claim_key exists as a literal key
|
||||
claim = claims[claim_key]
|
||||
|
||||
if claim is None:
|
||||
continue
|
||||
claim = claims[claim_key]
|
||||
|
||||
if isinstance(claim, list):
|
||||
values = claim
|
||||
else:
|
||||
elif isinstance(claim, str):
|
||||
values = claim.split()
|
||||
else:
|
||||
continue
|
||||
|
||||
if attribute_key in attributes:
|
||||
attributes[attribute_key].extend(values)
|
||||
|
|
|
|||
|
|
@ -10,10 +10,10 @@ from datetime import UTC, datetime, timedelta
|
|||
|
||||
from starlette.types import ASGIApp, Receive, Scope, Send
|
||||
|
||||
from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendType
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.kvstore.api import KVStore
|
||||
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
|
||||
from llama_stack.providers.utils.kvstore.kvstore import kvstore_impl
|
||||
from llama_stack.providers.utils.kvstore.kvstore import _KVSTORE_BACKENDS, kvstore_impl
|
||||
|
||||
logger = get_logger(name=__name__, category="core::server")
|
||||
|
||||
|
|
@ -33,7 +33,7 @@ class QuotaMiddleware:
|
|||
def __init__(
|
||||
self,
|
||||
app: ASGIApp,
|
||||
kv_config: KVStoreConfig,
|
||||
kv_config: KVStoreReference,
|
||||
anonymous_max_requests: int,
|
||||
authenticated_max_requests: int,
|
||||
window_seconds: int = 86400,
|
||||
|
|
@ -45,15 +45,15 @@ class QuotaMiddleware:
|
|||
self.authenticated_max_requests = authenticated_max_requests
|
||||
self.window_seconds = window_seconds
|
||||
|
||||
if isinstance(self.kv_config, SqliteKVStoreConfig):
|
||||
logger.warning(
|
||||
"QuotaMiddleware: Using SQLite backend. Expiry/TTL is not enforced; cleanup is manual. "
|
||||
f"window_seconds={self.window_seconds}"
|
||||
)
|
||||
|
||||
async def _get_kv(self) -> KVStore:
|
||||
if self.kv is None:
|
||||
self.kv = await kvstore_impl(self.kv_config)
|
||||
backend_config = _KVSTORE_BACKENDS.get(self.kv_config.backend)
|
||||
if backend_config and backend_config.type == StorageBackendType.KV_SQLITE:
|
||||
logger.warning(
|
||||
"QuotaMiddleware: Using SQLite backend. Expiry/TTL is not enforced; cleanup is manual. "
|
||||
f"window_seconds={self.window_seconds}"
|
||||
)
|
||||
return self.kv
|
||||
|
||||
async def __call__(self, scope: Scope, receive: Receive, send: Send):
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ from llama_stack.core.stack import (
|
|||
from llama_stack.core.utils.config import redact_sensitive_fields
|
||||
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
||||
from llama_stack.core.utils.context import preserve_contexts_async_generator
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.log import get_logger, setup_logging
|
||||
from llama_stack.providers.datatypes import Api
|
||||
from llama_stack.providers.inline.telemetry.meta_reference.config import TelemetryConfig
|
||||
from llama_stack.providers.inline.telemetry.meta_reference.telemetry import (
|
||||
|
|
@ -374,6 +374,9 @@ def create_app() -> StackApp:
|
|||
Returns:
|
||||
Configured StackApp instance.
|
||||
"""
|
||||
# Initialize logging from environment variables first
|
||||
setup_logging()
|
||||
|
||||
config_file = os.getenv("LLAMA_STACK_CONFIG")
|
||||
if config_file is None:
|
||||
raise ValueError("LLAMA_STACK_CONFIG environment variable is required")
|
||||
|
|
@ -445,7 +448,7 @@ def create_app() -> StackApp:
|
|||
if cors_config:
|
||||
app.add_middleware(CORSMiddleware, **cors_config.model_dump())
|
||||
|
||||
if Api.telemetry in impls:
|
||||
if config.telemetry.enabled:
|
||||
setup_logger(impls[Api.telemetry])
|
||||
else:
|
||||
setup_logger(TelemetryAdapter(TelemetryConfig(), {}))
|
||||
|
|
|
|||
|
|
@ -35,13 +35,23 @@ from llama_stack.apis.telemetry import Telemetry
|
|||
from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime
|
||||
from llama_stack.apis.vector_io import VectorIO
|
||||
from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
|
||||
from llama_stack.core.datatypes import Provider, StackRunConfig
|
||||
from llama_stack.core.datatypes import Provider, StackRunConfig, VectorStoresConfig
|
||||
from llama_stack.core.distribution import get_provider_registry
|
||||
from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
|
||||
from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl
|
||||
from llama_stack.core.providers import ProviderImpl, ProviderImplConfig
|
||||
from llama_stack.core.resolver import ProviderRegistry, resolve_impls
|
||||
from llama_stack.core.routing_tables.common import CommonRoutingTableImpl
|
||||
from llama_stack.core.storage.datatypes import (
|
||||
InferenceStoreReference,
|
||||
KVStoreReference,
|
||||
ServerStoresConfig,
|
||||
SqliteKVStoreConfig,
|
||||
SqliteSqlStoreConfig,
|
||||
SqlStoreReference,
|
||||
StorageBackendConfig,
|
||||
StorageConfig,
|
||||
)
|
||||
from llama_stack.core.store.registry import create_dist_registry
|
||||
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||
from llama_stack.log import get_logger
|
||||
|
|
@ -100,7 +110,7 @@ TEST_RECORDING_CONTEXT = None
|
|||
|
||||
async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
|
||||
for rsrc, api, register_method, list_method in RESOURCES:
|
||||
objects = getattr(run_config, rsrc)
|
||||
objects = getattr(run_config.registered_resources, rsrc)
|
||||
if api not in impls:
|
||||
continue
|
||||
|
||||
|
|
@ -129,6 +139,42 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
|
|||
)
|
||||
|
||||
|
||||
async def validate_vector_stores_config(vector_stores_config: VectorStoresConfig | None, impls: dict[Api, Any]):
|
||||
"""Validate vector stores configuration."""
|
||||
if vector_stores_config is None:
|
||||
return
|
||||
|
||||
default_embedding_model = vector_stores_config.default_embedding_model
|
||||
if default_embedding_model is None:
|
||||
return
|
||||
|
||||
provider_id = default_embedding_model.provider_id
|
||||
model_id = default_embedding_model.model_id
|
||||
default_model_id = f"{provider_id}/{model_id}"
|
||||
|
||||
if Api.models not in impls:
|
||||
raise ValueError(f"Models API is not available but vector_stores config requires model '{default_model_id}'")
|
||||
|
||||
models_impl = impls[Api.models]
|
||||
response = await models_impl.list_models()
|
||||
models_list = {m.identifier: m for m in response.data if m.model_type == "embedding"}
|
||||
|
||||
default_model = models_list.get(default_model_id)
|
||||
if default_model is None:
|
||||
raise ValueError(f"Embedding model '{default_model_id}' not found. Available embedding models: {models_list}")
|
||||
|
||||
embedding_dimension = default_model.metadata.get("embedding_dimension")
|
||||
if embedding_dimension is None:
|
||||
raise ValueError(f"Embedding model '{default_model_id}' is missing 'embedding_dimension' in metadata")
|
||||
|
||||
try:
|
||||
int(embedding_dimension)
|
||||
except ValueError as err:
|
||||
raise ValueError(f"Embedding dimension '{embedding_dimension}' cannot be converted to an integer") from err
|
||||
|
||||
logger.debug(f"Validated default embedding model: {default_model_id} (dimension: {embedding_dimension})")
|
||||
|
||||
|
||||
class EnvVarError(Exception):
|
||||
def __init__(self, var_name: str, path: str = ""):
|
||||
self.var_name = var_name
|
||||
|
|
@ -303,6 +349,25 @@ def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConf
|
|||
impls[Api.conversations] = conversations_impl
|
||||
|
||||
|
||||
def _initialize_storage(run_config: StackRunConfig):
|
||||
kv_backends: dict[str, StorageBackendConfig] = {}
|
||||
sql_backends: dict[str, StorageBackendConfig] = {}
|
||||
for backend_name, backend_config in run_config.storage.backends.items():
|
||||
type = backend_config.type.value
|
||||
if type.startswith("kv_"):
|
||||
kv_backends[backend_name] = backend_config
|
||||
elif type.startswith("sql_"):
|
||||
sql_backends[backend_name] = backend_config
|
||||
else:
|
||||
raise ValueError(f"Unknown storage backend type: {type}")
|
||||
|
||||
from llama_stack.providers.utils.kvstore.kvstore import register_kvstore_backends
|
||||
from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
|
||||
|
||||
register_kvstore_backends(kv_backends)
|
||||
register_sqlstore_backends(sql_backends)
|
||||
|
||||
|
||||
class Stack:
|
||||
def __init__(self, run_config: StackRunConfig, provider_registry: ProviderRegistry | None = None):
|
||||
self.run_config = run_config
|
||||
|
|
@ -321,7 +386,11 @@ class Stack:
|
|||
TEST_RECORDING_CONTEXT.__enter__()
|
||||
logger.info(f"API recording enabled: mode={os.environ.get('LLAMA_STACK_TEST_INFERENCE_MODE')}")
|
||||
|
||||
dist_registry, _ = await create_dist_registry(self.run_config.metadata_store, self.run_config.image_name)
|
||||
_initialize_storage(self.run_config)
|
||||
stores = self.run_config.storage.stores
|
||||
if not stores.metadata:
|
||||
raise ValueError("storage.stores.metadata must be configured with a kv_* backend")
|
||||
dist_registry, _ = await create_dist_registry(stores.metadata, self.run_config.image_name)
|
||||
policy = self.run_config.server.auth.access_policy if self.run_config.server.auth else []
|
||||
|
||||
internal_impls = {}
|
||||
|
|
@ -341,8 +410,8 @@ class Stack:
|
|||
await impls[Api.conversations].initialize()
|
||||
|
||||
await register_resources(self.run_config, impls)
|
||||
|
||||
await refresh_registry_once(impls)
|
||||
await validate_vector_stores_config(self.run_config.vector_stores, impls)
|
||||
self.impls = impls
|
||||
|
||||
def create_registry_refresh_task(self):
|
||||
|
|
@ -462,5 +531,16 @@ def run_config_from_adhoc_config_spec(
|
|||
image_name="distro-test",
|
||||
apis=list(provider_configs_by_api.keys()),
|
||||
providers=provider_configs_by_api,
|
||||
storage=StorageConfig(
|
||||
backends={
|
||||
"kv_default": SqliteKVStoreConfig(db_path=f"{distro_dir}/kvstore.db"),
|
||||
"sql_default": SqliteSqlStoreConfig(db_path=f"{distro_dir}/sql_store.db"),
|
||||
},
|
||||
stores=ServerStoresConfig(
|
||||
metadata=KVStoreReference(backend="kv_default", namespace="registry"),
|
||||
inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
|
||||
conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"),
|
||||
),
|
||||
),
|
||||
)
|
||||
return config
|
||||
|
|
|
|||
|
|
@ -3,5 +3,3 @@
|
|||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .vector_dbs import *
|
||||
283
llama_stack/core/storage/datatypes.py
Normal file
283
llama_stack/core/storage/datatypes.py
Normal file
|
|
@ -0,0 +1,283 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import re
|
||||
from abc import abstractmethod
|
||||
from enum import StrEnum
|
||||
from pathlib import Path
|
||||
from typing import Annotated, Literal
|
||||
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
|
||||
|
||||
class StorageBackendType(StrEnum):
|
||||
KV_REDIS = "kv_redis"
|
||||
KV_SQLITE = "kv_sqlite"
|
||||
KV_POSTGRES = "kv_postgres"
|
||||
KV_MONGODB = "kv_mongodb"
|
||||
SQL_SQLITE = "sql_sqlite"
|
||||
SQL_POSTGRES = "sql_postgres"
|
||||
|
||||
|
||||
class CommonConfig(BaseModel):
|
||||
namespace: str | None = Field(
|
||||
default=None,
|
||||
description="All keys will be prefixed with this namespace",
|
||||
)
|
||||
|
||||
|
||||
class RedisKVStoreConfig(CommonConfig):
|
||||
type: Literal[StorageBackendType.KV_REDIS] = StorageBackendType.KV_REDIS
|
||||
host: str = "localhost"
|
||||
port: int = 6379
|
||||
|
||||
@property
|
||||
def url(self) -> str:
|
||||
return f"redis://{self.host}:{self.port}"
|
||||
|
||||
@classmethod
|
||||
def pip_packages(cls) -> list[str]:
|
||||
return ["redis"]
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls):
|
||||
return {
|
||||
"type": StorageBackendType.KV_REDIS.value,
|
||||
"host": "${env.REDIS_HOST:=localhost}",
|
||||
"port": "${env.REDIS_PORT:=6379}",
|
||||
}
|
||||
|
||||
|
||||
class SqliteKVStoreConfig(CommonConfig):
|
||||
type: Literal[StorageBackendType.KV_SQLITE] = StorageBackendType.KV_SQLITE
|
||||
db_path: str = Field(
|
||||
description="File path for the sqlite database",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def pip_packages(cls) -> list[str]:
|
||||
return ["aiosqlite"]
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str, db_name: str = "kvstore.db"):
|
||||
return {
|
||||
"type": StorageBackendType.KV_SQLITE.value,
|
||||
"db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
|
||||
}
|
||||
|
||||
|
||||
class PostgresKVStoreConfig(CommonConfig):
|
||||
type: Literal[StorageBackendType.KV_POSTGRES] = StorageBackendType.KV_POSTGRES
|
||||
host: str = "localhost"
|
||||
port: int | str = 5432
|
||||
db: str = "llamastack"
|
||||
user: str
|
||||
password: str | None = None
|
||||
ssl_mode: str | None = None
|
||||
ca_cert_path: str | None = None
|
||||
table_name: str = "llamastack_kvstore"
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, table_name: str = "llamastack_kvstore", **kwargs):
|
||||
return {
|
||||
"type": StorageBackendType.KV_POSTGRES.value,
|
||||
"host": "${env.POSTGRES_HOST:=localhost}",
|
||||
"port": "${env.POSTGRES_PORT:=5432}",
|
||||
"db": "${env.POSTGRES_DB:=llamastack}",
|
||||
"user": "${env.POSTGRES_USER:=llamastack}",
|
||||
"password": "${env.POSTGRES_PASSWORD:=llamastack}",
|
||||
"table_name": "${env.POSTGRES_TABLE_NAME:=" + table_name + "}",
|
||||
}
|
||||
|
||||
@classmethod
|
||||
@field_validator("table_name")
|
||||
def validate_table_name(cls, v: str) -> str:
|
||||
# PostgreSQL identifiers rules:
|
||||
# - Must start with a letter or underscore
|
||||
# - Can contain letters, numbers, and underscores
|
||||
# - Maximum length is 63 bytes
|
||||
pattern = r"^[a-zA-Z_][a-zA-Z0-9_]*$"
|
||||
if not re.match(pattern, v):
|
||||
raise ValueError(
|
||||
"Invalid table name. Must start with letter or underscore and contain only letters, numbers, and underscores"
|
||||
)
|
||||
if len(v) > 63:
|
||||
raise ValueError("Table name must be less than 63 characters")
|
||||
return v
|
||||
|
||||
@classmethod
|
||||
def pip_packages(cls) -> list[str]:
|
||||
return ["psycopg2-binary"]
|
||||
|
||||
|
||||
class MongoDBKVStoreConfig(CommonConfig):
|
||||
type: Literal[StorageBackendType.KV_MONGODB] = StorageBackendType.KV_MONGODB
|
||||
host: str = "localhost"
|
||||
port: int = 27017
|
||||
db: str = "llamastack"
|
||||
user: str | None = None
|
||||
password: str | None = None
|
||||
collection_name: str = "llamastack_kvstore"
|
||||
|
||||
@classmethod
|
||||
def pip_packages(cls) -> list[str]:
|
||||
return ["pymongo"]
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, collection_name: str = "llamastack_kvstore"):
|
||||
return {
|
||||
"type": StorageBackendType.KV_MONGODB.value,
|
||||
"host": "${env.MONGODB_HOST:=localhost}",
|
||||
"port": "${env.MONGODB_PORT:=5432}",
|
||||
"db": "${env.MONGODB_DB}",
|
||||
"user": "${env.MONGODB_USER}",
|
||||
"password": "${env.MONGODB_PASSWORD}",
|
||||
"collection_name": "${env.MONGODB_COLLECTION_NAME:=" + collection_name + "}",
|
||||
}
|
||||
|
||||
|
||||
class SqlAlchemySqlStoreConfig(BaseModel):
|
||||
@property
|
||||
@abstractmethod
|
||||
def engine_str(self) -> str: ...
|
||||
|
||||
# TODO: move this when we have a better way to specify dependencies with internal APIs
|
||||
@classmethod
|
||||
def pip_packages(cls) -> list[str]:
|
||||
return ["sqlalchemy[asyncio]"]
|
||||
|
||||
|
||||
class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig):
|
||||
type: Literal[StorageBackendType.SQL_SQLITE] = StorageBackendType.SQL_SQLITE
|
||||
db_path: str = Field(
|
||||
description="Database path, e.g. ~/.llama/distributions/ollama/sqlstore.db",
|
||||
)
|
||||
|
||||
@property
|
||||
def engine_str(self) -> str:
|
||||
return "sqlite+aiosqlite:///" + Path(self.db_path).expanduser().as_posix()
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"):
|
||||
return {
|
||||
"type": StorageBackendType.SQL_SQLITE.value,
|
||||
"db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def pip_packages(cls) -> list[str]:
|
||||
return super().pip_packages() + ["aiosqlite"]
|
||||
|
||||
|
||||
class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
|
||||
type: Literal[StorageBackendType.SQL_POSTGRES] = StorageBackendType.SQL_POSTGRES
|
||||
host: str = "localhost"
|
||||
port: int | str = 5432
|
||||
db: str = "llamastack"
|
||||
user: str
|
||||
password: str | None = None
|
||||
|
||||
@property
|
||||
def engine_str(self) -> str:
|
||||
return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.db}"
|
||||
|
||||
@classmethod
|
||||
def pip_packages(cls) -> list[str]:
|
||||
return super().pip_packages() + ["asyncpg"]
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, **kwargs):
|
||||
return {
|
||||
"type": StorageBackendType.SQL_POSTGRES.value,
|
||||
"host": "${env.POSTGRES_HOST:=localhost}",
|
||||
"port": "${env.POSTGRES_PORT:=5432}",
|
||||
"db": "${env.POSTGRES_DB:=llamastack}",
|
||||
"user": "${env.POSTGRES_USER:=llamastack}",
|
||||
"password": "${env.POSTGRES_PASSWORD:=llamastack}",
|
||||
}
|
||||
|
||||
|
||||
# reference = (backend_name, table_name)
|
||||
class SqlStoreReference(BaseModel):
|
||||
"""A reference to a 'SQL-like' persistent store. A table name must be provided."""
|
||||
|
||||
table_name: str = Field(
|
||||
description="Name of the table to use for the SqlStore",
|
||||
)
|
||||
|
||||
backend: str = Field(
|
||||
description="Name of backend from storage.backends",
|
||||
)
|
||||
|
||||
|
||||
# reference = (backend_name, namespace)
|
||||
class KVStoreReference(BaseModel):
|
||||
"""A reference to a 'key-value' persistent store. A namespace must be provided."""
|
||||
|
||||
namespace: str = Field(
|
||||
description="Key prefix for KVStore backends",
|
||||
)
|
||||
|
||||
backend: str = Field(
|
||||
description="Name of backend from storage.backends",
|
||||
)
|
||||
|
||||
|
||||
StorageBackendConfig = Annotated[
|
||||
RedisKVStoreConfig
|
||||
| SqliteKVStoreConfig
|
||||
| PostgresKVStoreConfig
|
||||
| MongoDBKVStoreConfig
|
||||
| SqliteSqlStoreConfig
|
||||
| PostgresSqlStoreConfig,
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
|
||||
|
||||
class InferenceStoreReference(SqlStoreReference):
|
||||
"""Inference store configuration with queue tuning."""
|
||||
|
||||
max_write_queue_size: int = Field(
|
||||
default=10000,
|
||||
description="Max queued writes for inference store",
|
||||
)
|
||||
num_writers: int = Field(
|
||||
default=4,
|
||||
description="Number of concurrent background writers",
|
||||
)
|
||||
|
||||
|
||||
class ResponsesStoreReference(InferenceStoreReference):
|
||||
"""Responses store configuration with queue tuning."""
|
||||
|
||||
|
||||
class ServerStoresConfig(BaseModel):
|
||||
metadata: KVStoreReference | None = Field(
|
||||
default=None,
|
||||
description="Metadata store configuration (uses KV backend)",
|
||||
)
|
||||
inference: InferenceStoreReference | None = Field(
|
||||
default=None,
|
||||
description="Inference store configuration (uses SQL backend)",
|
||||
)
|
||||
conversations: SqlStoreReference | None = Field(
|
||||
default=None,
|
||||
description="Conversations store configuration (uses SQL backend)",
|
||||
)
|
||||
responses: ResponsesStoreReference | None = Field(
|
||||
default=None,
|
||||
description="Responses store configuration (uses SQL backend)",
|
||||
)
|
||||
|
||||
|
||||
class StorageConfig(BaseModel):
|
||||
backends: dict[str, StorageBackendConfig] = Field(
|
||||
description="Named backend configurations (e.g., 'default', 'cache')",
|
||||
)
|
||||
stores: ServerStoresConfig = Field(
|
||||
default_factory=lambda: ServerStoresConfig(),
|
||||
description="Named references to storage backends used by the stack core",
|
||||
)
|
||||
|
|
@ -11,10 +11,9 @@ from typing import Protocol
|
|||
import pydantic
|
||||
|
||||
from llama_stack.core.datatypes import RoutableObjectWithProvider
|
||||
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
||||
from llama_stack.core.storage.datatypes import KVStoreReference
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
|
||||
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
|
||||
|
||||
logger = get_logger(__name__, category="core::registry")
|
||||
|
||||
|
|
@ -191,16 +190,10 @@ class CachedDiskDistributionRegistry(DiskDistributionRegistry):
|
|||
|
||||
|
||||
async def create_dist_registry(
|
||||
metadata_store: KVStoreConfig | None,
|
||||
image_name: str,
|
||||
metadata_store: KVStoreReference, image_name: str
|
||||
) -> tuple[CachedDiskDistributionRegistry, KVStore]:
|
||||
# instantiate kvstore for storing and retrieving distribution metadata
|
||||
if metadata_store:
|
||||
dist_kvstore = await kvstore_impl(metadata_store)
|
||||
else:
|
||||
dist_kvstore = await kvstore_impl(
|
||||
SqliteKVStoreConfig(db_path=(DISTRIBS_BASE_DIR / image_name / "kvstore.db").as_posix())
|
||||
)
|
||||
dist_kvstore = await kvstore_impl(metadata_store)
|
||||
dist_registry = CachedDiskDistributionRegistry(dist_kvstore)
|
||||
await dist_registry.initialize()
|
||||
return dist_registry, dist_kvstore
|
||||
|
|
|
|||
|
|
@ -42,3 +42,8 @@ def sync_test_context_from_provider_data():
|
|||
return TEST_CONTEXT.set(provider_data["__test_id"])
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def is_debug_mode() -> bool:
|
||||
"""Check if test recording debug mode is enabled via LLAMA_STACK_TEST_DEBUG env var."""
|
||||
return os.environ.get("LLAMA_STACK_TEST_DEBUG", "").lower() in ("1", "true", "yes")
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@
|
|||
1. Start up Llama Stack API server. More details [here](https://llamastack.github.io/latest/getting_started/index.htmll).
|
||||
|
||||
```
|
||||
llama stack build --distro together --image-type venv
|
||||
llama stack list-deps together | xargs -L1 uv pip install
|
||||
|
||||
llama stack run together
|
||||
```
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ def tool_chat_page():
|
|||
tool_groups_list = [tool_group.identifier for tool_group in tool_groups]
|
||||
mcp_tools_list = [tool for tool in tool_groups_list if tool.startswith("mcp::")]
|
||||
builtin_tools_list = [tool for tool in tool_groups_list if not tool.startswith("mcp::")]
|
||||
selected_vector_dbs = []
|
||||
selected_vector_stores = []
|
||||
|
||||
def reset_agent():
|
||||
st.session_state.clear()
|
||||
|
|
@ -55,13 +55,13 @@ def tool_chat_page():
|
|||
)
|
||||
|
||||
if "builtin::rag" in toolgroup_selection:
|
||||
vector_dbs = llama_stack_api.client.vector_dbs.list() or []
|
||||
if not vector_dbs:
|
||||
vector_stores = llama_stack_api.client.vector_stores.list() or []
|
||||
if not vector_stores:
|
||||
st.info("No vector databases available for selection.")
|
||||
vector_dbs = [vector_db.identifier for vector_db in vector_dbs]
|
||||
selected_vector_dbs = st.multiselect(
|
||||
vector_stores = [vector_store.identifier for vector_store in vector_stores]
|
||||
selected_vector_stores = st.multiselect(
|
||||
label="Select Document Collections to use in RAG queries",
|
||||
options=vector_dbs,
|
||||
options=vector_stores,
|
||||
on_change=reset_agent,
|
||||
)
|
||||
|
||||
|
|
@ -119,7 +119,7 @@ def tool_chat_page():
|
|||
tool_dict = dict(
|
||||
name="builtin::rag",
|
||||
args={
|
||||
"vector_db_ids": list(selected_vector_dbs),
|
||||
"vector_store_ids": list(selected_vector_stores),
|
||||
},
|
||||
)
|
||||
toolgroup_selection[i] = tool_dict
|
||||
|
|
|
|||
|
|
@ -42,25 +42,25 @@ def resolve_config_or_distro(
|
|||
# Strategy 1: Try as file path first
|
||||
config_path = Path(config_or_distro)
|
||||
if config_path.exists() and config_path.is_file():
|
||||
logger.info(f"Using file path: {config_path}")
|
||||
logger.debug(f"Using file path: {config_path}")
|
||||
return config_path.resolve()
|
||||
|
||||
# Strategy 2: Try as distribution name (if no .yaml extension)
|
||||
if not config_or_distro.endswith(".yaml"):
|
||||
distro_config = _get_distro_config_path(config_or_distro, mode)
|
||||
if distro_config.exists():
|
||||
logger.info(f"Using distribution: {distro_config}")
|
||||
logger.debug(f"Using distribution: {distro_config}")
|
||||
return distro_config
|
||||
|
||||
# Strategy 3: Try as built distribution name
|
||||
distrib_config = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
|
||||
if distrib_config.exists():
|
||||
logger.info(f"Using built distribution: {distrib_config}")
|
||||
logger.debug(f"Using built distribution: {distrib_config}")
|
||||
return distrib_config
|
||||
|
||||
distrib_config = DISTRIBS_BASE_DIR / f"{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
|
||||
if distrib_config.exists():
|
||||
logger.info(f"Using built distribution: {distrib_config}")
|
||||
logger.debug(f"Using built distribution: {distrib_config}")
|
||||
return distrib_config
|
||||
|
||||
# Strategy 4: Failed - provide helpful error
|
||||
|
|
|
|||
|
|
@ -25,6 +25,8 @@ distribution_spec:
|
|||
- provider_type: inline::milvus
|
||||
- provider_type: remote::chromadb
|
||||
- provider_type: remote::pgvector
|
||||
- provider_type: remote::qdrant
|
||||
- provider_type: remote::weaviate
|
||||
files:
|
||||
- provider_type: inline::localfs
|
||||
safety:
|
||||
|
|
@ -32,8 +34,6 @@ distribution_spec:
|
|||
- provider_type: inline::code-scanner
|
||||
agents:
|
||||
- provider_type: inline::meta-reference
|
||||
telemetry:
|
||||
- provider_type: inline::meta-reference
|
||||
post_training:
|
||||
- provider_type: inline::torchtune-cpu
|
||||
eval:
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ apis:
|
|||
- post_training
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
|
|
@ -94,30 +93,30 @@ providers:
|
|||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/faiss_store.db
|
||||
persistence:
|
||||
namespace: vector_io::faiss
|
||||
backend: kv_default
|
||||
- provider_id: sqlite-vec
|
||||
provider_type: inline::sqlite-vec
|
||||
config:
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec_registry.db
|
||||
persistence:
|
||||
namespace: vector_io::sqlite_vec
|
||||
backend: kv_default
|
||||
- provider_id: ${env.MILVUS_URL:+milvus}
|
||||
provider_type: inline::milvus
|
||||
config:
|
||||
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/ci-tests}/milvus.db
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/milvus_registry.db
|
||||
persistence:
|
||||
namespace: vector_io::milvus
|
||||
backend: kv_default
|
||||
- provider_id: ${env.CHROMADB_URL:+chromadb}
|
||||
provider_type: remote::chromadb
|
||||
config:
|
||||
url: ${env.CHROMADB_URL:=}
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests/}/chroma_remote_registry.db
|
||||
persistence:
|
||||
namespace: vector_io::chroma_remote
|
||||
backend: kv_default
|
||||
- provider_id: ${env.PGVECTOR_DB:+pgvector}
|
||||
provider_type: remote::pgvector
|
||||
config:
|
||||
|
|
@ -126,17 +125,32 @@ providers:
|
|||
db: ${env.PGVECTOR_DB:=}
|
||||
user: ${env.PGVECTOR_USER:=}
|
||||
password: ${env.PGVECTOR_PASSWORD:=}
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/pgvector_registry.db
|
||||
persistence:
|
||||
namespace: vector_io::pgvector
|
||||
backend: kv_default
|
||||
- provider_id: ${env.QDRANT_URL:+qdrant}
|
||||
provider_type: remote::qdrant
|
||||
config:
|
||||
api_key: ${env.QDRANT_API_KEY:=}
|
||||
persistence:
|
||||
namespace: vector_io::qdrant_remote
|
||||
backend: kv_default
|
||||
- provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
|
||||
provider_type: remote::weaviate
|
||||
config:
|
||||
weaviate_api_key: null
|
||||
weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
|
||||
persistence:
|
||||
namespace: vector_io::weaviate
|
||||
backend: kv_default
|
||||
files:
|
||||
- provider_id: meta-reference-files
|
||||
provider_type: inline::localfs
|
||||
config:
|
||||
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ci-tests/files}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/files_metadata.db
|
||||
table_name: files_metadata
|
||||
backend: sql_default
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
|
|
@ -148,20 +162,15 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
persistence_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/agents_store.db
|
||||
responses_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/responses_store.db
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:=sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/trace_store.db
|
||||
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
|
||||
persistence:
|
||||
agent_state:
|
||||
namespace: agents
|
||||
backend: kv_default
|
||||
responses:
|
||||
table_name: responses
|
||||
backend: sql_default
|
||||
max_write_queue_size: 10000
|
||||
num_writers: 4
|
||||
post_training:
|
||||
- provider_id: torchtune-cpu
|
||||
provider_type: inline::torchtune-cpu
|
||||
|
|
@ -172,21 +181,21 @@ providers:
|
|||
provider_type: inline::meta-reference
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/meta_reference_eval.db
|
||||
namespace: eval
|
||||
backend: kv_default
|
||||
datasetio:
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/huggingface_datasetio.db
|
||||
namespace: datasetio::huggingface
|
||||
backend: kv_default
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/localfs_datasetio.db
|
||||
namespace: datasetio::localfs
|
||||
backend: kv_default
|
||||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
|
|
@ -216,33 +225,52 @@ providers:
|
|||
provider_type: inline::reference
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/batches.db
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/inference_store.db
|
||||
conversations_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/conversations.db
|
||||
models: []
|
||||
shields:
|
||||
- shield_id: llama-guard
|
||||
provider_id: ${env.SAFETY_MODEL:+llama-guard}
|
||||
provider_shield_id: ${env.SAFETY_MODEL:=}
|
||||
- shield_id: code-scanner
|
||||
provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
|
||||
provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
namespace: batches
|
||||
backend: kv_default
|
||||
storage:
|
||||
backends:
|
||||
kv_default:
|
||||
type: kv_sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/kvstore.db
|
||||
sql_default:
|
||||
type: sql_sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sql_store.db
|
||||
stores:
|
||||
metadata:
|
||||
namespace: registry
|
||||
backend: kv_default
|
||||
inference:
|
||||
table_name: inference_store
|
||||
backend: sql_default
|
||||
max_write_queue_size: 10000
|
||||
num_writers: 4
|
||||
conversations:
|
||||
table_name: openai_conversations
|
||||
backend: sql_default
|
||||
registered_resources:
|
||||
models: []
|
||||
shields:
|
||||
- shield_id: llama-guard
|
||||
provider_id: ${env.SAFETY_MODEL:+llama-guard}
|
||||
provider_shield_id: ${env.SAFETY_MODEL:=}
|
||||
- shield_id: code-scanner
|
||||
provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
|
||||
provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
enabled: true
|
||||
vector_stores:
|
||||
default_provider_id: faiss
|
||||
default_embedding_model:
|
||||
provider_id: sentence-transformers
|
||||
model_id: nomic-ai/nomic-embed-text-v1.5
|
||||
|
|
|
|||
|
|
@ -14,8 +14,6 @@ distribution_spec:
|
|||
- provider_type: inline::llama-guard
|
||||
agents:
|
||||
- provider_type: inline::meta-reference
|
||||
telemetry:
|
||||
- provider_type: inline::meta-reference
|
||||
eval:
|
||||
- provider_type: inline::meta-reference
|
||||
datasetio:
|
||||
|
|
|
|||
|
|
@ -32,7 +32,6 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
],
|
||||
"safety": [BuildProvider(provider_type="inline::llama-guard")],
|
||||
"agents": [BuildProvider(provider_type="inline::meta-reference")],
|
||||
"telemetry": [BuildProvider(provider_type="inline::meta-reference")],
|
||||
"eval": [BuildProvider(provider_type="inline::meta-reference")],
|
||||
"datasetio": [
|
||||
BuildProvider(provider_type="remote::huggingface"),
|
||||
|
|
|
|||
|
|
@ -157,7 +157,7 @@ docker run \
|
|||
Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
|
||||
|
||||
```bash
|
||||
llama stack build --distro {{ name }} --image-type conda
|
||||
llama stack list-deps {{ name }} | xargs -L1 pip install
|
||||
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
DEH_URL=$DEH_URL \
|
||||
CHROMA_URL=$CHROMA_URL \
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ apis:
|
|||
- inference
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
|
|
@ -27,9 +26,9 @@ providers:
|
|||
provider_type: remote::chromadb
|
||||
config:
|
||||
url: ${env.CHROMADB_URL:=}
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell/}/chroma_remote_registry.db
|
||||
persistence:
|
||||
namespace: vector_io::chroma_remote
|
||||
backend: kv_default
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
|
|
@ -39,40 +38,35 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
persistence_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db
|
||||
responses_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/responses_store.db
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:=sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/trace_store.db
|
||||
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
|
||||
persistence:
|
||||
agent_state:
|
||||
namespace: agents
|
||||
backend: kv_default
|
||||
responses:
|
||||
table_name: responses
|
||||
backend: sql_default
|
||||
max_write_queue_size: 10000
|
||||
num_writers: 4
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db
|
||||
namespace: eval
|
||||
backend: kv_default
|
||||
datasetio:
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db
|
||||
namespace: datasetio::huggingface
|
||||
backend: kv_default
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db
|
||||
namespace: datasetio::localfs
|
||||
backend: kv_default
|
||||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
|
|
@ -95,39 +89,53 @@ providers:
|
|||
max_results: 3
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/inference_store.db
|
||||
conversations_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/conversations.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: tgi0
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ${env.SAFETY_MODEL}
|
||||
provider_id: tgi1
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 768
|
||||
model_id: nomic-embed-text-v1.5
|
||||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields:
|
||||
- shield_id: ${env.SAFETY_MODEL}
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: brave-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
storage:
|
||||
backends:
|
||||
kv_default:
|
||||
type: kv_sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/kvstore.db
|
||||
sql_default:
|
||||
type: sql_sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db
|
||||
stores:
|
||||
metadata:
|
||||
namespace: registry
|
||||
backend: kv_default
|
||||
inference:
|
||||
table_name: inference_store
|
||||
backend: sql_default
|
||||
max_write_queue_size: 10000
|
||||
num_writers: 4
|
||||
conversations:
|
||||
table_name: openai_conversations
|
||||
backend: sql_default
|
||||
registered_resources:
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: tgi0
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ${env.SAFETY_MODEL}
|
||||
provider_id: tgi1
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 768
|
||||
model_id: nomic-embed-text-v1.5
|
||||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields:
|
||||
- shield_id: ${env.SAFETY_MODEL}
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: brave-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
enabled: true
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ apis:
|
|||
- inference
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
|
|
@ -23,9 +22,9 @@ providers:
|
|||
provider_type: remote::chromadb
|
||||
config:
|
||||
url: ${env.CHROMADB_URL:=}
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell/}/chroma_remote_registry.db
|
||||
persistence:
|
||||
namespace: vector_io::chroma_remote
|
||||
backend: kv_default
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
|
|
@ -35,40 +34,35 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
persistence_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db
|
||||
responses_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/responses_store.db
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:=sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/trace_store.db
|
||||
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
|
||||
persistence:
|
||||
agent_state:
|
||||
namespace: agents
|
||||
backend: kv_default
|
||||
responses:
|
||||
table_name: responses
|
||||
backend: sql_default
|
||||
max_write_queue_size: 10000
|
||||
num_writers: 4
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db
|
||||
namespace: eval
|
||||
backend: kv_default
|
||||
datasetio:
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db
|
||||
namespace: datasetio::huggingface
|
||||
backend: kv_default
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db
|
||||
namespace: datasetio::localfs
|
||||
backend: kv_default
|
||||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
|
|
@ -91,34 +85,48 @@ providers:
|
|||
max_results: 3
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/inference_store.db
|
||||
conversations_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/conversations.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: tgi0
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 768
|
||||
model_id: nomic-embed-text-v1.5
|
||||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: brave-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
storage:
|
||||
backends:
|
||||
kv_default:
|
||||
type: kv_sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/kvstore.db
|
||||
sql_default:
|
||||
type: sql_sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db
|
||||
stores:
|
||||
metadata:
|
||||
namespace: registry
|
||||
backend: kv_default
|
||||
inference:
|
||||
table_name: inference_store
|
||||
backend: sql_default
|
||||
max_write_queue_size: 10000
|
||||
num_writers: 4
|
||||
conversations:
|
||||
table_name: openai_conversations
|
||||
backend: sql_default
|
||||
registered_resources:
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: tgi0
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 768
|
||||
model_id: nomic-embed-text-v1.5
|
||||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: brave-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
enabled: true
|
||||
|
|
|
|||
|
|
@ -12,8 +12,6 @@ distribution_spec:
|
|||
- provider_type: inline::llama-guard
|
||||
agents:
|
||||
- provider_type: inline::meta-reference
|
||||
telemetry:
|
||||
- provider_type: inline::meta-reference
|
||||
eval:
|
||||
- provider_type: inline::meta-reference
|
||||
datasetio:
|
||||
|
|
|
|||
|
|
@ -70,10 +70,10 @@ docker run \
|
|||
|
||||
### Via venv
|
||||
|
||||
Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
|
||||
Make sure you have the Llama Stack CLI available.
|
||||
|
||||
```bash
|
||||
llama stack build --distro {{ name }} --image-type venv
|
||||
llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
|
||||
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
llama stack run distributions/{{ name }}/run.yaml \
|
||||
--port 8321
|
||||
|
|
|
|||
|
|
@ -34,7 +34,6 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
],
|
||||
"safety": [BuildProvider(provider_type="inline::llama-guard")],
|
||||
"agents": [BuildProvider(provider_type="inline::meta-reference")],
|
||||
"telemetry": [BuildProvider(provider_type="inline::meta-reference")],
|
||||
"eval": [BuildProvider(provider_type="inline::meta-reference")],
|
||||
"datasetio": [
|
||||
BuildProvider(provider_type="remote::huggingface"),
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ apis:
|
|||
- inference
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
|
|
@ -38,9 +37,9 @@ providers:
|
|||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db
|
||||
persistence:
|
||||
namespace: vector_io::faiss
|
||||
backend: kv_default
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
|
|
@ -50,40 +49,35 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
persistence_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db
|
||||
responses_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/responses_store.db
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:=sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/trace_store.db
|
||||
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
|
||||
persistence:
|
||||
agent_state:
|
||||
namespace: agents
|
||||
backend: kv_default
|
||||
responses:
|
||||
table_name: responses
|
||||
backend: sql_default
|
||||
max_write_queue_size: 10000
|
||||
num_writers: 4
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
|
||||
namespace: eval
|
||||
backend: kv_default
|
||||
datasetio:
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db
|
||||
namespace: datasetio::huggingface
|
||||
backend: kv_default
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db
|
||||
namespace: datasetio::localfs
|
||||
backend: kv_default
|
||||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
|
|
@ -108,39 +102,53 @@ providers:
|
|||
provider_type: inline::rag-runtime
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/inference_store.db
|
||||
conversations_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/conversations.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: meta-reference-inference
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ${env.SAFETY_MODEL}
|
||||
provider_id: meta-reference-safety
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 768
|
||||
model_id: nomic-embed-text-v1.5
|
||||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields:
|
||||
- shield_id: ${env.SAFETY_MODEL}
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
storage:
|
||||
backends:
|
||||
kv_default:
|
||||
type: kv_sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/kvstore.db
|
||||
sql_default:
|
||||
type: sql_sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db
|
||||
stores:
|
||||
metadata:
|
||||
namespace: registry
|
||||
backend: kv_default
|
||||
inference:
|
||||
table_name: inference_store
|
||||
backend: sql_default
|
||||
max_write_queue_size: 10000
|
||||
num_writers: 4
|
||||
conversations:
|
||||
table_name: openai_conversations
|
||||
backend: sql_default
|
||||
registered_resources:
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: meta-reference-inference
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ${env.SAFETY_MODEL}
|
||||
provider_id: meta-reference-safety
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 768
|
||||
model_id: nomic-embed-text-v1.5
|
||||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields:
|
||||
- shield_id: ${env.SAFETY_MODEL}
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
enabled: true
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ apis:
|
|||
- inference
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
|
|
@ -28,9 +27,9 @@ providers:
|
|||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db
|
||||
persistence:
|
||||
namespace: vector_io::faiss
|
||||
backend: kv_default
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
|
|
@ -40,40 +39,35 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
persistence_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db
|
||||
responses_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/responses_store.db
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:=sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/trace_store.db
|
||||
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
|
||||
persistence:
|
||||
agent_state:
|
||||
namespace: agents
|
||||
backend: kv_default
|
||||
responses:
|
||||
table_name: responses
|
||||
backend: sql_default
|
||||
max_write_queue_size: 10000
|
||||
num_writers: 4
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
|
||||
namespace: eval
|
||||
backend: kv_default
|
||||
datasetio:
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db
|
||||
namespace: datasetio::huggingface
|
||||
backend: kv_default
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db
|
||||
namespace: datasetio::localfs
|
||||
backend: kv_default
|
||||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
|
|
@ -98,34 +92,48 @@ providers:
|
|||
provider_type: inline::rag-runtime
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/inference_store.db
|
||||
conversations_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/conversations.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: meta-reference-inference
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 768
|
||||
model_id: nomic-embed-text-v1.5
|
||||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
storage:
|
||||
backends:
|
||||
kv_default:
|
||||
type: kv_sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/kvstore.db
|
||||
sql_default:
|
||||
type: sql_sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db
|
||||
stores:
|
||||
metadata:
|
||||
namespace: registry
|
||||
backend: kv_default
|
||||
inference:
|
||||
table_name: inference_store
|
||||
backend: sql_default
|
||||
max_write_queue_size: 10000
|
||||
num_writers: 4
|
||||
conversations:
|
||||
table_name: openai_conversations
|
||||
backend: sql_default
|
||||
registered_resources:
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: meta-reference-inference
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 768
|
||||
model_id: nomic-embed-text-v1.5
|
||||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
enabled: true
|
||||
|
|
|
|||
|
|
@ -10,8 +10,6 @@ distribution_spec:
|
|||
- provider_type: remote::nvidia
|
||||
agents:
|
||||
- provider_type: inline::meta-reference
|
||||
telemetry:
|
||||
- provider_type: inline::meta-reference
|
||||
eval:
|
||||
- provider_type: remote::nvidia
|
||||
post_training:
|
||||
|
|
|
|||
|
|
@ -126,11 +126,11 @@ docker run \
|
|||
|
||||
### Via venv
|
||||
|
||||
If you've set up your local development environment, you can also build the image using your local virtual environment.
|
||||
If you've set up your local development environment, you can also install the distribution dependencies using your local virtual environment.
|
||||
|
||||
```bash
|
||||
INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
||||
llama stack build --distro nvidia --image-type venv
|
||||
llama stack list-deps nvidia | xargs -L1 uv pip install
|
||||
NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
llama stack run ./run.yaml \
|
||||
|
|
|
|||
|
|
@ -21,7 +21,6 @@ def get_distribution_template(name: str = "nvidia") -> DistributionTemplate:
|
|||
"vector_io": [BuildProvider(provider_type="inline::faiss")],
|
||||
"safety": [BuildProvider(provider_type="remote::nvidia")],
|
||||
"agents": [BuildProvider(provider_type="inline::meta-reference")],
|
||||
"telemetry": [BuildProvider(provider_type="inline::meta-reference")],
|
||||
"eval": [BuildProvider(provider_type="remote::nvidia")],
|
||||
"post_training": [BuildProvider(provider_type="remote::nvidia")],
|
||||
"datasetio": [
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@ apis:
|
|||
- post_training
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
|
|
@ -29,9 +28,9 @@ providers:
|
|||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db
|
||||
persistence:
|
||||
namespace: vector_io::faiss
|
||||
backend: kv_default
|
||||
safety:
|
||||
- provider_id: nvidia
|
||||
provider_type: remote::nvidia
|
||||
|
|
@ -42,20 +41,15 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
persistence_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db
|
||||
responses_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/responses_store.db
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:=sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/trace_store.db
|
||||
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
|
||||
persistence:
|
||||
agent_state:
|
||||
namespace: agents
|
||||
backend: kv_default
|
||||
responses:
|
||||
table_name: responses
|
||||
backend: sql_default
|
||||
max_write_queue_size: 10000
|
||||
num_writers: 4
|
||||
eval:
|
||||
- provider_id: nvidia
|
||||
provider_type: remote::nvidia
|
||||
|
|
@ -74,8 +68,8 @@ providers:
|
|||
provider_type: inline::localfs
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/localfs_datasetio.db
|
||||
namespace: datasetio::localfs
|
||||
backend: kv_default
|
||||
- provider_id: nvidia
|
||||
provider_type: remote::nvidia
|
||||
config:
|
||||
|
|
@ -95,35 +89,49 @@ providers:
|
|||
config:
|
||||
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/nvidia/files}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/files_metadata.db
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/inference_store.db
|
||||
conversations_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/conversations.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: nvidia
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ${env.SAFETY_MODEL}
|
||||
provider_id: nvidia
|
||||
model_type: llm
|
||||
shields:
|
||||
- shield_id: ${env.SAFETY_MODEL}
|
||||
provider_id: nvidia
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
table_name: files_metadata
|
||||
backend: sql_default
|
||||
storage:
|
||||
backends:
|
||||
kv_default:
|
||||
type: kv_sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/kvstore.db
|
||||
sql_default:
|
||||
type: sql_sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db
|
||||
stores:
|
||||
metadata:
|
||||
namespace: registry
|
||||
backend: kv_default
|
||||
inference:
|
||||
table_name: inference_store
|
||||
backend: sql_default
|
||||
max_write_queue_size: 10000
|
||||
num_writers: 4
|
||||
conversations:
|
||||
table_name: openai_conversations
|
||||
backend: sql_default
|
||||
registered_resources:
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: nvidia
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ${env.SAFETY_MODEL}
|
||||
provider_id: nvidia
|
||||
model_type: llm
|
||||
shields:
|
||||
- shield_id: ${env.SAFETY_MODEL}
|
||||
provider_id: nvidia
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
enabled: true
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@ apis:
|
|||
- post_training
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
|
|
@ -24,9 +23,9 @@ providers:
|
|||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db
|
||||
persistence:
|
||||
namespace: vector_io::faiss
|
||||
backend: kv_default
|
||||
safety:
|
||||
- provider_id: nvidia
|
||||
provider_type: remote::nvidia
|
||||
|
|
@ -37,20 +36,15 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
persistence_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db
|
||||
responses_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/responses_store.db
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:=sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/trace_store.db
|
||||
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
|
||||
persistence:
|
||||
agent_state:
|
||||
namespace: agents
|
||||
backend: kv_default
|
||||
responses:
|
||||
table_name: responses
|
||||
backend: sql_default
|
||||
max_write_queue_size: 10000
|
||||
num_writers: 4
|
||||
eval:
|
||||
- provider_id: nvidia
|
||||
provider_type: remote::nvidia
|
||||
|
|
@ -84,25 +78,39 @@ providers:
|
|||
config:
|
||||
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/nvidia/files}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/files_metadata.db
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/inference_store.db
|
||||
conversations_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/conversations.db
|
||||
models: []
|
||||
shields: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
table_name: files_metadata
|
||||
backend: sql_default
|
||||
storage:
|
||||
backends:
|
||||
kv_default:
|
||||
type: kv_sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/kvstore.db
|
||||
sql_default:
|
||||
type: sql_sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db
|
||||
stores:
|
||||
metadata:
|
||||
namespace: registry
|
||||
backend: kv_default
|
||||
inference:
|
||||
table_name: inference_store
|
||||
backend: sql_default
|
||||
max_write_queue_size: 10000
|
||||
num_writers: 4
|
||||
conversations:
|
||||
table_name: openai_conversations
|
||||
backend: sql_default
|
||||
registered_resources:
|
||||
models: []
|
||||
shields: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
enabled: true
|
||||
|
|
|
|||
|
|
@ -16,8 +16,6 @@ distribution_spec:
|
|||
- provider_type: inline::llama-guard
|
||||
agents:
|
||||
- provider_type: inline::meta-reference
|
||||
telemetry:
|
||||
- provider_type: inline::meta-reference
|
||||
eval:
|
||||
- provider_type: inline::meta-reference
|
||||
datasetio:
|
||||
|
|
|
|||
|
|
@ -105,7 +105,6 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
],
|
||||
"safety": [BuildProvider(provider_type="inline::llama-guard")],
|
||||
"agents": [BuildProvider(provider_type="inline::meta-reference")],
|
||||
"telemetry": [BuildProvider(provider_type="inline::meta-reference")],
|
||||
"eval": [BuildProvider(provider_type="inline::meta-reference")],
|
||||
"datasetio": [
|
||||
BuildProvider(provider_type="remote::huggingface"),
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ apis:
|
|||
- inference
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
|
|
@ -40,16 +39,16 @@ providers:
|
|||
provider_type: inline::sqlite-vec
|
||||
config:
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sqlite_vec.db
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sqlite_vec_registry.db
|
||||
persistence:
|
||||
namespace: vector_io::sqlite_vec
|
||||
backend: kv_default
|
||||
- provider_id: ${env.ENABLE_CHROMADB:+chromadb}
|
||||
provider_type: remote::chromadb
|
||||
config:
|
||||
url: ${env.CHROMADB_URL:=}
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/chroma_remote_registry.db
|
||||
persistence:
|
||||
namespace: vector_io::chroma_remote
|
||||
backend: kv_default
|
||||
- provider_id: ${env.ENABLE_PGVECTOR:+pgvector}
|
||||
provider_type: remote::pgvector
|
||||
config:
|
||||
|
|
@ -58,9 +57,9 @@ providers:
|
|||
db: ${env.PGVECTOR_DB:=}
|
||||
user: ${env.PGVECTOR_USER:=}
|
||||
password: ${env.PGVECTOR_PASSWORD:=}
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/pgvector_registry.db
|
||||
persistence:
|
||||
namespace: vector_io::pgvector
|
||||
backend: kv_default
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
|
|
@ -70,40 +69,35 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
persistence_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/agents_store.db
|
||||
responses_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/responses_store.db
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:=sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/trace_store.db
|
||||
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
|
||||
persistence:
|
||||
agent_state:
|
||||
namespace: agents
|
||||
backend: kv_default
|
||||
responses:
|
||||
table_name: responses
|
||||
backend: sql_default
|
||||
max_write_queue_size: 10000
|
||||
num_writers: 4
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/meta_reference_eval.db
|
||||
namespace: eval
|
||||
backend: kv_default
|
||||
datasetio:
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/huggingface_datasetio.db
|
||||
namespace: datasetio::huggingface
|
||||
backend: kv_default
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/localfs_datasetio.db
|
||||
namespace: datasetio::localfs
|
||||
backend: kv_default
|
||||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
|
|
@ -128,117 +122,131 @@ providers:
|
|||
provider_type: inline::rag-runtime
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/inference_store.db
|
||||
conversations_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/conversations.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: gpt-4o
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-4o
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: claude-3-5-sonnet-latest
|
||||
provider_id: anthropic
|
||||
provider_model_id: claude-3-5-sonnet-latest
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gemini/gemini-1.5-flash
|
||||
provider_id: gemini
|
||||
provider_model_id: gemini/gemini-1.5-flash
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||
provider_id: groq
|
||||
provider_model_id: groq/llama-3.3-70b-versatile
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-405B-Instruct
|
||||
provider_id: together
|
||||
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
|
||||
model_type: llm
|
||||
shields:
|
||||
- shield_id: meta-llama/Llama-Guard-3-8B
|
||||
vector_dbs: []
|
||||
datasets:
|
||||
- purpose: eval/messages-answer
|
||||
source:
|
||||
type: uri
|
||||
uri: huggingface://datasets/llamastack/simpleqa?split=train
|
||||
metadata: {}
|
||||
dataset_id: simpleqa
|
||||
- purpose: eval/messages-answer
|
||||
source:
|
||||
type: uri
|
||||
uri: huggingface://datasets/llamastack/mmlu_cot?split=test&name=all
|
||||
metadata: {}
|
||||
dataset_id: mmlu_cot
|
||||
- purpose: eval/messages-answer
|
||||
source:
|
||||
type: uri
|
||||
uri: huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main
|
||||
metadata: {}
|
||||
dataset_id: gpqa_cot
|
||||
- purpose: eval/messages-answer
|
||||
source:
|
||||
type: uri
|
||||
uri: huggingface://datasets/llamastack/math_500?split=test
|
||||
metadata: {}
|
||||
dataset_id: math_500
|
||||
- purpose: eval/messages-answer
|
||||
source:
|
||||
type: uri
|
||||
uri: huggingface://datasets/llamastack/IfEval?split=train
|
||||
metadata: {}
|
||||
dataset_id: ifeval
|
||||
- purpose: eval/messages-answer
|
||||
source:
|
||||
type: uri
|
||||
uri: huggingface://datasets/llamastack/docvqa?split=val
|
||||
metadata: {}
|
||||
dataset_id: docvqa
|
||||
scoring_fns: []
|
||||
benchmarks:
|
||||
- dataset_id: simpleqa
|
||||
scoring_functions:
|
||||
- llm-as-judge::405b-simpleqa
|
||||
metadata: {}
|
||||
benchmark_id: meta-reference-simpleqa
|
||||
- dataset_id: mmlu_cot
|
||||
scoring_functions:
|
||||
- basic::regex_parser_multiple_choice_answer
|
||||
metadata: {}
|
||||
benchmark_id: meta-reference-mmlu-cot
|
||||
- dataset_id: gpqa_cot
|
||||
scoring_functions:
|
||||
- basic::regex_parser_multiple_choice_answer
|
||||
metadata: {}
|
||||
benchmark_id: meta-reference-gpqa-cot
|
||||
- dataset_id: math_500
|
||||
scoring_functions:
|
||||
- basic::regex_parser_math_response
|
||||
metadata: {}
|
||||
benchmark_id: meta-reference-math-500
|
||||
- dataset_id: ifeval
|
||||
scoring_functions:
|
||||
- basic::ifeval
|
||||
metadata: {}
|
||||
benchmark_id: meta-reference-ifeval
|
||||
- dataset_id: docvqa
|
||||
scoring_functions:
|
||||
- basic::docvqa
|
||||
metadata: {}
|
||||
benchmark_id: meta-reference-docvqa
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
storage:
|
||||
backends:
|
||||
kv_default:
|
||||
type: kv_sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/kvstore.db
|
||||
sql_default:
|
||||
type: sql_sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sql_store.db
|
||||
stores:
|
||||
metadata:
|
||||
namespace: registry
|
||||
backend: kv_default
|
||||
inference:
|
||||
table_name: inference_store
|
||||
backend: sql_default
|
||||
max_write_queue_size: 10000
|
||||
num_writers: 4
|
||||
conversations:
|
||||
table_name: openai_conversations
|
||||
backend: sql_default
|
||||
registered_resources:
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: gpt-4o
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-4o
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: claude-3-5-sonnet-latest
|
||||
provider_id: anthropic
|
||||
provider_model_id: claude-3-5-sonnet-latest
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gemini/gemini-1.5-flash
|
||||
provider_id: gemini
|
||||
provider_model_id: gemini/gemini-1.5-flash
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||
provider_id: groq
|
||||
provider_model_id: groq/llama-3.3-70b-versatile
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-405B-Instruct
|
||||
provider_id: together
|
||||
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
|
||||
model_type: llm
|
||||
shields:
|
||||
- shield_id: meta-llama/Llama-Guard-3-8B
|
||||
vector_dbs: []
|
||||
datasets:
|
||||
- purpose: eval/messages-answer
|
||||
source:
|
||||
type: uri
|
||||
uri: huggingface://datasets/llamastack/simpleqa?split=train
|
||||
metadata: {}
|
||||
dataset_id: simpleqa
|
||||
- purpose: eval/messages-answer
|
||||
source:
|
||||
type: uri
|
||||
uri: huggingface://datasets/llamastack/mmlu_cot?split=test&name=all
|
||||
metadata: {}
|
||||
dataset_id: mmlu_cot
|
||||
- purpose: eval/messages-answer
|
||||
source:
|
||||
type: uri
|
||||
uri: huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main
|
||||
metadata: {}
|
||||
dataset_id: gpqa_cot
|
||||
- purpose: eval/messages-answer
|
||||
source:
|
||||
type: uri
|
||||
uri: huggingface://datasets/llamastack/math_500?split=test
|
||||
metadata: {}
|
||||
dataset_id: math_500
|
||||
- purpose: eval/messages-answer
|
||||
source:
|
||||
type: uri
|
||||
uri: huggingface://datasets/llamastack/IfEval?split=train
|
||||
metadata: {}
|
||||
dataset_id: ifeval
|
||||
- purpose: eval/messages-answer
|
||||
source:
|
||||
type: uri
|
||||
uri: huggingface://datasets/llamastack/docvqa?split=val
|
||||
metadata: {}
|
||||
dataset_id: docvqa
|
||||
scoring_fns: []
|
||||
benchmarks:
|
||||
- dataset_id: simpleqa
|
||||
scoring_functions:
|
||||
- llm-as-judge::405b-simpleqa
|
||||
metadata: {}
|
||||
benchmark_id: meta-reference-simpleqa
|
||||
- dataset_id: mmlu_cot
|
||||
scoring_functions:
|
||||
- basic::regex_parser_multiple_choice_answer
|
||||
metadata: {}
|
||||
benchmark_id: meta-reference-mmlu-cot
|
||||
- dataset_id: gpqa_cot
|
||||
scoring_functions:
|
||||
- basic::regex_parser_multiple_choice_answer
|
||||
metadata: {}
|
||||
benchmark_id: meta-reference-gpqa-cot
|
||||
- dataset_id: math_500
|
||||
scoring_functions:
|
||||
- basic::regex_parser_math_response
|
||||
metadata: {}
|
||||
benchmark_id: meta-reference-math-500
|
||||
- dataset_id: ifeval
|
||||
scoring_functions:
|
||||
- basic::ifeval
|
||||
metadata: {}
|
||||
benchmark_id: meta-reference-ifeval
|
||||
- dataset_id: docvqa
|
||||
scoring_functions:
|
||||
- basic::docvqa
|
||||
metadata: {}
|
||||
benchmark_id: meta-reference-docvqa
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
enabled: true
|
||||
|
|
|
|||
|
|
@ -11,8 +11,6 @@ distribution_spec:
|
|||
- provider_type: inline::llama-guard
|
||||
agents:
|
||||
- provider_type: inline::meta-reference
|
||||
telemetry:
|
||||
- provider_type: inline::meta-reference
|
||||
tool_runtime:
|
||||
- provider_type: remote::brave-search
|
||||
- provider_type: remote::tavily-search
|
||||
|
|
|
|||
|
|
@ -42,7 +42,6 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"vector_io": [BuildProvider(provider_type="remote::chromadb")],
|
||||
"safety": [BuildProvider(provider_type="inline::llama-guard")],
|
||||
"agents": [BuildProvider(provider_type="inline::meta-reference")],
|
||||
"telemetry": [BuildProvider(provider_type="inline::meta-reference")],
|
||||
"tool_runtime": [
|
||||
BuildProvider(provider_type="remote::brave-search"),
|
||||
BuildProvider(provider_type="remote::tavily-search"),
|
||||
|
|
@ -92,7 +91,6 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"embedding_dimension": 768,
|
||||
},
|
||||
)
|
||||
postgres_config = PostgresSqlStoreConfig.sample_run_config()
|
||||
return DistributionTemplate(
|
||||
name=name,
|
||||
distro_type="self_hosted",
|
||||
|
|
@ -106,33 +104,16 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
provider_overrides={
|
||||
"inference": inference_providers + [embedding_provider],
|
||||
"vector_io": vector_io_providers,
|
||||
"agents": [
|
||||
Provider(
|
||||
provider_id="meta-reference",
|
||||
provider_type="inline::meta-reference",
|
||||
config=dict(
|
||||
persistence_store=postgres_config,
|
||||
responses_store=postgres_config,
|
||||
),
|
||||
)
|
||||
],
|
||||
"telemetry": [
|
||||
Provider(
|
||||
provider_id="meta-reference",
|
||||
provider_type="inline::meta-reference",
|
||||
config=dict(
|
||||
service_name="${env.OTEL_SERVICE_NAME:=\u200b}",
|
||||
sinks="${env.TELEMETRY_SINKS:=console,otel_trace}",
|
||||
otel_exporter_otlp_endpoint="${env.OTEL_EXPORTER_OTLP_ENDPOINT:=http://localhost:4318/v1/traces}",
|
||||
),
|
||||
)
|
||||
],
|
||||
},
|
||||
default_models=default_models + [embedding_model],
|
||||
default_tool_groups=default_tool_groups,
|
||||
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
|
||||
metadata_store=PostgresKVStoreConfig.sample_run_config(),
|
||||
inference_store=postgres_config,
|
||||
storage_backends={
|
||||
"kv_default": PostgresKVStoreConfig.sample_run_config(
|
||||
table_name="llamastack_kvstore",
|
||||
),
|
||||
"sql_default": PostgresSqlStoreConfig.sample_run_config(),
|
||||
},
|
||||
),
|
||||
},
|
||||
run_config_env_vars={
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ apis:
|
|||
- agents
|
||||
- inference
|
||||
- safety
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
|
|
@ -23,9 +22,9 @@ providers:
|
|||
provider_type: remote::chromadb
|
||||
config:
|
||||
url: ${env.CHROMADB_URL:=}
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/postgres-demo}/chroma_remote_registry.db
|
||||
persistence:
|
||||
namespace: vector_io::chroma_remote
|
||||
backend: kv_default
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
|
|
@ -35,27 +34,15 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
persistence_store:
|
||||
type: postgres
|
||||
host: ${env.POSTGRES_HOST:=localhost}
|
||||
port: ${env.POSTGRES_PORT:=5432}
|
||||
db: ${env.POSTGRES_DB:=llamastack}
|
||||
user: ${env.POSTGRES_USER:=llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||
responses_store:
|
||||
type: postgres
|
||||
host: ${env.POSTGRES_HOST:=localhost}
|
||||
port: ${env.POSTGRES_PORT:=5432}
|
||||
db: ${env.POSTGRES_DB:=llamastack}
|
||||
user: ${env.POSTGRES_USER:=llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:=console,otel_trace}
|
||||
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=http://localhost:4318/v1/traces}
|
||||
persistence:
|
||||
agent_state:
|
||||
namespace: agents
|
||||
backend: kv_default
|
||||
responses:
|
||||
table_name: responses
|
||||
backend: sql_default
|
||||
max_write_queue_size: 10000
|
||||
num_writers: 4
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
|
|
@ -71,44 +58,58 @@ providers:
|
|||
provider_type: inline::rag-runtime
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
metadata_store:
|
||||
type: postgres
|
||||
host: ${env.POSTGRES_HOST:=localhost}
|
||||
port: ${env.POSTGRES_PORT:=5432}
|
||||
db: ${env.POSTGRES_DB:=llamastack}
|
||||
user: ${env.POSTGRES_USER:=llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||
table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
|
||||
inference_store:
|
||||
type: postgres
|
||||
host: ${env.POSTGRES_HOST:=localhost}
|
||||
port: ${env.POSTGRES_PORT:=5432}
|
||||
db: ${env.POSTGRES_DB:=llamastack}
|
||||
user: ${env.POSTGRES_USER:=llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||
conversations_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/postgres-demo}/conversations.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: vllm-inference
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 768
|
||||
model_id: nomic-embed-text-v1.5
|
||||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields:
|
||||
- shield_id: meta-llama/Llama-Guard-3-8B
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
storage:
|
||||
backends:
|
||||
kv_default:
|
||||
type: kv_postgres
|
||||
host: ${env.POSTGRES_HOST:=localhost}
|
||||
port: ${env.POSTGRES_PORT:=5432}
|
||||
db: ${env.POSTGRES_DB:=llamastack}
|
||||
user: ${env.POSTGRES_USER:=llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||
table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
|
||||
sql_default:
|
||||
type: sql_postgres
|
||||
host: ${env.POSTGRES_HOST:=localhost}
|
||||
port: ${env.POSTGRES_PORT:=5432}
|
||||
db: ${env.POSTGRES_DB:=llamastack}
|
||||
user: ${env.POSTGRES_USER:=llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||
stores:
|
||||
metadata:
|
||||
namespace: registry
|
||||
backend: kv_default
|
||||
inference:
|
||||
table_name: inference_store
|
||||
backend: sql_default
|
||||
max_write_queue_size: 10000
|
||||
num_writers: 4
|
||||
conversations:
|
||||
table_name: openai_conversations
|
||||
backend: sql_default
|
||||
registered_resources:
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: vllm-inference
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 768
|
||||
model_id: nomic-embed-text-v1.5
|
||||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields:
|
||||
- shield_id: meta-llama/Llama-Guard-3-8B
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
enabled: true
|
||||
|
|
|
|||
|
|
@ -26,6 +26,8 @@ distribution_spec:
|
|||
- provider_type: inline::milvus
|
||||
- provider_type: remote::chromadb
|
||||
- provider_type: remote::pgvector
|
||||
- provider_type: remote::qdrant
|
||||
- provider_type: remote::weaviate
|
||||
files:
|
||||
- provider_type: inline::localfs
|
||||
safety:
|
||||
|
|
@ -33,8 +35,6 @@ distribution_spec:
|
|||
- provider_type: inline::code-scanner
|
||||
agents:
|
||||
- provider_type: inline::meta-reference
|
||||
telemetry:
|
||||
- provider_type: inline::meta-reference
|
||||
post_training:
|
||||
- provider_type: inline::huggingface-gpu
|
||||
eval:
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ apis:
|
|||
- post_training
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
|
|
@ -94,30 +93,30 @@ providers:
|
|||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/faiss_store.db
|
||||
persistence:
|
||||
namespace: vector_io::faiss
|
||||
backend: kv_default
|
||||
- provider_id: sqlite-vec
|
||||
provider_type: inline::sqlite-vec
|
||||
config:
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec.db
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec_registry.db
|
||||
persistence:
|
||||
namespace: vector_io::sqlite_vec
|
||||
backend: kv_default
|
||||
- provider_id: ${env.MILVUS_URL:+milvus}
|
||||
provider_type: inline::milvus
|
||||
config:
|
||||
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter-gpu}/milvus.db
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/milvus_registry.db
|
||||
persistence:
|
||||
namespace: vector_io::milvus
|
||||
backend: kv_default
|
||||
- provider_id: ${env.CHROMADB_URL:+chromadb}
|
||||
provider_type: remote::chromadb
|
||||
config:
|
||||
url: ${env.CHROMADB_URL:=}
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu/}/chroma_remote_registry.db
|
||||
persistence:
|
||||
namespace: vector_io::chroma_remote
|
||||
backend: kv_default
|
||||
- provider_id: ${env.PGVECTOR_DB:+pgvector}
|
||||
provider_type: remote::pgvector
|
||||
config:
|
||||
|
|
@ -126,17 +125,32 @@ providers:
|
|||
db: ${env.PGVECTOR_DB:=}
|
||||
user: ${env.PGVECTOR_USER:=}
|
||||
password: ${env.PGVECTOR_PASSWORD:=}
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/pgvector_registry.db
|
||||
persistence:
|
||||
namespace: vector_io::pgvector
|
||||
backend: kv_default
|
||||
- provider_id: ${env.QDRANT_URL:+qdrant}
|
||||
provider_type: remote::qdrant
|
||||
config:
|
||||
api_key: ${env.QDRANT_API_KEY:=}
|
||||
persistence:
|
||||
namespace: vector_io::qdrant_remote
|
||||
backend: kv_default
|
||||
- provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
|
||||
provider_type: remote::weaviate
|
||||
config:
|
||||
weaviate_api_key: null
|
||||
weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
|
||||
persistence:
|
||||
namespace: vector_io::weaviate
|
||||
backend: kv_default
|
||||
files:
|
||||
- provider_id: meta-reference-files
|
||||
provider_type: inline::localfs
|
||||
config:
|
||||
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter-gpu/files}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/files_metadata.db
|
||||
table_name: files_metadata
|
||||
backend: sql_default
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
|
|
@ -148,20 +162,15 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
persistence_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/agents_store.db
|
||||
responses_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/responses_store.db
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:=sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/trace_store.db
|
||||
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
|
||||
persistence:
|
||||
agent_state:
|
||||
namespace: agents
|
||||
backend: kv_default
|
||||
responses:
|
||||
table_name: responses
|
||||
backend: sql_default
|
||||
max_write_queue_size: 10000
|
||||
num_writers: 4
|
||||
post_training:
|
||||
- provider_id: huggingface-gpu
|
||||
provider_type: inline::huggingface-gpu
|
||||
|
|
@ -175,21 +184,21 @@ providers:
|
|||
provider_type: inline::meta-reference
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/meta_reference_eval.db
|
||||
namespace: eval
|
||||
backend: kv_default
|
||||
datasetio:
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/huggingface_datasetio.db
|
||||
namespace: datasetio::huggingface
|
||||
backend: kv_default
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/localfs_datasetio.db
|
||||
namespace: datasetio::localfs
|
||||
backend: kv_default
|
||||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
|
|
@ -219,33 +228,52 @@ providers:
|
|||
provider_type: inline::reference
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/batches.db
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/inference_store.db
|
||||
conversations_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/conversations.db
|
||||
models: []
|
||||
shields:
|
||||
- shield_id: llama-guard
|
||||
provider_id: ${env.SAFETY_MODEL:+llama-guard}
|
||||
provider_shield_id: ${env.SAFETY_MODEL:=}
|
||||
- shield_id: code-scanner
|
||||
provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
|
||||
provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
namespace: batches
|
||||
backend: kv_default
|
||||
storage:
|
||||
backends:
|
||||
kv_default:
|
||||
type: kv_sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/kvstore.db
|
||||
sql_default:
|
||||
type: sql_sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sql_store.db
|
||||
stores:
|
||||
metadata:
|
||||
namespace: registry
|
||||
backend: kv_default
|
||||
inference:
|
||||
table_name: inference_store
|
||||
backend: sql_default
|
||||
max_write_queue_size: 10000
|
||||
num_writers: 4
|
||||
conversations:
|
||||
table_name: openai_conversations
|
||||
backend: sql_default
|
||||
registered_resources:
|
||||
models: []
|
||||
shields:
|
||||
- shield_id: llama-guard
|
||||
provider_id: ${env.SAFETY_MODEL:+llama-guard}
|
||||
provider_shield_id: ${env.SAFETY_MODEL:=}
|
||||
- shield_id: code-scanner
|
||||
provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
|
||||
provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
enabled: true
|
||||
vector_stores:
|
||||
default_provider_id: faiss
|
||||
default_embedding_model:
|
||||
provider_id: sentence-transformers
|
||||
model_id: nomic-ai/nomic-embed-text-v1.5
|
||||
|
|
|
|||
|
|
@ -26,6 +26,8 @@ distribution_spec:
|
|||
- provider_type: inline::milvus
|
||||
- provider_type: remote::chromadb
|
||||
- provider_type: remote::pgvector
|
||||
- provider_type: remote::qdrant
|
||||
- provider_type: remote::weaviate
|
||||
files:
|
||||
- provider_type: inline::localfs
|
||||
safety:
|
||||
|
|
@ -33,8 +35,6 @@ distribution_spec:
|
|||
- provider_type: inline::code-scanner
|
||||
agents:
|
||||
- provider_type: inline::meta-reference
|
||||
telemetry:
|
||||
- provider_type: inline::meta-reference
|
||||
post_training:
|
||||
- provider_type: inline::torchtune-cpu
|
||||
eval:
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ apis:
|
|||
- post_training
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
|
|
@ -94,30 +93,30 @@ providers:
|
|||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db
|
||||
persistence:
|
||||
namespace: vector_io::faiss
|
||||
backend: kv_default
|
||||
- provider_id: sqlite-vec
|
||||
provider_type: inline::sqlite-vec
|
||||
config:
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec_registry.db
|
||||
persistence:
|
||||
namespace: vector_io::sqlite_vec
|
||||
backend: kv_default
|
||||
- provider_id: ${env.MILVUS_URL:+milvus}
|
||||
provider_type: inline::milvus
|
||||
config:
|
||||
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db
|
||||
persistence:
|
||||
namespace: vector_io::milvus
|
||||
backend: kv_default
|
||||
- provider_id: ${env.CHROMADB_URL:+chromadb}
|
||||
provider_type: remote::chromadb
|
||||
config:
|
||||
url: ${env.CHROMADB_URL:=}
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter/}/chroma_remote_registry.db
|
||||
persistence:
|
||||
namespace: vector_io::chroma_remote
|
||||
backend: kv_default
|
||||
- provider_id: ${env.PGVECTOR_DB:+pgvector}
|
||||
provider_type: remote::pgvector
|
||||
config:
|
||||
|
|
@ -126,17 +125,32 @@ providers:
|
|||
db: ${env.PGVECTOR_DB:=}
|
||||
user: ${env.PGVECTOR_USER:=}
|
||||
password: ${env.PGVECTOR_PASSWORD:=}
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/pgvector_registry.db
|
||||
persistence:
|
||||
namespace: vector_io::pgvector
|
||||
backend: kv_default
|
||||
- provider_id: ${env.QDRANT_URL:+qdrant}
|
||||
provider_type: remote::qdrant
|
||||
config:
|
||||
api_key: ${env.QDRANT_API_KEY:=}
|
||||
persistence:
|
||||
namespace: vector_io::qdrant_remote
|
||||
backend: kv_default
|
||||
- provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
|
||||
provider_type: remote::weaviate
|
||||
config:
|
||||
weaviate_api_key: null
|
||||
weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
|
||||
persistence:
|
||||
namespace: vector_io::weaviate
|
||||
backend: kv_default
|
||||
files:
|
||||
- provider_id: meta-reference-files
|
||||
provider_type: inline::localfs
|
||||
config:
|
||||
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
|
||||
table_name: files_metadata
|
||||
backend: sql_default
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
|
|
@ -148,20 +162,15 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
persistence_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/agents_store.db
|
||||
responses_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/responses_store.db
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:=sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/trace_store.db
|
||||
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
|
||||
persistence:
|
||||
agent_state:
|
||||
namespace: agents
|
||||
backend: kv_default
|
||||
responses:
|
||||
table_name: responses
|
||||
backend: sql_default
|
||||
max_write_queue_size: 10000
|
||||
num_writers: 4
|
||||
post_training:
|
||||
- provider_id: torchtune-cpu
|
||||
provider_type: inline::torchtune-cpu
|
||||
|
|
@ -172,21 +181,21 @@ providers:
|
|||
provider_type: inline::meta-reference
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/meta_reference_eval.db
|
||||
namespace: eval
|
||||
backend: kv_default
|
||||
datasetio:
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/huggingface_datasetio.db
|
||||
namespace: datasetio::huggingface
|
||||
backend: kv_default
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/localfs_datasetio.db
|
||||
namespace: datasetio::localfs
|
||||
backend: kv_default
|
||||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
|
|
@ -216,33 +225,52 @@ providers:
|
|||
provider_type: inline::reference
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/batches.db
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/inference_store.db
|
||||
conversations_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/conversations.db
|
||||
models: []
|
||||
shields:
|
||||
- shield_id: llama-guard
|
||||
provider_id: ${env.SAFETY_MODEL:+llama-guard}
|
||||
provider_shield_id: ${env.SAFETY_MODEL:=}
|
||||
- shield_id: code-scanner
|
||||
provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
|
||||
provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
namespace: batches
|
||||
backend: kv_default
|
||||
storage:
|
||||
backends:
|
||||
kv_default:
|
||||
type: kv_sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/kvstore.db
|
||||
sql_default:
|
||||
type: sql_sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sql_store.db
|
||||
stores:
|
||||
metadata:
|
||||
namespace: registry
|
||||
backend: kv_default
|
||||
inference:
|
||||
table_name: inference_store
|
||||
backend: sql_default
|
||||
max_write_queue_size: 10000
|
||||
num_writers: 4
|
||||
conversations:
|
||||
table_name: openai_conversations
|
||||
backend: sql_default
|
||||
registered_resources:
|
||||
models: []
|
||||
shields:
|
||||
- shield_id: llama-guard
|
||||
provider_id: ${env.SAFETY_MODEL:+llama-guard}
|
||||
provider_shield_id: ${env.SAFETY_MODEL:=}
|
||||
- shield_id: code-scanner
|
||||
provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
|
||||
provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
enabled: true
|
||||
vector_stores:
|
||||
default_provider_id: faiss
|
||||
default_embedding_model:
|
||||
provider_id: sentence-transformers
|
||||
model_id: nomic-ai/nomic-embed-text-v1.5
|
||||
|
|
|
|||
|
|
@ -11,8 +11,10 @@ from llama_stack.core.datatypes import (
|
|||
BuildProvider,
|
||||
Provider,
|
||||
ProviderSpec,
|
||||
QualifiedModel,
|
||||
ShieldInput,
|
||||
ToolGroupInput,
|
||||
VectorStoresConfig,
|
||||
)
|
||||
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
|
||||
|
|
@ -31,6 +33,8 @@ from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOC
|
|||
from llama_stack.providers.remote.vector_io.pgvector.config import (
|
||||
PGVectorVectorIOConfig,
|
||||
)
|
||||
from llama_stack.providers.remote.vector_io.qdrant.config import QdrantVectorIOConfig
|
||||
from llama_stack.providers.remote.vector_io.weaviate.config import WeaviateVectorIOConfig
|
||||
from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
|
||||
|
||||
|
||||
|
|
@ -113,6 +117,8 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
|
|||
BuildProvider(provider_type="inline::milvus"),
|
||||
BuildProvider(provider_type="remote::chromadb"),
|
||||
BuildProvider(provider_type="remote::pgvector"),
|
||||
BuildProvider(provider_type="remote::qdrant"),
|
||||
BuildProvider(provider_type="remote::weaviate"),
|
||||
],
|
||||
"files": [BuildProvider(provider_type="inline::localfs")],
|
||||
"safety": [
|
||||
|
|
@ -120,7 +126,6 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
|
|||
BuildProvider(provider_type="inline::code-scanner"),
|
||||
],
|
||||
"agents": [BuildProvider(provider_type="inline::meta-reference")],
|
||||
"telemetry": [BuildProvider(provider_type="inline::meta-reference")],
|
||||
"post_training": [BuildProvider(provider_type="inline::torchtune-cpu")],
|
||||
"eval": [BuildProvider(provider_type="inline::meta-reference")],
|
||||
"datasetio": [
|
||||
|
|
@ -222,12 +227,35 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
|
|||
password="${env.PGVECTOR_PASSWORD:=}",
|
||||
),
|
||||
),
|
||||
Provider(
|
||||
provider_id="${env.QDRANT_URL:+qdrant}",
|
||||
provider_type="remote::qdrant",
|
||||
config=QdrantVectorIOConfig.sample_run_config(
|
||||
f"~/.llama/distributions/{name}",
|
||||
url="${env.QDRANT_URL:=}",
|
||||
),
|
||||
),
|
||||
Provider(
|
||||
provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}",
|
||||
provider_type="remote::weaviate",
|
||||
config=WeaviateVectorIOConfig.sample_run_config(
|
||||
f"~/.llama/distributions/{name}",
|
||||
cluster_url="${env.WEAVIATE_CLUSTER_URL:=}",
|
||||
),
|
||||
),
|
||||
],
|
||||
"files": [files_provider],
|
||||
},
|
||||
default_models=[],
|
||||
default_tool_groups=default_tool_groups,
|
||||
default_shields=default_shields,
|
||||
vector_stores_config=VectorStoresConfig(
|
||||
default_provider_id="faiss",
|
||||
default_embedding_model=QualifiedModel(
|
||||
provider_id="sentence-transformers",
|
||||
model_id="nomic-ai/nomic-embed-text-v1.5",
|
||||
),
|
||||
),
|
||||
),
|
||||
},
|
||||
run_config_env_vars={
|
||||
|
|
|
|||
|
|
@ -25,9 +25,17 @@ from llama_stack.core.datatypes import (
|
|||
ModelInput,
|
||||
Provider,
|
||||
ShieldInput,
|
||||
TelemetryConfig,
|
||||
ToolGroupInput,
|
||||
VectorStoresConfig,
|
||||
)
|
||||
from llama_stack.core.distribution import get_provider_registry
|
||||
from llama_stack.core.storage.datatypes import (
|
||||
InferenceStoreReference,
|
||||
KVStoreReference,
|
||||
SqlStoreReference,
|
||||
StorageBackendType,
|
||||
)
|
||||
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||
from llama_stack.core.utils.image_types import LlamaStackImageType
|
||||
from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
|
||||
|
|
@ -179,9 +187,10 @@ class RunConfigSettings(BaseModel):
|
|||
default_tool_groups: list[ToolGroupInput] | None = None
|
||||
default_datasets: list[DatasetInput] | None = None
|
||||
default_benchmarks: list[BenchmarkInput] | None = None
|
||||
metadata_store: dict | None = None
|
||||
inference_store: dict | None = None
|
||||
conversations_store: dict | None = None
|
||||
vector_stores_config: VectorStoresConfig | None = None
|
||||
telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True))
|
||||
storage_backends: dict[str, Any] | None = None
|
||||
storage_stores: dict[str, Any] | None = None
|
||||
|
||||
def run_config(
|
||||
self,
|
||||
|
|
@ -224,40 +233,65 @@ class RunConfigSettings(BaseModel):
|
|||
# Get unique set of APIs from providers
|
||||
apis = sorted(providers.keys())
|
||||
|
||||
storage_backends = self.storage_backends or {
|
||||
"kv_default": SqliteKVStoreConfig.sample_run_config(
|
||||
__distro_dir__=f"~/.llama/distributions/{name}",
|
||||
db_name="kvstore.db",
|
||||
),
|
||||
"sql_default": SqliteSqlStoreConfig.sample_run_config(
|
||||
__distro_dir__=f"~/.llama/distributions/{name}",
|
||||
db_name="sql_store.db",
|
||||
),
|
||||
}
|
||||
|
||||
storage_stores = self.storage_stores or {
|
||||
"metadata": KVStoreReference(
|
||||
backend="kv_default",
|
||||
namespace="registry",
|
||||
).model_dump(exclude_none=True),
|
||||
"inference": InferenceStoreReference(
|
||||
backend="sql_default",
|
||||
table_name="inference_store",
|
||||
).model_dump(exclude_none=True),
|
||||
"conversations": SqlStoreReference(
|
||||
backend="sql_default",
|
||||
table_name="openai_conversations",
|
||||
).model_dump(exclude_none=True),
|
||||
}
|
||||
|
||||
storage_config = dict(
|
||||
backends=storage_backends,
|
||||
stores=storage_stores,
|
||||
)
|
||||
|
||||
# Return a dict that matches StackRunConfig structure
|
||||
return {
|
||||
config = {
|
||||
"version": LLAMA_STACK_RUN_CONFIG_VERSION,
|
||||
"image_name": name,
|
||||
"container_image": container_image,
|
||||
"apis": apis,
|
||||
"providers": provider_configs,
|
||||
"metadata_store": self.metadata_store
|
||||
or SqliteKVStoreConfig.sample_run_config(
|
||||
__distro_dir__=f"~/.llama/distributions/{name}",
|
||||
db_name="registry.db",
|
||||
),
|
||||
"inference_store": self.inference_store
|
||||
or SqliteSqlStoreConfig.sample_run_config(
|
||||
__distro_dir__=f"~/.llama/distributions/{name}",
|
||||
db_name="inference_store.db",
|
||||
),
|
||||
"conversations_store": self.conversations_store
|
||||
or SqliteSqlStoreConfig.sample_run_config(
|
||||
__distro_dir__=f"~/.llama/distributions/{name}",
|
||||
db_name="conversations.db",
|
||||
),
|
||||
"models": [m.model_dump(exclude_none=True) for m in (self.default_models or [])],
|
||||
"shields": [s.model_dump(exclude_none=True) for s in (self.default_shields or [])],
|
||||
"vector_dbs": [],
|
||||
"datasets": [d.model_dump(exclude_none=True) for d in (self.default_datasets or [])],
|
||||
"scoring_fns": [],
|
||||
"benchmarks": [b.model_dump(exclude_none=True) for b in (self.default_benchmarks or [])],
|
||||
"tool_groups": [t.model_dump(exclude_none=True) for t in (self.default_tool_groups or [])],
|
||||
"storage": storage_config,
|
||||
"registered_resources": {
|
||||
"models": [m.model_dump(exclude_none=True) for m in (self.default_models or [])],
|
||||
"shields": [s.model_dump(exclude_none=True) for s in (self.default_shields or [])],
|
||||
"vector_dbs": [],
|
||||
"datasets": [d.model_dump(exclude_none=True) for d in (self.default_datasets or [])],
|
||||
"scoring_fns": [],
|
||||
"benchmarks": [b.model_dump(exclude_none=True) for b in (self.default_benchmarks or [])],
|
||||
"tool_groups": [t.model_dump(exclude_none=True) for t in (self.default_tool_groups or [])],
|
||||
},
|
||||
"server": {
|
||||
"port": 8321,
|
||||
},
|
||||
"telemetry": self.telemetry.model_dump(exclude_none=True) if self.telemetry else None,
|
||||
}
|
||||
|
||||
if self.vector_stores_config:
|
||||
config["vector_stores"] = self.vector_stores_config.model_dump(exclude_none=True)
|
||||
|
||||
return config
|
||||
|
||||
|
||||
class DistributionTemplate(BaseModel):
|
||||
"""
|
||||
|
|
@ -294,11 +328,15 @@ class DistributionTemplate(BaseModel):
|
|||
# We should have a better way to do this by formalizing the concept of "internal" APIs
|
||||
# and providers, with a way to specify dependencies for them.
|
||||
|
||||
if run_config_.get("inference_store"):
|
||||
additional_pip_packages.extend(get_sql_pip_packages(run_config_["inference_store"]))
|
||||
|
||||
if run_config_.get("metadata_store"):
|
||||
additional_pip_packages.extend(get_kv_pip_packages(run_config_["metadata_store"]))
|
||||
storage_cfg = run_config_.get("storage", {})
|
||||
for backend_cfg in storage_cfg.get("backends", {}).values():
|
||||
store_type = backend_cfg.get("type")
|
||||
if not store_type:
|
||||
continue
|
||||
if str(store_type).startswith("kv_"):
|
||||
additional_pip_packages.extend(get_kv_pip_packages(backend_cfg))
|
||||
elif str(store_type).startswith("sql_"):
|
||||
additional_pip_packages.extend(get_sql_pip_packages(backend_cfg))
|
||||
|
||||
if self.additional_pip_packages:
|
||||
additional_pip_packages.extend(self.additional_pip_packages)
|
||||
|
|
@ -384,11 +422,13 @@ class DistributionTemplate(BaseModel):
|
|||
def enum_representer(dumper, data):
|
||||
return dumper.represent_scalar("tag:yaml.org,2002:str", data.value)
|
||||
|
||||
# Register YAML representer for ModelType
|
||||
# Register YAML representer for enums
|
||||
yaml.add_representer(ModelType, enum_representer)
|
||||
yaml.add_representer(DatasetPurpose, enum_representer)
|
||||
yaml.add_representer(StorageBackendType, enum_representer)
|
||||
yaml.SafeDumper.add_representer(ModelType, enum_representer)
|
||||
yaml.SafeDumper.add_representer(DatasetPurpose, enum_representer)
|
||||
yaml.SafeDumper.add_representer(StorageBackendType, enum_representer)
|
||||
|
||||
for output_dir in [yaml_output_dir, doc_output_dir]:
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
|
|
|||
|
|
@ -11,8 +11,6 @@ distribution_spec:
|
|||
- provider_type: inline::llama-guard
|
||||
agents:
|
||||
- provider_type: inline::meta-reference
|
||||
telemetry:
|
||||
- provider_type: inline::meta-reference
|
||||
eval:
|
||||
- provider_type: inline::meta-reference
|
||||
datasetio:
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@ apis:
|
|||
- inference
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
|
|
@ -23,9 +22,9 @@ providers:
|
|||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/faiss_store.db
|
||||
persistence:
|
||||
namespace: vector_io::faiss
|
||||
backend: kv_default
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
|
|
@ -35,40 +34,35 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
persistence_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/agents_store.db
|
||||
responses_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/responses_store.db
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:=sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/trace_store.db
|
||||
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
|
||||
persistence:
|
||||
agent_state:
|
||||
namespace: agents
|
||||
backend: kv_default
|
||||
responses:
|
||||
table_name: responses
|
||||
backend: sql_default
|
||||
max_write_queue_size: 10000
|
||||
num_writers: 4
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/meta_reference_eval.db
|
||||
namespace: eval
|
||||
backend: kv_default
|
||||
datasetio:
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/huggingface_datasetio.db
|
||||
namespace: datasetio::huggingface
|
||||
backend: kv_default
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/localfs_datasetio.db
|
||||
namespace: datasetio::localfs
|
||||
backend: kv_default
|
||||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
|
|
@ -99,27 +93,41 @@ providers:
|
|||
config:
|
||||
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/watsonx/files}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/files_metadata.db
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/inference_store.db
|
||||
conversations_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/conversations.db
|
||||
models: []
|
||||
shields: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
table_name: files_metadata
|
||||
backend: sql_default
|
||||
storage:
|
||||
backends:
|
||||
kv_default:
|
||||
type: kv_sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/kvstore.db
|
||||
sql_default:
|
||||
type: sql_sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/sql_store.db
|
||||
stores:
|
||||
metadata:
|
||||
namespace: registry
|
||||
backend: kv_default
|
||||
inference:
|
||||
table_name: inference_store
|
||||
backend: sql_default
|
||||
max_write_queue_size: 10000
|
||||
num_writers: 4
|
||||
conversations:
|
||||
table_name: openai_conversations
|
||||
backend: sql_default
|
||||
registered_resources:
|
||||
models: []
|
||||
shields: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
telemetry:
|
||||
enabled: true
|
||||
|
|
|
|||
|
|
@ -20,7 +20,6 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
|
|||
"vector_io": [BuildProvider(provider_type="inline::faiss")],
|
||||
"safety": [BuildProvider(provider_type="inline::llama-guard")],
|
||||
"agents": [BuildProvider(provider_type="inline::meta-reference")],
|
||||
"telemetry": [BuildProvider(provider_type="inline::meta-reference")],
|
||||
"eval": [BuildProvider(provider_type="inline::meta-reference")],
|
||||
"datasetio": [
|
||||
BuildProvider(provider_type="remote::huggingface"),
|
||||
|
|
|
|||
|
|
@ -166,14 +166,26 @@ class CustomFileHandler(logging.FileHandler):
|
|||
super().emit(record)
|
||||
|
||||
|
||||
def setup_logging(category_levels: dict[str, int], log_file: str | None) -> None:
|
||||
def setup_logging(category_levels: dict[str, int] | None = None, log_file: str | None = None) -> None:
|
||||
"""
|
||||
Configure logging based on the provided category log levels and an optional log file.
|
||||
If category_levels or log_file are not provided, they will be read from environment variables.
|
||||
|
||||
Parameters:
|
||||
category_levels (Dict[str, int]): A dictionary mapping categories to their log levels.
|
||||
log_file (str): Path to a log file to additionally pipe the logs into
|
||||
category_levels (Dict[str, int] | None): A dictionary mapping categories to their log levels.
|
||||
If None, reads from LLAMA_STACK_LOGGING environment variable and uses defaults.
|
||||
log_file (str | None): Path to a log file to additionally pipe the logs into.
|
||||
If None, reads from LLAMA_STACK_LOG_FILE environment variable.
|
||||
"""
|
||||
# Read from environment variables if not explicitly provided
|
||||
if category_levels is None:
|
||||
category_levels = dict.fromkeys(CATEGORIES, DEFAULT_LOG_LEVEL)
|
||||
env_config = os.environ.get("LLAMA_STACK_LOGGING", "")
|
||||
if env_config:
|
||||
category_levels.update(parse_environment_config(env_config))
|
||||
|
||||
if log_file is None:
|
||||
log_file = os.environ.get("LLAMA_STACK_LOG_FILE")
|
||||
log_format = "%(asctime)s %(name)s:%(lineno)d %(category)s: %(message)s"
|
||||
|
||||
class CategoryFilter(logging.Filter):
|
||||
|
|
@ -224,12 +236,30 @@ def setup_logging(category_levels: dict[str, int], log_file: str | None) -> None
|
|||
}
|
||||
},
|
||||
"loggers": {
|
||||
category: {
|
||||
"handlers": list(handlers.keys()), # Apply all handlers
|
||||
"level": category_levels.get(category, DEFAULT_LOG_LEVEL),
|
||||
"propagate": False, # Disable propagation to root logger
|
||||
}
|
||||
for category in CATEGORIES
|
||||
**{
|
||||
category: {
|
||||
"handlers": list(handlers.keys()), # Apply all handlers
|
||||
"level": category_levels.get(category, DEFAULT_LOG_LEVEL),
|
||||
"propagate": False, # Disable propagation to root logger
|
||||
}
|
||||
for category in CATEGORIES
|
||||
},
|
||||
# Explicitly configure uvicorn loggers to preserve their INFO level
|
||||
"uvicorn": {
|
||||
"handlers": list(handlers.keys()),
|
||||
"level": logging.INFO,
|
||||
"propagate": False,
|
||||
},
|
||||
"uvicorn.error": {
|
||||
"handlers": list(handlers.keys()),
|
||||
"level": logging.INFO,
|
||||
"propagate": False,
|
||||
},
|
||||
"uvicorn.access": {
|
||||
"handlers": list(handlers.keys()),
|
||||
"level": logging.INFO,
|
||||
"propagate": False,
|
||||
},
|
||||
},
|
||||
"root": {
|
||||
"handlers": list(handlers.keys()),
|
||||
|
|
@ -238,9 +268,13 @@ def setup_logging(category_levels: dict[str, int], log_file: str | None) -> None
|
|||
}
|
||||
dictConfig(logging_config)
|
||||
|
||||
# Ensure third-party libraries follow the root log level
|
||||
for _, logger in logging.root.manager.loggerDict.items():
|
||||
# Ensure third-party libraries follow the root log level, but preserve
|
||||
# already-configured loggers (e.g., uvicorn) and our own llama_stack loggers
|
||||
for name, logger in logging.root.manager.loggerDict.items():
|
||||
if isinstance(logger, logging.Logger):
|
||||
# Skip infrastructure loggers (uvicorn, fastapi) and our own loggers
|
||||
if name.startswith(("uvicorn", "fastapi", "llama_stack")):
|
||||
continue
|
||||
logger.setLevel(root_level)
|
||||
|
||||
|
||||
|
|
@ -278,12 +312,3 @@ def get_logger(
|
|||
log_level = _category_levels.get("root", DEFAULT_LOG_LEVEL)
|
||||
logger.setLevel(log_level)
|
||||
return logging.LoggerAdapter(logger, {"category": category})
|
||||
|
||||
|
||||
env_config = os.environ.get("LLAMA_STACK_LOGGING", "")
|
||||
if env_config:
|
||||
_category_levels.update(parse_environment_config(env_config))
|
||||
|
||||
log_file = os.environ.get("LLAMA_STACK_LOG_FILE")
|
||||
|
||||
setup_logging(_category_levels, log_file)
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ from llama_stack.apis.models import Model
|
|||
from llama_stack.apis.scoring_functions import ScoringFn
|
||||
from llama_stack.apis.shields import Shield
|
||||
from llama_stack.apis.tools import ToolGroup
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_stores import VectorStore
|
||||
from llama_stack.schema_utils import json_schema_type
|
||||
|
||||
|
||||
|
|
@ -68,10 +68,10 @@ class ShieldsProtocolPrivate(Protocol):
|
|||
async def unregister_shield(self, identifier: str) -> None: ...
|
||||
|
||||
|
||||
class VectorDBsProtocolPrivate(Protocol):
|
||||
async def register_vector_db(self, vector_db: VectorDB) -> None: ...
|
||||
class VectorStoresProtocolPrivate(Protocol):
|
||||
async def register_vector_store(self, vector_store: VectorStore) -> None: ...
|
||||
|
||||
async def unregister_vector_db(self, vector_db_id: str) -> None: ...
|
||||
async def unregister_vector_store(self, vector_store_id: str) -> None: ...
|
||||
|
||||
|
||||
class DatasetsProtocolPrivate(Protocol):
|
||||
|
|
|
|||
|
|
@ -11,7 +11,12 @@ from llama_stack.core.datatypes import AccessRule, Api
|
|||
from .config import MetaReferenceAgentsImplConfig
|
||||
|
||||
|
||||
async def get_provider_impl(config: MetaReferenceAgentsImplConfig, deps: dict[Api, Any], policy: list[AccessRule]):
|
||||
async def get_provider_impl(
|
||||
config: MetaReferenceAgentsImplConfig,
|
||||
deps: dict[Api, Any],
|
||||
policy: list[AccessRule],
|
||||
telemetry_enabled: bool = False,
|
||||
):
|
||||
from .agents import MetaReferenceAgentsImpl
|
||||
|
||||
impl = MetaReferenceAgentsImpl(
|
||||
|
|
@ -23,7 +28,7 @@ async def get_provider_impl(config: MetaReferenceAgentsImplConfig, deps: dict[Ap
|
|||
deps[Api.tool_groups],
|
||||
deps[Api.conversations],
|
||||
policy,
|
||||
Api.telemetry in deps,
|
||||
telemetry_enabled,
|
||||
)
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ from llama_stack.apis.agents import (
|
|||
Session,
|
||||
Turn,
|
||||
)
|
||||
from llama_stack.apis.agents.agents import ResponseGuardrail
|
||||
from llama_stack.apis.agents.openai_responses import OpenAIResponseText
|
||||
from llama_stack.apis.common.responses import PaginatedResponse
|
||||
from llama_stack.apis.conversations import Conversations
|
||||
|
|
@ -82,8 +83,8 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
self.policy = policy
|
||||
|
||||
async def initialize(self) -> None:
|
||||
self.persistence_store = await kvstore_impl(self.config.persistence_store)
|
||||
self.responses_store = ResponsesStore(self.config.responses_store, self.policy)
|
||||
self.persistence_store = await kvstore_impl(self.config.persistence.agent_state)
|
||||
self.responses_store = ResponsesStore(self.config.persistence.responses, self.policy)
|
||||
await self.responses_store.initialize()
|
||||
self.openai_responses_impl = OpenAIResponsesImpl(
|
||||
inference_api=self.inference_api,
|
||||
|
|
@ -91,6 +92,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
tool_runtime_api=self.tool_runtime_api,
|
||||
responses_store=self.responses_store,
|
||||
vector_io_api=self.vector_io_api,
|
||||
safety_api=self.safety_api,
|
||||
conversations_api=self.conversations_api,
|
||||
)
|
||||
|
||||
|
|
@ -337,7 +339,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
tools: list[OpenAIResponseInputTool] | None = None,
|
||||
include: list[str] | None = None,
|
||||
max_infer_iters: int | None = 10,
|
||||
shields: list | None = None,
|
||||
guardrails: list[ResponseGuardrail] | None = None,
|
||||
) -> OpenAIResponseObject:
|
||||
return await self.openai_responses_impl.create_openai_response(
|
||||
input,
|
||||
|
|
@ -352,7 +354,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
tools,
|
||||
include,
|
||||
max_infer_iters,
|
||||
shields,
|
||||
guardrails,
|
||||
)
|
||||
|
||||
async def list_openai_responses(
|
||||
|
|
|
|||
|
|
@ -8,24 +8,30 @@ from typing import Any
|
|||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.providers.utils.kvstore import KVStoreConfig
|
||||
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
|
||||
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig
|
||||
from llama_stack.core.storage.datatypes import KVStoreReference, ResponsesStoreReference
|
||||
|
||||
|
||||
class AgentPersistenceConfig(BaseModel):
|
||||
"""Nested persistence configuration for agents."""
|
||||
|
||||
agent_state: KVStoreReference
|
||||
responses: ResponsesStoreReference
|
||||
|
||||
|
||||
class MetaReferenceAgentsImplConfig(BaseModel):
|
||||
persistence_store: KVStoreConfig
|
||||
responses_store: SqlStoreConfig
|
||||
persistence: AgentPersistenceConfig
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
|
||||
return {
|
||||
"persistence_store": SqliteKVStoreConfig.sample_run_config(
|
||||
__distro_dir__=__distro_dir__,
|
||||
db_name="agents_store.db",
|
||||
),
|
||||
"responses_store": SqliteSqlStoreConfig.sample_run_config(
|
||||
__distro_dir__=__distro_dir__,
|
||||
db_name="responses_store.db",
|
||||
),
|
||||
"persistence": {
|
||||
"agent_state": KVStoreReference(
|
||||
backend="kv_default",
|
||||
namespace="agents",
|
||||
).model_dump(exclude_none=True),
|
||||
"responses": ResponsesStoreReference(
|
||||
backend="sql_default",
|
||||
table_name="responses",
|
||||
).model_dump(exclude_none=True),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ from collections.abc import AsyncIterator
|
|||
from pydantic import BaseModel, TypeAdapter
|
||||
|
||||
from llama_stack.apis.agents import Order
|
||||
from llama_stack.apis.agents.agents import ResponseGuardrailSpec
|
||||
from llama_stack.apis.agents.openai_responses import (
|
||||
ListOpenAIResponseInputItem,
|
||||
ListOpenAIResponseObject,
|
||||
|
|
@ -34,6 +35,7 @@ from llama_stack.apis.inference import (
|
|||
OpenAIMessageParam,
|
||||
OpenAISystemMessageParam,
|
||||
)
|
||||
from llama_stack.apis.safety import Safety
|
||||
from llama_stack.apis.tools import ToolGroups, ToolRuntime
|
||||
from llama_stack.apis.vector_io import VectorIO
|
||||
from llama_stack.log import get_logger
|
||||
|
|
@ -48,6 +50,7 @@ from .types import ChatCompletionContext, ToolContext
|
|||
from .utils import (
|
||||
convert_response_input_to_chat_messages,
|
||||
convert_response_text_to_chat_response_format,
|
||||
extract_guardrail_ids,
|
||||
)
|
||||
|
||||
logger = get_logger(name=__name__, category="openai_responses")
|
||||
|
|
@ -66,6 +69,7 @@ class OpenAIResponsesImpl:
|
|||
tool_runtime_api: ToolRuntime,
|
||||
responses_store: ResponsesStore,
|
||||
vector_io_api: VectorIO, # VectorIO
|
||||
safety_api: Safety,
|
||||
conversations_api: Conversations,
|
||||
):
|
||||
self.inference_api = inference_api
|
||||
|
|
@ -73,6 +77,7 @@ class OpenAIResponsesImpl:
|
|||
self.tool_runtime_api = tool_runtime_api
|
||||
self.responses_store = responses_store
|
||||
self.vector_io_api = vector_io_api
|
||||
self.safety_api = safety_api
|
||||
self.conversations_api = conversations_api
|
||||
self.tool_executor = ToolExecutor(
|
||||
tool_groups_api=tool_groups_api,
|
||||
|
|
@ -100,6 +105,7 @@ class OpenAIResponsesImpl:
|
|||
input: str | list[OpenAIResponseInput],
|
||||
tools: list[OpenAIResponseInputTool] | None,
|
||||
previous_response_id: str | None,
|
||||
conversation: str | None,
|
||||
) -> tuple[str | list[OpenAIResponseInput], list[OpenAIMessageParam]]:
|
||||
"""Process input with optional previous response context.
|
||||
|
||||
|
|
@ -124,16 +130,39 @@ class OpenAIResponsesImpl:
|
|||
messages = await convert_response_input_to_chat_messages(all_input)
|
||||
|
||||
tool_context.recover_tools_from_previous_response(previous_response)
|
||||
elif conversation is not None:
|
||||
conversation_items = await self.conversations_api.list(conversation, order="asc")
|
||||
|
||||
# Use stored messages as source of truth (like previous_response.messages)
|
||||
stored_messages = await self.responses_store.get_conversation_messages(conversation)
|
||||
|
||||
all_input = input
|
||||
if not conversation_items.data:
|
||||
# First turn - just convert the new input
|
||||
messages = await convert_response_input_to_chat_messages(input)
|
||||
else:
|
||||
if not stored_messages:
|
||||
all_input = conversation_items.data
|
||||
if isinstance(input, str):
|
||||
all_input.append(
|
||||
OpenAIResponseMessage(
|
||||
role="user", content=[OpenAIResponseInputMessageContentText(text=input)]
|
||||
)
|
||||
)
|
||||
else:
|
||||
all_input.extend(input)
|
||||
else:
|
||||
all_input = input
|
||||
|
||||
messages = stored_messages or []
|
||||
new_messages = await convert_response_input_to_chat_messages(all_input, previous_messages=messages)
|
||||
messages.extend(new_messages)
|
||||
else:
|
||||
all_input = input
|
||||
messages = await convert_response_input_to_chat_messages(input)
|
||||
messages = await convert_response_input_to_chat_messages(all_input)
|
||||
|
||||
return all_input, messages, tool_context
|
||||
|
||||
async def _prepend_instructions(self, messages, instructions):
|
||||
if instructions:
|
||||
messages.insert(0, OpenAISystemMessageParam(content=instructions))
|
||||
|
||||
async def get_openai_response(
|
||||
self,
|
||||
response_id: str,
|
||||
|
|
@ -220,41 +249,34 @@ class OpenAIResponsesImpl:
|
|||
tools: list[OpenAIResponseInputTool] | None = None,
|
||||
include: list[str] | None = None,
|
||||
max_infer_iters: int | None = 10,
|
||||
shields: list | None = None,
|
||||
guardrails: list[ResponseGuardrailSpec] | None = None,
|
||||
):
|
||||
stream = bool(stream)
|
||||
text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text
|
||||
|
||||
# Shields parameter received via extra_body - not yet implemented
|
||||
if shields is not None:
|
||||
raise NotImplementedError("Shields parameter is not yet implemented in the meta-reference provider")
|
||||
guardrail_ids = extract_guardrail_ids(guardrails) if guardrails else []
|
||||
|
||||
if conversation is not None and previous_response_id is not None:
|
||||
raise ValueError(
|
||||
"Mutually exclusive parameters: 'previous_response_id' and 'conversation'. Ensure you are only providing one of these parameters."
|
||||
)
|
||||
|
||||
original_input = input # needed for syncing to Conversations
|
||||
if conversation is not None:
|
||||
if previous_response_id is not None:
|
||||
raise ValueError(
|
||||
"Mutually exclusive parameters: 'previous_response_id' and 'conversation'. Ensure you are only providing one of these parameters."
|
||||
)
|
||||
|
||||
if not conversation.startswith("conv_"):
|
||||
raise InvalidConversationIdError(conversation)
|
||||
|
||||
# Check conversation exists (raises ConversationNotFoundError if not)
|
||||
_ = await self.conversations_api.get_conversation(conversation)
|
||||
input = await self._load_conversation_context(conversation, input)
|
||||
|
||||
stream_gen = self._create_streaming_response(
|
||||
input=input,
|
||||
original_input=original_input,
|
||||
conversation=conversation,
|
||||
model=model,
|
||||
instructions=instructions,
|
||||
previous_response_id=previous_response_id,
|
||||
conversation=conversation,
|
||||
store=store,
|
||||
temperature=temperature,
|
||||
text=text,
|
||||
tools=tools,
|
||||
max_infer_iters=max_infer_iters,
|
||||
guardrail_ids=guardrail_ids,
|
||||
)
|
||||
|
||||
if stream:
|
||||
|
|
@ -292,7 +314,6 @@ class OpenAIResponsesImpl:
|
|||
self,
|
||||
input: str | list[OpenAIResponseInput],
|
||||
model: str,
|
||||
original_input: str | list[OpenAIResponseInput] | None = None,
|
||||
instructions: str | None = None,
|
||||
previous_response_id: str | None = None,
|
||||
conversation: str | None = None,
|
||||
|
|
@ -301,12 +322,15 @@ class OpenAIResponsesImpl:
|
|||
text: OpenAIResponseText | None = None,
|
||||
tools: list[OpenAIResponseInputTool] | None = None,
|
||||
max_infer_iters: int | None = 10,
|
||||
guardrail_ids: list[str] | None = None,
|
||||
) -> AsyncIterator[OpenAIResponseObjectStream]:
|
||||
# Input preprocessing
|
||||
all_input, messages, tool_context = await self._process_input_with_previous_response(
|
||||
input, tools, previous_response_id
|
||||
input, tools, previous_response_id, conversation
|
||||
)
|
||||
await self._prepend_instructions(messages, instructions)
|
||||
|
||||
if instructions:
|
||||
messages.insert(0, OpenAISystemMessageParam(content=instructions))
|
||||
|
||||
# Structured outputs
|
||||
response_format = await convert_response_text_to_chat_response_format(text)
|
||||
|
|
@ -333,11 +357,16 @@ class OpenAIResponsesImpl:
|
|||
text=text,
|
||||
max_infer_iters=max_infer_iters,
|
||||
tool_executor=self.tool_executor,
|
||||
safety_api=self.safety_api,
|
||||
guardrail_ids=guardrail_ids,
|
||||
instructions=instructions,
|
||||
)
|
||||
|
||||
# Stream the response
|
||||
final_response = None
|
||||
failed_response = None
|
||||
|
||||
output_items = []
|
||||
async for stream_chunk in orchestrator.create_response():
|
||||
if stream_chunk.type in {"response.completed", "response.incomplete"}:
|
||||
final_response = stream_chunk.response
|
||||
|
|
@ -345,102 +374,50 @@ class OpenAIResponsesImpl:
|
|||
failed_response = stream_chunk.response
|
||||
yield stream_chunk
|
||||
|
||||
if stream_chunk.type == "response.output_item.done":
|
||||
item = stream_chunk.item
|
||||
output_items.append(item)
|
||||
|
||||
# Store and sync immediately after yielding terminal events
|
||||
# This ensures the storage/syncing happens even if the consumer breaks early
|
||||
if (
|
||||
stream_chunk.type in {"response.completed", "response.incomplete"}
|
||||
and store
|
||||
and final_response
|
||||
and failed_response is None
|
||||
):
|
||||
await self._store_response(
|
||||
response=final_response,
|
||||
input=all_input,
|
||||
messages=orchestrator.final_messages,
|
||||
messages_to_store = list(
|
||||
filter(lambda x: not isinstance(x, OpenAISystemMessageParam), orchestrator.final_messages)
|
||||
)
|
||||
if store:
|
||||
# TODO: we really should work off of output_items instead of "final_messages"
|
||||
await self._store_response(
|
||||
response=final_response,
|
||||
input=all_input,
|
||||
messages=messages_to_store,
|
||||
)
|
||||
|
||||
if stream_chunk.type in {"response.completed", "response.incomplete"} and conversation and final_response:
|
||||
# for Conversations, we need to use the original_input if it's available, otherwise use input
|
||||
sync_input = original_input if original_input is not None else input
|
||||
await self._sync_response_to_conversation(conversation, sync_input, final_response)
|
||||
if conversation:
|
||||
await self._sync_response_to_conversation(conversation, input, output_items)
|
||||
await self.responses_store.store_conversation_messages(conversation, messages_to_store)
|
||||
|
||||
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
|
||||
return await self.responses_store.delete_response_object(response_id)
|
||||
|
||||
async def _load_conversation_context(
|
||||
self, conversation_id: str, content: str | list[OpenAIResponseInput]
|
||||
) -> list[OpenAIResponseInput]:
|
||||
"""Load conversation history and merge with provided content."""
|
||||
conversation_items = await self.conversations_api.list(conversation_id, order="asc")
|
||||
|
||||
context_messages = []
|
||||
for item in conversation_items.data:
|
||||
if isinstance(item, OpenAIResponseMessage):
|
||||
if item.role == "user":
|
||||
context_messages.append(
|
||||
OpenAIResponseMessage(
|
||||
role="user", content=item.content, id=item.id if hasattr(item, "id") else None
|
||||
)
|
||||
)
|
||||
elif item.role == "assistant":
|
||||
context_messages.append(
|
||||
OpenAIResponseMessage(
|
||||
role="assistant", content=item.content, id=item.id if hasattr(item, "id") else None
|
||||
)
|
||||
)
|
||||
|
||||
# add new content to context
|
||||
if isinstance(content, str):
|
||||
context_messages.append(OpenAIResponseMessage(role="user", content=content))
|
||||
elif isinstance(content, list):
|
||||
context_messages.extend(content)
|
||||
|
||||
return context_messages
|
||||
|
||||
async def _sync_response_to_conversation(
|
||||
self, conversation_id: str, content: str | list[OpenAIResponseInput], response: OpenAIResponseObject
|
||||
self, conversation_id: str, input: str | list[OpenAIResponseInput] | None, output_items: list[ConversationItem]
|
||||
) -> None:
|
||||
"""Sync content and response messages to the conversation."""
|
||||
conversation_items = []
|
||||
|
||||
# add user content message(s)
|
||||
if isinstance(content, str):
|
||||
if isinstance(input, str):
|
||||
conversation_items.append(
|
||||
{"type": "message", "role": "user", "content": [{"type": "input_text", "text": content}]}
|
||||
OpenAIResponseMessage(role="user", content=[OpenAIResponseInputMessageContentText(text=input)])
|
||||
)
|
||||
elif isinstance(content, list):
|
||||
for item in content:
|
||||
if not isinstance(item, OpenAIResponseMessage):
|
||||
raise NotImplementedError(f"Unsupported input item type: {type(item)}")
|
||||
elif isinstance(input, list):
|
||||
conversation_items.extend(input)
|
||||
|
||||
if item.role == "user":
|
||||
if isinstance(item.content, str):
|
||||
conversation_items.append(
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [{"type": "input_text", "text": item.content}],
|
||||
}
|
||||
)
|
||||
elif isinstance(item.content, list):
|
||||
conversation_items.append({"type": "message", "role": "user", "content": item.content})
|
||||
else:
|
||||
raise NotImplementedError(f"Unsupported user message content type: {type(item.content)}")
|
||||
elif item.role == "assistant":
|
||||
if isinstance(item.content, list):
|
||||
conversation_items.append({"type": "message", "role": "assistant", "content": item.content})
|
||||
else:
|
||||
raise NotImplementedError(f"Unsupported assistant message content type: {type(item.content)}")
|
||||
else:
|
||||
raise NotImplementedError(f"Unsupported message role: {item.role}")
|
||||
conversation_items.extend(output_items)
|
||||
|
||||
# add assistant response message
|
||||
for output_item in response.output:
|
||||
if isinstance(output_item, OpenAIResponseMessage) and output_item.role == "assistant":
|
||||
if hasattr(output_item, "content") and isinstance(output_item.content, list):
|
||||
conversation_items.append({"type": "message", "role": "assistant", "content": output_item.content})
|
||||
|
||||
if conversation_items:
|
||||
adapter = TypeAdapter(list[ConversationItem])
|
||||
validated_items = adapter.validate_python(conversation_items)
|
||||
await self.conversations_api.add_items(conversation_id, validated_items)
|
||||
adapter = TypeAdapter(list[ConversationItem])
|
||||
validated_items = adapter.validate_python(conversation_items)
|
||||
await self.conversations_api.add_items(conversation_id, validated_items)
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ from llama_stack.apis.agents.openai_responses import (
|
|||
OpenAIResponseInputTool,
|
||||
OpenAIResponseInputToolMCP,
|
||||
OpenAIResponseMCPApprovalRequest,
|
||||
OpenAIResponseMessage,
|
||||
OpenAIResponseObject,
|
||||
OpenAIResponseObjectStream,
|
||||
OpenAIResponseObjectStreamResponseCompleted,
|
||||
|
|
@ -42,8 +43,12 @@ from llama_stack.apis.agents.openai_responses import (
|
|||
OpenAIResponseObjectStreamResponseRefusalDelta,
|
||||
OpenAIResponseObjectStreamResponseRefusalDone,
|
||||
OpenAIResponseOutput,
|
||||
OpenAIResponseOutputMessageContentOutputText,
|
||||
OpenAIResponseOutputMessageFileSearchToolCall,
|
||||
OpenAIResponseOutputMessageFunctionToolCall,
|
||||
OpenAIResponseOutputMessageMCPCall,
|
||||
OpenAIResponseOutputMessageMCPListTools,
|
||||
OpenAIResponseOutputMessageWebSearchToolCall,
|
||||
OpenAIResponseText,
|
||||
OpenAIResponseUsage,
|
||||
OpenAIResponseUsageInputTokensDetails,
|
||||
|
|
@ -61,10 +66,15 @@ from llama_stack.apis.inference import (
|
|||
OpenAIMessageParam,
|
||||
)
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
|
||||
from llama_stack.providers.utils.telemetry import tracing
|
||||
|
||||
from .types import ChatCompletionContext, ChatCompletionResult
|
||||
from .utils import convert_chat_choice_to_response_message, is_function_tool_call
|
||||
from .utils import (
|
||||
convert_chat_choice_to_response_message,
|
||||
is_function_tool_call,
|
||||
run_guardrails,
|
||||
)
|
||||
|
||||
logger = get_logger(name=__name__, category="agents::meta_reference")
|
||||
|
||||
|
|
@ -100,6 +110,9 @@ class StreamingResponseOrchestrator:
|
|||
text: OpenAIResponseText,
|
||||
max_infer_iters: int,
|
||||
tool_executor, # Will be the tool execution logic from the main class
|
||||
instructions: str,
|
||||
safety_api,
|
||||
guardrail_ids: list[str] | None = None,
|
||||
):
|
||||
self.inference_api = inference_api
|
||||
self.ctx = ctx
|
||||
|
|
@ -108,6 +121,8 @@ class StreamingResponseOrchestrator:
|
|||
self.text = text
|
||||
self.max_infer_iters = max_infer_iters
|
||||
self.tool_executor = tool_executor
|
||||
self.safety_api = safety_api
|
||||
self.guardrail_ids = guardrail_ids or []
|
||||
self.sequence_number = 0
|
||||
# Store MCP tool mapping that gets built during tool processing
|
||||
self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = ctx.tool_context.previous_tools or {}
|
||||
|
|
@ -117,6 +132,25 @@ class StreamingResponseOrchestrator:
|
|||
self.citation_files: dict[str, str] = {}
|
||||
# Track accumulated usage across all inference calls
|
||||
self.accumulated_usage: OpenAIResponseUsage | None = None
|
||||
# Track if we've sent a refusal response
|
||||
self.violation_detected = False
|
||||
# system message that is inserted into the model's context
|
||||
self.instructions = instructions
|
||||
|
||||
async def _create_refusal_response(self, violation_message: str) -> OpenAIResponseObjectStream:
|
||||
"""Create a refusal response to replace streaming content."""
|
||||
refusal_content = OpenAIResponseContentPartRefusal(refusal=violation_message)
|
||||
|
||||
# Create a completed refusal response
|
||||
refusal_response = OpenAIResponseObject(
|
||||
id=self.response_id,
|
||||
created_at=self.created_at,
|
||||
model=self.ctx.model,
|
||||
status="completed",
|
||||
output=[OpenAIResponseMessage(role="assistant", content=[refusal_content], type="message")],
|
||||
)
|
||||
|
||||
return OpenAIResponseObjectStreamResponseCompleted(response=refusal_response)
|
||||
|
||||
def _clone_outputs(self, outputs: list[OpenAIResponseOutput]) -> list[OpenAIResponseOutput]:
|
||||
cloned: list[OpenAIResponseOutput] = []
|
||||
|
|
@ -145,6 +179,7 @@ class StreamingResponseOrchestrator:
|
|||
tools=self.ctx.available_tools(),
|
||||
error=error,
|
||||
usage=self.accumulated_usage,
|
||||
instructions=self.instructions,
|
||||
)
|
||||
|
||||
async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
|
||||
|
|
@ -161,6 +196,15 @@ class StreamingResponseOrchestrator:
|
|||
sequence_number=self.sequence_number,
|
||||
)
|
||||
|
||||
# Input safety validation - check messages before processing
|
||||
if self.guardrail_ids:
|
||||
combined_text = interleaved_content_as_str([msg.content for msg in self.ctx.messages])
|
||||
input_violation_message = await run_guardrails(self.safety_api, combined_text, self.guardrail_ids)
|
||||
if input_violation_message:
|
||||
logger.info(f"Input guardrail violation: {input_violation_message}")
|
||||
yield await self._create_refusal_response(input_violation_message)
|
||||
return
|
||||
|
||||
async for stream_event in self._process_tools(output_messages):
|
||||
yield stream_event
|
||||
|
||||
|
|
@ -175,6 +219,7 @@ class StreamingResponseOrchestrator:
|
|||
# (some providers don't support non-empty response_format when tools are present)
|
||||
response_format = None if self.ctx.response_format.type == "text" else self.ctx.response_format
|
||||
logger.debug(f"calling openai_chat_completion with tools: {self.ctx.chat_tools}")
|
||||
|
||||
params = OpenAIChatCompletionRequestWithExtraBody(
|
||||
model=self.ctx.model,
|
||||
messages=messages,
|
||||
|
|
@ -195,6 +240,11 @@ class StreamingResponseOrchestrator:
|
|||
completion_result_data = stream_event_or_result
|
||||
else:
|
||||
yield stream_event_or_result
|
||||
|
||||
# If violation detected, skip the rest of processing since we already sent refusal
|
||||
if self.violation_detected:
|
||||
return
|
||||
|
||||
if not completion_result_data:
|
||||
raise ValueError("Streaming chunk processor failed to return completion data")
|
||||
last_completion_result = completion_result_data
|
||||
|
|
@ -500,6 +550,7 @@ class StreamingResponseOrchestrator:
|
|||
# Track tool call items for streaming events
|
||||
tool_call_item_ids: dict[int, str] = {}
|
||||
# Track content parts for streaming events
|
||||
message_item_added_emitted = False
|
||||
content_part_emitted = False
|
||||
reasoning_part_emitted = False
|
||||
refusal_part_emitted = False
|
||||
|
|
@ -518,9 +569,29 @@ class StreamingResponseOrchestrator:
|
|||
# Accumulate usage from chunks (typically in final chunk with stream_options)
|
||||
self._accumulate_chunk_usage(chunk)
|
||||
|
||||
# Track deltas for this specific chunk for guardrail validation
|
||||
chunk_events: list[OpenAIResponseObjectStream] = []
|
||||
|
||||
for chunk_choice in chunk.choices:
|
||||
# Emit incremental text content as delta events
|
||||
if chunk_choice.delta.content:
|
||||
# Emit output_item.added for the message on first content
|
||||
if not message_item_added_emitted:
|
||||
message_item_added_emitted = True
|
||||
self.sequence_number += 1
|
||||
message_item = OpenAIResponseMessage(
|
||||
id=message_item_id,
|
||||
content=[],
|
||||
role="assistant",
|
||||
status="in_progress",
|
||||
)
|
||||
yield OpenAIResponseObjectStreamResponseOutputItemAdded(
|
||||
response_id=self.response_id,
|
||||
item=message_item,
|
||||
output_index=message_output_index,
|
||||
sequence_number=self.sequence_number,
|
||||
)
|
||||
|
||||
# Emit content_part.added event for first text chunk
|
||||
if not content_part_emitted:
|
||||
content_part_emitted = True
|
||||
|
|
@ -536,13 +607,19 @@ class StreamingResponseOrchestrator:
|
|||
sequence_number=self.sequence_number,
|
||||
)
|
||||
self.sequence_number += 1
|
||||
yield OpenAIResponseObjectStreamResponseOutputTextDelta(
|
||||
|
||||
text_delta_event = OpenAIResponseObjectStreamResponseOutputTextDelta(
|
||||
content_index=content_index,
|
||||
delta=chunk_choice.delta.content,
|
||||
item_id=message_item_id,
|
||||
output_index=message_output_index,
|
||||
sequence_number=self.sequence_number,
|
||||
)
|
||||
# Buffer text delta events for guardrail check
|
||||
if self.guardrail_ids:
|
||||
chunk_events.append(text_delta_event)
|
||||
else:
|
||||
yield text_delta_event
|
||||
|
||||
# Collect content for final response
|
||||
chat_response_content.append(chunk_choice.delta.content or "")
|
||||
|
|
@ -558,7 +635,11 @@ class StreamingResponseOrchestrator:
|
|||
message_item_id=message_item_id,
|
||||
message_output_index=message_output_index,
|
||||
):
|
||||
yield event
|
||||
# Buffer reasoning events for guardrail check
|
||||
if self.guardrail_ids:
|
||||
chunk_events.append(event)
|
||||
else:
|
||||
yield event
|
||||
reasoning_part_emitted = True
|
||||
reasoning_text_accumulated.append(chunk_choice.delta.reasoning_content)
|
||||
|
||||
|
|
@ -593,19 +674,22 @@ class StreamingResponseOrchestrator:
|
|||
|
||||
# Emit output_item.added event for the new function call
|
||||
self.sequence_number += 1
|
||||
function_call_item = OpenAIResponseOutputMessageFunctionToolCall(
|
||||
arguments="", # Will be filled incrementally via delta events
|
||||
call_id=tool_call.id or "",
|
||||
name=tool_call.function.name if tool_call.function else "",
|
||||
id=tool_call_item_id,
|
||||
status="in_progress",
|
||||
)
|
||||
yield OpenAIResponseObjectStreamResponseOutputItemAdded(
|
||||
response_id=self.response_id,
|
||||
item=function_call_item,
|
||||
output_index=len(output_messages),
|
||||
sequence_number=self.sequence_number,
|
||||
)
|
||||
is_mcp_tool = tool_call.function.name and tool_call.function.name in self.mcp_tool_to_server
|
||||
if not is_mcp_tool and tool_call.function.name not in ["web_search", "knowledge_search"]:
|
||||
# for MCP tools (and even other non-function tools) we emit an output message item later
|
||||
function_call_item = OpenAIResponseOutputMessageFunctionToolCall(
|
||||
arguments="", # Will be filled incrementally via delta events
|
||||
call_id=tool_call.id or "",
|
||||
name=tool_call.function.name if tool_call.function else "",
|
||||
id=tool_call_item_id,
|
||||
status="in_progress",
|
||||
)
|
||||
yield OpenAIResponseObjectStreamResponseOutputItemAdded(
|
||||
response_id=self.response_id,
|
||||
item=function_call_item,
|
||||
output_index=len(output_messages),
|
||||
sequence_number=self.sequence_number,
|
||||
)
|
||||
|
||||
# Stream tool call arguments as they arrive (differentiate between MCP and function calls)
|
||||
if tool_call.function and tool_call.function.arguments:
|
||||
|
|
@ -637,6 +721,22 @@ class StreamingResponseOrchestrator:
|
|||
response_tool_call.function.arguments or ""
|
||||
) + tool_call.function.arguments
|
||||
|
||||
# Output Safety Validation for this chunk
|
||||
if self.guardrail_ids:
|
||||
# Check guardrails on accumulated text so far
|
||||
accumulated_text = "".join(chat_response_content)
|
||||
violation_message = await run_guardrails(self.safety_api, accumulated_text, self.guardrail_ids)
|
||||
if violation_message:
|
||||
logger.info(f"Output guardrail violation: {violation_message}")
|
||||
chunk_events.clear()
|
||||
yield await self._create_refusal_response(violation_message)
|
||||
self.violation_detected = True
|
||||
return
|
||||
else:
|
||||
# No violation detected, emit all content events for this chunk
|
||||
for event in chunk_events:
|
||||
yield event
|
||||
|
||||
# Emit arguments.done events for completed tool calls (differentiate between MCP and function calls)
|
||||
for tool_call_index in sorted(chat_response_tool_calls.keys()):
|
||||
tool_call = chat_response_tool_calls[tool_call_index]
|
||||
|
|
@ -700,6 +800,32 @@ class StreamingResponseOrchestrator:
|
|||
if chat_response_tool_calls:
|
||||
chat_response_content = []
|
||||
|
||||
# Emit output_item.done for message when we have content and no tool calls
|
||||
if message_item_added_emitted and not chat_response_tool_calls:
|
||||
content_parts = []
|
||||
if content_part_emitted:
|
||||
final_text = "".join(chat_response_content)
|
||||
content_parts.append(
|
||||
OpenAIResponseOutputMessageContentOutputText(
|
||||
text=final_text,
|
||||
annotations=[],
|
||||
)
|
||||
)
|
||||
|
||||
self.sequence_number += 1
|
||||
message_item = OpenAIResponseMessage(
|
||||
id=message_item_id,
|
||||
content=content_parts,
|
||||
role="assistant",
|
||||
status="completed",
|
||||
)
|
||||
yield OpenAIResponseObjectStreamResponseOutputItemDone(
|
||||
response_id=self.response_id,
|
||||
item=message_item,
|
||||
output_index=message_output_index,
|
||||
sequence_number=self.sequence_number,
|
||||
)
|
||||
|
||||
yield ChatCompletionResult(
|
||||
response_id=chat_response_id,
|
||||
content=chat_response_content,
|
||||
|
|
@ -760,6 +886,36 @@ class StreamingResponseOrchestrator:
|
|||
if not matching_item_id:
|
||||
matching_item_id = f"tc_{uuid.uuid4()}"
|
||||
|
||||
self.sequence_number += 1
|
||||
if tool_call.function.name and tool_call.function.name in self.mcp_tool_to_server:
|
||||
item = OpenAIResponseOutputMessageMCPCall(
|
||||
arguments="",
|
||||
name=tool_call.function.name,
|
||||
id=matching_item_id,
|
||||
server_label=self.mcp_tool_to_server[tool_call.function.name].server_label,
|
||||
status="in_progress",
|
||||
)
|
||||
elif tool_call.function.name == "web_search":
|
||||
item = OpenAIResponseOutputMessageWebSearchToolCall(
|
||||
id=matching_item_id,
|
||||
status="in_progress",
|
||||
)
|
||||
elif tool_call.function.name == "knowledge_search":
|
||||
item = OpenAIResponseOutputMessageFileSearchToolCall(
|
||||
id=matching_item_id,
|
||||
status="in_progress",
|
||||
queries=[tool_call.function.arguments or ""],
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unsupported tool call: {tool_call.function.name}")
|
||||
|
||||
yield OpenAIResponseObjectStreamResponseOutputItemAdded(
|
||||
response_id=self.response_id,
|
||||
item=item,
|
||||
output_index=len(output_messages),
|
||||
sequence_number=self.sequence_number,
|
||||
)
|
||||
|
||||
# Execute tool call with streaming
|
||||
tool_call_log = None
|
||||
tool_response_message = None
|
||||
|
|
@ -1018,7 +1174,11 @@ class StreamingResponseOrchestrator:
|
|||
self.sequence_number += 1
|
||||
yield OpenAIResponseObjectStreamResponseOutputItemAdded(
|
||||
response_id=self.response_id,
|
||||
item=mcp_list_message,
|
||||
item=OpenAIResponseOutputMessageMCPListTools(
|
||||
id=mcp_list_message.id,
|
||||
server_label=mcp_list_message.server_label,
|
||||
tools=[],
|
||||
),
|
||||
output_index=len(output_messages) - 1,
|
||||
sequence_number=self.sequence_number,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -93,7 +93,7 @@ class ToolExecutor:
|
|||
|
||||
# Build result messages from tool execution
|
||||
output_message, input_message = await self._build_result_messages(
|
||||
function, tool_call_id, tool_kwargs, ctx, error_exc, result, has_error, mcp_tool_to_server
|
||||
function, tool_call_id, item_id, tool_kwargs, ctx, error_exc, result, has_error, mcp_tool_to_server
|
||||
)
|
||||
|
||||
# Yield the final result
|
||||
|
|
@ -356,6 +356,7 @@ class ToolExecutor:
|
|||
self,
|
||||
function,
|
||||
tool_call_id: str,
|
||||
item_id: str,
|
||||
tool_kwargs: dict,
|
||||
ctx: ChatCompletionContext,
|
||||
error_exc: Exception | None,
|
||||
|
|
@ -375,7 +376,7 @@ class ToolExecutor:
|
|||
)
|
||||
|
||||
message = OpenAIResponseOutputMessageMCPCall(
|
||||
id=tool_call_id,
|
||||
id=item_id,
|
||||
arguments=function.arguments,
|
||||
name=function.name,
|
||||
server_label=mcp_tool_to_server[function.name].server_label,
|
||||
|
|
@ -389,14 +390,14 @@ class ToolExecutor:
|
|||
else:
|
||||
if function.name == "web_search":
|
||||
message = OpenAIResponseOutputMessageWebSearchToolCall(
|
||||
id=tool_call_id,
|
||||
id=item_id,
|
||||
status="completed",
|
||||
)
|
||||
if has_error:
|
||||
message.status = "failed"
|
||||
elif function.name == "knowledge_search":
|
||||
message = OpenAIResponseOutputMessageFileSearchToolCall(
|
||||
id=tool_call_id,
|
||||
id=item_id,
|
||||
queries=[tool_kwargs.get("query", "")],
|
||||
status="completed",
|
||||
)
|
||||
|
|
|
|||
|
|
@ -4,9 +4,11 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
import uuid
|
||||
|
||||
from llama_stack.apis.agents.agents import ResponseGuardrailSpec
|
||||
from llama_stack.apis.agents.openai_responses import (
|
||||
OpenAIResponseAnnotationFileCitation,
|
||||
OpenAIResponseInput,
|
||||
|
|
@ -45,6 +47,7 @@ from llama_stack.apis.inference import (
|
|||
OpenAIToolMessageParam,
|
||||
OpenAIUserMessageParam,
|
||||
)
|
||||
from llama_stack.apis.safety import Safety
|
||||
|
||||
|
||||
async def convert_chat_choice_to_response_message(
|
||||
|
|
@ -240,7 +243,8 @@ async def convert_response_text_to_chat_response_format(
|
|||
raise ValueError(f"Unsupported text format: {text.format}")
|
||||
|
||||
|
||||
async def get_message_type_by_role(role: str):
|
||||
async def get_message_type_by_role(role: str) -> type[OpenAIMessageParam] | None:
|
||||
"""Get the appropriate OpenAI message parameter type for a given role."""
|
||||
role_to_type = {
|
||||
"user": OpenAIUserMessageParam,
|
||||
"system": OpenAISystemMessageParam,
|
||||
|
|
@ -307,3 +311,55 @@ def is_function_tool_call(
|
|||
if t.type == "function" and t.name == tool_call.function.name:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
async def run_guardrails(safety_api: Safety, messages: str, guardrail_ids: list[str]) -> str | None:
|
||||
"""Run guardrails against messages and return violation message if blocked."""
|
||||
if not messages:
|
||||
return None
|
||||
|
||||
# Look up shields to get their provider_resource_id (actual model ID)
|
||||
model_ids = []
|
||||
shields_list = await safety_api.routing_table.list_shields()
|
||||
|
||||
for guardrail_id in guardrail_ids:
|
||||
matching_shields = [shield for shield in shields_list.data if shield.identifier == guardrail_id]
|
||||
if matching_shields:
|
||||
model_id = matching_shields[0].provider_resource_id
|
||||
model_ids.append(model_id)
|
||||
else:
|
||||
raise ValueError(f"No shield found with identifier '{guardrail_id}'")
|
||||
|
||||
guardrail_tasks = [safety_api.run_moderation(messages, model=model_id) for model_id in model_ids]
|
||||
responses = await asyncio.gather(*guardrail_tasks)
|
||||
|
||||
for response in responses:
|
||||
for result in response.results:
|
||||
if result.flagged:
|
||||
message = result.user_message or "Content blocked by safety guardrails"
|
||||
flagged_categories = [cat for cat, flagged in result.categories.items() if flagged]
|
||||
violation_type = result.metadata.get("violation_type", []) if result.metadata else []
|
||||
|
||||
if flagged_categories:
|
||||
message += f" (flagged for: {', '.join(flagged_categories)})"
|
||||
if violation_type:
|
||||
message += f" (violation type: {', '.join(violation_type)})"
|
||||
|
||||
return message
|
||||
|
||||
|
||||
def extract_guardrail_ids(guardrails: list | None) -> list[str]:
|
||||
"""Extract guardrail IDs from guardrails parameter, handling both string IDs and ResponseGuardrailSpec objects."""
|
||||
if not guardrails:
|
||||
return []
|
||||
|
||||
guardrail_ids = []
|
||||
for guardrail in guardrails:
|
||||
if isinstance(guardrail, str):
|
||||
guardrail_ids.append(guardrail)
|
||||
elif isinstance(guardrail, ResponseGuardrailSpec):
|
||||
guardrail_ids.append(guardrail.type)
|
||||
else:
|
||||
raise ValueError(f"Unknown guardrail format: {guardrail}, expected str or ResponseGuardrailSpec")
|
||||
|
||||
return guardrail_ids
|
||||
|
|
|
|||
|
|
@ -6,13 +6,13 @@
|
|||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
|
||||
from llama_stack.core.storage.datatypes import KVStoreReference
|
||||
|
||||
|
||||
class ReferenceBatchesImplConfig(BaseModel):
|
||||
"""Configuration for the Reference Batches implementation."""
|
||||
|
||||
kvstore: KVStoreConfig = Field(
|
||||
kvstore: KVStoreReference = Field(
|
||||
description="Configuration for the key-value store backend.",
|
||||
)
|
||||
|
||||
|
|
@ -33,8 +33,8 @@ class ReferenceBatchesImplConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str) -> dict:
|
||||
return {
|
||||
"kvstore": SqliteKVStoreConfig.sample_run_config(
|
||||
__distro_dir__=__distro_dir__,
|
||||
db_name="batches.db",
|
||||
),
|
||||
"kvstore": KVStoreReference(
|
||||
backend="kv_default",
|
||||
namespace="batches",
|
||||
).model_dump(exclude_none=True),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,20 +7,17 @@ from typing import Any
|
|||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.providers.utils.kvstore.config import (
|
||||
KVStoreConfig,
|
||||
SqliteKVStoreConfig,
|
||||
)
|
||||
from llama_stack.core.storage.datatypes import KVStoreReference
|
||||
|
||||
|
||||
class LocalFSDatasetIOConfig(BaseModel):
|
||||
kvstore: KVStoreConfig
|
||||
kvstore: KVStoreReference
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
|
||||
return {
|
||||
"kvstore": SqliteKVStoreConfig.sample_run_config(
|
||||
__distro_dir__=__distro_dir__,
|
||||
db_name="localfs_datasetio.db",
|
||||
)
|
||||
"kvstore": KVStoreReference(
|
||||
backend="kv_default",
|
||||
namespace="datasetio::localfs",
|
||||
).model_dump(exclude_none=True)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,20 +7,17 @@ from typing import Any
|
|||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.providers.utils.kvstore.config import (
|
||||
KVStoreConfig,
|
||||
SqliteKVStoreConfig,
|
||||
)
|
||||
from llama_stack.core.storage.datatypes import KVStoreReference
|
||||
|
||||
|
||||
class MetaReferenceEvalConfig(BaseModel):
|
||||
kvstore: KVStoreConfig
|
||||
kvstore: KVStoreReference
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
|
||||
return {
|
||||
"kvstore": SqliteKVStoreConfig.sample_run_config(
|
||||
__distro_dir__=__distro_dir__,
|
||||
db_name="meta_reference_eval.db",
|
||||
)
|
||||
"kvstore": KVStoreReference(
|
||||
backend="kv_default",
|
||||
namespace="eval",
|
||||
).model_dump(exclude_none=True)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,14 +8,14 @@ from typing import Any
|
|||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig
|
||||
from llama_stack.core.storage.datatypes import SqlStoreReference
|
||||
|
||||
|
||||
class LocalfsFilesImplConfig(BaseModel):
|
||||
storage_dir: str = Field(
|
||||
description="Directory to store uploaded files",
|
||||
)
|
||||
metadata_store: SqlStoreConfig = Field(
|
||||
metadata_store: SqlStoreReference = Field(
|
||||
description="SQL store configuration for file metadata",
|
||||
)
|
||||
ttl_secs: int = 365 * 24 * 60 * 60 # 1 year
|
||||
|
|
@ -24,8 +24,8 @@ class LocalfsFilesImplConfig(BaseModel):
|
|||
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
|
||||
return {
|
||||
"storage_dir": "${env.FILES_STORAGE_DIR:=" + __distro_dir__ + "/files}",
|
||||
"metadata_store": SqliteSqlStoreConfig.sample_run_config(
|
||||
__distro_dir__=__distro_dir__,
|
||||
db_name="files_metadata.db",
|
||||
),
|
||||
"metadata_store": SqlStoreReference(
|
||||
backend="sql_default",
|
||||
table_name="files_metadata",
|
||||
).model_dump(exclude_none=True),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,13 +9,10 @@ from typing import Any
|
|||
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
|
||||
from llama_stack.core.utils.config_dirs import RUNTIME_BASE_DIR
|
||||
|
||||
|
||||
class TelemetrySink(StrEnum):
|
||||
OTEL_TRACE = "otel_trace"
|
||||
OTEL_METRIC = "otel_metric"
|
||||
SQLITE = "sqlite"
|
||||
CONSOLE = "console"
|
||||
|
||||
|
||||
|
|
@ -30,12 +27,8 @@ class TelemetryConfig(BaseModel):
|
|||
description="The service name to use for telemetry",
|
||||
)
|
||||
sinks: list[TelemetrySink] = Field(
|
||||
default=[TelemetrySink.SQLITE],
|
||||
description="List of telemetry sinks to enable (possible values: otel_trace, otel_metric, sqlite, console)",
|
||||
)
|
||||
sqlite_db_path: str = Field(
|
||||
default_factory=lambda: (RUNTIME_BASE_DIR / "trace_store.db").as_posix(),
|
||||
description="The path to the SQLite database to use for storing traces",
|
||||
default_factory=list,
|
||||
description="List of telemetry sinks to enable (possible values: otel_trace, otel_metric, console)",
|
||||
)
|
||||
|
||||
@field_validator("sinks", mode="before")
|
||||
|
|
@ -43,13 +36,12 @@ class TelemetryConfig(BaseModel):
|
|||
def validate_sinks(cls, v):
|
||||
if isinstance(v, str):
|
||||
return [TelemetrySink(sink.strip()) for sink in v.split(",")]
|
||||
return v
|
||||
return v or []
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str, db_name: str = "trace_store.db") -> dict[str, Any]:
|
||||
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
|
||||
return {
|
||||
"service_name": "${env.OTEL_SERVICE_NAME:=\u200b}",
|
||||
"sinks": "${env.TELEMETRY_SINKS:=sqlite}",
|
||||
"sqlite_db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
|
||||
"sinks": "${env.TELEMETRY_SINKS:=}",
|
||||
"otel_exporter_otlp_endpoint": "${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,75 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import json
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from opentelemetry.sdk.trace import ReadableSpan
|
||||
from opentelemetry.sdk.trace.export import SpanProcessor
|
||||
from opentelemetry.trace.status import StatusCode
|
||||
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
logger = get_logger(name="console_span_processor", category="telemetry")
|
||||
|
||||
|
||||
class ConsoleSpanProcessor(SpanProcessor):
|
||||
def __init__(self, print_attributes: bool = False):
|
||||
self.print_attributes = print_attributes
|
||||
|
||||
def on_start(self, span: ReadableSpan, parent_context=None) -> None:
|
||||
if span.attributes and span.attributes.get("__autotraced__"):
|
||||
return
|
||||
|
||||
timestamp = datetime.fromtimestamp(span.start_time / 1e9, tz=UTC).strftime("%H:%M:%S.%f")[:-3]
|
||||
logger.info(f"[dim]{timestamp}[/dim] [bold magenta][START][/bold magenta] [dim]{span.name}[/dim]")
|
||||
|
||||
def on_end(self, span: ReadableSpan) -> None:
|
||||
timestamp = datetime.fromtimestamp(span.end_time / 1e9, tz=UTC).strftime("%H:%M:%S.%f")[:-3]
|
||||
span_context = f"[dim]{timestamp}[/dim] [bold magenta][END][/bold magenta] [dim]{span.name}[/dim]"
|
||||
if span.status.status_code == StatusCode.ERROR:
|
||||
span_context += " [bold red][ERROR][/bold red]"
|
||||
elif span.status.status_code != StatusCode.UNSET:
|
||||
span_context += f" [{span.status.status_code}]"
|
||||
duration_ms = (span.end_time - span.start_time) / 1e6
|
||||
span_context += f" ({duration_ms:.2f}ms)"
|
||||
logger.info(span_context)
|
||||
|
||||
if self.print_attributes and span.attributes:
|
||||
for key, value in span.attributes.items():
|
||||
if key.startswith("__"):
|
||||
continue
|
||||
str_value = str(value)
|
||||
if len(str_value) > 1000:
|
||||
str_value = str_value[:997] + "..."
|
||||
logger.info(f" [dim]{key}[/dim]: {str_value}")
|
||||
|
||||
for event in span.events:
|
||||
event_time = datetime.fromtimestamp(event.timestamp / 1e9, tz=UTC).strftime("%H:%M:%S.%f")[:-3]
|
||||
severity = event.attributes.get("severity", "info")
|
||||
message = event.attributes.get("message", event.name)
|
||||
if isinstance(message, dict) or isinstance(message, list):
|
||||
message = json.dumps(message, indent=2)
|
||||
severity_color = {
|
||||
"error": "red",
|
||||
"warn": "yellow",
|
||||
"info": "white",
|
||||
"debug": "dim",
|
||||
}.get(severity, "white")
|
||||
logger.info(f" {event_time} [bold {severity_color}][{severity.upper()}][/bold {severity_color}] {message}")
|
||||
if event.attributes:
|
||||
for key, value in event.attributes.items():
|
||||
if key.startswith("__") or key in ["message", "severity"]:
|
||||
continue
|
||||
logger.info(f"[dim]{key}[/dim]: {value}")
|
||||
|
||||
def shutdown(self) -> None:
|
||||
"""Shutdown the processor."""
|
||||
pass
|
||||
|
||||
def force_flush(self, timeout_millis: float | None = None) -> bool:
|
||||
"""Force flush any pending spans."""
|
||||
return True
|
||||
|
|
@ -1,190 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import json
|
||||
import os
|
||||
import sqlite3
|
||||
import threading
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from opentelemetry.sdk.trace import SpanProcessor
|
||||
from opentelemetry.trace import Span
|
||||
from opentelemetry.trace.span import format_span_id, format_trace_id
|
||||
|
||||
from llama_stack.providers.utils.telemetry.tracing import LOCAL_ROOT_SPAN_MARKER
|
||||
|
||||
|
||||
class SQLiteSpanProcessor(SpanProcessor):
|
||||
def __init__(self, conn_string):
|
||||
"""Initialize the SQLite span processor with a connection string."""
|
||||
self.conn_string = conn_string
|
||||
self._local = threading.local() # Thread-local storage for connections
|
||||
self.setup_database()
|
||||
|
||||
def _get_connection(self):
|
||||
"""Get a thread-local database connection."""
|
||||
if not hasattr(self._local, "conn"):
|
||||
try:
|
||||
self._local.conn = sqlite3.connect(self.conn_string)
|
||||
except Exception as e:
|
||||
print(f"Error connecting to SQLite database: {e}")
|
||||
raise
|
||||
return self._local.conn
|
||||
|
||||
def setup_database(self):
|
||||
"""Create the necessary tables if they don't exist."""
|
||||
# Create directory if it doesn't exist
|
||||
os.makedirs(os.path.dirname(self.conn_string), exist_ok=True)
|
||||
|
||||
conn = self._get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS traces (
|
||||
trace_id TEXT PRIMARY KEY,
|
||||
service_name TEXT,
|
||||
root_span_id TEXT,
|
||||
start_time TIMESTAMP,
|
||||
end_time TIMESTAMP,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
cursor.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS spans (
|
||||
span_id TEXT PRIMARY KEY,
|
||||
trace_id TEXT REFERENCES traces(trace_id),
|
||||
parent_span_id TEXT,
|
||||
name TEXT,
|
||||
start_time TIMESTAMP,
|
||||
end_time TIMESTAMP,
|
||||
attributes TEXT,
|
||||
status TEXT,
|
||||
kind TEXT
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
cursor.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS span_events (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
span_id TEXT REFERENCES spans(span_id),
|
||||
name TEXT,
|
||||
timestamp TIMESTAMP,
|
||||
attributes TEXT
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
cursor.execute(
|
||||
"""
|
||||
CREATE INDEX IF NOT EXISTS idx_traces_created_at
|
||||
ON traces(created_at)
|
||||
"""
|
||||
)
|
||||
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
|
||||
def on_start(self, span: Span, parent_context=None):
|
||||
"""Called when a span starts."""
|
||||
pass
|
||||
|
||||
def on_end(self, span: Span):
|
||||
"""Called when a span ends. Export the span data to SQLite."""
|
||||
try:
|
||||
conn = self._get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
trace_id = format_trace_id(span.get_span_context().trace_id)
|
||||
span_id = format_span_id(span.get_span_context().span_id)
|
||||
service_name = span.resource.attributes.get("service.name", "unknown")
|
||||
|
||||
parent_span_id = None
|
||||
parent_context = span.parent
|
||||
if parent_context:
|
||||
parent_span_id = format_span_id(parent_context.span_id)
|
||||
|
||||
# Insert into traces
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO traces (
|
||||
trace_id, service_name, root_span_id, start_time, end_time
|
||||
) VALUES (?, ?, ?, ?, ?)
|
||||
ON CONFLICT(trace_id) DO UPDATE SET
|
||||
root_span_id = COALESCE(root_span_id, excluded.root_span_id),
|
||||
start_time = MIN(excluded.start_time, start_time),
|
||||
end_time = MAX(excluded.end_time, end_time)
|
||||
""",
|
||||
(
|
||||
trace_id,
|
||||
service_name,
|
||||
(span_id if span.attributes.get(LOCAL_ROOT_SPAN_MARKER) else None),
|
||||
datetime.fromtimestamp(span.start_time / 1e9, UTC).isoformat(),
|
||||
datetime.fromtimestamp(span.end_time / 1e9, UTC).isoformat(),
|
||||
),
|
||||
)
|
||||
|
||||
# Insert into spans
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO spans (
|
||||
span_id, trace_id, parent_span_id, name,
|
||||
start_time, end_time, attributes, status,
|
||||
kind
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
span_id,
|
||||
trace_id,
|
||||
parent_span_id,
|
||||
span.name,
|
||||
datetime.fromtimestamp(span.start_time / 1e9, UTC).isoformat(),
|
||||
datetime.fromtimestamp(span.end_time / 1e9, UTC).isoformat(),
|
||||
json.dumps(dict(span.attributes)),
|
||||
span.status.status_code.name,
|
||||
span.kind.name,
|
||||
),
|
||||
)
|
||||
|
||||
for event in span.events:
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO span_events (
|
||||
span_id, name, timestamp, attributes
|
||||
) VALUES (?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
span_id,
|
||||
event.name,
|
||||
datetime.fromtimestamp(event.timestamp / 1e9, UTC).isoformat(),
|
||||
json.dumps(dict(event.attributes)),
|
||||
),
|
||||
)
|
||||
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
except Exception as e:
|
||||
print(f"Error exporting span to SQLite: {e}")
|
||||
|
||||
def shutdown(self):
|
||||
"""Cleanup any resources."""
|
||||
# We can't access other threads' connections, so we just close our own
|
||||
if hasattr(self._local, "conn"):
|
||||
try:
|
||||
self._local.conn.close()
|
||||
except Exception as e:
|
||||
print(f"Error closing SQLite connection: {e}")
|
||||
finally:
|
||||
del self._local.conn
|
||||
|
||||
def force_flush(self, timeout_millis=30000):
|
||||
"""Force export of spans."""
|
||||
pass
|
||||
|
|
@ -4,7 +4,7 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import datetime
|
||||
import os
|
||||
import threading
|
||||
from typing import Any
|
||||
|
||||
|
|
@ -13,43 +13,25 @@ from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExp
|
|||
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
||||
from opentelemetry.sdk.metrics import MeterProvider
|
||||
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
|
||||
from opentelemetry.sdk.resources import Resource
|
||||
from opentelemetry.sdk.trace import TracerProvider
|
||||
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
||||
from opentelemetry.semconv.resource import ResourceAttributes
|
||||
from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
|
||||
|
||||
from llama_stack.apis.telemetry import (
|
||||
Event,
|
||||
MetricEvent,
|
||||
MetricLabelMatcher,
|
||||
MetricQueryType,
|
||||
QueryCondition,
|
||||
QueryMetricsResponse,
|
||||
QuerySpanTreeResponse,
|
||||
QueryTracesResponse,
|
||||
Span,
|
||||
SpanEndPayload,
|
||||
SpanStartPayload,
|
||||
SpanStatus,
|
||||
StructuredLogEvent,
|
||||
Telemetry,
|
||||
Trace,
|
||||
UnstructuredLogEvent,
|
||||
)
|
||||
from llama_stack.core.datatypes import Api
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.inline.telemetry.meta_reference.console_span_processor import (
|
||||
ConsoleSpanProcessor,
|
||||
)
|
||||
from llama_stack.providers.inline.telemetry.meta_reference.sqlite_span_processor import (
|
||||
SQLiteSpanProcessor,
|
||||
)
|
||||
from llama_stack.providers.utils.telemetry.dataset_mixin import TelemetryDatasetMixin
|
||||
from llama_stack.providers.utils.telemetry.sqlite_trace_store import SQLiteTraceStore
|
||||
from llama_stack.providers.utils.telemetry.tracing import ROOT_SPAN_MARKERS
|
||||
|
||||
from .config import TelemetryConfig, TelemetrySink
|
||||
from .config import TelemetryConfig
|
||||
|
||||
_GLOBAL_STORAGE: dict[str, dict[str | int, Any]] = {
|
||||
"active_spans": {},
|
||||
|
|
@ -68,66 +50,49 @@ def is_tracing_enabled(tracer):
|
|||
return span.is_recording()
|
||||
|
||||
|
||||
class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
|
||||
def __init__(self, config: TelemetryConfig, deps: dict[Api, Any]) -> None:
|
||||
self.config = config
|
||||
class TelemetryAdapter(Telemetry):
|
||||
def __init__(self, _config: TelemetryConfig, deps: dict[Api, Any]) -> None:
|
||||
self.datasetio_api = deps.get(Api.datasetio)
|
||||
self.meter = None
|
||||
|
||||
resource = Resource.create(
|
||||
{
|
||||
ResourceAttributes.SERVICE_NAME: self.config.service_name,
|
||||
}
|
||||
)
|
||||
|
||||
global _TRACER_PROVIDER
|
||||
# Initialize the correct span processor based on the provider state.
|
||||
# This is needed since once the span processor is set, it cannot be unset.
|
||||
# Recreating the telemetry adapter multiple times will result in duplicate span processors.
|
||||
# Since the library client can be recreated multiple times in a notebook,
|
||||
# the kernel will hold on to the span processor and cause duplicate spans to be written.
|
||||
if _TRACER_PROVIDER is None:
|
||||
provider = TracerProvider(resource=resource)
|
||||
trace.set_tracer_provider(provider)
|
||||
_TRACER_PROVIDER = provider
|
||||
if os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT"):
|
||||
if _TRACER_PROVIDER is None:
|
||||
provider = TracerProvider()
|
||||
trace.set_tracer_provider(provider)
|
||||
_TRACER_PROVIDER = provider
|
||||
|
||||
# Use single OTLP endpoint for all telemetry signals
|
||||
if TelemetrySink.OTEL_TRACE in self.config.sinks or TelemetrySink.OTEL_METRIC in self.config.sinks:
|
||||
if self.config.otel_exporter_otlp_endpoint is None:
|
||||
raise ValueError(
|
||||
"otel_exporter_otlp_endpoint is required when OTEL_TRACE or OTEL_METRIC is enabled"
|
||||
)
|
||||
# Use single OTLP endpoint for all telemetry signals
|
||||
|
||||
# Let OpenTelemetry SDK handle endpoint construction automatically
|
||||
# The SDK will read OTEL_EXPORTER_OTLP_ENDPOINT and construct appropriate URLs
|
||||
# https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter
|
||||
if TelemetrySink.OTEL_TRACE in self.config.sinks:
|
||||
span_exporter = OTLPSpanExporter()
|
||||
span_processor = BatchSpanProcessor(span_exporter)
|
||||
trace.get_tracer_provider().add_span_processor(span_processor)
|
||||
span_exporter = OTLPSpanExporter()
|
||||
span_processor = BatchSpanProcessor(span_exporter)
|
||||
trace.get_tracer_provider().add_span_processor(span_processor)
|
||||
|
||||
if TelemetrySink.OTEL_METRIC in self.config.sinks:
|
||||
metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter())
|
||||
metric_provider = MeterProvider(resource=resource, metric_readers=[metric_reader])
|
||||
metrics.set_meter_provider(metric_provider)
|
||||
|
||||
if TelemetrySink.SQLITE in self.config.sinks:
|
||||
trace.get_tracer_provider().add_span_processor(SQLiteSpanProcessor(self.config.sqlite_db_path))
|
||||
if TelemetrySink.CONSOLE in self.config.sinks:
|
||||
trace.get_tracer_provider().add_span_processor(ConsoleSpanProcessor(print_attributes=True))
|
||||
|
||||
if TelemetrySink.OTEL_METRIC in self.config.sinks:
|
||||
self.meter = metrics.get_meter(__name__)
|
||||
if TelemetrySink.SQLITE in self.config.sinks:
|
||||
self.trace_store = SQLiteTraceStore(self.config.sqlite_db_path)
|
||||
metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter())
|
||||
metric_provider = MeterProvider(metric_readers=[metric_reader])
|
||||
metrics.set_meter_provider(metric_provider)
|
||||
self.is_otel_endpoint_set = True
|
||||
else:
|
||||
logger.warning("OTEL_EXPORTER_OTLP_ENDPOINT is not set, skipping telemetry")
|
||||
self.is_otel_endpoint_set = False
|
||||
|
||||
self.meter = metrics.get_meter(__name__)
|
||||
self._lock = _global_lock
|
||||
|
||||
async def initialize(self) -> None:
|
||||
pass
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
trace.get_tracer_provider().force_flush()
|
||||
if self.is_otel_endpoint_set:
|
||||
trace.get_tracer_provider().force_flush()
|
||||
|
||||
async def log_event(self, event: Event, ttl_seconds: int = 604800) -> None:
|
||||
if isinstance(event, UnstructuredLogEvent):
|
||||
|
|
@ -139,47 +104,6 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
|
|||
else:
|
||||
raise ValueError(f"Unknown event type: {event}")
|
||||
|
||||
async def query_metrics(
|
||||
self,
|
||||
metric_name: str,
|
||||
start_time: int,
|
||||
end_time: int | None = None,
|
||||
granularity: str | None = None,
|
||||
query_type: MetricQueryType = MetricQueryType.RANGE,
|
||||
label_matchers: list[MetricLabelMatcher] | None = None,
|
||||
) -> QueryMetricsResponse:
|
||||
"""Query metrics from the telemetry store.
|
||||
|
||||
Args:
|
||||
metric_name: The name of the metric to query (e.g., "prompt_tokens")
|
||||
start_time: Start time as Unix timestamp
|
||||
end_time: End time as Unix timestamp (defaults to now if None)
|
||||
granularity: Time granularity for aggregation
|
||||
query_type: Type of query (RANGE or INSTANT)
|
||||
label_matchers: Label filters to apply
|
||||
|
||||
Returns:
|
||||
QueryMetricsResponse with metric time series data
|
||||
"""
|
||||
# Convert timestamps to datetime objects
|
||||
start_dt = datetime.datetime.fromtimestamp(start_time, datetime.UTC)
|
||||
end_dt = datetime.datetime.fromtimestamp(end_time, datetime.UTC) if end_time else None
|
||||
|
||||
# Use SQLite trace store if available
|
||||
if hasattr(self, "trace_store") and self.trace_store:
|
||||
return await self.trace_store.query_metrics(
|
||||
metric_name=metric_name,
|
||||
start_time=start_dt,
|
||||
end_time=end_dt,
|
||||
granularity=granularity,
|
||||
query_type=query_type,
|
||||
label_matchers=label_matchers,
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"In order to query_metrics, you must have {TelemetrySink.SQLITE} set in your telemetry sinks"
|
||||
)
|
||||
|
||||
def _log_unstructured(self, event: UnstructuredLogEvent, ttl_seconds: int) -> None:
|
||||
with self._lock:
|
||||
# Use global storage instead of instance storage
|
||||
|
|
@ -326,39 +250,3 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
|
|||
_GLOBAL_STORAGE["active_spans"].pop(span_id, None)
|
||||
else:
|
||||
raise ValueError(f"Unknown structured log event: {event}")
|
||||
|
||||
async def query_traces(
|
||||
self,
|
||||
attribute_filters: list[QueryCondition] | None = None,
|
||||
limit: int | None = 100,
|
||||
offset: int | None = 0,
|
||||
order_by: list[str] | None = None,
|
||||
) -> QueryTracesResponse:
|
||||
return QueryTracesResponse(
|
||||
data=await self.trace_store.query_traces(
|
||||
attribute_filters=attribute_filters,
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
order_by=order_by,
|
||||
)
|
||||
)
|
||||
|
||||
async def get_trace(self, trace_id: str) -> Trace:
|
||||
return await self.trace_store.get_trace(trace_id)
|
||||
|
||||
async def get_span(self, trace_id: str, span_id: str) -> Span:
|
||||
return await self.trace_store.get_span(trace_id, span_id)
|
||||
|
||||
async def get_span_tree(
|
||||
self,
|
||||
span_id: str,
|
||||
attributes_to_return: list[str] | None = None,
|
||||
max_depth: int | None = None,
|
||||
) -> QuerySpanTreeResponse:
|
||||
return QuerySpanTreeResponse(
|
||||
data=await self.trace_store.get_span_tree(
|
||||
span_id=span_id,
|
||||
attributes_to_return=attributes_to_return,
|
||||
max_depth=max_depth,
|
||||
)
|
||||
)
|
||||
|
|
|
|||
|
|
@ -272,7 +272,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
|||
return RAGQueryResult(
|
||||
content=picked,
|
||||
metadata={
|
||||
"document_ids": [c.metadata["document_id"] for c in chunks[: len(picked)]],
|
||||
"document_ids": [c.document_id for c in chunks[: len(picked)]],
|
||||
"chunks": [c.content for c in chunks[: len(picked)]],
|
||||
"scores": scores[: len(picked)],
|
||||
"vector_db_ids": [c.metadata["vector_db_id"] for c in chunks[: len(picked)]],
|
||||
|
|
|
|||
|
|
@ -12,9 +12,7 @@ from .config import ChromaVectorIOConfig
|
|||
|
||||
|
||||
async def get_provider_impl(config: ChromaVectorIOConfig, deps: dict[Api, Any]):
|
||||
from llama_stack.providers.remote.vector_io.chroma.chroma import (
|
||||
ChromaVectorIOAdapter,
|
||||
)
|
||||
from llama_stack.providers.remote.vector_io.chroma.chroma import ChromaVectorIOAdapter
|
||||
|
||||
impl = ChromaVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
|
||||
await impl.initialize()
|
||||
|
|
|
|||
|
|
@ -8,14 +8,14 @@ from typing import Any
|
|||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
|
||||
from llama_stack.core.storage.datatypes import KVStoreReference
|
||||
from llama_stack.schema_utils import json_schema_type
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ChromaVectorIOConfig(BaseModel):
|
||||
db_path: str
|
||||
kvstore: KVStoreConfig = Field(description="Config for KV store backend")
|
||||
persistence: KVStoreReference = Field(description="Config for KV store backend")
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(
|
||||
|
|
@ -23,8 +23,8 @@ class ChromaVectorIOConfig(BaseModel):
|
|||
) -> dict[str, Any]:
|
||||
return {
|
||||
"db_path": db_path,
|
||||
"kvstore": SqliteKVStoreConfig.sample_run_config(
|
||||
__distro_dir__=__distro_dir__,
|
||||
db_name="chroma_inline_registry.db",
|
||||
),
|
||||
"persistence": KVStoreReference(
|
||||
backend="kv_default",
|
||||
namespace="vector_io::chroma",
|
||||
).model_dump(exclude_none=True),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,6 +16,6 @@ async def get_provider_impl(config: FaissVectorIOConfig, deps: dict[Api, Any]):
|
|||
|
||||
assert isinstance(config, FaissVectorIOConfig), f"Unexpected config type: {type(config)}"
|
||||
|
||||
impl = FaissVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files, None))
|
||||
impl = FaissVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
|
|
|||
|
|
@ -8,22 +8,19 @@ from typing import Any
|
|||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.providers.utils.kvstore.config import (
|
||||
KVStoreConfig,
|
||||
SqliteKVStoreConfig,
|
||||
)
|
||||
from llama_stack.core.storage.datatypes import KVStoreReference
|
||||
from llama_stack.schema_utils import json_schema_type
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class FaissVectorIOConfig(BaseModel):
|
||||
kvstore: KVStoreConfig
|
||||
persistence: KVStoreReference
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
|
||||
return {
|
||||
"kvstore": SqliteKVStoreConfig.sample_run_config(
|
||||
__distro_dir__=__distro_dir__,
|
||||
db_name="faiss_store.db",
|
||||
)
|
||||
"persistence": KVStoreReference(
|
||||
backend="kv_default",
|
||||
namespace="vector_io::faiss",
|
||||
).model_dump(exclude_none=True)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,33 +17,21 @@ from numpy.typing import NDArray
|
|||
from llama_stack.apis.common.errors import VectorStoreNotFoundError
|
||||
from llama_stack.apis.files import Files
|
||||
from llama_stack.apis.inference import Inference, InterleavedContent
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_io import (
|
||||
Chunk,
|
||||
QueryChunksResponse,
|
||||
VectorIO,
|
||||
)
|
||||
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
||||
from llama_stack.apis.vector_stores import VectorStore
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import (
|
||||
HealthResponse,
|
||||
HealthStatus,
|
||||
VectorDBsProtocolPrivate,
|
||||
)
|
||||
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorStoresProtocolPrivate
|
||||
from llama_stack.providers.utils.kvstore import kvstore_impl
|
||||
from llama_stack.providers.utils.kvstore.api import KVStore
|
||||
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
||||
from llama_stack.providers.utils.memory.vector_store import (
|
||||
ChunkForDeletion,
|
||||
EmbeddingIndex,
|
||||
VectorDBWithIndex,
|
||||
)
|
||||
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
|
||||
|
||||
from .config import FaissVectorIOConfig
|
||||
|
||||
logger = get_logger(name=__name__, category="vector_io")
|
||||
|
||||
VERSION = "v3"
|
||||
VECTOR_DBS_PREFIX = f"vector_dbs:{VERSION}::"
|
||||
VECTOR_DBS_PREFIX = f"vector_stores:{VERSION}::"
|
||||
FAISS_INDEX_PREFIX = f"faiss_index:{VERSION}::"
|
||||
OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:{VERSION}::"
|
||||
OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:{VERSION}::"
|
||||
|
|
@ -154,12 +142,7 @@ class FaissIndex(EmbeddingIndex):
|
|||
|
||||
await self._save_index()
|
||||
|
||||
async def query_vector(
|
||||
self,
|
||||
embedding: NDArray,
|
||||
k: int,
|
||||
score_threshold: float,
|
||||
) -> QueryChunksResponse:
|
||||
async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
|
||||
distances, indices = await asyncio.to_thread(self.index.search, embedding.reshape(1, -1).astype(np.float32), k)
|
||||
chunks = []
|
||||
scores = []
|
||||
|
|
@ -174,12 +157,7 @@ class FaissIndex(EmbeddingIndex):
|
|||
|
||||
return QueryChunksResponse(chunks=chunks, scores=scores)
|
||||
|
||||
async def query_keyword(
|
||||
self,
|
||||
query_string: str,
|
||||
k: int,
|
||||
score_threshold: float,
|
||||
) -> QueryChunksResponse:
|
||||
async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
|
||||
raise NotImplementedError(
|
||||
"Keyword search is not supported - underlying DB FAISS does not support this search mode"
|
||||
)
|
||||
|
|
@ -198,28 +176,28 @@ class FaissIndex(EmbeddingIndex):
|
|||
)
|
||||
|
||||
|
||||
class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
|
||||
class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
|
||||
def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
|
||||
super().__init__(files_api=files_api, kvstore=None)
|
||||
self.config = config
|
||||
self.inference_api = inference_api
|
||||
self.cache: dict[str, VectorDBWithIndex] = {}
|
||||
self.cache: dict[str, VectorStoreWithIndex] = {}
|
||||
|
||||
async def initialize(self) -> None:
|
||||
self.kvstore = await kvstore_impl(self.config.kvstore)
|
||||
self.kvstore = await kvstore_impl(self.config.persistence)
|
||||
# Load existing banks from kvstore
|
||||
start_key = VECTOR_DBS_PREFIX
|
||||
end_key = f"{VECTOR_DBS_PREFIX}\xff"
|
||||
stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key)
|
||||
stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)
|
||||
|
||||
for vector_db_data in stored_vector_dbs:
|
||||
vector_db = VectorDB.model_validate_json(vector_db_data)
|
||||
index = VectorDBWithIndex(
|
||||
vector_db,
|
||||
await FaissIndex.create(vector_db.embedding_dimension, self.kvstore, vector_db.identifier),
|
||||
for vector_store_data in stored_vector_stores:
|
||||
vector_store = VectorStore.model_validate_json(vector_store_data)
|
||||
index = VectorStoreWithIndex(
|
||||
vector_store,
|
||||
await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier),
|
||||
self.inference_api,
|
||||
)
|
||||
self.cache[vector_db.identifier] = index
|
||||
self.cache[vector_store.identifier] = index
|
||||
|
||||
# Load existing OpenAI vector stores into the in-memory cache
|
||||
await self.initialize_openai_vector_stores()
|
||||
|
|
@ -244,45 +222,33 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
|
|||
except Exception as e:
|
||||
return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
|
||||
|
||||
async def register_vector_db(
|
||||
self,
|
||||
vector_db: VectorDB,
|
||||
) -> None:
|
||||
async def register_vector_store(self, vector_store: VectorStore) -> None:
|
||||
assert self.kvstore is not None
|
||||
|
||||
key = f"{VECTOR_DBS_PREFIX}{vector_db.identifier}"
|
||||
await self.kvstore.set(
|
||||
key=key,
|
||||
value=vector_db.model_dump_json(),
|
||||
)
|
||||
key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
|
||||
await self.kvstore.set(key=key, value=vector_store.model_dump_json())
|
||||
|
||||
# Store in cache
|
||||
self.cache[vector_db.identifier] = VectorDBWithIndex(
|
||||
vector_db=vector_db,
|
||||
index=await FaissIndex.create(vector_db.embedding_dimension, self.kvstore, vector_db.identifier),
|
||||
self.cache[vector_store.identifier] = VectorStoreWithIndex(
|
||||
vector_store=vector_store,
|
||||
index=await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier),
|
||||
inference_api=self.inference_api,
|
||||
)
|
||||
|
||||
async def list_vector_dbs(self) -> list[VectorDB]:
|
||||
return [i.vector_db for i in self.cache.values()]
|
||||
async def list_vector_stores(self) -> list[VectorStore]:
|
||||
return [i.vector_store for i in self.cache.values()]
|
||||
|
||||
async def unregister_vector_db(self, vector_db_id: str) -> None:
|
||||
async def unregister_vector_store(self, vector_store_id: str) -> None:
|
||||
assert self.kvstore is not None
|
||||
|
||||
if vector_db_id not in self.cache:
|
||||
logger.warning(f"Vector DB {vector_db_id} not found")
|
||||
if vector_store_id not in self.cache:
|
||||
return
|
||||
|
||||
await self.cache[vector_db_id].index.delete()
|
||||
del self.cache[vector_db_id]
|
||||
await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_db_id}")
|
||||
await self.cache[vector_store_id].index.delete()
|
||||
del self.cache[vector_store_id]
|
||||
await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}")
|
||||
|
||||
async def insert_chunks(
|
||||
self,
|
||||
vector_db_id: str,
|
||||
chunks: list[Chunk],
|
||||
ttl_seconds: int | None = None,
|
||||
) -> None:
|
||||
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
|
||||
index = self.cache.get(vector_db_id)
|
||||
if index is None:
|
||||
raise ValueError(f"Vector DB {vector_db_id} not found. found: {self.cache.keys()}")
|
||||
|
|
@ -290,10 +256,7 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
|
|||
await index.insert_chunks(chunks)
|
||||
|
||||
async def query_chunks(
|
||||
self,
|
||||
vector_db_id: str,
|
||||
query: InterleavedContent,
|
||||
params: dict[str, Any] | None = None,
|
||||
self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
||||
) -> QueryChunksResponse:
|
||||
index = self.cache.get(vector_db_id)
|
||||
if index is None:
|
||||
|
|
|
|||
|
|
@ -14,6 +14,6 @@ from .config import MilvusVectorIOConfig
|
|||
async def get_provider_impl(config: MilvusVectorIOConfig, deps: dict[Api, Any]):
|
||||
from llama_stack.providers.remote.vector_io.milvus.milvus import MilvusVectorIOAdapter
|
||||
|
||||
impl = MilvusVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files, None))
|
||||
impl = MilvusVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue