mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-04 02:03:44 +00:00
Merge branch 'main' into feat/gunicorn-production-server
This commit is contained in:
commit
893d49c59e
2086 changed files with 133277 additions and 643859 deletions
|
|
@ -3,8 +3,3 @@
|
|||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.core.library_client import ( # noqa: F401
|
||||
AsyncLlamaStackAsLibraryClient,
|
||||
LlamaStackAsLibraryClient,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,814 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from collections.abc import AsyncIterator
|
||||
from datetime import datetime
|
||||
from enum import StrEnum
|
||||
from typing import Annotated, Any, Literal, Protocol, runtime_checkable
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
from llama_stack.apis.common.content_types import URL, ContentDelta, InterleavedContent
|
||||
from llama_stack.apis.common.responses import Order, PaginatedResponse
|
||||
from llama_stack.apis.inference import (
|
||||
CompletionMessage,
|
||||
ResponseFormat,
|
||||
SamplingParams,
|
||||
ToolCall,
|
||||
ToolChoice,
|
||||
ToolConfig,
|
||||
ToolPromptFormat,
|
||||
ToolResponse,
|
||||
ToolResponseMessage,
|
||||
UserMessage,
|
||||
)
|
||||
from llama_stack.apis.safety import SafetyViolation
|
||||
from llama_stack.apis.tools import ToolDef
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
||||
from llama_stack.schema_utils import ExtraBodyField, json_schema_type, register_schema, webmethod
|
||||
|
||||
from .openai_responses import (
|
||||
ListOpenAIResponseInputItem,
|
||||
ListOpenAIResponseObject,
|
||||
OpenAIDeleteResponseObject,
|
||||
OpenAIResponseInput,
|
||||
OpenAIResponseInputTool,
|
||||
OpenAIResponseObject,
|
||||
OpenAIResponseObjectStream,
|
||||
OpenAIResponsePrompt,
|
||||
OpenAIResponseText,
|
||||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ResponseGuardrailSpec(BaseModel):
|
||||
"""Specification for a guardrail to apply during response generation.
|
||||
|
||||
:param type: The type/identifier of the guardrail.
|
||||
"""
|
||||
|
||||
type: str
|
||||
# TODO: more fields to be added for guardrail configuration
|
||||
|
||||
|
||||
ResponseGuardrail = str | ResponseGuardrailSpec
|
||||
|
||||
|
||||
class Attachment(BaseModel):
|
||||
"""An attachment to an agent turn.
|
||||
|
||||
:param content: The content of the attachment.
|
||||
:param mime_type: The MIME type of the attachment.
|
||||
"""
|
||||
|
||||
content: InterleavedContent | URL
|
||||
mime_type: str
|
||||
|
||||
|
||||
class Document(BaseModel):
|
||||
"""A document to be used by an agent.
|
||||
|
||||
:param content: The content of the document.
|
||||
:param mime_type: The MIME type of the document.
|
||||
"""
|
||||
|
||||
content: InterleavedContent | URL
|
||||
mime_type: str
|
||||
|
||||
|
||||
class StepCommon(BaseModel):
|
||||
"""A common step in an agent turn.
|
||||
|
||||
:param turn_id: The ID of the turn.
|
||||
:param step_id: The ID of the step.
|
||||
:param started_at: The time the step started.
|
||||
:param completed_at: The time the step completed.
|
||||
"""
|
||||
|
||||
turn_id: str
|
||||
step_id: str
|
||||
started_at: datetime | None = None
|
||||
completed_at: datetime | None = None
|
||||
|
||||
|
||||
class StepType(StrEnum):
|
||||
"""Type of the step in an agent turn.
|
||||
|
||||
:cvar inference: The step is an inference step that calls an LLM.
|
||||
:cvar tool_execution: The step is a tool execution step that executes a tool call.
|
||||
:cvar shield_call: The step is a shield call step that checks for safety violations.
|
||||
:cvar memory_retrieval: The step is a memory retrieval step that retrieves context for vector dbs.
|
||||
"""
|
||||
|
||||
inference = "inference"
|
||||
tool_execution = "tool_execution"
|
||||
shield_call = "shield_call"
|
||||
memory_retrieval = "memory_retrieval"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class InferenceStep(StepCommon):
|
||||
"""An inference step in an agent turn.
|
||||
|
||||
:param model_response: The response from the LLM.
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(protected_namespaces=())
|
||||
|
||||
step_type: Literal[StepType.inference] = StepType.inference
|
||||
model_response: CompletionMessage
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ToolExecutionStep(StepCommon):
|
||||
"""A tool execution step in an agent turn.
|
||||
|
||||
:param tool_calls: The tool calls to execute.
|
||||
:param tool_responses: The tool responses from the tool calls.
|
||||
"""
|
||||
|
||||
step_type: Literal[StepType.tool_execution] = StepType.tool_execution
|
||||
tool_calls: list[ToolCall]
|
||||
tool_responses: list[ToolResponse]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ShieldCallStep(StepCommon):
|
||||
"""A shield call step in an agent turn.
|
||||
|
||||
:param violation: The violation from the shield call.
|
||||
"""
|
||||
|
||||
step_type: Literal[StepType.shield_call] = StepType.shield_call
|
||||
violation: SafetyViolation | None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class MemoryRetrievalStep(StepCommon):
|
||||
"""A memory retrieval step in an agent turn.
|
||||
|
||||
:param vector_store_ids: The IDs of the vector databases to retrieve context from.
|
||||
:param inserted_context: The context retrieved from the vector databases.
|
||||
"""
|
||||
|
||||
step_type: Literal[StepType.memory_retrieval] = StepType.memory_retrieval
|
||||
# TODO: should this be List[str]?
|
||||
vector_store_ids: str
|
||||
inserted_context: InterleavedContent
|
||||
|
||||
|
||||
Step = Annotated[
|
||||
InferenceStep | ToolExecutionStep | ShieldCallStep | MemoryRetrievalStep,
|
||||
Field(discriminator="step_type"),
|
||||
]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class Turn(BaseModel):
|
||||
"""A single turn in an interaction with an Agentic System.
|
||||
|
||||
:param turn_id: Unique identifier for the turn within a session
|
||||
:param session_id: Unique identifier for the conversation session
|
||||
:param input_messages: List of messages that initiated this turn
|
||||
:param steps: Ordered list of processing steps executed during this turn
|
||||
:param output_message: The model's generated response containing content and metadata
|
||||
:param output_attachments: (Optional) Files or media attached to the agent's response
|
||||
:param started_at: Timestamp when the turn began
|
||||
:param completed_at: (Optional) Timestamp when the turn finished, if completed
|
||||
"""
|
||||
|
||||
turn_id: str
|
||||
session_id: str
|
||||
input_messages: list[UserMessage | ToolResponseMessage]
|
||||
steps: list[Step]
|
||||
output_message: CompletionMessage
|
||||
output_attachments: list[Attachment] | None = Field(default_factory=lambda: [])
|
||||
|
||||
started_at: datetime
|
||||
completed_at: datetime | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class Session(BaseModel):
|
||||
"""A single session of an interaction with an Agentic System.
|
||||
|
||||
:param session_id: Unique identifier for the conversation session
|
||||
:param session_name: Human-readable name for the session
|
||||
:param turns: List of all turns that have occurred in this session
|
||||
:param started_at: Timestamp when the session was created
|
||||
"""
|
||||
|
||||
session_id: str
|
||||
session_name: str
|
||||
turns: list[Turn]
|
||||
started_at: datetime
|
||||
|
||||
|
||||
class AgentToolGroupWithArgs(BaseModel):
|
||||
name: str
|
||||
args: dict[str, Any]
|
||||
|
||||
|
||||
AgentToolGroup = str | AgentToolGroupWithArgs
|
||||
register_schema(AgentToolGroup, name="AgentTool")
|
||||
|
||||
|
||||
class AgentConfigCommon(BaseModel):
|
||||
sampling_params: SamplingParams | None = Field(default_factory=SamplingParams)
|
||||
|
||||
input_shields: list[str] | None = Field(default_factory=lambda: [])
|
||||
output_shields: list[str] | None = Field(default_factory=lambda: [])
|
||||
toolgroups: list[AgentToolGroup] | None = Field(default_factory=lambda: [])
|
||||
client_tools: list[ToolDef] | None = Field(default_factory=lambda: [])
|
||||
tool_choice: ToolChoice | None = Field(default=None, deprecated="use tool_config instead")
|
||||
tool_prompt_format: ToolPromptFormat | None = Field(default=None, deprecated="use tool_config instead")
|
||||
tool_config: ToolConfig | None = Field(default=None)
|
||||
|
||||
max_infer_iters: int | None = 10
|
||||
|
||||
def model_post_init(self, __context):
|
||||
if self.tool_config:
|
||||
if self.tool_choice and self.tool_config.tool_choice != self.tool_choice:
|
||||
raise ValueError("tool_choice is deprecated. Use tool_choice in tool_config instead.")
|
||||
if self.tool_prompt_format and self.tool_config.tool_prompt_format != self.tool_prompt_format:
|
||||
raise ValueError("tool_prompt_format is deprecated. Use tool_prompt_format in tool_config instead.")
|
||||
else:
|
||||
params = {}
|
||||
if self.tool_choice:
|
||||
params["tool_choice"] = self.tool_choice
|
||||
if self.tool_prompt_format:
|
||||
params["tool_prompt_format"] = self.tool_prompt_format
|
||||
self.tool_config = ToolConfig(**params)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class AgentConfig(AgentConfigCommon):
|
||||
"""Configuration for an agent.
|
||||
|
||||
:param model: The model identifier to use for the agent
|
||||
:param instructions: The system instructions for the agent
|
||||
:param name: Optional name for the agent, used in telemetry and identification
|
||||
:param enable_session_persistence: Optional flag indicating whether session data has to be persisted
|
||||
:param response_format: Optional response format configuration
|
||||
"""
|
||||
|
||||
model: str
|
||||
instructions: str
|
||||
name: str | None = None
|
||||
enable_session_persistence: bool | None = False
|
||||
response_format: ResponseFormat | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class Agent(BaseModel):
|
||||
"""An agent instance with configuration and metadata.
|
||||
|
||||
:param agent_id: Unique identifier for the agent
|
||||
:param agent_config: Configuration settings for the agent
|
||||
:param created_at: Timestamp when the agent was created
|
||||
"""
|
||||
|
||||
agent_id: str
|
||||
agent_config: AgentConfig
|
||||
created_at: datetime
|
||||
|
||||
|
||||
class AgentConfigOverridablePerTurn(AgentConfigCommon):
|
||||
instructions: str | None = None
|
||||
|
||||
|
||||
class AgentTurnResponseEventType(StrEnum):
|
||||
step_start = "step_start"
|
||||
step_complete = "step_complete"
|
||||
step_progress = "step_progress"
|
||||
|
||||
turn_start = "turn_start"
|
||||
turn_complete = "turn_complete"
|
||||
turn_awaiting_input = "turn_awaiting_input"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class AgentTurnResponseStepStartPayload(BaseModel):
|
||||
"""Payload for step start events in agent turn responses.
|
||||
|
||||
:param event_type: Type of event being reported
|
||||
:param step_type: Type of step being executed
|
||||
:param step_id: Unique identifier for the step within a turn
|
||||
:param metadata: (Optional) Additional metadata for the step
|
||||
"""
|
||||
|
||||
event_type: Literal[AgentTurnResponseEventType.step_start] = AgentTurnResponseEventType.step_start
|
||||
step_type: StepType
|
||||
step_id: str
|
||||
metadata: dict[str, Any] | None = Field(default_factory=lambda: {})
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class AgentTurnResponseStepCompletePayload(BaseModel):
|
||||
"""Payload for step completion events in agent turn responses.
|
||||
|
||||
:param event_type: Type of event being reported
|
||||
:param step_type: Type of step being executed
|
||||
:param step_id: Unique identifier for the step within a turn
|
||||
:param step_details: Complete details of the executed step
|
||||
"""
|
||||
|
||||
event_type: Literal[AgentTurnResponseEventType.step_complete] = AgentTurnResponseEventType.step_complete
|
||||
step_type: StepType
|
||||
step_id: str
|
||||
step_details: Step
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class AgentTurnResponseStepProgressPayload(BaseModel):
|
||||
"""Payload for step progress events in agent turn responses.
|
||||
|
||||
:param event_type: Type of event being reported
|
||||
:param step_type: Type of step being executed
|
||||
:param step_id: Unique identifier for the step within a turn
|
||||
:param delta: Incremental content changes during step execution
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(protected_namespaces=())
|
||||
|
||||
event_type: Literal[AgentTurnResponseEventType.step_progress] = AgentTurnResponseEventType.step_progress
|
||||
step_type: StepType
|
||||
step_id: str
|
||||
|
||||
delta: ContentDelta
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class AgentTurnResponseTurnStartPayload(BaseModel):
|
||||
"""Payload for turn start events in agent turn responses.
|
||||
|
||||
:param event_type: Type of event being reported
|
||||
:param turn_id: Unique identifier for the turn within a session
|
||||
"""
|
||||
|
||||
event_type: Literal[AgentTurnResponseEventType.turn_start] = AgentTurnResponseEventType.turn_start
|
||||
turn_id: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class AgentTurnResponseTurnCompletePayload(BaseModel):
|
||||
"""Payload for turn completion events in agent turn responses.
|
||||
|
||||
:param event_type: Type of event being reported
|
||||
:param turn: Complete turn data including all steps and results
|
||||
"""
|
||||
|
||||
event_type: Literal[AgentTurnResponseEventType.turn_complete] = AgentTurnResponseEventType.turn_complete
|
||||
turn: Turn
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class AgentTurnResponseTurnAwaitingInputPayload(BaseModel):
|
||||
"""Payload for turn awaiting input events in agent turn responses.
|
||||
|
||||
:param event_type: Type of event being reported
|
||||
:param turn: Turn data when waiting for external tool responses
|
||||
"""
|
||||
|
||||
event_type: Literal[AgentTurnResponseEventType.turn_awaiting_input] = AgentTurnResponseEventType.turn_awaiting_input
|
||||
turn: Turn
|
||||
|
||||
|
||||
AgentTurnResponseEventPayload = Annotated[
|
||||
AgentTurnResponseStepStartPayload
|
||||
| AgentTurnResponseStepProgressPayload
|
||||
| AgentTurnResponseStepCompletePayload
|
||||
| AgentTurnResponseTurnStartPayload
|
||||
| AgentTurnResponseTurnCompletePayload
|
||||
| AgentTurnResponseTurnAwaitingInputPayload,
|
||||
Field(discriminator="event_type"),
|
||||
]
|
||||
register_schema(AgentTurnResponseEventPayload, name="AgentTurnResponseEventPayload")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class AgentTurnResponseEvent(BaseModel):
|
||||
"""An event in an agent turn response stream.
|
||||
|
||||
:param payload: Event-specific payload containing event data
|
||||
"""
|
||||
|
||||
payload: AgentTurnResponseEventPayload
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class AgentCreateResponse(BaseModel):
|
||||
"""Response returned when creating a new agent.
|
||||
|
||||
:param agent_id: Unique identifier for the created agent
|
||||
"""
|
||||
|
||||
agent_id: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class AgentSessionCreateResponse(BaseModel):
|
||||
"""Response returned when creating a new agent session.
|
||||
|
||||
:param session_id: Unique identifier for the created session
|
||||
"""
|
||||
|
||||
session_id: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class AgentTurnCreateRequest(AgentConfigOverridablePerTurn):
|
||||
"""Request to create a new turn for an agent.
|
||||
|
||||
:param agent_id: Unique identifier for the agent
|
||||
:param session_id: Unique identifier for the conversation session
|
||||
:param messages: List of messages to start the turn with
|
||||
:param documents: (Optional) List of documents to provide to the agent
|
||||
:param toolgroups: (Optional) List of tool groups to make available for this turn
|
||||
:param stream: (Optional) Whether to stream the response
|
||||
:param tool_config: (Optional) Tool configuration to override agent defaults
|
||||
"""
|
||||
|
||||
agent_id: str
|
||||
session_id: str
|
||||
|
||||
# TODO: figure out how we can simplify this and make why
|
||||
# ToolResponseMessage needs to be here (it is function call
|
||||
# execution from outside the system)
|
||||
messages: list[UserMessage | ToolResponseMessage]
|
||||
|
||||
documents: list[Document] | None = None
|
||||
toolgroups: list[AgentToolGroup] | None = Field(default_factory=lambda: [])
|
||||
|
||||
stream: bool | None = False
|
||||
tool_config: ToolConfig | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class AgentTurnResumeRequest(BaseModel):
|
||||
"""Request to resume an agent turn with tool responses.
|
||||
|
||||
:param agent_id: Unique identifier for the agent
|
||||
:param session_id: Unique identifier for the conversation session
|
||||
:param turn_id: Unique identifier for the turn within a session
|
||||
:param tool_responses: List of tool responses to submit to continue the turn
|
||||
:param stream: (Optional) Whether to stream the response
|
||||
"""
|
||||
|
||||
agent_id: str
|
||||
session_id: str
|
||||
turn_id: str
|
||||
tool_responses: list[ToolResponse]
|
||||
stream: bool | None = False
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class AgentTurnResponseStreamChunk(BaseModel):
|
||||
"""Streamed agent turn completion response.
|
||||
|
||||
:param event: Individual event in the agent turn response stream
|
||||
"""
|
||||
|
||||
event: AgentTurnResponseEvent
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class AgentStepResponse(BaseModel):
|
||||
"""Response containing details of a specific agent step.
|
||||
|
||||
:param step: The complete step data and execution details
|
||||
"""
|
||||
|
||||
step: Step
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class Agents(Protocol):
|
||||
"""Agents
|
||||
|
||||
APIs for creating and interacting with agentic systems."""
|
||||
|
||||
@webmethod(
|
||||
route="/agents",
|
||||
method="POST",
|
||||
descriptive_name="create_agent",
|
||||
level=LLAMA_STACK_API_V1ALPHA,
|
||||
)
|
||||
async def create_agent(
|
||||
self,
|
||||
agent_config: AgentConfig,
|
||||
) -> AgentCreateResponse:
|
||||
"""Create an agent with the given configuration.
|
||||
|
||||
:param agent_config: The configuration for the agent.
|
||||
:returns: An AgentCreateResponse with the agent ID.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/agents/{agent_id}/session/{session_id}/turn",
|
||||
method="POST",
|
||||
descriptive_name="create_agent_turn",
|
||||
level=LLAMA_STACK_API_V1ALPHA,
|
||||
)
|
||||
async def create_agent_turn(
|
||||
self,
|
||||
agent_id: str,
|
||||
session_id: str,
|
||||
messages: list[UserMessage | ToolResponseMessage],
|
||||
stream: bool | None = False,
|
||||
documents: list[Document] | None = None,
|
||||
toolgroups: list[AgentToolGroup] | None = None,
|
||||
tool_config: ToolConfig | None = None,
|
||||
) -> Turn | AsyncIterator[AgentTurnResponseStreamChunk]:
|
||||
"""Create a new turn for an agent.
|
||||
|
||||
:param agent_id: The ID of the agent to create the turn for.
|
||||
:param session_id: The ID of the session to create the turn for.
|
||||
:param messages: List of messages to start the turn with.
|
||||
:param stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False.
|
||||
:param documents: (Optional) List of documents to create the turn with.
|
||||
:param toolgroups: (Optional) List of toolgroups to create the turn with, will be used in addition to the agent's config toolgroups for the request.
|
||||
:param tool_config: (Optional) The tool configuration to create the turn with, will be used to override the agent's tool_config.
|
||||
:returns: If stream=False, returns a Turn object.
|
||||
If stream=True, returns an SSE event stream of AgentTurnResponseStreamChunk.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
|
||||
method="POST",
|
||||
descriptive_name="resume_agent_turn",
|
||||
level=LLAMA_STACK_API_V1ALPHA,
|
||||
)
|
||||
async def resume_agent_turn(
|
||||
self,
|
||||
agent_id: str,
|
||||
session_id: str,
|
||||
turn_id: str,
|
||||
tool_responses: list[ToolResponse],
|
||||
stream: bool | None = False,
|
||||
) -> Turn | AsyncIterator[AgentTurnResponseStreamChunk]:
|
||||
"""Resume an agent turn with executed tool call responses.
|
||||
|
||||
When a Turn has the status `awaiting_input` due to pending input from client side tool calls, this endpoint can be used to submit the outputs from the tool calls once they are ready.
|
||||
|
||||
:param agent_id: The ID of the agent to resume.
|
||||
:param session_id: The ID of the session to resume.
|
||||
:param turn_id: The ID of the turn to resume.
|
||||
:param tool_responses: The tool call responses to resume the turn with.
|
||||
:param stream: Whether to stream the response.
|
||||
:returns: A Turn object if stream is False, otherwise an AsyncIterator of AgentTurnResponseStreamChunk objects.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
|
||||
method="GET",
|
||||
level=LLAMA_STACK_API_V1ALPHA,
|
||||
)
|
||||
async def get_agents_turn(
|
||||
self,
|
||||
agent_id: str,
|
||||
session_id: str,
|
||||
turn_id: str,
|
||||
) -> Turn:
|
||||
"""Retrieve an agent turn by its ID.
|
||||
|
||||
:param agent_id: The ID of the agent to get the turn for.
|
||||
:param session_id: The ID of the session to get the turn for.
|
||||
:param turn_id: The ID of the turn to get.
|
||||
:returns: A Turn.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
|
||||
method="GET",
|
||||
level=LLAMA_STACK_API_V1ALPHA,
|
||||
)
|
||||
async def get_agents_step(
|
||||
self,
|
||||
agent_id: str,
|
||||
session_id: str,
|
||||
turn_id: str,
|
||||
step_id: str,
|
||||
) -> AgentStepResponse:
|
||||
"""Retrieve an agent step by its ID.
|
||||
|
||||
:param agent_id: The ID of the agent to get the step for.
|
||||
:param session_id: The ID of the session to get the step for.
|
||||
:param turn_id: The ID of the turn to get the step for.
|
||||
:param step_id: The ID of the step to get.
|
||||
:returns: An AgentStepResponse.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/agents/{agent_id}/session",
|
||||
method="POST",
|
||||
descriptive_name="create_agent_session",
|
||||
level=LLAMA_STACK_API_V1ALPHA,
|
||||
)
|
||||
async def create_agent_session(
|
||||
self,
|
||||
agent_id: str,
|
||||
session_name: str,
|
||||
) -> AgentSessionCreateResponse:
|
||||
"""Create a new session for an agent.
|
||||
|
||||
:param agent_id: The ID of the agent to create the session for.
|
||||
:param session_name: The name of the session to create.
|
||||
:returns: An AgentSessionCreateResponse.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/agents/{agent_id}/session/{session_id}",
|
||||
method="GET",
|
||||
level=LLAMA_STACK_API_V1ALPHA,
|
||||
)
|
||||
async def get_agents_session(
|
||||
self,
|
||||
session_id: str,
|
||||
agent_id: str,
|
||||
turn_ids: list[str] | None = None,
|
||||
) -> Session:
|
||||
"""Retrieve an agent session by its ID.
|
||||
|
||||
:param session_id: The ID of the session to get.
|
||||
:param agent_id: The ID of the agent to get the session for.
|
||||
:param turn_ids: (Optional) List of turn IDs to filter the session by.
|
||||
:returns: A Session.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/agents/{agent_id}/session/{session_id}",
|
||||
method="DELETE",
|
||||
level=LLAMA_STACK_API_V1ALPHA,
|
||||
)
|
||||
async def delete_agents_session(
|
||||
self,
|
||||
session_id: str,
|
||||
agent_id: str,
|
||||
) -> None:
|
||||
"""Delete an agent session by its ID and its associated turns.
|
||||
|
||||
:param session_id: The ID of the session to delete.
|
||||
:param agent_id: The ID of the agent to delete the session for.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/agents/{agent_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def delete_agent(
|
||||
self,
|
||||
agent_id: str,
|
||||
) -> None:
|
||||
"""Delete an agent by its ID and its associated sessions and turns.
|
||||
|
||||
:param agent_id: The ID of the agent to delete.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/agents", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def list_agents(self, start_index: int | None = None, limit: int | None = None) -> PaginatedResponse:
|
||||
"""List all agents.
|
||||
|
||||
:param start_index: The index to start the pagination from.
|
||||
:param limit: The number of agents to return.
|
||||
:returns: A PaginatedResponse.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/agents/{agent_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def get_agent(self, agent_id: str) -> Agent:
|
||||
"""Describe an agent by its ID.
|
||||
|
||||
:param agent_id: ID of the agent.
|
||||
:returns: An Agent of the agent.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/agents/{agent_id}/sessions", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def list_agent_sessions(
|
||||
self,
|
||||
agent_id: str,
|
||||
start_index: int | None = None,
|
||||
limit: int | None = None,
|
||||
) -> PaginatedResponse:
|
||||
"""List all session(s) of a given agent.
|
||||
|
||||
:param agent_id: The ID of the agent to list sessions for.
|
||||
:param start_index: The index to start the pagination from.
|
||||
:param limit: The number of sessions to return.
|
||||
:returns: A PaginatedResponse.
|
||||
"""
|
||||
...
|
||||
|
||||
# We situate the OpenAI Responses API in the Agents API just like we did things
|
||||
# for Inference. The Responses API, in its intent, serves the same purpose as
|
||||
# the Agents API above -- it is essentially a lightweight "agentic loop" with
|
||||
# integrated tool calling.
|
||||
#
|
||||
# Both of these APIs are inherently stateful.
|
||||
|
||||
@webmethod(route="/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_openai_response(
|
||||
self,
|
||||
response_id: str,
|
||||
) -> OpenAIResponseObject:
|
||||
"""Get a model response.
|
||||
|
||||
:param response_id: The ID of the OpenAI response to retrieve.
|
||||
:returns: An OpenAIResponseObject.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/responses", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def create_openai_response(
|
||||
self,
|
||||
input: str | list[OpenAIResponseInput],
|
||||
model: str,
|
||||
prompt: OpenAIResponsePrompt | None = None,
|
||||
instructions: str | None = None,
|
||||
previous_response_id: str | None = None,
|
||||
conversation: str | None = None,
|
||||
store: bool | None = True,
|
||||
stream: bool | None = False,
|
||||
temperature: float | None = None,
|
||||
text: OpenAIResponseText | None = None,
|
||||
tools: list[OpenAIResponseInputTool] | None = None,
|
||||
include: list[str] | None = None,
|
||||
max_infer_iters: int | None = 10, # this is an extension to the OpenAI API
|
||||
guardrails: Annotated[
|
||||
list[ResponseGuardrail] | None,
|
||||
ExtraBodyField(
|
||||
"List of guardrails to apply during response generation. Guardrails provide safety and content moderation."
|
||||
),
|
||||
] = None,
|
||||
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
|
||||
"""Create a model response.
|
||||
|
||||
:param input: Input message(s) to create the response.
|
||||
:param model: The underlying LLM used for completions.
|
||||
:param prompt: (Optional) Prompt object with ID, version, and variables.
|
||||
:param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
|
||||
:param conversation: (Optional) The ID of a conversation to add the response to. Must begin with 'conv_'. Input and output messages will be automatically added to the conversation.
|
||||
:param include: (Optional) Additional fields to include in the response.
|
||||
:param guardrails: (Optional) List of guardrails to apply during response generation. Can be guardrail IDs (strings) or guardrail specifications.
|
||||
:returns: An OpenAIResponseObject.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/responses", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_openai_responses(
|
||||
self,
|
||||
after: str | None = None,
|
||||
limit: int | None = 50,
|
||||
model: str | None = None,
|
||||
order: Order | None = Order.desc,
|
||||
) -> ListOpenAIResponseObject:
|
||||
"""List all responses.
|
||||
|
||||
:param after: The ID of the last response to return.
|
||||
:param limit: The number of responses to return.
|
||||
:param model: The model to filter responses by.
|
||||
:param order: The order to sort responses by when sorted by created_at ('asc' or 'desc').
|
||||
:returns: A ListOpenAIResponseObject.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_openai_response_input_items(
|
||||
self,
|
||||
response_id: str,
|
||||
after: str | None = None,
|
||||
before: str | None = None,
|
||||
include: list[str] | None = None,
|
||||
limit: int | None = 20,
|
||||
order: Order | None = Order.desc,
|
||||
) -> ListOpenAIResponseInputItem:
|
||||
"""List input items.
|
||||
|
||||
:param response_id: The ID of the response to retrieve input items for.
|
||||
:param after: An item ID to list items after, used for pagination.
|
||||
:param before: An item ID to list items before, used for pagination.
|
||||
:param include: Additional fields to include in the response.
|
||||
:param limit: A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.
|
||||
:param order: The order to return the input items in. Default is desc.
|
||||
:returns: An ListOpenAIResponseInputItem.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
|
||||
"""Delete a response.
|
||||
|
||||
:param response_id: The ID of the OpenAI response to delete.
|
||||
:returns: An OpenAIDeleteResponseObject
|
||||
"""
|
||||
...
|
||||
|
|
@ -1,9 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .batches import Batches, BatchObject, ListBatchesResponse
|
||||
|
||||
__all__ = ["Batches", "BatchObject", "ListBatchesResponse"]
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .benchmarks import *
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
|
@ -1,36 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.schema_utils import json_schema_type
|
||||
|
||||
|
||||
class Order(Enum):
|
||||
"""Sort order for paginated responses.
|
||||
:cvar asc: Ascending order
|
||||
:cvar desc: Descending order
|
||||
"""
|
||||
|
||||
asc = "asc"
|
||||
desc = "desc"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class PaginatedResponse(BaseModel):
|
||||
"""A generic paginated response that follows a simple format.
|
||||
|
||||
:param data: The list of items for the current page
|
||||
:param has_more: Whether there are more items available after this set
|
||||
:param url: The URL for accessing this list
|
||||
"""
|
||||
|
||||
data: list[dict[str, Any]]
|
||||
has_more: bool
|
||||
url: str | None = None
|
||||
|
|
@ -1,31 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .conversations import (
|
||||
Conversation,
|
||||
ConversationCreateRequest,
|
||||
ConversationDeletedResource,
|
||||
ConversationItem,
|
||||
ConversationItemCreateRequest,
|
||||
ConversationItemDeletedResource,
|
||||
ConversationItemList,
|
||||
Conversations,
|
||||
ConversationUpdateRequest,
|
||||
Metadata,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"Conversation",
|
||||
"ConversationCreateRequest",
|
||||
"ConversationDeletedResource",
|
||||
"ConversationItem",
|
||||
"ConversationItemCreateRequest",
|
||||
"ConversationItemDeletedResource",
|
||||
"ConversationItemList",
|
||||
"Conversations",
|
||||
"ConversationUpdateRequest",
|
||||
"Metadata",
|
||||
]
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .datasetio import *
|
||||
|
|
@ -1,158 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from enum import Enum, EnumMeta
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.schema_utils import json_schema_type
|
||||
|
||||
|
||||
class DynamicApiMeta(EnumMeta):
|
||||
def __new__(cls, name, bases, namespace):
|
||||
# Store the original enum values
|
||||
original_values = {k: v for k, v in namespace.items() if not k.startswith("_")}
|
||||
|
||||
# Create the enum class
|
||||
cls = super().__new__(cls, name, bases, namespace)
|
||||
|
||||
# Store the original values for reference
|
||||
cls._original_values = original_values
|
||||
# Initialize _dynamic_values
|
||||
cls._dynamic_values = {}
|
||||
|
||||
return cls
|
||||
|
||||
def __call__(cls, value):
|
||||
try:
|
||||
return super().__call__(value)
|
||||
except ValueError as e:
|
||||
# If this value was already dynamically added, return it
|
||||
if value in cls._dynamic_values:
|
||||
return cls._dynamic_values[value]
|
||||
|
||||
# If the value doesn't exist, create a new enum member
|
||||
# Create a new member name from the value
|
||||
member_name = value.lower().replace("-", "_")
|
||||
|
||||
# If this member name already exists in the enum, return the existing member
|
||||
if member_name in cls._member_map_:
|
||||
return cls._member_map_[member_name]
|
||||
|
||||
# Instead of creating a new member, raise ValueError to force users to use Api.add() to
|
||||
# register new APIs explicitly
|
||||
raise ValueError(f"API '{value}' does not exist. Use Api.add() to register new APIs.") from e
|
||||
|
||||
def __iter__(cls):
|
||||
# Allow iteration over both static and dynamic members
|
||||
yield from super().__iter__()
|
||||
if hasattr(cls, "_dynamic_values"):
|
||||
yield from cls._dynamic_values.values()
|
||||
|
||||
def add(cls, value):
|
||||
"""
|
||||
Add a new API to the enum.
|
||||
Used to register external APIs.
|
||||
"""
|
||||
member_name = value.lower().replace("-", "_")
|
||||
|
||||
# If this member name already exists in the enum, return it
|
||||
if member_name in cls._member_map_:
|
||||
return cls._member_map_[member_name]
|
||||
|
||||
# Create a new enum member
|
||||
member = object.__new__(cls)
|
||||
member._name_ = member_name
|
||||
member._value_ = value
|
||||
|
||||
# Add it to the enum class
|
||||
cls._member_map_[member_name] = member
|
||||
cls._member_names_.append(member_name)
|
||||
cls._member_type_ = str
|
||||
|
||||
# Store it in our dynamic values
|
||||
cls._dynamic_values[value] = member
|
||||
|
||||
return member
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class Api(Enum, metaclass=DynamicApiMeta):
|
||||
"""Enumeration of all available APIs in the Llama Stack system.
|
||||
:cvar providers: Provider management and configuration
|
||||
:cvar inference: Text generation, chat completions, and embeddings
|
||||
:cvar safety: Content moderation and safety shields
|
||||
:cvar agents: Agent orchestration and execution
|
||||
:cvar batches: Batch processing for asynchronous API requests
|
||||
:cvar vector_io: Vector database operations and queries
|
||||
:cvar datasetio: Dataset input/output operations
|
||||
:cvar scoring: Model output evaluation and scoring
|
||||
:cvar eval: Model evaluation and benchmarking framework
|
||||
:cvar post_training: Fine-tuning and model training
|
||||
:cvar tool_runtime: Tool execution and management
|
||||
:cvar telemetry: Observability and system monitoring
|
||||
:cvar models: Model metadata and management
|
||||
:cvar shields: Safety shield implementations
|
||||
:cvar datasets: Dataset creation and management
|
||||
:cvar scoring_functions: Scoring function definitions
|
||||
:cvar benchmarks: Benchmark suite management
|
||||
:cvar tool_groups: Tool group organization
|
||||
:cvar files: File storage and management
|
||||
:cvar prompts: Prompt versions and management
|
||||
:cvar inspect: Built-in system inspection and introspection
|
||||
"""
|
||||
|
||||
providers = "providers"
|
||||
inference = "inference"
|
||||
safety = "safety"
|
||||
agents = "agents"
|
||||
batches = "batches"
|
||||
vector_io = "vector_io"
|
||||
datasetio = "datasetio"
|
||||
scoring = "scoring"
|
||||
eval = "eval"
|
||||
post_training = "post_training"
|
||||
tool_runtime = "tool_runtime"
|
||||
|
||||
models = "models"
|
||||
shields = "shields"
|
||||
vector_stores = "vector_stores" # only used for routing table
|
||||
datasets = "datasets"
|
||||
scoring_functions = "scoring_functions"
|
||||
benchmarks = "benchmarks"
|
||||
tool_groups = "tool_groups"
|
||||
files = "files"
|
||||
prompts = "prompts"
|
||||
conversations = "conversations"
|
||||
|
||||
# built-in API
|
||||
inspect = "inspect"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class Error(BaseModel):
|
||||
"""
|
||||
Error response from the API. Roughly follows RFC 7807.
|
||||
|
||||
:param status: HTTP status code
|
||||
:param title: Error title, a short summary of the error which is invariant for an error type
|
||||
:param detail: Error detail, a longer human-readable description of the error
|
||||
:param instance: (Optional) A URL which can be used to retrieve more information about the specific occurrence of the error
|
||||
"""
|
||||
|
||||
status: int
|
||||
title: str
|
||||
detail: str
|
||||
instance: str | None = None
|
||||
|
||||
|
||||
class ExternalApiSpec(BaseModel):
|
||||
"""Specification for an external API implementation."""
|
||||
|
||||
module: str = Field(..., description="Python module containing the API implementation")
|
||||
name: str = Field(..., description="Name of the API")
|
||||
pip_packages: list[str] = Field(default=[], description="List of pip packages to install the API")
|
||||
protocol: str = Field(..., description="Name of the protocol class for the API")
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .eval import *
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .files import *
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .inference import *
|
||||
|
|
@ -1,43 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from termcolor import cprint
|
||||
|
||||
from llama_stack.apis.inference import (
|
||||
ChatCompletionResponseEventType,
|
||||
ChatCompletionResponseStreamChunk,
|
||||
)
|
||||
|
||||
|
||||
class LogEvent:
|
||||
def __init__(
|
||||
self,
|
||||
content: str = "",
|
||||
end: str = "\n",
|
||||
color="white",
|
||||
):
|
||||
self.content = content
|
||||
self.color = color
|
||||
self.end = "\n" if end is None else end
|
||||
|
||||
def print(self, flush=True):
|
||||
cprint(f"{self.content}", color=self.color, end=self.end, flush=flush)
|
||||
|
||||
|
||||
class EventLogger:
|
||||
async def log(self, event_generator):
|
||||
async for chunk in event_generator:
|
||||
if isinstance(chunk, ChatCompletionResponseStreamChunk):
|
||||
event = chunk.event
|
||||
if event.event_type == ChatCompletionResponseEventType.start:
|
||||
yield LogEvent("Assistant> ", color="cyan", end="")
|
||||
elif event.event_type == ChatCompletionResponseEventType.progress:
|
||||
yield LogEvent(event.delta, color="yellow", end="")
|
||||
elif event.event_type == ChatCompletionResponseEventType.complete:
|
||||
yield LogEvent("")
|
||||
else:
|
||||
yield LogEvent("Assistant> ", color="cyan", end="")
|
||||
yield LogEvent(chunk.completion_message.content, color="yellow")
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .inspect import *
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .models import *
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .post_training import *
|
||||
|
|
@ -1,9 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .prompts import ListPromptsResponse, Prompt, Prompts
|
||||
|
||||
__all__ = ["Prompt", "Prompts", "ListPromptsResponse"]
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .providers import *
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .safety import *
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .scoring import *
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .scoring_functions import *
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .shields import *
|
||||
|
|
@ -1,8 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .rag_tool import *
|
||||
from .tools import *
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .vector_io import *
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .vector_stores import *
|
||||
|
|
@ -21,7 +21,7 @@ from llama_stack.core.datatypes import (
|
|||
from llama_stack.core.distribution import get_provider_registry
|
||||
from llama_stack.core.stack import replace_env_vars
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import Api
|
||||
from llama_stack_api import Api
|
||||
|
||||
TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates"
|
||||
|
||||
|
|
|
|||
|
|
@ -46,6 +46,10 @@ class StackListDeps(Subcommand):
|
|||
def _run_stack_list_deps_command(self, args: argparse.Namespace) -> None:
|
||||
# always keep implementation completely silo-ed away from CLI so CLI
|
||||
# can be fast to load and reduces dependencies
|
||||
if not args.config and not args.providers:
|
||||
self.parser.print_help()
|
||||
self.parser.exit()
|
||||
|
||||
from ._list_deps import run_stack_list_deps_command
|
||||
|
||||
return run_stack_list_deps_command(args)
|
||||
|
|
|
|||
|
|
@ -9,48 +9,69 @@ from pathlib import Path
|
|||
|
||||
from llama_stack.cli.subcommand import Subcommand
|
||||
from llama_stack.cli.table import print_table
|
||||
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
||||
|
||||
|
||||
class StackListBuilds(Subcommand):
|
||||
"""List built stacks in .llama/distributions directory"""
|
||||
"""List available distributions (both built-in and custom)"""
|
||||
|
||||
def __init__(self, subparsers: argparse._SubParsersAction):
|
||||
super().__init__()
|
||||
self.parser = subparsers.add_parser(
|
||||
"list",
|
||||
prog="llama stack list",
|
||||
description="list the build stacks",
|
||||
description="list available distributions",
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
)
|
||||
self._add_arguments()
|
||||
self.parser.set_defaults(func=self._list_stack_command)
|
||||
|
||||
def _get_distribution_dirs(self) -> dict[str, Path]:
|
||||
"""Return a dictionary of distribution names and their paths"""
|
||||
distributions = {}
|
||||
dist_dir = Path.home() / ".llama" / "distributions"
|
||||
def _get_distribution_dirs(self) -> dict[str, tuple[Path, str]]:
|
||||
"""Return a dictionary of distribution names and their paths with source type
|
||||
|
||||
Returns:
|
||||
dict mapping distro name to (path, source_type) where source_type is 'built-in' or 'custom'
|
||||
"""
|
||||
distributions = {}
|
||||
|
||||
# Get built-in distributions from source code
|
||||
distro_dir = Path(__file__).parent.parent.parent / "distributions"
|
||||
if distro_dir.exists():
|
||||
for stack_dir in distro_dir.iterdir():
|
||||
if stack_dir.is_dir() and not stack_dir.name.startswith(".") and not stack_dir.name.startswith("__"):
|
||||
distributions[stack_dir.name] = (stack_dir, "built-in")
|
||||
|
||||
# Get custom/run distributions from ~/.llama/distributions
|
||||
# These override built-in ones if they have the same name
|
||||
if DISTRIBS_BASE_DIR.exists():
|
||||
for stack_dir in DISTRIBS_BASE_DIR.iterdir():
|
||||
if stack_dir.is_dir() and not stack_dir.name.startswith("."):
|
||||
# Clean up the name (remove llamastack- prefix if present)
|
||||
name = stack_dir.name.replace("llamastack-", "")
|
||||
distributions[name] = (stack_dir, "custom")
|
||||
|
||||
if dist_dir.exists():
|
||||
for stack_dir in dist_dir.iterdir():
|
||||
if stack_dir.is_dir():
|
||||
distributions[stack_dir.name] = stack_dir
|
||||
return distributions
|
||||
|
||||
def _list_stack_command(self, args: argparse.Namespace) -> None:
|
||||
distributions = self._get_distribution_dirs()
|
||||
|
||||
if not distributions:
|
||||
print("No stacks found in ~/.llama/distributions")
|
||||
print("No distributions found")
|
||||
return
|
||||
|
||||
headers = ["Stack Name", "Path"]
|
||||
headers.extend(["Build Config", "Run Config"])
|
||||
headers = ["Stack Name", "Source", "Path", "Build Config", "Run Config"]
|
||||
rows = []
|
||||
for name, path in distributions.items():
|
||||
row = [name, str(path)]
|
||||
for name, (path, source_type) in sorted(distributions.items()):
|
||||
row = [name, source_type, str(path)]
|
||||
# Check for build and run config files
|
||||
build_config = "Yes" if (path / f"{name}-build.yaml").exists() else "No"
|
||||
run_config = "Yes" if (path / f"{name}-run.yaml").exists() else "No"
|
||||
# For built-in distributions, configs are named build.yaml and run.yaml
|
||||
# For custom distributions, configs are named {name}-build.yaml and {name}-run.yaml
|
||||
if source_type == "built-in":
|
||||
build_config = "Yes" if (path / "build.yaml").exists() else "No"
|
||||
run_config = "Yes" if (path / "run.yaml").exists() else "No"
|
||||
else:
|
||||
build_config = "Yes" if (path / f"{name}-build.yaml").exists() else "No"
|
||||
run_config = "Yes" if (path / f"{name}-run.yaml").exists() else "No"
|
||||
row.extend([build_config, run_config])
|
||||
rows.append(row)
|
||||
print_table(rows, headers, separate_rows=True)
|
||||
|
|
|
|||
|
|
@ -393,7 +393,7 @@ class StackRun(Subcommand):
|
|||
)
|
||||
return
|
||||
|
||||
ui_dir = REPO_ROOT / "llama_stack" / "ui"
|
||||
ui_dir = REPO_ROOT / "llama_stack_ui"
|
||||
logs_dir = Path("~/.llama/ui/logs").expanduser()
|
||||
try:
|
||||
# Create logs directory if it doesn't exist
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ from llama_stack.core.storage.datatypes import (
|
|||
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
|
||||
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||
from llama_stack.core.utils.image_types import LlamaStackImageType
|
||||
from llama_stack.providers.datatypes import Api
|
||||
from llama_stack_api import Api
|
||||
|
||||
TEMPLATES_PATH = Path(__file__).parent.parent.parent / "distributions"
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import importlib.resources
|
||||
import sys
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
|
@ -12,12 +11,9 @@ from termcolor import cprint
|
|||
|
||||
from llama_stack.core.datatypes import BuildConfig
|
||||
from llama_stack.core.distribution import get_provider_registry
|
||||
from llama_stack.core.external import load_external_apis
|
||||
from llama_stack.core.utils.exec import run_command
|
||||
from llama_stack.core.utils.image_types import LlamaStackImageType
|
||||
from llama_stack.distributions.template import DistributionTemplate
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import Api
|
||||
from llama_stack_api import Api
|
||||
|
||||
log = get_logger(name=__name__, category="core")
|
||||
|
||||
|
|
@ -101,64 +97,3 @@ def print_pip_install_help(config: BuildConfig):
|
|||
for special_dep in special_deps:
|
||||
cprint(f"uv pip install {special_dep}", color="yellow", file=sys.stderr)
|
||||
print()
|
||||
|
||||
|
||||
def build_image(
|
||||
build_config: BuildConfig,
|
||||
image_name: str,
|
||||
distro_or_config: str,
|
||||
run_config: str | None = None,
|
||||
):
|
||||
container_base = build_config.distribution_spec.container_image or "python:3.12-slim"
|
||||
|
||||
normal_deps, special_deps, external_provider_deps = get_provider_dependencies(build_config)
|
||||
normal_deps += SERVER_DEPENDENCIES
|
||||
if build_config.external_apis_dir:
|
||||
external_apis = load_external_apis(build_config)
|
||||
if external_apis:
|
||||
for _, api_spec in external_apis.items():
|
||||
normal_deps.extend(api_spec.pip_packages)
|
||||
|
||||
if build_config.image_type == LlamaStackImageType.CONTAINER.value:
|
||||
script = str(importlib.resources.files("llama_stack") / "core/build_container.sh")
|
||||
args = [
|
||||
script,
|
||||
"--distro-or-config",
|
||||
distro_or_config,
|
||||
"--image-name",
|
||||
image_name,
|
||||
"--container-base",
|
||||
container_base,
|
||||
"--normal-deps",
|
||||
" ".join(normal_deps),
|
||||
]
|
||||
# When building from a config file (not a template), include the run config path in the
|
||||
# build arguments
|
||||
if run_config is not None:
|
||||
args.extend(["--run-config", run_config])
|
||||
else:
|
||||
script = str(importlib.resources.files("llama_stack") / "core/build_venv.sh")
|
||||
args = [
|
||||
script,
|
||||
"--env-name",
|
||||
str(image_name),
|
||||
"--normal-deps",
|
||||
" ".join(normal_deps),
|
||||
]
|
||||
|
||||
# Always pass both arguments, even if empty, to maintain consistent positional arguments
|
||||
if special_deps:
|
||||
args.extend(["--optional-deps", "#".join(special_deps)])
|
||||
if external_provider_deps:
|
||||
args.extend(
|
||||
["--external-provider-deps", "#".join(external_provider_deps)]
|
||||
) # the script will install external provider module, get its deps, and install those too.
|
||||
|
||||
return_code = run_command(args)
|
||||
|
||||
if return_code != 0:
|
||||
log.error(
|
||||
f"Failed to build target {image_name} with return code {return_code}",
|
||||
)
|
||||
|
||||
return return_code
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ import httpx
|
|||
from pydantic import BaseModel, parse_obj_as
|
||||
from termcolor import cprint
|
||||
|
||||
from llama_stack.providers.datatypes import RemoteProviderConfig
|
||||
from llama_stack_api import RemoteProviderConfig
|
||||
|
||||
_CLIENT_CLASSES = {}
|
||||
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
|
|||
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||
from llama_stack.core.utils.prompt_for_config import prompt_for_config
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import Api, ProviderSpec
|
||||
from llama_stack_api import Api, ProviderSpec
|
||||
|
||||
logger = get_logger(name=__name__, category="core")
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,11 @@ from typing import Any, Literal
|
|||
|
||||
from pydantic import BaseModel, TypeAdapter
|
||||
|
||||
from llama_stack.apis.conversations.conversations import (
|
||||
from llama_stack.core.datatypes import AccessRule, StackRunConfig
|
||||
from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
||||
from llama_stack.core.storage.sqlstore.sqlstore import sqlstore_impl
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack_api import (
|
||||
Conversation,
|
||||
ConversationDeletedResource,
|
||||
ConversationItem,
|
||||
|
|
@ -20,11 +24,7 @@ from llama_stack.apis.conversations.conversations import (
|
|||
Conversations,
|
||||
Metadata,
|
||||
)
|
||||
from llama_stack.core.datatypes import AccessRule, StackRunConfig
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
|
||||
from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
||||
from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
|
||||
from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType
|
||||
|
||||
logger = get_logger(name=__name__, category="openai_conversations")
|
||||
|
||||
|
|
@ -203,16 +203,11 @@ class ConversationServiceImpl(Conversations):
|
|||
"item_data": item_dict,
|
||||
}
|
||||
|
||||
# TODO: Add support for upsert in sql_store, this will fail first if ID exists and then update
|
||||
try:
|
||||
await self.sql_store.insert(table="conversation_items", data=item_record)
|
||||
except Exception:
|
||||
# If insert fails due to ID conflict, update existing record
|
||||
await self.sql_store.update(
|
||||
table="conversation_items",
|
||||
data={"created_at": created_at, "item_data": item_dict},
|
||||
where={"id": item_id},
|
||||
)
|
||||
await self.sql_store.upsert(
|
||||
table="conversation_items",
|
||||
data=item_record,
|
||||
conflict_columns=["id"],
|
||||
)
|
||||
|
||||
created_items.append(item_dict)
|
||||
|
||||
|
|
|
|||
|
|
@ -11,20 +11,6 @@ from urllib.parse import urlparse
|
|||
|
||||
from pydantic import BaseModel, Field, field_validator, model_validator
|
||||
|
||||
from llama_stack.apis.benchmarks import Benchmark, BenchmarkInput
|
||||
from llama_stack.apis.datasetio import DatasetIO
|
||||
from llama_stack.apis.datasets import Dataset, DatasetInput
|
||||
from llama_stack.apis.eval import Eval
|
||||
from llama_stack.apis.inference import Inference
|
||||
from llama_stack.apis.models import Model, ModelInput
|
||||
from llama_stack.apis.resource import Resource
|
||||
from llama_stack.apis.safety import Safety
|
||||
from llama_stack.apis.scoring import Scoring
|
||||
from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnInput
|
||||
from llama_stack.apis.shields import Shield, ShieldInput
|
||||
from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
|
||||
from llama_stack.apis.vector_io import VectorIO
|
||||
from llama_stack.apis.vector_stores import VectorStore, VectorStoreInput
|
||||
from llama_stack.core.access_control.datatypes import AccessRule
|
||||
from llama_stack.core.storage.datatypes import (
|
||||
KVStoreReference,
|
||||
|
|
@ -32,7 +18,32 @@ from llama_stack.core.storage.datatypes import (
|
|||
StorageConfig,
|
||||
)
|
||||
from llama_stack.log import LoggingConfig
|
||||
from llama_stack.providers.datatypes import Api, ProviderSpec
|
||||
from llama_stack_api import (
|
||||
Api,
|
||||
Benchmark,
|
||||
BenchmarkInput,
|
||||
Dataset,
|
||||
DatasetInput,
|
||||
DatasetIO,
|
||||
Eval,
|
||||
Inference,
|
||||
Model,
|
||||
ModelInput,
|
||||
ProviderSpec,
|
||||
Resource,
|
||||
Safety,
|
||||
Scoring,
|
||||
ScoringFn,
|
||||
ScoringFnInput,
|
||||
Shield,
|
||||
ShieldInput,
|
||||
ToolGroup,
|
||||
ToolGroupInput,
|
||||
ToolRuntime,
|
||||
VectorIO,
|
||||
VectorStore,
|
||||
VectorStoreInput,
|
||||
)
|
||||
|
||||
LLAMA_STACK_BUILD_CONFIG_VERSION = 2
|
||||
LLAMA_STACK_RUN_CONFIG_VERSION = 2
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ from pydantic import BaseModel
|
|||
from llama_stack.core.datatypes import BuildConfig, DistributionSpec
|
||||
from llama_stack.core.external import load_external_apis
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import (
|
||||
from llama_stack_api import (
|
||||
Api,
|
||||
InlineProviderSpec,
|
||||
ProviderSpec,
|
||||
|
|
|
|||
|
|
@ -7,9 +7,9 @@
|
|||
|
||||
import yaml
|
||||
|
||||
from llama_stack.apis.datatypes import Api, ExternalApiSpec
|
||||
from llama_stack.core.datatypes import BuildConfig, StackRunConfig
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack_api import Api, ExternalApiSpec
|
||||
|
||||
logger = get_logger(name=__name__, category="core")
|
||||
|
||||
|
|
|
|||
|
|
@ -8,18 +8,17 @@ from importlib.metadata import version
|
|||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.inspect import (
|
||||
from llama_stack.core.datatypes import StackRunConfig
|
||||
from llama_stack.core.external import load_external_apis
|
||||
from llama_stack.core.server.routes import get_all_api_routes
|
||||
from llama_stack_api import (
|
||||
HealthInfo,
|
||||
HealthStatus,
|
||||
Inspect,
|
||||
ListRoutesResponse,
|
||||
RouteInfo,
|
||||
VersionInfo,
|
||||
)
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.core.datatypes import StackRunConfig
|
||||
from llama_stack.core.external import load_external_apis
|
||||
from llama_stack.core.server.routes import get_all_api_routes
|
||||
from llama_stack.providers.datatypes import HealthStatus
|
||||
|
||||
|
||||
class DistributionInspectConfig(BaseModel):
|
||||
|
|
@ -46,8 +45,8 @@ class DistributionInspectImpl(Inspect):
|
|||
# Helper function to determine if a route should be included based on api_filter
|
||||
def should_include_route(webmethod) -> bool:
|
||||
if api_filter is None:
|
||||
# Default: only non-deprecated v1 APIs
|
||||
return not webmethod.deprecated and webmethod.level == LLAMA_STACK_API_V1
|
||||
# Default: only non-deprecated APIs
|
||||
return not webmethod.deprecated
|
||||
elif api_filter == "deprecated":
|
||||
# Special filter: show deprecated routes regardless of their actual level
|
||||
return bool(webmethod.deprecated)
|
||||
|
|
|
|||
|
|
@ -18,14 +18,23 @@ from typing import Any, TypeVar, Union, get_args, get_origin
|
|||
import httpx
|
||||
import yaml
|
||||
from fastapi import Response as FastAPIResponse
|
||||
from llama_stack_client import (
|
||||
NOT_GIVEN,
|
||||
APIResponse,
|
||||
AsyncAPIResponse,
|
||||
AsyncLlamaStackClient,
|
||||
AsyncStream,
|
||||
LlamaStackClient,
|
||||
)
|
||||
|
||||
from llama_stack.core.utils.type_inspection import is_unwrapped_body_param
|
||||
|
||||
try:
|
||||
from llama_stack_client import (
|
||||
NOT_GIVEN,
|
||||
APIResponse,
|
||||
AsyncAPIResponse,
|
||||
AsyncLlamaStackClient,
|
||||
AsyncStream,
|
||||
LlamaStackClient,
|
||||
)
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"llama-stack-client is not installed. Please install it with `uv pip install llama-stack[client]`."
|
||||
) from e
|
||||
|
||||
from pydantic import BaseModel, TypeAdapter
|
||||
from rich.console import Console
|
||||
from termcolor import cprint
|
||||
|
|
@ -33,24 +42,16 @@ from termcolor import cprint
|
|||
from llama_stack.core.build import print_pip_install_help
|
||||
from llama_stack.core.configure import parse_and_maybe_upgrade_config
|
||||
from llama_stack.core.datatypes import BuildConfig, BuildProvider, DistributionSpec
|
||||
from llama_stack.core.request_headers import (
|
||||
PROVIDER_DATA_VAR,
|
||||
request_provider_data_context,
|
||||
)
|
||||
from llama_stack.core.request_headers import PROVIDER_DATA_VAR, request_provider_data_context
|
||||
from llama_stack.core.resolver import ProviderRegistry
|
||||
from llama_stack.core.server.routes import RouteImpls, find_matching_route, initialize_route_impls
|
||||
from llama_stack.core.stack import (
|
||||
Stack,
|
||||
get_stack_run_config_from_distro,
|
||||
replace_env_vars,
|
||||
)
|
||||
from llama_stack.core.stack import Stack, get_stack_run_config_from_distro, replace_env_vars
|
||||
from llama_stack.core.telemetry import Telemetry
|
||||
from llama_stack.core.telemetry.tracing import CURRENT_TRACE_CONTEXT, end_trace, setup_logger, start_trace
|
||||
from llama_stack.core.utils.config import redact_sensitive_fields
|
||||
from llama_stack.core.utils.context import preserve_contexts_async_generator
|
||||
from llama_stack.core.utils.exec import in_notebook
|
||||
from llama_stack.log import get_logger, setup_logging
|
||||
from llama_stack.strong_typing.inspection import is_unwrapped_body_param
|
||||
|
||||
logger = get_logger(name=__name__, category="core")
|
||||
|
||||
|
|
@ -382,6 +383,12 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|||
matched_func, path_params, route_path, webmethod = find_matching_route(options.method, path, self.route_impls)
|
||||
body |= path_params
|
||||
|
||||
# Pass through params that aren't already handled as path params
|
||||
if options.params:
|
||||
extra_query_params = {k: v for k, v in options.params.items() if k not in path_params}
|
||||
if extra_query_params:
|
||||
body["extra_query"] = extra_query_params
|
||||
|
||||
body, field_names = self._handle_file_uploads(options, body)
|
||||
|
||||
body = self._convert_body(matched_func, body, exclude_params=set(field_names))
|
||||
|
|
|
|||
|
|
@ -9,9 +9,9 @@ from typing import Any
|
|||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.prompts import ListPromptsResponse, Prompt, Prompts
|
||||
from llama_stack.core.datatypes import StackRunConfig
|
||||
from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
|
||||
from llama_stack.core.storage.kvstore import KVStore, kvstore_impl
|
||||
from llama_stack_api import ListPromptsResponse, Prompt, Prompts
|
||||
|
||||
|
||||
class PromptServiceConfig(BaseModel):
|
||||
|
|
|
|||
|
|
@ -9,9 +9,8 @@ from typing import Any
|
|||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.providers import ListProvidersResponse, ProviderInfo, Providers
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import HealthResponse, HealthStatus
|
||||
from llama_stack_api import HealthResponse, HealthStatus, ListProvidersResponse, ProviderInfo, Providers
|
||||
|
||||
from .datatypes import StackRunConfig
|
||||
from .utils.config import redact_sensitive_fields
|
||||
|
|
|
|||
|
|
@ -8,29 +8,6 @@ import importlib.metadata
|
|||
import inspect
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.agents import Agents
|
||||
from llama_stack.apis.batches import Batches
|
||||
from llama_stack.apis.benchmarks import Benchmarks
|
||||
from llama_stack.apis.conversations import Conversations
|
||||
from llama_stack.apis.datasetio import DatasetIO
|
||||
from llama_stack.apis.datasets import Datasets
|
||||
from llama_stack.apis.datatypes import ExternalApiSpec
|
||||
from llama_stack.apis.eval import Eval
|
||||
from llama_stack.apis.files import Files
|
||||
from llama_stack.apis.inference import Inference, InferenceProvider
|
||||
from llama_stack.apis.inspect import Inspect
|
||||
from llama_stack.apis.models import Models
|
||||
from llama_stack.apis.post_training import PostTraining
|
||||
from llama_stack.apis.prompts import Prompts
|
||||
from llama_stack.apis.providers import Providers as ProvidersAPI
|
||||
from llama_stack.apis.safety import Safety
|
||||
from llama_stack.apis.scoring import Scoring
|
||||
from llama_stack.apis.scoring_functions import ScoringFunctions
|
||||
from llama_stack.apis.shields import Shields
|
||||
from llama_stack.apis.tools import ToolGroups, ToolRuntime
|
||||
from llama_stack.apis.vector_io import VectorIO
|
||||
from llama_stack.apis.vector_stores import VectorStore
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
|
||||
from llama_stack.core.client import get_client_impl
|
||||
from llama_stack.core.datatypes import (
|
||||
AccessRule,
|
||||
|
|
@ -44,17 +21,44 @@ from llama_stack.core.external import load_external_apis
|
|||
from llama_stack.core.store import DistributionRegistry
|
||||
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import (
|
||||
from llama_stack_api import (
|
||||
LLAMA_STACK_API_V1ALPHA,
|
||||
Agents,
|
||||
Api,
|
||||
Batches,
|
||||
Benchmarks,
|
||||
BenchmarksProtocolPrivate,
|
||||
Conversations,
|
||||
DatasetIO,
|
||||
Datasets,
|
||||
DatasetsProtocolPrivate,
|
||||
Eval,
|
||||
ExternalApiSpec,
|
||||
Files,
|
||||
Inference,
|
||||
InferenceProvider,
|
||||
Inspect,
|
||||
Models,
|
||||
ModelsProtocolPrivate,
|
||||
PostTraining,
|
||||
Prompts,
|
||||
ProviderSpec,
|
||||
RemoteProviderConfig,
|
||||
RemoteProviderSpec,
|
||||
Safety,
|
||||
Scoring,
|
||||
ScoringFunctions,
|
||||
ScoringFunctionsProtocolPrivate,
|
||||
Shields,
|
||||
ShieldsProtocolPrivate,
|
||||
ToolGroups,
|
||||
ToolGroupsProtocolPrivate,
|
||||
ToolRuntime,
|
||||
VectorIO,
|
||||
VectorStore,
|
||||
)
|
||||
from llama_stack_api import (
|
||||
Providers as ProvidersAPI,
|
||||
)
|
||||
|
||||
logger = get_logger(name=__name__, category="core")
|
||||
|
|
@ -397,6 +401,18 @@ async def instantiate_provider(
|
|||
impl.__provider_spec__ = provider_spec
|
||||
impl.__provider_config__ = config
|
||||
|
||||
# Apply tracing if telemetry is enabled and any base class has __marked_for_tracing__ marker
|
||||
if run_config.telemetry.enabled:
|
||||
traced_classes = [
|
||||
base for base in reversed(impl.__class__.__mro__) if getattr(base, "__marked_for_tracing__", False)
|
||||
]
|
||||
|
||||
if traced_classes:
|
||||
from llama_stack.core.telemetry.trace_protocol import trace_protocol
|
||||
|
||||
for cls in traced_classes:
|
||||
trace_protocol(cls)
|
||||
|
||||
protocols = api_protocol_map_for_compliance_check(run_config)
|
||||
additional_protocols = additional_protocols_map()
|
||||
# TODO: check compliance for special tool groups
|
||||
|
|
|
|||
|
|
@ -12,8 +12,8 @@ from llama_stack.core.datatypes import (
|
|||
)
|
||||
from llama_stack.core.stack import StackRunConfig
|
||||
from llama_stack.core.store import DistributionRegistry
|
||||
from llama_stack.providers.datatypes import Api, RoutingTable
|
||||
from llama_stack.providers.utils.inference.inference_store import InferenceStore
|
||||
from llama_stack_api import Api, RoutingTable
|
||||
|
||||
|
||||
async def get_routing_table_impl(
|
||||
|
|
@ -45,6 +45,7 @@ async def get_routing_table_impl(
|
|||
raise ValueError(f"API {api.value} not found in router map")
|
||||
|
||||
impl = api_to_tables[api.value](impls_by_provider_id, dist_registry, policy)
|
||||
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
||||
|
|
@ -92,5 +93,6 @@ async def get_auto_router_impl(
|
|||
api_to_dep_impl["safety_config"] = run_config.safety
|
||||
|
||||
impl = api_to_routers[api.value](routing_table, **api_to_dep_impl)
|
||||
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
|
|
|||
|
|
@ -6,11 +6,8 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.common.responses import PaginatedResponse
|
||||
from llama_stack.apis.datasetio import DatasetIO
|
||||
from llama_stack.apis.datasets import DatasetPurpose, DataSource
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import RoutingTable
|
||||
from llama_stack_api import DatasetIO, DatasetPurpose, DataSource, PaginatedResponse, RoutingTable
|
||||
|
||||
logger = get_logger(name=__name__, category="core::routers")
|
||||
|
||||
|
|
|
|||
|
|
@ -6,15 +6,18 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.eval import BenchmarkConfig, Eval, EvaluateResponse, Job
|
||||
from llama_stack.apis.scoring import (
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack_api import (
|
||||
BenchmarkConfig,
|
||||
Eval,
|
||||
EvaluateResponse,
|
||||
Job,
|
||||
RoutingTable,
|
||||
ScoreBatchResponse,
|
||||
ScoreResponse,
|
||||
Scoring,
|
||||
ScoringFnParams,
|
||||
)
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import RoutingTable
|
||||
|
||||
logger = get_logger(name=__name__, category="core::routers")
|
||||
|
||||
|
|
|
|||
|
|
@ -15,13 +15,25 @@ from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatC
|
|||
from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam
|
||||
from pydantic import TypeAdapter
|
||||
|
||||
from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
|
||||
from llama_stack.apis.inference import (
|
||||
from llama_stack.core.telemetry.telemetry import MetricEvent
|
||||
from llama_stack.core.telemetry.tracing import enqueue_event, get_current_span
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.models.llama.llama3.chat_format import ChatFormat
|
||||
from llama_stack.models.llama.llama3.tokenizer import Tokenizer
|
||||
from llama_stack.providers.utils.inference.inference_store import InferenceStore
|
||||
from llama_stack_api import (
|
||||
HealthResponse,
|
||||
HealthStatus,
|
||||
Inference,
|
||||
ListOpenAIChatCompletionResponse,
|
||||
ModelNotFoundError,
|
||||
ModelType,
|
||||
ModelTypeError,
|
||||
OpenAIAssistantMessageParam,
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionChunk,
|
||||
OpenAIChatCompletionContentPartImageParam,
|
||||
OpenAIChatCompletionContentPartTextParam,
|
||||
OpenAIChatCompletionRequestWithExtraBody,
|
||||
OpenAIChatCompletionToolCall,
|
||||
OpenAIChatCompletionToolCallFunction,
|
||||
|
|
@ -35,19 +47,8 @@ from llama_stack.apis.inference import (
|
|||
OpenAIMessageParam,
|
||||
Order,
|
||||
RerankResponse,
|
||||
RoutingTable,
|
||||
)
|
||||
from llama_stack.apis.inference.inference import (
|
||||
OpenAIChatCompletionContentPartImageParam,
|
||||
OpenAIChatCompletionContentPartTextParam,
|
||||
)
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack.core.telemetry.telemetry import MetricEvent
|
||||
from llama_stack.core.telemetry.tracing import enqueue_event, get_current_span
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.models.llama.llama3.chat_format import ChatFormat
|
||||
from llama_stack.models.llama.llama3.tokenizer import Tokenizer
|
||||
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
|
||||
from llama_stack.providers.utils.inference.inference_store import InferenceStore
|
||||
|
||||
logger = get_logger(name=__name__, category="core::routers")
|
||||
|
||||
|
|
@ -190,7 +191,7 @@ class InferenceRouter(Inference):
|
|||
|
||||
response = await provider.openai_completion(params)
|
||||
response.model = request_model_id
|
||||
if self.telemetry_enabled:
|
||||
if self.telemetry_enabled and response.usage is not None:
|
||||
metrics = self._construct_metrics(
|
||||
prompt_tokens=response.usage.prompt_tokens,
|
||||
completion_tokens=response.usage.completion_tokens,
|
||||
|
|
@ -253,7 +254,7 @@ class InferenceRouter(Inference):
|
|||
if self.store:
|
||||
asyncio.create_task(self.store.store_chat_completion(response, params.messages))
|
||||
|
||||
if self.telemetry_enabled:
|
||||
if self.telemetry_enabled and response.usage is not None:
|
||||
metrics = self._construct_metrics(
|
||||
prompt_tokens=response.usage.prompt_tokens,
|
||||
completion_tokens=response.usage.completion_tokens,
|
||||
|
|
@ -416,7 +417,7 @@ class InferenceRouter(Inference):
|
|||
prompt_tokens=chunk.usage.prompt_tokens,
|
||||
completion_tokens=chunk.usage.completion_tokens,
|
||||
total_tokens=chunk.usage.total_tokens,
|
||||
model_id=fully_qualified_model_id,
|
||||
fully_qualified_model_id=fully_qualified_model_id,
|
||||
provider_id=provider_id,
|
||||
)
|
||||
for metric in metrics:
|
||||
|
|
|
|||
|
|
@ -6,13 +6,9 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.inference import Message
|
||||
from llama_stack.apis.safety import RunShieldResponse, Safety
|
||||
from llama_stack.apis.safety.safety import ModerationObject
|
||||
from llama_stack.apis.shields import Shield
|
||||
from llama_stack.core.datatypes import SafetyConfig
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import RoutingTable
|
||||
from llama_stack_api import ModerationObject, OpenAIMessageParam, RoutingTable, RunShieldResponse, Safety, Shield
|
||||
|
||||
logger = get_logger(name=__name__, category="core::routers")
|
||||
|
||||
|
|
@ -52,7 +48,7 @@ class SafetyRouter(Safety):
|
|||
async def run_shield(
|
||||
self,
|
||||
shield_id: str,
|
||||
messages: list[Message],
|
||||
messages: list[OpenAIMessageParam],
|
||||
params: dict[str, Any] = None,
|
||||
) -> RunShieldResponse:
|
||||
logger.debug(f"SafetyRouter.run_shield: {shield_id}")
|
||||
|
|
|
|||
|
|
@ -6,19 +6,12 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.common.content_types import (
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack_api import (
|
||||
URL,
|
||||
InterleavedContent,
|
||||
)
|
||||
from llama_stack.apis.tools import (
|
||||
ListToolDefsResponse,
|
||||
RAGDocument,
|
||||
RAGQueryConfig,
|
||||
RAGQueryResult,
|
||||
RAGToolRuntime,
|
||||
ToolRuntime,
|
||||
)
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
from ..routing_tables.toolgroups import ToolGroupsRoutingTable
|
||||
|
||||
|
|
@ -26,36 +19,6 @@ logger = get_logger(name=__name__, category="core::routers")
|
|||
|
||||
|
||||
class ToolRuntimeRouter(ToolRuntime):
|
||||
class RagToolImpl(RAGToolRuntime):
|
||||
def __init__(
|
||||
self,
|
||||
routing_table: ToolGroupsRoutingTable,
|
||||
) -> None:
|
||||
logger.debug("Initializing ToolRuntimeRouter.RagToolImpl")
|
||||
self.routing_table = routing_table
|
||||
|
||||
async def query(
|
||||
self,
|
||||
content: InterleavedContent,
|
||||
vector_store_ids: list[str],
|
||||
query_config: RAGQueryConfig | None = None,
|
||||
) -> RAGQueryResult:
|
||||
logger.debug(f"ToolRuntimeRouter.RagToolImpl.query: {vector_store_ids}")
|
||||
provider = await self.routing_table.get_provider_impl("knowledge_search")
|
||||
return await provider.query(content, vector_store_ids, query_config)
|
||||
|
||||
async def insert(
|
||||
self,
|
||||
documents: list[RAGDocument],
|
||||
vector_store_id: str,
|
||||
chunk_size_in_tokens: int = 512,
|
||||
) -> None:
|
||||
logger.debug(
|
||||
f"ToolRuntimeRouter.RagToolImpl.insert: {vector_store_id}, {len(documents)} documents, chunk_size={chunk_size_in_tokens}"
|
||||
)
|
||||
provider = await self.routing_table.get_provider_impl("insert_into_memory")
|
||||
return await provider.insert(documents, vector_store_id, chunk_size_in_tokens)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
routing_table: ToolGroupsRoutingTable,
|
||||
|
|
@ -63,11 +26,6 @@ class ToolRuntimeRouter(ToolRuntime):
|
|||
logger.debug("Initializing ToolRuntimeRouter")
|
||||
self.routing_table = routing_table
|
||||
|
||||
# HACK ALERT this should be in sync with "get_all_api_endpoints()"
|
||||
self.rag_tool = self.RagToolImpl(routing_table)
|
||||
for method in ("query", "insert"):
|
||||
setattr(self, f"rag_tool.{method}", getattr(self.rag_tool, method))
|
||||
|
||||
async def initialize(self) -> None:
|
||||
logger.debug("ToolRuntimeRouter.initialize")
|
||||
pass
|
||||
|
|
@ -76,16 +34,16 @@ class ToolRuntimeRouter(ToolRuntime):
|
|||
logger.debug("ToolRuntimeRouter.shutdown")
|
||||
pass
|
||||
|
||||
async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> Any:
|
||||
async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None) -> Any:
|
||||
logger.debug(f"ToolRuntimeRouter.invoke_tool: {tool_name}")
|
||||
provider = await self.routing_table.get_provider_impl(tool_name)
|
||||
return await provider.invoke_tool(
|
||||
tool_name=tool_name,
|
||||
kwargs=kwargs,
|
||||
authorization=authorization,
|
||||
)
|
||||
|
||||
async def list_runtime_tools(
|
||||
self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
|
||||
self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None, authorization: str | None = None
|
||||
) -> ListToolDefsResponse:
|
||||
logger.debug(f"ToolRuntimeRouter.list_runtime_tools: {tool_group_id}")
|
||||
return await self.routing_table.list_tools(tool_group_id)
|
||||
return await self.routing_table.list_tools(tool_group_id, authorization=authorization)
|
||||
|
|
|
|||
|
|
@ -10,19 +10,28 @@ from typing import Annotated, Any
|
|||
|
||||
from fastapi import Body
|
||||
|
||||
from llama_stack.apis.common.content_types import InterleavedContent
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack.apis.vector_io import (
|
||||
from llama_stack.core.datatypes import VectorStoresConfig
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack_api import (
|
||||
Chunk,
|
||||
HealthResponse,
|
||||
HealthStatus,
|
||||
InterleavedContent,
|
||||
ModelNotFoundError,
|
||||
ModelType,
|
||||
ModelTypeError,
|
||||
OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
|
||||
OpenAICreateVectorStoreRequestWithExtraBody,
|
||||
QueryChunksResponse,
|
||||
RoutingTable,
|
||||
SearchRankingOptions,
|
||||
VectorIO,
|
||||
VectorStoreChunkingStrategy,
|
||||
VectorStoreChunkingStrategyStatic,
|
||||
VectorStoreChunkingStrategyStaticConfig,
|
||||
VectorStoreDeleteResponse,
|
||||
VectorStoreFileBatchObject,
|
||||
VectorStoreFileContentsResponse,
|
||||
VectorStoreFileContentResponse,
|
||||
VectorStoreFileDeleteResponse,
|
||||
VectorStoreFileObject,
|
||||
VectorStoreFilesListInBatchResponse,
|
||||
|
|
@ -31,9 +40,6 @@ from llama_stack.apis.vector_io import (
|
|||
VectorStoreObject,
|
||||
VectorStoreSearchResponsePage,
|
||||
)
|
||||
from llama_stack.core.datatypes import VectorStoresConfig
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
|
||||
|
||||
logger = get_logger(name=__name__, category="core::routers")
|
||||
|
||||
|
|
@ -120,6 +126,14 @@ class VectorIORouter(VectorIO):
|
|||
if embedding_model is not None and embedding_dimension is None:
|
||||
embedding_dimension = await self._get_embedding_model_dimension(embedding_model)
|
||||
|
||||
# Validate that embedding model exists and is of the correct type
|
||||
if embedding_model is not None:
|
||||
model = await self.routing_table.get_object_by_identifier("model", embedding_model)
|
||||
if model is None:
|
||||
raise ModelNotFoundError(embedding_model)
|
||||
if model.model_type != ModelType.embedding:
|
||||
raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding)
|
||||
|
||||
# Auto-select provider if not specified
|
||||
if provider_id is None:
|
||||
num_providers = len(self.routing_table.impls_by_provider_id)
|
||||
|
|
@ -167,6 +181,13 @@ class VectorIORouter(VectorIO):
|
|||
if embedding_dimension is not None:
|
||||
params.model_extra["embedding_dimension"] = embedding_dimension
|
||||
|
||||
# Set chunking strategy explicitly if not provided
|
||||
if params.chunking_strategy is None or params.chunking_strategy.type == "auto":
|
||||
# actualize the chunking strategy to static
|
||||
params.chunking_strategy = VectorStoreChunkingStrategyStatic(
|
||||
static=VectorStoreChunkingStrategyStaticConfig()
|
||||
)
|
||||
|
||||
return await provider.openai_create_vector_store(params)
|
||||
|
||||
async def openai_list_vector_stores(
|
||||
|
|
@ -238,6 +259,13 @@ class VectorIORouter(VectorIO):
|
|||
metadata: dict[str, Any] | None = None,
|
||||
) -> VectorStoreObject:
|
||||
logger.debug(f"VectorIORouter.openai_update_vector_store: {vector_store_id}")
|
||||
|
||||
# Check if provider_id is being changed (not supported)
|
||||
if metadata and "provider_id" in metadata:
|
||||
current_store = await self.routing_table.get_object_by_identifier("vector_store", vector_store_id)
|
||||
if current_store and current_store.provider_id != metadata["provider_id"]:
|
||||
raise ValueError("provider_id cannot be changed after vector store creation")
|
||||
|
||||
provider = await self.routing_table.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_update_vector_store(
|
||||
vector_store_id=vector_store_id,
|
||||
|
|
@ -283,6 +311,8 @@ class VectorIORouter(VectorIO):
|
|||
chunking_strategy: VectorStoreChunkingStrategy | None = None,
|
||||
) -> VectorStoreFileObject:
|
||||
logger.debug(f"VectorIORouter.openai_attach_file_to_vector_store: {vector_store_id}, {file_id}")
|
||||
if chunking_strategy is None or chunking_strategy.type == "auto":
|
||||
chunking_strategy = VectorStoreChunkingStrategyStatic(static=VectorStoreChunkingStrategyStaticConfig())
|
||||
provider = await self.routing_table.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_attach_file_to_vector_store(
|
||||
vector_store_id=vector_store_id,
|
||||
|
|
@ -327,12 +357,19 @@ class VectorIORouter(VectorIO):
|
|||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
) -> VectorStoreFileContentsResponse:
|
||||
logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}")
|
||||
provider = await self.routing_table.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_retrieve_vector_store_file_contents(
|
||||
include_embeddings: bool | None = False,
|
||||
include_metadata: bool | None = False,
|
||||
) -> VectorStoreFileContentResponse:
|
||||
logger.debug(
|
||||
f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}, "
|
||||
f"include_embeddings={include_embeddings}, include_metadata={include_metadata}"
|
||||
)
|
||||
|
||||
return await self.routing_table.openai_retrieve_vector_store_file_contents(
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
include_embeddings=include_embeddings,
|
||||
include_metadata=include_metadata,
|
||||
)
|
||||
|
||||
async def openai_update_vector_store_file(
|
||||
|
|
|
|||
|
|
@ -6,11 +6,11 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse
|
||||
from llama_stack.core.datatypes import (
|
||||
BenchmarkWithOwner,
|
||||
)
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack_api import Benchmark, Benchmarks, ListBenchmarksResponse
|
||||
|
||||
from .common import CommonRoutingTableImpl
|
||||
|
||||
|
|
|
|||
|
|
@ -6,9 +6,6 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.common.errors import ModelNotFoundError
|
||||
from llama_stack.apis.models import Model
|
||||
from llama_stack.apis.resource import ResourceType
|
||||
from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed
|
||||
from llama_stack.core.access_control.datatypes import Action
|
||||
from llama_stack.core.datatypes import (
|
||||
|
|
@ -21,7 +18,7 @@ from llama_stack.core.datatypes import (
|
|||
from llama_stack.core.request_headers import get_authenticated_user
|
||||
from llama_stack.core.store import DistributionRegistry
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import Api, RoutingTable
|
||||
from llama_stack_api import Api, Model, ModelNotFoundError, ResourceType, RoutingTable
|
||||
|
||||
logger = get_logger(name=__name__, category="core::routing_tables")
|
||||
|
||||
|
|
|
|||
|
|
@ -7,22 +7,22 @@
|
|||
import uuid
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.common.errors import DatasetNotFoundError
|
||||
from llama_stack.apis.datasets import (
|
||||
from llama_stack.core.datatypes import (
|
||||
DatasetWithOwner,
|
||||
)
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack_api import (
|
||||
Dataset,
|
||||
DatasetNotFoundError,
|
||||
DatasetPurpose,
|
||||
Datasets,
|
||||
DatasetType,
|
||||
DataSource,
|
||||
ListDatasetsResponse,
|
||||
ResourceType,
|
||||
RowsDataSource,
|
||||
URIDataSource,
|
||||
)
|
||||
from llama_stack.apis.resource import ResourceType
|
||||
from llama_stack.core.datatypes import (
|
||||
DatasetWithOwner,
|
||||
)
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
from .common import CommonRoutingTableImpl
|
||||
|
||||
|
|
|
|||
|
|
@ -7,8 +7,6 @@
|
|||
import time
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.common.errors import ModelNotFoundError
|
||||
from llama_stack.apis.models import ListModelsResponse, Model, Models, ModelType, OpenAIListModelsResponse, OpenAIModel
|
||||
from llama_stack.core.datatypes import (
|
||||
ModelWithOwner,
|
||||
RegistryEntrySource,
|
||||
|
|
@ -16,6 +14,15 @@ from llama_stack.core.datatypes import (
|
|||
from llama_stack.core.request_headers import PROVIDER_DATA_VAR, NeedsRequestProviderData
|
||||
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack_api import (
|
||||
ListModelsResponse,
|
||||
Model,
|
||||
ModelNotFoundError,
|
||||
Models,
|
||||
ModelType,
|
||||
OpenAIListModelsResponse,
|
||||
OpenAIModel,
|
||||
)
|
||||
|
||||
from .common import CommonRoutingTableImpl, lookup_model
|
||||
|
||||
|
|
|
|||
|
|
@ -4,18 +4,18 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.apis.common.type_system import ParamType
|
||||
from llama_stack.apis.resource import ResourceType
|
||||
from llama_stack.apis.scoring_functions import (
|
||||
ListScoringFunctionsResponse,
|
||||
ScoringFn,
|
||||
ScoringFnParams,
|
||||
ScoringFunctions,
|
||||
)
|
||||
from llama_stack.core.datatypes import (
|
||||
ScoringFnWithOwner,
|
||||
)
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack_api import (
|
||||
ListScoringFunctionsResponse,
|
||||
ParamType,
|
||||
ResourceType,
|
||||
ScoringFn,
|
||||
ScoringFnParams,
|
||||
ScoringFunctions,
|
||||
)
|
||||
|
||||
from .common import CommonRoutingTableImpl
|
||||
|
||||
|
|
|
|||
|
|
@ -6,12 +6,11 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.resource import ResourceType
|
||||
from llama_stack.apis.shields import ListShieldsResponse, Shield, Shields
|
||||
from llama_stack.core.datatypes import (
|
||||
ShieldWithOwner,
|
||||
)
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack_api import ListShieldsResponse, ResourceType, Shield, Shields
|
||||
|
||||
from .common import CommonRoutingTableImpl
|
||||
|
||||
|
|
|
|||
|
|
@ -6,11 +6,17 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.common.content_types import URL
|
||||
from llama_stack.apis.common.errors import ToolGroupNotFoundError
|
||||
from llama_stack.apis.tools import ListToolDefsResponse, ListToolGroupsResponse, ToolDef, ToolGroup, ToolGroups
|
||||
from llama_stack.core.datatypes import AuthenticationRequiredError, ToolGroupWithOwner
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack_api import (
|
||||
URL,
|
||||
ListToolDefsResponse,
|
||||
ListToolGroupsResponse,
|
||||
ToolDef,
|
||||
ToolGroup,
|
||||
ToolGroupNotFoundError,
|
||||
ToolGroups,
|
||||
)
|
||||
|
||||
from .common import CommonRoutingTableImpl
|
||||
|
||||
|
|
@ -43,7 +49,9 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
|
|||
routing_key = self.tool_to_toolgroup[routing_key]
|
||||
return await super().get_provider_impl(routing_key, provider_id)
|
||||
|
||||
async def list_tools(self, toolgroup_id: str | None = None) -> ListToolDefsResponse:
|
||||
async def list_tools(
|
||||
self, toolgroup_id: str | None = None, authorization: str | None = None
|
||||
) -> ListToolDefsResponse:
|
||||
if toolgroup_id:
|
||||
if group_id := parse_toolgroup_from_toolgroup_name_pair(toolgroup_id):
|
||||
toolgroup_id = group_id
|
||||
|
|
@ -55,7 +63,7 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
|
|||
for toolgroup in toolgroups:
|
||||
if toolgroup.identifier not in self.toolgroups_to_tools:
|
||||
try:
|
||||
await self._index_tools(toolgroup)
|
||||
await self._index_tools(toolgroup, authorization=authorization)
|
||||
except AuthenticationRequiredError:
|
||||
# Send authentication errors back to the client so it knows
|
||||
# that it needs to supply credentials for remote MCP servers.
|
||||
|
|
@ -70,9 +78,11 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
|
|||
|
||||
return ListToolDefsResponse(data=all_tools)
|
||||
|
||||
async def _index_tools(self, toolgroup: ToolGroup):
|
||||
async def _index_tools(self, toolgroup: ToolGroup, authorization: str | None = None):
|
||||
provider_impl = await super().get_provider_impl(toolgroup.identifier, toolgroup.provider_id)
|
||||
tooldefs_response = await provider_impl.list_runtime_tools(toolgroup.identifier, toolgroup.mcp_endpoint)
|
||||
tooldefs_response = await provider_impl.list_runtime_tools(
|
||||
toolgroup.identifier, toolgroup.mcp_endpoint, authorization=authorization
|
||||
)
|
||||
|
||||
tooldefs = tooldefs_response.data
|
||||
for t in tooldefs:
|
||||
|
|
|
|||
|
|
@ -6,26 +6,27 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack.apis.resource import ResourceType
|
||||
from llama_stack.core.datatypes import (
|
||||
VectorStoreWithOwner,
|
||||
)
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
# Removed VectorStores import to avoid exposing public API
|
||||
from llama_stack.apis.vector_io.vector_io import (
|
||||
from llama_stack_api import (
|
||||
ModelNotFoundError,
|
||||
ModelType,
|
||||
ModelTypeError,
|
||||
ResourceType,
|
||||
SearchRankingOptions,
|
||||
VectorStoreChunkingStrategy,
|
||||
VectorStoreDeleteResponse,
|
||||
VectorStoreFileContentsResponse,
|
||||
VectorStoreFileContentResponse,
|
||||
VectorStoreFileDeleteResponse,
|
||||
VectorStoreFileObject,
|
||||
VectorStoreFileStatus,
|
||||
VectorStoreObject,
|
||||
VectorStoreSearchResponsePage,
|
||||
)
|
||||
from llama_stack.core.datatypes import (
|
||||
VectorStoreWithOwner,
|
||||
)
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
from .common import CommonRoutingTableImpl, lookup_model
|
||||
|
||||
|
|
@ -195,12 +196,17 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
|
|||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
) -> VectorStoreFileContentsResponse:
|
||||
include_embeddings: bool | None = False,
|
||||
include_metadata: bool | None = False,
|
||||
) -> VectorStoreFileContentResponse:
|
||||
await self.assert_action_allowed("read", "vector_store", vector_store_id)
|
||||
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_retrieve_vector_store_file_contents(
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
include_embeddings=include_embeddings,
|
||||
include_metadata=include_metadata,
|
||||
)
|
||||
|
||||
async def openai_update_vector_store_file(
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@ import httpx
|
|||
import jwt
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.common.errors import TokenValidationError
|
||||
from llama_stack.core.datatypes import (
|
||||
AuthenticationConfig,
|
||||
CustomAuthConfig,
|
||||
|
|
@ -23,6 +22,7 @@ from llama_stack.core.datatypes import (
|
|||
User,
|
||||
)
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack_api import TokenValidationError
|
||||
|
||||
logger = get_logger(name=__name__, category="core::auth")
|
||||
|
||||
|
|
|
|||
|
|
@ -11,9 +11,9 @@ from datetime import UTC, datetime, timedelta
|
|||
from starlette.types import ASGIApp, Receive, Scope, Send
|
||||
|
||||
from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendType
|
||||
from llama_stack.core.storage.kvstore.kvstore import _KVSTORE_BACKENDS, kvstore_impl
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.kvstore.api import KVStore
|
||||
from llama_stack.providers.utils.kvstore.kvstore import _KVSTORE_BACKENDS, kvstore_impl
|
||||
from llama_stack_api.internal.kvstore import KVStore
|
||||
|
||||
logger = get_logger(name=__name__, category="core::server")
|
||||
|
||||
|
|
|
|||
|
|
@ -12,10 +12,8 @@ from typing import Any
|
|||
from aiohttp import hdrs
|
||||
from starlette.routing import Route
|
||||
|
||||
from llama_stack.apis.datatypes import Api, ExternalApiSpec
|
||||
from llama_stack.apis.tools import RAGToolRuntime, SpecialToolGroup
|
||||
from llama_stack.core.resolver import api_protocol_map
|
||||
from llama_stack.schema_utils import WebMethod
|
||||
from llama_stack_api import Api, ExternalApiSpec, WebMethod
|
||||
|
||||
EndpointFunc = Callable[..., Any]
|
||||
PathParams = dict[str, str]
|
||||
|
|
@ -25,33 +23,16 @@ RouteImpls = dict[str, PathImpl]
|
|||
RouteMatch = tuple[EndpointFunc, PathParams, str, WebMethod]
|
||||
|
||||
|
||||
def toolgroup_protocol_map():
|
||||
return {
|
||||
SpecialToolGroup.rag_tool: RAGToolRuntime,
|
||||
}
|
||||
|
||||
|
||||
def get_all_api_routes(
|
||||
external_apis: dict[Api, ExternalApiSpec] | None = None,
|
||||
) -> dict[Api, list[tuple[Route, WebMethod]]]:
|
||||
apis = {}
|
||||
|
||||
protocols = api_protocol_map(external_apis)
|
||||
toolgroup_protocols = toolgroup_protocol_map()
|
||||
for api, protocol in protocols.items():
|
||||
routes = []
|
||||
protocol_methods = inspect.getmembers(protocol, predicate=inspect.isfunction)
|
||||
|
||||
# HACK ALERT
|
||||
if api == Api.tool_runtime:
|
||||
for tool_group in SpecialToolGroup:
|
||||
sub_protocol = toolgroup_protocols[tool_group]
|
||||
sub_protocol_methods = inspect.getmembers(sub_protocol, predicate=inspect.isfunction)
|
||||
for name, method in sub_protocol_methods:
|
||||
if not hasattr(method, "__webmethod__"):
|
||||
continue
|
||||
protocol_methods.append((f"{tool_group.value}.{name}", method))
|
||||
|
||||
for name, method in protocol_methods:
|
||||
# Get all webmethods for this method (supports multiple decorators)
|
||||
webmethods = getattr(method, "__webmethods__", [])
|
||||
|
|
|
|||
|
|
@ -31,8 +31,6 @@ from fastapi.responses import JSONResponse, StreamingResponse
|
|||
from openai import BadRequestError
|
||||
from pydantic import BaseModel, ValidationError
|
||||
|
||||
from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError
|
||||
from llama_stack.apis.common.responses import PaginatedResponse
|
||||
from llama_stack.core.access_control.access_control import AccessDeniedError
|
||||
from llama_stack.core.datatypes import (
|
||||
AuthenticationRequiredError,
|
||||
|
|
@ -58,7 +56,7 @@ from llama_stack.core.utils.config import redact_sensitive_fields
|
|||
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
||||
from llama_stack.core.utils.context import preserve_contexts_async_generator
|
||||
from llama_stack.log import LoggingConfig, get_logger, setup_logging
|
||||
from llama_stack.providers.datatypes import Api
|
||||
from llama_stack_api import Api, ConflictError, PaginatedResponse, ResourceNotFoundError
|
||||
|
||||
from .auth import AuthenticationMiddleware
|
||||
from .quota import QuotaMiddleware
|
||||
|
|
@ -526,8 +524,8 @@ def extract_path_params(route: str) -> list[str]:
|
|||
|
||||
def remove_disabled_providers(obj):
|
||||
if isinstance(obj, dict):
|
||||
keys = ["provider_id", "shield_id", "provider_model_id", "model_id"]
|
||||
if any(k in obj and obj[k] in ("__disabled__", "", None) for k in keys):
|
||||
# Filter out items where provider_id is explicitly disabled or empty
|
||||
if "provider_id" in obj and obj["provider_id"] in ("__disabled__", "", None):
|
||||
return None
|
||||
return {k: v for k, v in ((k, remove_disabled_providers(v)) for k, v in obj.items()) if v is not None}
|
||||
elif isinstance(obj, list):
|
||||
|
|
|
|||
|
|
@ -13,26 +13,6 @@ from typing import Any
|
|||
|
||||
import yaml
|
||||
|
||||
from llama_stack.apis.agents import Agents
|
||||
from llama_stack.apis.batches import Batches
|
||||
from llama_stack.apis.benchmarks import Benchmarks
|
||||
from llama_stack.apis.conversations import Conversations
|
||||
from llama_stack.apis.datasetio import DatasetIO
|
||||
from llama_stack.apis.datasets import Datasets
|
||||
from llama_stack.apis.eval import Eval
|
||||
from llama_stack.apis.files import Files
|
||||
from llama_stack.apis.inference import Inference
|
||||
from llama_stack.apis.inspect import Inspect
|
||||
from llama_stack.apis.models import Models
|
||||
from llama_stack.apis.post_training import PostTraining
|
||||
from llama_stack.apis.prompts import Prompts
|
||||
from llama_stack.apis.providers import Providers
|
||||
from llama_stack.apis.safety import Safety
|
||||
from llama_stack.apis.scoring import Scoring
|
||||
from llama_stack.apis.scoring_functions import ScoringFunctions
|
||||
from llama_stack.apis.shields import Shields
|
||||
from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime
|
||||
from llama_stack.apis.vector_io import VectorIO
|
||||
from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
|
||||
from llama_stack.core.datatypes import Provider, SafetyConfig, StackRunConfig, VectorStoresConfig
|
||||
from llama_stack.core.distribution import get_provider_registry
|
||||
|
|
@ -54,7 +34,30 @@ from llama_stack.core.storage.datatypes import (
|
|||
from llama_stack.core.store.registry import create_dist_registry
|
||||
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import Api
|
||||
from llama_stack_api import (
|
||||
Agents,
|
||||
Api,
|
||||
Batches,
|
||||
Benchmarks,
|
||||
Conversations,
|
||||
DatasetIO,
|
||||
Datasets,
|
||||
Eval,
|
||||
Files,
|
||||
Inference,
|
||||
Inspect,
|
||||
Models,
|
||||
PostTraining,
|
||||
Prompts,
|
||||
Providers,
|
||||
Safety,
|
||||
Scoring,
|
||||
ScoringFunctions,
|
||||
Shields,
|
||||
ToolGroups,
|
||||
ToolRuntime,
|
||||
VectorIO,
|
||||
)
|
||||
|
||||
logger = get_logger(name=__name__, category="core")
|
||||
|
||||
|
|
@ -78,7 +81,6 @@ class LlamaStack(
|
|||
Inspect,
|
||||
ToolGroups,
|
||||
ToolRuntime,
|
||||
RAGToolRuntime,
|
||||
Files,
|
||||
Prompts,
|
||||
Conversations,
|
||||
|
|
@ -383,8 +385,8 @@ def _initialize_storage(run_config: StackRunConfig):
|
|||
else:
|
||||
raise ValueError(f"Unknown storage backend type: {type}")
|
||||
|
||||
from llama_stack.providers.utils.kvstore.kvstore import register_kvstore_backends
|
||||
from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
|
||||
from llama_stack.core.storage.kvstore.kvstore import register_kvstore_backends
|
||||
from llama_stack.core.storage.sqlstore.sqlstore import register_sqlstore_backends
|
||||
|
||||
register_kvstore_backends(kv_backends)
|
||||
register_sqlstore_backends(sql_backends)
|
||||
|
|
|
|||
|
|
@ -12,6 +12,8 @@ from typing import Annotated, Literal
|
|||
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
|
||||
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
||||
|
||||
|
||||
class StorageBackendType(StrEnum):
|
||||
KV_REDIS = "kv_redis"
|
||||
|
|
@ -256,15 +258,24 @@ class ResponsesStoreReference(InferenceStoreReference):
|
|||
|
||||
class ServerStoresConfig(BaseModel):
|
||||
metadata: KVStoreReference | None = Field(
|
||||
default=None,
|
||||
default=KVStoreReference(
|
||||
backend="kv_default",
|
||||
namespace="registry",
|
||||
),
|
||||
description="Metadata store configuration (uses KV backend)",
|
||||
)
|
||||
inference: InferenceStoreReference | None = Field(
|
||||
default=None,
|
||||
default=InferenceStoreReference(
|
||||
backend="sql_default",
|
||||
table_name="inference_store",
|
||||
),
|
||||
description="Inference store configuration (uses SQL backend)",
|
||||
)
|
||||
conversations: SqlStoreReference | None = Field(
|
||||
default=None,
|
||||
default=SqlStoreReference(
|
||||
backend="sql_default",
|
||||
table_name="openai_conversations",
|
||||
),
|
||||
description="Conversations store configuration (uses SQL backend)",
|
||||
)
|
||||
responses: ResponsesStoreReference | None = Field(
|
||||
|
|
@ -272,13 +283,21 @@ class ServerStoresConfig(BaseModel):
|
|||
description="Responses store configuration (uses SQL backend)",
|
||||
)
|
||||
prompts: KVStoreReference | None = Field(
|
||||
default=None,
|
||||
default=KVStoreReference(backend="kv_default", namespace="prompts"),
|
||||
description="Prompts store configuration (uses KV backend)",
|
||||
)
|
||||
|
||||
|
||||
class StorageConfig(BaseModel):
|
||||
backends: dict[str, StorageBackendConfig] = Field(
|
||||
default={
|
||||
"kv_default": SqliteKVStoreConfig(
|
||||
db_path=f"${{env.SQLITE_STORE_DIR:={DISTRIBS_BASE_DIR}}}/kvstore.db",
|
||||
),
|
||||
"sql_default": SqliteSqlStoreConfig(
|
||||
db_path=f"${{env.SQLITE_STORE_DIR:={DISTRIBS_BASE_DIR}}}/sql_store.db",
|
||||
),
|
||||
},
|
||||
description="Named backend configurations (e.g., 'default', 'cache')",
|
||||
)
|
||||
stores: ServerStoresConfig = Field(
|
||||
|
|
|
|||
|
|
@ -4,4 +4,6 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack_api.internal.kvstore import KVStore as KVStore
|
||||
|
||||
from .kvstore import * # noqa: F401, F403
|
||||
|
|
@ -11,10 +11,21 @@
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendConfig, StorageBackendType
|
||||
import asyncio
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
from typing import cast
|
||||
|
||||
from .api import KVStore
|
||||
from .config import KVStoreConfig
|
||||
from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendConfig
|
||||
from llama_stack_api.internal.kvstore import KVStore
|
||||
|
||||
from .config import (
|
||||
KVStoreConfig,
|
||||
MongoDBKVStoreConfig,
|
||||
PostgresKVStoreConfig,
|
||||
RedisKVStoreConfig,
|
||||
SqliteKVStoreConfig,
|
||||
)
|
||||
|
||||
|
||||
def kvstore_dependencies():
|
||||
|
|
@ -30,7 +41,7 @@ def kvstore_dependencies():
|
|||
|
||||
class InmemoryKVStoreImpl(KVStore):
|
||||
def __init__(self):
|
||||
self._store = {}
|
||||
self._store: dict[str, str] = {}
|
||||
|
||||
async def initialize(self) -> None:
|
||||
pass
|
||||
|
|
@ -38,7 +49,7 @@ class InmemoryKVStoreImpl(KVStore):
|
|||
async def get(self, key: str) -> str | None:
|
||||
return self._store.get(key)
|
||||
|
||||
async def set(self, key: str, value: str) -> None:
|
||||
async def set(self, key: str, value: str, expiration: datetime | None = None) -> None:
|
||||
self._store[key] = value
|
||||
|
||||
async def values_in_range(self, start_key: str, end_key: str) -> list[str]:
|
||||
|
|
@ -53,45 +64,65 @@ class InmemoryKVStoreImpl(KVStore):
|
|||
|
||||
|
||||
_KVSTORE_BACKENDS: dict[str, KVStoreConfig] = {}
|
||||
_KVSTORE_INSTANCES: dict[tuple[str, str], KVStore] = {}
|
||||
_KVSTORE_LOCKS: defaultdict[tuple[str, str], asyncio.Lock] = defaultdict(asyncio.Lock)
|
||||
|
||||
|
||||
def register_kvstore_backends(backends: dict[str, StorageBackendConfig]) -> None:
|
||||
"""Register the set of available KV store backends for reference resolution."""
|
||||
global _KVSTORE_BACKENDS
|
||||
global _KVSTORE_INSTANCES
|
||||
global _KVSTORE_LOCKS
|
||||
|
||||
_KVSTORE_BACKENDS.clear()
|
||||
_KVSTORE_INSTANCES.clear()
|
||||
_KVSTORE_LOCKS.clear()
|
||||
for name, cfg in backends.items():
|
||||
_KVSTORE_BACKENDS[name] = cfg
|
||||
typed_cfg = cast(KVStoreConfig, cfg)
|
||||
_KVSTORE_BACKENDS[name] = typed_cfg
|
||||
|
||||
|
||||
async def kvstore_impl(reference: KVStoreReference) -> KVStore:
|
||||
backend_name = reference.backend
|
||||
cache_key = (backend_name, reference.namespace)
|
||||
|
||||
existing = _KVSTORE_INSTANCES.get(cache_key)
|
||||
if existing:
|
||||
return existing
|
||||
|
||||
backend_config = _KVSTORE_BACKENDS.get(backend_name)
|
||||
if backend_config is None:
|
||||
raise ValueError(f"Unknown KVStore backend '{backend_name}'. Registered backends: {sorted(_KVSTORE_BACKENDS)}")
|
||||
|
||||
config = backend_config.model_copy()
|
||||
config.namespace = reference.namespace
|
||||
lock = _KVSTORE_LOCKS[cache_key]
|
||||
async with lock:
|
||||
existing = _KVSTORE_INSTANCES.get(cache_key)
|
||||
if existing:
|
||||
return existing
|
||||
|
||||
if config.type == StorageBackendType.KV_REDIS.value:
|
||||
from .redis import RedisKVStoreImpl
|
||||
config = backend_config.model_copy()
|
||||
config.namespace = reference.namespace
|
||||
|
||||
impl = RedisKVStoreImpl(config)
|
||||
elif config.type == StorageBackendType.KV_SQLITE.value:
|
||||
from .sqlite import SqliteKVStoreImpl
|
||||
impl: KVStore
|
||||
if isinstance(config, RedisKVStoreConfig):
|
||||
from .redis import RedisKVStoreImpl
|
||||
|
||||
impl = SqliteKVStoreImpl(config)
|
||||
elif config.type == StorageBackendType.KV_POSTGRES.value:
|
||||
from .postgres import PostgresKVStoreImpl
|
||||
impl = RedisKVStoreImpl(config)
|
||||
elif isinstance(config, SqliteKVStoreConfig):
|
||||
from .sqlite import SqliteKVStoreImpl
|
||||
|
||||
impl = PostgresKVStoreImpl(config)
|
||||
elif config.type == StorageBackendType.KV_MONGODB.value:
|
||||
from .mongodb import MongoDBKVStoreImpl
|
||||
impl = SqliteKVStoreImpl(config)
|
||||
elif isinstance(config, PostgresKVStoreConfig):
|
||||
from .postgres import PostgresKVStoreImpl
|
||||
|
||||
impl = MongoDBKVStoreImpl(config)
|
||||
else:
|
||||
raise ValueError(f"Unknown kvstore type {config.type}")
|
||||
impl = PostgresKVStoreImpl(config)
|
||||
elif isinstance(config, MongoDBKVStoreConfig):
|
||||
from .mongodb import MongoDBKVStoreImpl
|
||||
|
||||
await impl.initialize()
|
||||
return impl
|
||||
impl = MongoDBKVStoreImpl(config)
|
||||
else:
|
||||
raise ValueError(f"Unknown kvstore type {config.type}")
|
||||
|
||||
await impl.initialize()
|
||||
_KVSTORE_INSTANCES[cache_key] = impl
|
||||
return impl
|
||||
|
|
@ -9,8 +9,8 @@ from datetime import datetime
|
|||
from pymongo import AsyncMongoClient
|
||||
from pymongo.asynchronous.collection import AsyncCollection
|
||||
|
||||
from llama_stack.core.storage.kvstore import KVStore
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.kvstore import KVStore
|
||||
|
||||
from ..config import MongoDBKVStoreConfig
|
||||
|
||||
|
|
@ -6,12 +6,13 @@
|
|||
|
||||
from datetime import datetime
|
||||
|
||||
import psycopg2
|
||||
from psycopg2.extras import DictCursor
|
||||
import psycopg2 # type: ignore[import-not-found]
|
||||
from psycopg2.extensions import connection as PGConnection # type: ignore[import-not-found]
|
||||
from psycopg2.extras import DictCursor # type: ignore[import-not-found]
|
||||
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack_api.internal.kvstore import KVStore
|
||||
|
||||
from ..api import KVStore
|
||||
from ..config import PostgresKVStoreConfig
|
||||
|
||||
log = get_logger(name=__name__, category="providers::utils")
|
||||
|
|
@ -20,12 +21,12 @@ log = get_logger(name=__name__, category="providers::utils")
|
|||
class PostgresKVStoreImpl(KVStore):
|
||||
def __init__(self, config: PostgresKVStoreConfig):
|
||||
self.config = config
|
||||
self.conn = None
|
||||
self.cursor = None
|
||||
self._conn: PGConnection | None = None
|
||||
self._cursor: DictCursor | None = None
|
||||
|
||||
async def initialize(self) -> None:
|
||||
try:
|
||||
self.conn = psycopg2.connect(
|
||||
self._conn = psycopg2.connect(
|
||||
host=self.config.host,
|
||||
port=self.config.port,
|
||||
database=self.config.db,
|
||||
|
|
@ -34,11 +35,11 @@ class PostgresKVStoreImpl(KVStore):
|
|||
sslmode=self.config.ssl_mode,
|
||||
sslrootcert=self.config.ca_cert_path,
|
||||
)
|
||||
self.conn.autocommit = True
|
||||
self.cursor = self.conn.cursor(cursor_factory=DictCursor)
|
||||
self._conn.autocommit = True
|
||||
self._cursor = self._conn.cursor(cursor_factory=DictCursor)
|
||||
|
||||
# Create table if it doesn't exist
|
||||
self.cursor.execute(
|
||||
self._cursor.execute(
|
||||
f"""
|
||||
CREATE TABLE IF NOT EXISTS {self.config.table_name} (
|
||||
key TEXT PRIMARY KEY,
|
||||
|
|
@ -51,6 +52,11 @@ class PostgresKVStoreImpl(KVStore):
|
|||
log.exception("Could not connect to PostgreSQL database server")
|
||||
raise RuntimeError("Could not connect to PostgreSQL database server") from e
|
||||
|
||||
def _cursor_or_raise(self) -> DictCursor:
|
||||
if self._cursor is None:
|
||||
raise RuntimeError("Postgres client not initialized")
|
||||
return self._cursor
|
||||
|
||||
def _namespaced_key(self, key: str) -> str:
|
||||
if not self.config.namespace:
|
||||
return key
|
||||
|
|
@ -58,7 +64,8 @@ class PostgresKVStoreImpl(KVStore):
|
|||
|
||||
async def set(self, key: str, value: str, expiration: datetime | None = None) -> None:
|
||||
key = self._namespaced_key(key)
|
||||
self.cursor.execute(
|
||||
cursor = self._cursor_or_raise()
|
||||
cursor.execute(
|
||||
f"""
|
||||
INSERT INTO {self.config.table_name} (key, value, expiration)
|
||||
VALUES (%s, %s, %s)
|
||||
|
|
@ -70,7 +77,8 @@ class PostgresKVStoreImpl(KVStore):
|
|||
|
||||
async def get(self, key: str) -> str | None:
|
||||
key = self._namespaced_key(key)
|
||||
self.cursor.execute(
|
||||
cursor = self._cursor_or_raise()
|
||||
cursor.execute(
|
||||
f"""
|
||||
SELECT value FROM {self.config.table_name}
|
||||
WHERE key = %s
|
||||
|
|
@ -78,12 +86,13 @@ class PostgresKVStoreImpl(KVStore):
|
|||
""",
|
||||
(key,),
|
||||
)
|
||||
result = self.cursor.fetchone()
|
||||
result = cursor.fetchone()
|
||||
return result[0] if result else None
|
||||
|
||||
async def delete(self, key: str) -> None:
|
||||
key = self._namespaced_key(key)
|
||||
self.cursor.execute(
|
||||
cursor = self._cursor_or_raise()
|
||||
cursor.execute(
|
||||
f"DELETE FROM {self.config.table_name} WHERE key = %s",
|
||||
(key,),
|
||||
)
|
||||
|
|
@ -92,7 +101,8 @@ class PostgresKVStoreImpl(KVStore):
|
|||
start_key = self._namespaced_key(start_key)
|
||||
end_key = self._namespaced_key(end_key)
|
||||
|
||||
self.cursor.execute(
|
||||
cursor = self._cursor_or_raise()
|
||||
cursor.execute(
|
||||
f"""
|
||||
SELECT value FROM {self.config.table_name}
|
||||
WHERE key >= %s AND key < %s
|
||||
|
|
@ -101,14 +111,15 @@ class PostgresKVStoreImpl(KVStore):
|
|||
""",
|
||||
(start_key, end_key),
|
||||
)
|
||||
return [row[0] for row in self.cursor.fetchall()]
|
||||
return [row[0] for row in cursor.fetchall()]
|
||||
|
||||
async def keys_in_range(self, start_key: str, end_key: str) -> list[str]:
|
||||
start_key = self._namespaced_key(start_key)
|
||||
end_key = self._namespaced_key(end_key)
|
||||
|
||||
self.cursor.execute(
|
||||
cursor = self._cursor_or_raise()
|
||||
cursor.execute(
|
||||
f"SELECT key FROM {self.config.table_name} WHERE key >= %s AND key < %s",
|
||||
(start_key, end_key),
|
||||
)
|
||||
return [row[0] for row in self.cursor.fetchall()]
|
||||
return [row[0] for row in cursor.fetchall()]
|
||||
|
|
@ -6,18 +6,25 @@
|
|||
|
||||
from datetime import datetime
|
||||
|
||||
from redis.asyncio import Redis
|
||||
from redis.asyncio import Redis # type: ignore[import-not-found]
|
||||
|
||||
from llama_stack_api.internal.kvstore import KVStore
|
||||
|
||||
from ..api import KVStore
|
||||
from ..config import RedisKVStoreConfig
|
||||
|
||||
|
||||
class RedisKVStoreImpl(KVStore):
|
||||
def __init__(self, config: RedisKVStoreConfig):
|
||||
self.config = config
|
||||
self._redis: Redis | None = None
|
||||
|
||||
async def initialize(self) -> None:
|
||||
self.redis = Redis.from_url(self.config.url)
|
||||
self._redis = Redis.from_url(self.config.url)
|
||||
|
||||
def _client(self) -> Redis:
|
||||
if self._redis is None:
|
||||
raise RuntimeError("Redis client not initialized")
|
||||
return self._redis
|
||||
|
||||
def _namespaced_key(self, key: str) -> str:
|
||||
if not self.config.namespace:
|
||||
|
|
@ -26,30 +33,37 @@ class RedisKVStoreImpl(KVStore):
|
|||
|
||||
async def set(self, key: str, value: str, expiration: datetime | None = None) -> None:
|
||||
key = self._namespaced_key(key)
|
||||
await self.redis.set(key, value)
|
||||
client = self._client()
|
||||
await client.set(key, value)
|
||||
if expiration:
|
||||
await self.redis.expireat(key, expiration)
|
||||
await client.expireat(key, expiration)
|
||||
|
||||
async def get(self, key: str) -> str | None:
|
||||
key = self._namespaced_key(key)
|
||||
value = await self.redis.get(key)
|
||||
client = self._client()
|
||||
value = await client.get(key)
|
||||
if value is None:
|
||||
return None
|
||||
await self.redis.ttl(key)
|
||||
return value
|
||||
await client.ttl(key)
|
||||
if isinstance(value, bytes):
|
||||
return value.decode("utf-8")
|
||||
if isinstance(value, str):
|
||||
return value
|
||||
return str(value)
|
||||
|
||||
async def delete(self, key: str) -> None:
|
||||
key = self._namespaced_key(key)
|
||||
await self.redis.delete(key)
|
||||
await self._client().delete(key)
|
||||
|
||||
async def values_in_range(self, start_key: str, end_key: str) -> list[str]:
|
||||
start_key = self._namespaced_key(start_key)
|
||||
end_key = self._namespaced_key(end_key)
|
||||
client = self._client()
|
||||
cursor = 0
|
||||
pattern = start_key + "*" # Match all keys starting with start_key prefix
|
||||
matching_keys = []
|
||||
matching_keys: list[str | bytes] = []
|
||||
while True:
|
||||
cursor, keys = await self.redis.scan(cursor, match=pattern, count=1000)
|
||||
cursor, keys = await client.scan(cursor, match=pattern, count=1000)
|
||||
|
||||
for key in keys:
|
||||
key_str = key.decode("utf-8") if isinstance(key, bytes) else key
|
||||
|
|
@ -61,7 +75,7 @@ class RedisKVStoreImpl(KVStore):
|
|||
|
||||
# Then fetch all values in a single MGET call
|
||||
if matching_keys:
|
||||
values = await self.redis.mget(matching_keys)
|
||||
values = await client.mget(matching_keys)
|
||||
return [
|
||||
value.decode("utf-8") if isinstance(value, bytes) else value for value in values if value is not None
|
||||
]
|
||||
|
|
@ -70,7 +84,18 @@ class RedisKVStoreImpl(KVStore):
|
|||
|
||||
async def keys_in_range(self, start_key: str, end_key: str) -> list[str]:
|
||||
"""Get all keys in the given range."""
|
||||
matching_keys = await self.redis.zrangebylex(self.namespace, f"[{start_key}", f"[{end_key}")
|
||||
if not matching_keys:
|
||||
return []
|
||||
return [k.decode("utf-8") for k in matching_keys]
|
||||
start_key = self._namespaced_key(start_key)
|
||||
end_key = self._namespaced_key(end_key)
|
||||
client = self._client()
|
||||
cursor = 0
|
||||
pattern = start_key + "*"
|
||||
result: list[str] = []
|
||||
while True:
|
||||
cursor, keys = await client.scan(cursor, match=pattern, count=1000)
|
||||
for key in keys:
|
||||
key_str = key.decode("utf-8") if isinstance(key, bytes) else str(key)
|
||||
if start_key <= key_str <= end_key:
|
||||
result.append(key_str)
|
||||
if cursor == 0:
|
||||
break
|
||||
return result
|
||||
|
|
@ -10,8 +10,8 @@ from datetime import datetime
|
|||
import aiosqlite
|
||||
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack_api.internal.kvstore import KVStore
|
||||
|
||||
from ..api import KVStore
|
||||
from ..config import SqliteKVStoreConfig
|
||||
|
||||
logger = get_logger(name=__name__, category="providers::utils")
|
||||
17
src/llama_stack/core/storage/sqlstore/__init__.py
Normal file
17
src/llama_stack/core/storage/sqlstore/__init__.py
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack_api.internal.sqlstore import (
|
||||
ColumnDefinition as ColumnDefinition,
|
||||
)
|
||||
from llama_stack_api.internal.sqlstore import (
|
||||
ColumnType as ColumnType,
|
||||
)
|
||||
from llama_stack_api.internal.sqlstore import (
|
||||
SqlStore as SqlStore,
|
||||
)
|
||||
|
||||
from .sqlstore import * # noqa: F401,F403
|
||||
|
|
@ -14,8 +14,8 @@ from llama_stack.core.datatypes import User
|
|||
from llama_stack.core.request_headers import get_authenticated_user
|
||||
from llama_stack.core.storage.datatypes import StorageBackendType
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
from .api import ColumnDefinition, ColumnType, PaginatedResponse, SqlStore
|
||||
from llama_stack_api import PaginatedResponse
|
||||
from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType, SqlStore
|
||||
|
||||
logger = get_logger(name=__name__, category="providers::utils")
|
||||
|
||||
|
|
@ -45,8 +45,13 @@ def _enhance_item_with_access_control(item: Mapping[str, Any], current_user: Use
|
|||
enhanced["owner_principal"] = current_user.principal
|
||||
enhanced["access_attributes"] = current_user.attributes
|
||||
else:
|
||||
enhanced["owner_principal"] = None
|
||||
enhanced["access_attributes"] = None
|
||||
# IMPORTANT: Use empty string and null value (not None) to match public access filter
|
||||
# The public access filter in _get_public_access_conditions() expects:
|
||||
# - owner_principal = '' (empty string)
|
||||
# - access_attributes = null (JSON null, which serializes to the string 'null')
|
||||
# Setting them to None (SQL NULL) will cause rows to be filtered out on read.
|
||||
enhanced["owner_principal"] = ""
|
||||
enhanced["access_attributes"] = None # Pydantic/JSON will serialize this as JSON null
|
||||
return enhanced
|
||||
|
||||
|
||||
|
|
@ -124,6 +129,23 @@ class AuthorizedSqlStore:
|
|||
enhanced_data = [_enhance_item_with_access_control(item, current_user) for item in data]
|
||||
await self.sql_store.insert(table, enhanced_data)
|
||||
|
||||
async def upsert(
|
||||
self,
|
||||
table: str,
|
||||
data: Mapping[str, Any],
|
||||
conflict_columns: list[str],
|
||||
update_columns: list[str] | None = None,
|
||||
) -> None:
|
||||
"""Upsert a row with automatic access control attribute capture."""
|
||||
current_user = get_authenticated_user()
|
||||
enhanced_data = _enhance_item_with_access_control(data, current_user)
|
||||
await self.sql_store.upsert(
|
||||
table=table,
|
||||
data=enhanced_data,
|
||||
conflict_columns=conflict_columns,
|
||||
update_columns=update_columns,
|
||||
)
|
||||
|
||||
async def fetch_all(
|
||||
self,
|
||||
table: str,
|
||||
|
|
@ -188,8 +210,9 @@ class AuthorizedSqlStore:
|
|||
enhanced_data["owner_principal"] = current_user.principal
|
||||
enhanced_data["access_attributes"] = current_user.attributes
|
||||
else:
|
||||
enhanced_data["owner_principal"] = None
|
||||
enhanced_data["access_attributes"] = None
|
||||
# IMPORTANT: Use empty string for owner_principal to match public access filter
|
||||
enhanced_data["owner_principal"] = ""
|
||||
enhanced_data["access_attributes"] = None # Will serialize as JSON null
|
||||
|
||||
await self.sql_store.update(table, enhanced_data, where)
|
||||
|
||||
|
|
@ -245,14 +268,24 @@ class AuthorizedSqlStore:
|
|||
raise ValueError(f"Unsupported database type: {self.database_type}")
|
||||
|
||||
def _get_public_access_conditions(self) -> list[str]:
|
||||
"""Get the SQL conditions for public access."""
|
||||
# Public records are records that have no owner_principal or access_attributes
|
||||
"""Get the SQL conditions for public access.
|
||||
|
||||
Public records are those with:
|
||||
- owner_principal = '' (empty string)
|
||||
- access_attributes is either SQL NULL or JSON null
|
||||
|
||||
Note: Different databases serialize None differently:
|
||||
- SQLite: None → JSON null (text = 'null')
|
||||
- Postgres: None → SQL NULL (IS NULL)
|
||||
"""
|
||||
conditions = ["owner_principal = ''"]
|
||||
if self.database_type == StorageBackendType.SQL_POSTGRES.value:
|
||||
# Postgres stores JSON null as 'null'
|
||||
conditions.append("access_attributes::text = 'null'")
|
||||
# Accept both SQL NULL and JSON null for Postgres compatibility
|
||||
# This handles both old rows (SQL NULL) and new rows (JSON null)
|
||||
conditions.append("(access_attributes IS NULL OR access_attributes::text = 'null')")
|
||||
elif self.database_type == StorageBackendType.SQL_SQLITE.value:
|
||||
conditions.append("access_attributes = 'null'")
|
||||
# SQLite serializes None as JSON null
|
||||
conditions.append("(access_attributes IS NULL OR access_attributes = 'null')")
|
||||
else:
|
||||
raise ValueError(f"Unsupported database type: {self.database_type}")
|
||||
return conditions
|
||||
|
|
@ -26,11 +26,10 @@ from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
|
|||
from sqlalchemy.ext.asyncio.engine import AsyncEngine
|
||||
from sqlalchemy.sql.elements import ColumnElement
|
||||
|
||||
from llama_stack.apis.common.responses import PaginatedResponse
|
||||
from llama_stack.core.storage.datatypes import SqlAlchemySqlStoreConfig
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
from .api import ColumnDefinition, ColumnType, SqlStore
|
||||
from llama_stack_api import PaginatedResponse
|
||||
from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType, SqlStore
|
||||
|
||||
logger = get_logger(name=__name__, category="providers::utils")
|
||||
|
||||
|
|
@ -72,13 +71,14 @@ def _build_where_expr(column: ColumnElement, value: Any) -> ColumnElement:
|
|||
class SqlAlchemySqlStoreImpl(SqlStore):
|
||||
def __init__(self, config: SqlAlchemySqlStoreConfig):
|
||||
self.config = config
|
||||
self._is_sqlite_backend = "sqlite" in self.config.engine_str
|
||||
self.async_session = async_sessionmaker(self.create_engine())
|
||||
self.metadata = MetaData()
|
||||
|
||||
def create_engine(self) -> AsyncEngine:
|
||||
# Configure connection args for better concurrency support
|
||||
connect_args = {}
|
||||
if "sqlite" in self.config.engine_str:
|
||||
if self._is_sqlite_backend:
|
||||
# SQLite-specific optimizations for concurrent access
|
||||
# With WAL mode, most locks resolve in milliseconds, but allow up to 5s for edge cases
|
||||
connect_args["timeout"] = 5.0
|
||||
|
|
@ -91,7 +91,7 @@ class SqlAlchemySqlStoreImpl(SqlStore):
|
|||
)
|
||||
|
||||
# Enable WAL mode for SQLite to support concurrent readers and writers
|
||||
if "sqlite" in self.config.engine_str:
|
||||
if self._is_sqlite_backend:
|
||||
|
||||
@event.listens_for(engine.sync_engine, "connect")
|
||||
def set_sqlite_pragma(dbapi_conn, connection_record):
|
||||
|
|
@ -151,6 +151,29 @@ class SqlAlchemySqlStoreImpl(SqlStore):
|
|||
await session.execute(self.metadata.tables[table].insert(), data)
|
||||
await session.commit()
|
||||
|
||||
async def upsert(
|
||||
self,
|
||||
table: str,
|
||||
data: Mapping[str, Any],
|
||||
conflict_columns: list[str],
|
||||
update_columns: list[str] | None = None,
|
||||
) -> None:
|
||||
table_obj = self.metadata.tables[table]
|
||||
dialect_insert = self._get_dialect_insert(table_obj)
|
||||
insert_stmt = dialect_insert.values(**data)
|
||||
|
||||
if update_columns is None:
|
||||
update_columns = [col for col in data.keys() if col not in conflict_columns]
|
||||
|
||||
update_mapping = {col: getattr(insert_stmt.excluded, col) for col in update_columns}
|
||||
conflict_cols = [table_obj.c[col] for col in conflict_columns]
|
||||
|
||||
stmt = insert_stmt.on_conflict_do_update(index_elements=conflict_cols, set_=update_mapping)
|
||||
|
||||
async with self.async_session() as session:
|
||||
await session.execute(stmt)
|
||||
await session.commit()
|
||||
|
||||
async def fetch_all(
|
||||
self,
|
||||
table: str,
|
||||
|
|
@ -333,9 +356,18 @@ class SqlAlchemySqlStoreImpl(SqlStore):
|
|||
add_column_sql = text(f"ALTER TABLE {table} ADD COLUMN {column_name} {compiled_type}{nullable_clause}")
|
||||
|
||||
await conn.execute(add_column_sql)
|
||||
|
||||
except Exception as e:
|
||||
# If any error occurs during migration, log it but don't fail
|
||||
# The table creation will handle adding the column
|
||||
logger.error(f"Error adding column {column_name} to table {table}: {e}")
|
||||
pass
|
||||
|
||||
def _get_dialect_insert(self, table: Table):
|
||||
if self._is_sqlite_backend:
|
||||
from sqlalchemy.dialects.sqlite import insert as sqlite_insert
|
||||
|
||||
return sqlite_insert(table)
|
||||
else:
|
||||
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
||||
|
||||
return pg_insert(table)
|
||||
|
|
@ -4,6 +4,7 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from threading import Lock
|
||||
from typing import Annotated, cast
|
||||
|
||||
from pydantic import Field
|
||||
|
|
@ -15,12 +16,13 @@ from llama_stack.core.storage.datatypes import (
|
|||
StorageBackendConfig,
|
||||
StorageBackendType,
|
||||
)
|
||||
|
||||
from .api import SqlStore
|
||||
from llama_stack_api.internal.sqlstore import SqlStore
|
||||
|
||||
sql_store_pip_packages = ["sqlalchemy[asyncio]", "aiosqlite", "asyncpg"]
|
||||
|
||||
_SQLSTORE_BACKENDS: dict[str, StorageBackendConfig] = {}
|
||||
_SQLSTORE_INSTANCES: dict[str, SqlStore] = {}
|
||||
_SQLSTORE_LOCKS: dict[str, Lock] = {}
|
||||
|
||||
|
||||
SqlStoreConfig = Annotated[
|
||||
|
|
@ -52,19 +54,34 @@ def sqlstore_impl(reference: SqlStoreReference) -> SqlStore:
|
|||
f"Unknown SQL store backend '{backend_name}'. Registered backends: {sorted(_SQLSTORE_BACKENDS)}"
|
||||
)
|
||||
|
||||
if isinstance(backend_config, SqliteSqlStoreConfig | PostgresSqlStoreConfig):
|
||||
from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl
|
||||
existing = _SQLSTORE_INSTANCES.get(backend_name)
|
||||
if existing:
|
||||
return existing
|
||||
|
||||
config = cast(SqliteSqlStoreConfig | PostgresSqlStoreConfig, backend_config).model_copy()
|
||||
return SqlAlchemySqlStoreImpl(config)
|
||||
else:
|
||||
raise ValueError(f"Unknown sqlstore type {backend_config.type}")
|
||||
lock = _SQLSTORE_LOCKS.setdefault(backend_name, Lock())
|
||||
with lock:
|
||||
existing = _SQLSTORE_INSTANCES.get(backend_name)
|
||||
if existing:
|
||||
return existing
|
||||
|
||||
if isinstance(backend_config, SqliteSqlStoreConfig | PostgresSqlStoreConfig):
|
||||
from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl
|
||||
|
||||
config = cast(SqliteSqlStoreConfig | PostgresSqlStoreConfig, backend_config).model_copy()
|
||||
instance = SqlAlchemySqlStoreImpl(config)
|
||||
_SQLSTORE_INSTANCES[backend_name] = instance
|
||||
return instance
|
||||
else:
|
||||
raise ValueError(f"Unknown sqlstore type {backend_config.type}")
|
||||
|
||||
|
||||
def register_sqlstore_backends(backends: dict[str, StorageBackendConfig]) -> None:
|
||||
"""Register the set of available SQL store backends for reference resolution."""
|
||||
global _SQLSTORE_BACKENDS
|
||||
global _SQLSTORE_INSTANCES
|
||||
|
||||
_SQLSTORE_BACKENDS.clear()
|
||||
_SQLSTORE_INSTANCES.clear()
|
||||
_SQLSTORE_LOCKS.clear()
|
||||
for name, cfg in backends.items():
|
||||
_SQLSTORE_BACKENDS[name] = cfg
|
||||
|
|
@ -12,8 +12,8 @@ import pydantic
|
|||
|
||||
from llama_stack.core.datatypes import RoutableObjectWithProvider
|
||||
from llama_stack.core.storage.datatypes import KVStoreReference
|
||||
from llama_stack.core.storage.kvstore import KVStore, kvstore_impl
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
|
||||
|
||||
logger = get_logger(__name__, category="core::registry")
|
||||
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ from pydantic import BaseModel, Field
|
|||
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.models.llama.datatypes import Primitive
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema
|
||||
from llama_stack_api import json_schema_type, register_schema
|
||||
|
||||
ROOT_SPAN_MARKERS = ["__root__", "__root_span__"]
|
||||
|
||||
|
|
@ -163,47 +163,6 @@ class MetricEvent(EventCommon):
|
|||
unit: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class MetricInResponse(BaseModel):
|
||||
"""A metric value included in API responses.
|
||||
:param metric: The name of the metric
|
||||
:param value: The numeric value of the metric
|
||||
:param unit: (Optional) The unit of measurement for the metric value
|
||||
"""
|
||||
|
||||
metric: str
|
||||
value: int | float
|
||||
unit: str | None = None
|
||||
|
||||
|
||||
# This is a short term solution to allow inference API to return metrics
|
||||
# The ideal way to do this is to have a way for all response types to include metrics
|
||||
# and all metric events logged to the telemetry API to be included with the response
|
||||
# To do this, we will need to augment all response types with a metrics field.
|
||||
# We have hit a blocker from stainless SDK that prevents us from doing this.
|
||||
# The blocker is that if we were to augment the response types that have a data field
|
||||
# in them like so
|
||||
# class ListModelsResponse(BaseModel):
|
||||
# metrics: Optional[List[MetricEvent]] = None
|
||||
# data: List[Models]
|
||||
# ...
|
||||
# The client SDK will need to access the data by using a .data field, which is not
|
||||
# ergonomic. Stainless SDK does support unwrapping the response type, but it
|
||||
# requires that the response type to only have a single field.
|
||||
|
||||
# We will need a way in the client SDK to signal that the metrics are needed
|
||||
# and if they are needed, the client SDK has to return the full response type
|
||||
# without unwrapping it.
|
||||
|
||||
|
||||
class MetricResponseMixin(BaseModel):
|
||||
"""Mixin class for API responses that can include metrics.
|
||||
:param metrics: (Optional) List of metrics associated with the API response
|
||||
"""
|
||||
|
||||
metrics: list[MetricInResponse] | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class StructuredLogType(Enum):
|
||||
"""The type of structured log event payload.
|
||||
|
|
@ -427,6 +386,7 @@ _GLOBAL_STORAGE: dict[str, dict[str | int, Any]] = {
|
|||
"counters": {},
|
||||
"gauges": {},
|
||||
"up_down_counters": {},
|
||||
"histograms": {},
|
||||
}
|
||||
_global_lock = threading.Lock()
|
||||
_TRACER_PROVIDER = None
|
||||
|
|
@ -540,6 +500,16 @@ class Telemetry:
|
|||
)
|
||||
return cast(metrics.ObservableGauge, _GLOBAL_STORAGE["gauges"][name])
|
||||
|
||||
def _get_or_create_histogram(self, name: str, unit: str) -> metrics.Histogram:
|
||||
assert self.meter is not None
|
||||
if name not in _GLOBAL_STORAGE["histograms"]:
|
||||
_GLOBAL_STORAGE["histograms"][name] = self.meter.create_histogram(
|
||||
name=name,
|
||||
unit=unit,
|
||||
description=f"Histogram for {name}",
|
||||
)
|
||||
return cast(metrics.Histogram, _GLOBAL_STORAGE["histograms"][name])
|
||||
|
||||
def _log_metric(self, event: MetricEvent) -> None:
|
||||
# Add metric as an event to the current span
|
||||
try:
|
||||
|
|
@ -571,7 +541,16 @@ class Telemetry:
|
|||
# Log to OpenTelemetry meter if available
|
||||
if self.meter is None:
|
||||
return
|
||||
if isinstance(event.value, int):
|
||||
|
||||
# Use histograms for token-related metrics (per-request measurements)
|
||||
# Use counters for other cumulative metrics
|
||||
token_metrics = {"prompt_tokens", "completion_tokens", "total_tokens"}
|
||||
|
||||
if event.metric in token_metrics:
|
||||
# Token metrics are per-request measurements, use histogram
|
||||
histogram = self._get_or_create_histogram(event.metric, event.unit)
|
||||
histogram.record(event.value, attributes=_clean_attributes(event.attributes))
|
||||
elif isinstance(event.value, int):
|
||||
counter = self._get_or_create_counter(event.metric, event.unit)
|
||||
counter.add(event.value, attributes=_clean_attributes(event.attributes))
|
||||
elif isinstance(event.value, float):
|
||||
|
|
|
|||
|
|
@ -129,6 +129,15 @@ def trace_protocol[T: type[Any]](cls: T) -> T:
|
|||
else:
|
||||
return sync_wrapper
|
||||
|
||||
# Wrap methods on the class itself (for classes applied at runtime)
|
||||
# Skip if already wrapped (indicated by __wrapped__ attribute)
|
||||
for name, method in vars(cls).items():
|
||||
if inspect.isfunction(method) and not name.startswith("_"):
|
||||
if not hasattr(method, "__wrapped__"):
|
||||
wrapped = trace_method(method)
|
||||
setattr(cls, name, wrapped) # noqa: B010
|
||||
|
||||
# Also set up __init_subclass__ for future subclasses
|
||||
original_init_subclass = cast(Callable[..., Any] | None, getattr(cls, "__init_subclass__", None))
|
||||
|
||||
def __init_subclass__(cls_child: type[Any], **kwargs: Any) -> None: # noqa: N807
|
||||
|
|
|
|||
|
|
@ -1,11 +0,0 @@
|
|||
# More info on playground configuration can be found here:
|
||||
# https://llama-stack.readthedocs.io/en/latest/playground
|
||||
|
||||
FROM python:3.12-slim
|
||||
WORKDIR /app
|
||||
COPY . /app/
|
||||
RUN /usr/local/bin/python -m pip install --upgrade pip && \
|
||||
/usr/local/bin/pip3 install -r requirements.txt
|
||||
EXPOSE 8501
|
||||
|
||||
ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
|
||||
|
|
@ -1,50 +0,0 @@
|
|||
# (Experimental) LLama Stack UI
|
||||
|
||||
## Docker Setup
|
||||
|
||||
:warning: This is a work in progress.
|
||||
|
||||
## Developer Setup
|
||||
|
||||
1. Start up Llama Stack API server. More details [here](https://llamastack.github.io/latest/getting_started/index.htmll).
|
||||
|
||||
```
|
||||
llama stack list-deps together | xargs -L1 uv pip install
|
||||
|
||||
llama stack run together
|
||||
```
|
||||
|
||||
2. (Optional) Register datasets and eval tasks as resources. If you want to run pre-configured evaluation flows (e.g. Evaluations (Generation + Scoring) Page).
|
||||
|
||||
```bash
|
||||
llama-stack-client datasets register \
|
||||
--dataset-id "mmlu" \
|
||||
--provider-id "huggingface" \
|
||||
--url "https://huggingface.co/datasets/llamastack/evals" \
|
||||
--metadata '{"path": "llamastack/evals", "name": "evals__mmlu__details", "split": "train"}' \
|
||||
--schema '{"input_query": {"type": "string"}, "expected_answer": {"type": "string", "chat_completion_input": {"type": "string"}}}'
|
||||
```
|
||||
|
||||
```bash
|
||||
llama-stack-client benchmarks register \
|
||||
--eval-task-id meta-reference-mmlu \
|
||||
--provider-id meta-reference \
|
||||
--dataset-id mmlu \
|
||||
--scoring-functions basic::regex_parser_multiple_choice_answer
|
||||
```
|
||||
|
||||
3. Start Streamlit UI
|
||||
|
||||
```bash
|
||||
uv run --with ".[ui]" streamlit run llama_stack.core/ui/app.py
|
||||
```
|
||||
|
||||
## Environment Variables
|
||||
|
||||
| Environment Variable | Description | Default Value |
|
||||
|----------------------------|------------------------------------|---------------------------|
|
||||
| LLAMA_STACK_ENDPOINT | The endpoint for the Llama Stack | http://localhost:8321 |
|
||||
| FIREWORKS_API_KEY | API key for Fireworks provider | (empty string) |
|
||||
| TOGETHER_API_KEY | API key for Together provider | (empty string) |
|
||||
| SAMBANOVA_API_KEY | API key for SambaNova provider | (empty string) |
|
||||
| OPENAI_API_KEY | API key for OpenAI provider | (empty string) |
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
|
@ -1,55 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
import streamlit as st
|
||||
|
||||
|
||||
def main():
|
||||
# Evaluation pages
|
||||
application_evaluation_page = st.Page(
|
||||
"page/evaluations/app_eval.py",
|
||||
title="Evaluations (Scoring)",
|
||||
icon="📊",
|
||||
default=False,
|
||||
)
|
||||
native_evaluation_page = st.Page(
|
||||
"page/evaluations/native_eval.py",
|
||||
title="Evaluations (Generation + Scoring)",
|
||||
icon="📊",
|
||||
default=False,
|
||||
)
|
||||
|
||||
# Playground pages
|
||||
chat_page = st.Page("page/playground/chat.py", title="Chat", icon="💬", default=True)
|
||||
rag_page = st.Page("page/playground/rag.py", title="RAG", icon="💬", default=False)
|
||||
tool_page = st.Page("page/playground/tools.py", title="Tools", icon="🛠", default=False)
|
||||
|
||||
# Distribution pages
|
||||
resources_page = st.Page("page/distribution/resources.py", title="Resources", icon="🔍", default=False)
|
||||
provider_page = st.Page(
|
||||
"page/distribution/providers.py",
|
||||
title="API Providers",
|
||||
icon="🔍",
|
||||
default=False,
|
||||
)
|
||||
|
||||
pg = st.navigation(
|
||||
{
|
||||
"Playground": [
|
||||
chat_page,
|
||||
rag_page,
|
||||
tool_page,
|
||||
application_evaluation_page,
|
||||
native_evaluation_page,
|
||||
],
|
||||
"Inspect": [provider_page, resources_page],
|
||||
},
|
||||
expanded=False,
|
||||
)
|
||||
pg.run()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
|
@ -1,32 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import os
|
||||
|
||||
from llama_stack_client import LlamaStackClient
|
||||
|
||||
|
||||
class LlamaStackApi:
|
||||
def __init__(self):
|
||||
self.client = LlamaStackClient(
|
||||
base_url=os.environ.get("LLAMA_STACK_ENDPOINT", "http://localhost:8321"),
|
||||
provider_data={
|
||||
"fireworks_api_key": os.environ.get("FIREWORKS_API_KEY", ""),
|
||||
"together_api_key": os.environ.get("TOGETHER_API_KEY", ""),
|
||||
"sambanova_api_key": os.environ.get("SAMBANOVA_API_KEY", ""),
|
||||
"openai_api_key": os.environ.get("OPENAI_API_KEY", ""),
|
||||
"tavily_search_api_key": os.environ.get("TAVILY_SEARCH_API_KEY", ""),
|
||||
},
|
||||
)
|
||||
|
||||
def run_scoring(self, row, scoring_function_ids: list[str], scoring_params: dict | None):
|
||||
"""Run scoring on a single row"""
|
||||
if not scoring_params:
|
||||
scoring_params = dict.fromkeys(scoring_function_ids)
|
||||
return self.client.scoring.score(input_rows=[row], scoring_functions=scoring_params)
|
||||
|
||||
|
||||
llama_stack_api = LlamaStackApi()
|
||||
|
|
@ -1,42 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import base64
|
||||
import os
|
||||
|
||||
import pandas as pd
|
||||
import streamlit as st
|
||||
|
||||
|
||||
def process_dataset(file):
|
||||
if file is None:
|
||||
return "No file uploaded", None
|
||||
|
||||
try:
|
||||
# Determine file type and read accordingly
|
||||
file_ext = os.path.splitext(file.name)[1].lower()
|
||||
if file_ext == ".csv":
|
||||
df = pd.read_csv(file)
|
||||
elif file_ext in [".xlsx", ".xls"]:
|
||||
df = pd.read_excel(file)
|
||||
else:
|
||||
return "Unsupported file format. Please upload a CSV or Excel file.", None
|
||||
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"Error processing file: {str(e)}")
|
||||
return None
|
||||
|
||||
|
||||
def data_url_from_file(file) -> str:
|
||||
file_content = file.getvalue()
|
||||
base64_content = base64.b64encode(file_content).decode("utf-8")
|
||||
mime_type = file.type
|
||||
|
||||
data_url = f"data:{mime_type};base64,{base64_content}"
|
||||
|
||||
return data_url
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import streamlit as st
|
||||
|
||||
from llama_stack.core.ui.modules.api import llama_stack_api
|
||||
|
||||
|
||||
def datasets():
|
||||
st.header("Datasets")
|
||||
|
||||
datasets_info = {d.identifier: d.to_dict() for d in llama_stack_api.client.datasets.list()}
|
||||
if len(datasets_info) > 0:
|
||||
selected_dataset = st.selectbox("Select a dataset", list(datasets_info.keys()))
|
||||
st.json(datasets_info[selected_dataset], expanded=True)
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import streamlit as st
|
||||
|
||||
from llama_stack.core.ui.modules.api import llama_stack_api
|
||||
|
||||
|
||||
def benchmarks():
|
||||
# Benchmarks Section
|
||||
st.header("Benchmarks")
|
||||
|
||||
benchmarks_info = {d.identifier: d.to_dict() for d in llama_stack_api.client.benchmarks.list()}
|
||||
|
||||
if len(benchmarks_info) > 0:
|
||||
selected_benchmark = st.selectbox("Select an eval task", list(benchmarks_info.keys()), key="benchmark_inspect")
|
||||
st.json(benchmarks_info[selected_benchmark], expanded=True)
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import streamlit as st
|
||||
|
||||
from llama_stack.core.ui.modules.api import llama_stack_api
|
||||
|
||||
|
||||
def models():
|
||||
# Models Section
|
||||
st.header("Models")
|
||||
models_info = {m.id: m.model_dump() for m in llama_stack_api.client.models.list()}
|
||||
|
||||
selected_model = st.selectbox("Select a model", list(models_info.keys()))
|
||||
st.json(models_info[selected_model])
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import streamlit as st
|
||||
|
||||
from llama_stack.core.ui.modules.api import llama_stack_api
|
||||
|
||||
|
||||
def providers():
|
||||
st.header("🔍 API Providers")
|
||||
apis_providers_lst = llama_stack_api.client.providers.list()
|
||||
api_to_providers = {}
|
||||
for api_provider in apis_providers_lst:
|
||||
if api_provider.api in api_to_providers:
|
||||
api_to_providers[api_provider.api].append(api_provider)
|
||||
else:
|
||||
api_to_providers[api_provider.api] = [api_provider]
|
||||
|
||||
for api in api_to_providers.keys():
|
||||
st.markdown(f"###### {api}")
|
||||
st.dataframe([x.to_dict() for x in api_to_providers[api]], width=500)
|
||||
|
||||
|
||||
providers()
|
||||
|
|
@ -1,48 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from streamlit_option_menu import option_menu
|
||||
|
||||
from llama_stack.core.ui.page.distribution.datasets import datasets
|
||||
from llama_stack.core.ui.page.distribution.eval_tasks import benchmarks
|
||||
from llama_stack.core.ui.page.distribution.models import models
|
||||
from llama_stack.core.ui.page.distribution.scoring_functions import scoring_functions
|
||||
from llama_stack.core.ui.page.distribution.shields import shields
|
||||
|
||||
|
||||
def resources_page():
|
||||
options = [
|
||||
"Models",
|
||||
"Shields",
|
||||
"Scoring Functions",
|
||||
"Datasets",
|
||||
"Benchmarks",
|
||||
]
|
||||
icons = ["magic", "shield", "file-bar-graph", "database", "list-task"]
|
||||
selected_resource = option_menu(
|
||||
None,
|
||||
options,
|
||||
icons=icons,
|
||||
orientation="horizontal",
|
||||
styles={
|
||||
"nav-link": {
|
||||
"font-size": "12px",
|
||||
},
|
||||
},
|
||||
)
|
||||
if selected_resource == "Benchmarks":
|
||||
benchmarks()
|
||||
elif selected_resource == "Datasets":
|
||||
datasets()
|
||||
elif selected_resource == "Models":
|
||||
models()
|
||||
elif selected_resource == "Scoring Functions":
|
||||
scoring_functions()
|
||||
elif selected_resource == "Shields":
|
||||
shields()
|
||||
|
||||
|
||||
resources_page()
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import streamlit as st
|
||||
|
||||
from llama_stack.core.ui.modules.api import llama_stack_api
|
||||
|
||||
|
||||
def scoring_functions():
|
||||
st.header("Scoring Functions")
|
||||
|
||||
scoring_functions_info = {s.identifier: s.to_dict() for s in llama_stack_api.client.scoring_functions.list()}
|
||||
|
||||
selected_scoring_function = st.selectbox("Select a scoring function", list(scoring_functions_info.keys()))
|
||||
st.json(scoring_functions_info[selected_scoring_function], expanded=True)
|
||||
|
|
@ -1,19 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import streamlit as st
|
||||
|
||||
from llama_stack.core.ui.modules.api import llama_stack_api
|
||||
|
||||
|
||||
def shields():
|
||||
# Shields Section
|
||||
st.header("Shields")
|
||||
|
||||
shields_info = {s.identifier: s.to_dict() for s in llama_stack_api.client.shields.list()}
|
||||
|
||||
selected_shield = st.selectbox("Select a shield", list(shields_info.keys()))
|
||||
st.json(shields_info[selected_shield])
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
|
@ -1,143 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import json
|
||||
|
||||
import pandas as pd
|
||||
import streamlit as st
|
||||
|
||||
from llama_stack.core.ui.modules.api import llama_stack_api
|
||||
from llama_stack.core.ui.modules.utils import process_dataset
|
||||
|
||||
|
||||
def application_evaluation_page():
|
||||
st.set_page_config(page_title="Evaluations (Scoring)", page_icon="🦙")
|
||||
st.title("📊 Evaluations (Scoring)")
|
||||
|
||||
# File uploader
|
||||
uploaded_file = st.file_uploader("Upload Dataset", type=["csv", "xlsx", "xls"])
|
||||
|
||||
if uploaded_file is None:
|
||||
st.error("No file uploaded")
|
||||
return
|
||||
|
||||
# Process uploaded file
|
||||
df = process_dataset(uploaded_file)
|
||||
if df is None:
|
||||
st.error("Error processing file")
|
||||
return
|
||||
|
||||
# Display dataset information
|
||||
st.success("Dataset loaded successfully!")
|
||||
|
||||
# Display dataframe preview
|
||||
st.subheader("Dataset Preview")
|
||||
st.dataframe(df)
|
||||
|
||||
# Select Scoring Functions to Run Evaluation On
|
||||
st.subheader("Select Scoring Functions")
|
||||
scoring_functions = llama_stack_api.client.scoring_functions.list()
|
||||
scoring_functions = {sf.identifier: sf for sf in scoring_functions}
|
||||
scoring_functions_names = list(scoring_functions.keys())
|
||||
selected_scoring_functions = st.multiselect(
|
||||
"Choose one or more scoring functions",
|
||||
options=scoring_functions_names,
|
||||
help="Choose one or more scoring functions.",
|
||||
)
|
||||
|
||||
available_models = llama_stack_api.client.models.list()
|
||||
available_models = [m.identifier for m in available_models]
|
||||
|
||||
scoring_params = {}
|
||||
if selected_scoring_functions:
|
||||
st.write("Selected:")
|
||||
for scoring_fn_id in selected_scoring_functions:
|
||||
scoring_fn = scoring_functions[scoring_fn_id]
|
||||
st.write(f"- **{scoring_fn_id}**: {scoring_fn.description}")
|
||||
new_params = None
|
||||
if scoring_fn.params:
|
||||
new_params = {}
|
||||
for param_name, param_value in scoring_fn.params.to_dict().items():
|
||||
if param_name == "type":
|
||||
new_params[param_name] = param_value
|
||||
continue
|
||||
|
||||
if param_name == "judge_model":
|
||||
value = st.selectbox(
|
||||
f"Select **{param_name}** for {scoring_fn_id}",
|
||||
options=available_models,
|
||||
index=0,
|
||||
key=f"{scoring_fn_id}_{param_name}",
|
||||
)
|
||||
new_params[param_name] = value
|
||||
else:
|
||||
value = st.text_area(
|
||||
f"Enter value for **{param_name}** in {scoring_fn_id} in valid JSON format",
|
||||
value=json.dumps(param_value, indent=2),
|
||||
height=80,
|
||||
)
|
||||
try:
|
||||
new_params[param_name] = json.loads(value)
|
||||
except json.JSONDecodeError:
|
||||
st.error(f"Invalid JSON for **{param_name}** in {scoring_fn_id}")
|
||||
|
||||
st.json(new_params)
|
||||
scoring_params[scoring_fn_id] = new_params
|
||||
|
||||
# Add run evaluation button & slider
|
||||
total_rows = len(df)
|
||||
num_rows = st.slider("Number of rows to evaluate", 1, total_rows, total_rows)
|
||||
|
||||
if st.button("Run Evaluation"):
|
||||
progress_text = "Running evaluation..."
|
||||
progress_bar = st.progress(0, text=progress_text)
|
||||
rows = df.to_dict(orient="records")
|
||||
if num_rows < total_rows:
|
||||
rows = rows[:num_rows]
|
||||
|
||||
# Create separate containers for progress text and results
|
||||
progress_text_container = st.empty()
|
||||
results_container = st.empty()
|
||||
output_res = {}
|
||||
for i, r in enumerate(rows):
|
||||
# Update progress
|
||||
progress = i / len(rows)
|
||||
progress_bar.progress(progress, text=progress_text)
|
||||
|
||||
# Run evaluation for current row
|
||||
score_res = llama_stack_api.run_scoring(
|
||||
r,
|
||||
scoring_function_ids=selected_scoring_functions,
|
||||
scoring_params=scoring_params,
|
||||
)
|
||||
|
||||
for k in r.keys():
|
||||
if k not in output_res:
|
||||
output_res[k] = []
|
||||
output_res[k].append(r[k])
|
||||
|
||||
for fn_id in selected_scoring_functions:
|
||||
if fn_id not in output_res:
|
||||
output_res[fn_id] = []
|
||||
output_res[fn_id].append(score_res.results[fn_id].score_rows[0])
|
||||
|
||||
# Display current row results using separate containers
|
||||
progress_text_container.write(f"Expand to see current processed result ({i + 1} / {len(rows)})")
|
||||
results_container.json(
|
||||
score_res.to_json(),
|
||||
expanded=2,
|
||||
)
|
||||
|
||||
progress_bar.progress(1.0, text="Evaluation complete!")
|
||||
|
||||
# Display results in dataframe
|
||||
if output_res:
|
||||
output_df = pd.DataFrame(output_res)
|
||||
st.subheader("Evaluation Results")
|
||||
st.dataframe(output_df)
|
||||
|
||||
|
||||
application_evaluation_page()
|
||||
|
|
@ -1,253 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import json
|
||||
|
||||
import pandas as pd
|
||||
import streamlit as st
|
||||
|
||||
from llama_stack.core.ui.modules.api import llama_stack_api
|
||||
|
||||
|
||||
def select_benchmark_1():
|
||||
# Select Benchmarks
|
||||
st.subheader("1. Choose An Eval Task")
|
||||
benchmarks = llama_stack_api.client.benchmarks.list()
|
||||
benchmarks = {et.identifier: et for et in benchmarks}
|
||||
benchmarks_names = list(benchmarks.keys())
|
||||
selected_benchmark = st.selectbox(
|
||||
"Choose an eval task.",
|
||||
options=benchmarks_names,
|
||||
help="Choose an eval task. Each eval task is parameterized by a dataset, and list of scoring functions.",
|
||||
)
|
||||
with st.expander("View Eval Task"):
|
||||
st.json(benchmarks[selected_benchmark], expanded=True)
|
||||
|
||||
st.session_state["selected_benchmark"] = selected_benchmark
|
||||
st.session_state["benchmarks"] = benchmarks
|
||||
if st.button("Confirm", key="confirm_1"):
|
||||
st.session_state["selected_benchmark_1_next"] = True
|
||||
|
||||
|
||||
def define_eval_candidate_2():
|
||||
if not st.session_state.get("selected_benchmark_1_next", None):
|
||||
return
|
||||
|
||||
st.subheader("2. Define Eval Candidate")
|
||||
st.info(
|
||||
"""
|
||||
Define the configurations for the evaluation candidate model or agent used for generation.
|
||||
Select "model" if you want to run generation with inference API, or "agent" if you want to run generation with agent API through specifying AgentConfig.
|
||||
"""
|
||||
)
|
||||
with st.expander("Define Eval Candidate", expanded=True):
|
||||
# Define Eval Candidate
|
||||
candidate_type = st.radio("Candidate Type", ["model", "agent"])
|
||||
|
||||
available_models = llama_stack_api.client.models.list()
|
||||
available_models = [model.identifier for model in available_models]
|
||||
selected_model = st.selectbox(
|
||||
"Choose a model",
|
||||
available_models,
|
||||
index=0,
|
||||
)
|
||||
|
||||
# Sampling Parameters
|
||||
st.markdown("##### Sampling Parameters")
|
||||
temperature = st.slider(
|
||||
"Temperature",
|
||||
min_value=0.0,
|
||||
max_value=1.0,
|
||||
value=0.0,
|
||||
step=0.1,
|
||||
help="Controls the randomness of the response. Higher values make the output more creative and unexpected, lower values make it more conservative and predictable",
|
||||
)
|
||||
top_p = st.slider(
|
||||
"Top P",
|
||||
min_value=0.0,
|
||||
max_value=1.0,
|
||||
value=0.95,
|
||||
step=0.1,
|
||||
)
|
||||
max_tokens = st.slider(
|
||||
"Max Tokens",
|
||||
min_value=0,
|
||||
max_value=4096,
|
||||
value=512,
|
||||
step=1,
|
||||
help="The maximum number of tokens to generate",
|
||||
)
|
||||
repetition_penalty = st.slider(
|
||||
"Repetition Penalty",
|
||||
min_value=1.0,
|
||||
max_value=2.0,
|
||||
value=1.0,
|
||||
step=0.1,
|
||||
help="Controls the likelihood for generating the same word or phrase multiple times in the same sentence or paragraph. 1 implies no penalty, 2 will strongly discourage model to repeat words or phrases.",
|
||||
)
|
||||
if candidate_type == "model":
|
||||
if temperature > 0.0:
|
||||
strategy = {
|
||||
"type": "top_p",
|
||||
"temperature": temperature,
|
||||
"top_p": top_p,
|
||||
}
|
||||
else:
|
||||
strategy = {"type": "greedy"}
|
||||
|
||||
eval_candidate = {
|
||||
"type": "model",
|
||||
"model": selected_model,
|
||||
"sampling_params": {
|
||||
"strategy": strategy,
|
||||
"max_tokens": max_tokens,
|
||||
"repetition_penalty": repetition_penalty,
|
||||
},
|
||||
}
|
||||
elif candidate_type == "agent":
|
||||
system_prompt = st.text_area(
|
||||
"System Prompt",
|
||||
value="You are a helpful AI assistant.",
|
||||
help="Initial instructions given to the AI to set its behavior and context",
|
||||
)
|
||||
tools_json = st.text_area(
|
||||
"Tools Configuration (JSON)",
|
||||
value=json.dumps(
|
||||
[
|
||||
{
|
||||
"type": "brave_search",
|
||||
"engine": "brave",
|
||||
"api_key": "ENTER_BRAVE_API_KEY_HERE",
|
||||
}
|
||||
]
|
||||
),
|
||||
help="Enter tool configurations in JSON format. Each tool should have a name, description, and parameters.",
|
||||
height=200,
|
||||
)
|
||||
try:
|
||||
tools = json.loads(tools_json)
|
||||
except json.JSONDecodeError:
|
||||
st.error("Invalid JSON format for tools configuration")
|
||||
tools = []
|
||||
eval_candidate = {
|
||||
"type": "agent",
|
||||
"config": {
|
||||
"model": selected_model,
|
||||
"instructions": system_prompt,
|
||||
"tools": tools,
|
||||
"tool_choice": "auto",
|
||||
"tool_prompt_format": "json",
|
||||
"input_shields": [],
|
||||
"output_shields": [],
|
||||
"enable_session_persistence": False,
|
||||
},
|
||||
}
|
||||
st.session_state["eval_candidate"] = eval_candidate
|
||||
|
||||
if st.button("Confirm", key="confirm_2"):
|
||||
st.session_state["selected_eval_candidate_2_next"] = True
|
||||
|
||||
|
||||
def run_evaluation_3():
|
||||
if not st.session_state.get("selected_eval_candidate_2_next", None):
|
||||
return
|
||||
|
||||
st.subheader("3. Run Evaluation")
|
||||
# Add info box to explain configurations being used
|
||||
st.info(
|
||||
"""
|
||||
Review the configurations that will be used for this evaluation run, make any necessary changes, and then click the "Run Evaluation" button.
|
||||
"""
|
||||
)
|
||||
selected_benchmark = st.session_state["selected_benchmark"]
|
||||
benchmarks = st.session_state["benchmarks"]
|
||||
eval_candidate = st.session_state["eval_candidate"]
|
||||
|
||||
dataset_id = benchmarks[selected_benchmark].dataset_id
|
||||
rows = llama_stack_api.client.datasets.iterrows(
|
||||
dataset_id=dataset_id,
|
||||
)
|
||||
total_rows = len(rows.data)
|
||||
# Add number of examples control
|
||||
num_rows = st.number_input(
|
||||
"Number of Examples to Evaluate",
|
||||
min_value=1,
|
||||
max_value=total_rows,
|
||||
value=5,
|
||||
help="Number of examples from the dataset to evaluate. ",
|
||||
)
|
||||
|
||||
benchmark_config = {
|
||||
"type": "benchmark",
|
||||
"eval_candidate": eval_candidate,
|
||||
"scoring_params": {},
|
||||
}
|
||||
|
||||
with st.expander("View Evaluation Task", expanded=True):
|
||||
st.json(benchmarks[selected_benchmark], expanded=True)
|
||||
with st.expander("View Evaluation Task Configuration", expanded=True):
|
||||
st.json(benchmark_config, expanded=True)
|
||||
|
||||
# Add run button and handle evaluation
|
||||
if st.button("Run Evaluation"):
|
||||
progress_text = "Running evaluation..."
|
||||
progress_bar = st.progress(0, text=progress_text)
|
||||
rows = rows.data
|
||||
if num_rows < total_rows:
|
||||
rows = rows[:num_rows]
|
||||
|
||||
# Create separate containers for progress text and results
|
||||
progress_text_container = st.empty()
|
||||
results_container = st.empty()
|
||||
output_res = {}
|
||||
for i, r in enumerate(rows):
|
||||
# Update progress
|
||||
progress = i / len(rows)
|
||||
progress_bar.progress(progress, text=progress_text)
|
||||
# Run evaluation for current row
|
||||
eval_res = llama_stack_api.client.eval.evaluate_rows(
|
||||
benchmark_id=selected_benchmark,
|
||||
input_rows=[r],
|
||||
scoring_functions=benchmarks[selected_benchmark].scoring_functions,
|
||||
benchmark_config=benchmark_config,
|
||||
)
|
||||
|
||||
for k in r.keys():
|
||||
if k not in output_res:
|
||||
output_res[k] = []
|
||||
output_res[k].append(r[k])
|
||||
|
||||
for k in eval_res.generations[0].keys():
|
||||
if k not in output_res:
|
||||
output_res[k] = []
|
||||
output_res[k].append(eval_res.generations[0][k])
|
||||
|
||||
for scoring_fn in benchmarks[selected_benchmark].scoring_functions:
|
||||
if scoring_fn not in output_res:
|
||||
output_res[scoring_fn] = []
|
||||
output_res[scoring_fn].append(eval_res.scores[scoring_fn].score_rows[0])
|
||||
|
||||
progress_text_container.write(f"Expand to see current processed result ({i + 1} / {len(rows)})")
|
||||
results_container.json(eval_res, expanded=2)
|
||||
|
||||
progress_bar.progress(1.0, text="Evaluation complete!")
|
||||
# Display results in dataframe
|
||||
if output_res:
|
||||
output_df = pd.DataFrame(output_res)
|
||||
st.subheader("Evaluation Results")
|
||||
st.dataframe(output_df)
|
||||
|
||||
|
||||
def native_evaluation_page():
|
||||
st.set_page_config(page_title="Evaluations (Generation + Scoring)", page_icon="🦙")
|
||||
st.title("📊 Evaluations (Generation + Scoring)")
|
||||
|
||||
select_benchmark_1()
|
||||
define_eval_candidate_2()
|
||||
run_evaluation_3()
|
||||
|
||||
|
||||
native_evaluation_page()
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue