mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 09:53:45 +00:00
feat: split API and provider specs into separate llama-stack-api pkg (#3895)
# What does this PR do? Extract API definitions and provider specifications into a standalone llama-stack-api package that can be published to PyPI independently of the main llama-stack server. see: https://github.com/llamastack/llama-stack/pull/2978 and https://github.com/llamastack/llama-stack/pull/2978#issuecomment-3145115942 Motivation External providers currently import from llama-stack, which overrides the installed version and causes dependency conflicts. This separation allows external providers to: - Install only the type definitions they need without server dependencies - Avoid version conflicts with the installed llama-stack package - Be versioned and released independently This enables us to re-enable external provider module tests that were previously blocked by these import conflicts. Changes - Created llama-stack-api package with minimal dependencies (pydantic, jsonschema) - Moved APIs, providers datatypes, strong_typing, and schema_utils - Updated all imports from llama_stack.* to llama_stack_api.* - Configured local editable install for development workflow - Updated linting and type-checking configuration for both packages Next Steps - Publish llama-stack-api to PyPI - Update external provider dependencies - Re-enable external provider module tests Pre-cursor PRs to this one: - #4093 - #3954 - #4064 These PRs moved key pieces _out_ of the Api pkg, limiting the scope of change here. relates to #3237 ## Test Plan Package builds successfully and can be imported independently. All pre-commit hooks pass with expected exclusions maintained. --------- Signed-off-by: Charlie Doern <cdoern@redhat.com>
This commit is contained in:
parent
ceb716b9a0
commit
840ad75fe9
358 changed files with 2337 additions and 1424 deletions
|
|
@ -1,5 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .agents import *
|
||||
|
|
@ -1,153 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from collections.abc import AsyncIterator
|
||||
from typing import Annotated, Protocol, runtime_checkable
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.common.responses import Order
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.schema_utils import ExtraBodyField, json_schema_type, webmethod
|
||||
|
||||
from .openai_responses import (
|
||||
ListOpenAIResponseInputItem,
|
||||
ListOpenAIResponseObject,
|
||||
OpenAIDeleteResponseObject,
|
||||
OpenAIResponseInput,
|
||||
OpenAIResponseInputTool,
|
||||
OpenAIResponseObject,
|
||||
OpenAIResponseObjectStream,
|
||||
OpenAIResponsePrompt,
|
||||
OpenAIResponseText,
|
||||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ResponseGuardrailSpec(BaseModel):
|
||||
"""Specification for a guardrail to apply during response generation.
|
||||
|
||||
:param type: The type/identifier of the guardrail.
|
||||
"""
|
||||
|
||||
type: str
|
||||
# TODO: more fields to be added for guardrail configuration
|
||||
|
||||
|
||||
ResponseGuardrail = str | ResponseGuardrailSpec
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class Agents(Protocol):
|
||||
"""Agents
|
||||
|
||||
APIs for creating and interacting with agentic systems."""
|
||||
|
||||
# We situate the OpenAI Responses API in the Agents API just like we did things
|
||||
# for Inference. The Responses API, in its intent, serves the same purpose as
|
||||
# the Agents API above -- it is essentially a lightweight "agentic loop" with
|
||||
# integrated tool calling.
|
||||
#
|
||||
# Both of these APIs are inherently stateful.
|
||||
|
||||
@webmethod(route="/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_openai_response(
|
||||
self,
|
||||
response_id: str,
|
||||
) -> OpenAIResponseObject:
|
||||
"""Get a model response.
|
||||
|
||||
:param response_id: The ID of the OpenAI response to retrieve.
|
||||
:returns: An OpenAIResponseObject.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/responses", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def create_openai_response(
|
||||
self,
|
||||
input: str | list[OpenAIResponseInput],
|
||||
model: str,
|
||||
prompt: OpenAIResponsePrompt | None = None,
|
||||
instructions: str | None = None,
|
||||
previous_response_id: str | None = None,
|
||||
conversation: str | None = None,
|
||||
store: bool | None = True,
|
||||
stream: bool | None = False,
|
||||
temperature: float | None = None,
|
||||
text: OpenAIResponseText | None = None,
|
||||
tools: list[OpenAIResponseInputTool] | None = None,
|
||||
include: list[str] | None = None,
|
||||
max_infer_iters: int | None = 10, # this is an extension to the OpenAI API
|
||||
guardrails: Annotated[
|
||||
list[ResponseGuardrail] | None,
|
||||
ExtraBodyField(
|
||||
"List of guardrails to apply during response generation. Guardrails provide safety and content moderation."
|
||||
),
|
||||
] = None,
|
||||
max_tool_calls: int | None = None,
|
||||
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
|
||||
"""Create a model response.
|
||||
|
||||
:param input: Input message(s) to create the response.
|
||||
:param model: The underlying LLM used for completions.
|
||||
:param prompt: (Optional) Prompt object with ID, version, and variables.
|
||||
:param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
|
||||
:param conversation: (Optional) The ID of a conversation to add the response to. Must begin with 'conv_'. Input and output messages will be automatically added to the conversation.
|
||||
:param include: (Optional) Additional fields to include in the response.
|
||||
:param guardrails: (Optional) List of guardrails to apply during response generation. Can be guardrail IDs (strings) or guardrail specifications.
|
||||
:param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response.
|
||||
:returns: An OpenAIResponseObject.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/responses", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_openai_responses(
|
||||
self,
|
||||
after: str | None = None,
|
||||
limit: int | None = 50,
|
||||
model: str | None = None,
|
||||
order: Order | None = Order.desc,
|
||||
) -> ListOpenAIResponseObject:
|
||||
"""List all responses.
|
||||
|
||||
:param after: The ID of the last response to return.
|
||||
:param limit: The number of responses to return.
|
||||
:param model: The model to filter responses by.
|
||||
:param order: The order to sort responses by when sorted by created_at ('asc' or 'desc').
|
||||
:returns: A ListOpenAIResponseObject.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_openai_response_input_items(
|
||||
self,
|
||||
response_id: str,
|
||||
after: str | None = None,
|
||||
before: str | None = None,
|
||||
include: list[str] | None = None,
|
||||
limit: int | None = 20,
|
||||
order: Order | None = Order.desc,
|
||||
) -> ListOpenAIResponseInputItem:
|
||||
"""List input items.
|
||||
|
||||
:param response_id: The ID of the response to retrieve input items for.
|
||||
:param after: An item ID to list items after, used for pagination.
|
||||
:param before: An item ID to list items before, used for pagination.
|
||||
:param include: Additional fields to include in the response.
|
||||
:param limit: A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.
|
||||
:param order: The order to return the input items in. Default is desc.
|
||||
:returns: An ListOpenAIResponseInputItem.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
|
||||
"""Delete a response.
|
||||
|
||||
:param response_id: The ID of the OpenAI response to delete.
|
||||
:returns: An OpenAIDeleteResponseObject
|
||||
"""
|
||||
...
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,9 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .batches import Batches, BatchObject, ListBatchesResponse
|
||||
|
||||
__all__ = ["Batches", "BatchObject", "ListBatchesResponse"]
|
||||
|
|
@ -1,96 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Literal, Protocol, runtime_checkable
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
try:
|
||||
from openai.types import Batch as BatchObject
|
||||
except ImportError as e:
|
||||
raise ImportError("OpenAI package is required for batches API. Please install it with: pip install openai") from e
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ListBatchesResponse(BaseModel):
|
||||
"""Response containing a list of batch objects."""
|
||||
|
||||
object: Literal["list"] = "list"
|
||||
data: list[BatchObject] = Field(..., description="List of batch objects")
|
||||
first_id: str | None = Field(default=None, description="ID of the first batch in the list")
|
||||
last_id: str | None = Field(default=None, description="ID of the last batch in the list")
|
||||
has_more: bool = Field(default=False, description="Whether there are more batches available")
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class Batches(Protocol):
|
||||
"""
|
||||
The Batches API enables efficient processing of multiple requests in a single operation,
|
||||
particularly useful for processing large datasets, batch evaluation workflows, and
|
||||
cost-effective inference at scale.
|
||||
|
||||
The API is designed to allow use of openai client libraries for seamless integration.
|
||||
|
||||
This API provides the following extensions:
|
||||
- idempotent batch creation
|
||||
|
||||
Note: This API is currently under active development and may undergo changes.
|
||||
"""
|
||||
|
||||
@webmethod(route="/batches", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def create_batch(
|
||||
self,
|
||||
input_file_id: str,
|
||||
endpoint: str,
|
||||
completion_window: Literal["24h"],
|
||||
metadata: dict[str, str] | None = None,
|
||||
idempotency_key: str | None = None,
|
||||
) -> BatchObject:
|
||||
"""Create a new batch for processing multiple API requests.
|
||||
|
||||
:param input_file_id: The ID of an uploaded file containing requests for the batch.
|
||||
:param endpoint: The endpoint to be used for all requests in the batch.
|
||||
:param completion_window: The time window within which the batch should be processed.
|
||||
:param metadata: Optional metadata for the batch.
|
||||
:param idempotency_key: Optional idempotency key. When provided, enables idempotent behavior.
|
||||
:returns: The created batch object.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def retrieve_batch(self, batch_id: str) -> BatchObject:
|
||||
"""Retrieve information about a specific batch.
|
||||
|
||||
:param batch_id: The ID of the batch to retrieve.
|
||||
:returns: The batch object.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def cancel_batch(self, batch_id: str) -> BatchObject:
|
||||
"""Cancel a batch that is in progress.
|
||||
|
||||
:param batch_id: The ID of the batch to cancel.
|
||||
:returns: The updated batch object.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/batches", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_batches(
|
||||
self,
|
||||
after: str | None = None,
|
||||
limit: int = 20,
|
||||
) -> ListBatchesResponse:
|
||||
"""List all batches for the current user.
|
||||
|
||||
:param after: A cursor for pagination; returns batches after this batch ID.
|
||||
:param limit: Number of batches to return (default 20, max 100).
|
||||
:returns: A list of batch objects.
|
||||
"""
|
||||
...
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .benchmarks import *
|
||||
|
|
@ -1,104 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
from typing import Any, Literal, Protocol, runtime_checkable
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
|
||||
class CommonBenchmarkFields(BaseModel):
|
||||
dataset_id: str
|
||||
scoring_functions: list[str]
|
||||
metadata: dict[str, Any] = Field(
|
||||
default_factory=dict,
|
||||
description="Metadata for this evaluation task",
|
||||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class Benchmark(CommonBenchmarkFields, Resource):
|
||||
"""A benchmark resource for evaluating model performance.
|
||||
|
||||
:param dataset_id: Identifier of the dataset to use for the benchmark evaluation
|
||||
:param scoring_functions: List of scoring function identifiers to apply during evaluation
|
||||
:param metadata: Metadata for this evaluation task
|
||||
:param type: The resource type, always benchmark
|
||||
"""
|
||||
|
||||
type: Literal[ResourceType.benchmark] = ResourceType.benchmark
|
||||
|
||||
@property
|
||||
def benchmark_id(self) -> str:
|
||||
return self.identifier
|
||||
|
||||
@property
|
||||
def provider_benchmark_id(self) -> str | None:
|
||||
return self.provider_resource_id
|
||||
|
||||
|
||||
class BenchmarkInput(CommonBenchmarkFields, BaseModel):
|
||||
benchmark_id: str
|
||||
provider_id: str | None = None
|
||||
provider_benchmark_id: str | None = None
|
||||
|
||||
|
||||
class ListBenchmarksResponse(BaseModel):
|
||||
data: list[Benchmark]
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class Benchmarks(Protocol):
|
||||
@webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def list_benchmarks(self) -> ListBenchmarksResponse:
|
||||
"""List all benchmarks.
|
||||
|
||||
:returns: A ListBenchmarksResponse.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def get_benchmark(
|
||||
self,
|
||||
benchmark_id: str,
|
||||
) -> Benchmark:
|
||||
"""Get a benchmark by its ID.
|
||||
|
||||
:param benchmark_id: The ID of the benchmark to get.
|
||||
:returns: A Benchmark.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA, deprecated=True)
|
||||
async def register_benchmark(
|
||||
self,
|
||||
benchmark_id: str,
|
||||
dataset_id: str,
|
||||
scoring_functions: list[str],
|
||||
provider_benchmark_id: str | None = None,
|
||||
provider_id: str | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
"""Register a benchmark.
|
||||
|
||||
:param benchmark_id: The ID of the benchmark to register.
|
||||
:param dataset_id: The ID of the dataset to use for the benchmark.
|
||||
:param scoring_functions: The scoring functions to use for the benchmark.
|
||||
:param provider_benchmark_id: The ID of the provider benchmark to use for the benchmark.
|
||||
:param provider_id: The ID of the provider to use for the benchmark.
|
||||
:param metadata: The metadata to use for the benchmark.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA, deprecated=True)
|
||||
async def unregister_benchmark(self, benchmark_id: str) -> None:
|
||||
"""Unregister a benchmark.
|
||||
|
||||
:param benchmark_id: The ID of the benchmark to unregister.
|
||||
"""
|
||||
...
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
|
@ -1,143 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from enum import Enum
|
||||
from typing import Annotated, Literal
|
||||
|
||||
from pydantic import BaseModel, Field, model_validator
|
||||
|
||||
from llama_stack.models.llama.datatypes import ToolCall
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class URL(BaseModel):
|
||||
"""A URL reference to external content.
|
||||
|
||||
:param uri: The URL string pointing to the resource
|
||||
"""
|
||||
|
||||
uri: str
|
||||
|
||||
|
||||
class _URLOrData(BaseModel):
|
||||
"""
|
||||
A URL or a base64 encoded string
|
||||
|
||||
:param url: A URL of the image or data URL in the format of data:image/{type};base64,{data}. Note that URL could have length limits.
|
||||
:param data: base64 encoded image data as string
|
||||
"""
|
||||
|
||||
url: URL | None = None
|
||||
# data is a base64 encoded string, hint with contentEncoding=base64
|
||||
data: str | None = Field(default=None, json_schema_extra={"contentEncoding": "base64"})
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def validator(cls, values):
|
||||
if isinstance(values, dict):
|
||||
return values
|
||||
return {"url": values}
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ImageContentItem(BaseModel):
|
||||
"""A image content item
|
||||
|
||||
:param type: Discriminator type of the content item. Always "image"
|
||||
:param image: Image as a base64 encoded string or an URL
|
||||
"""
|
||||
|
||||
type: Literal["image"] = "image"
|
||||
image: _URLOrData
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class TextContentItem(BaseModel):
|
||||
"""A text content item
|
||||
|
||||
:param type: Discriminator type of the content item. Always "text"
|
||||
:param text: Text content
|
||||
"""
|
||||
|
||||
type: Literal["text"] = "text"
|
||||
text: str
|
||||
|
||||
|
||||
# other modalities can be added here
|
||||
InterleavedContentItem = Annotated[
|
||||
ImageContentItem | TextContentItem,
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
register_schema(InterleavedContentItem, name="InterleavedContentItem")
|
||||
|
||||
# accept a single "str" as a special case since it is common
|
||||
InterleavedContent = str | InterleavedContentItem | list[InterleavedContentItem]
|
||||
register_schema(InterleavedContent, name="InterleavedContent")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class TextDelta(BaseModel):
|
||||
"""A text content delta for streaming responses.
|
||||
|
||||
:param type: Discriminator type of the delta. Always "text"
|
||||
:param text: The incremental text content
|
||||
"""
|
||||
|
||||
type: Literal["text"] = "text"
|
||||
text: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ImageDelta(BaseModel):
|
||||
"""An image content delta for streaming responses.
|
||||
|
||||
:param type: Discriminator type of the delta. Always "image"
|
||||
:param image: The incremental image data as bytes
|
||||
"""
|
||||
|
||||
type: Literal["image"] = "image"
|
||||
image: bytes
|
||||
|
||||
|
||||
class ToolCallParseStatus(Enum):
|
||||
"""Status of tool call parsing during streaming.
|
||||
:cvar started: Tool call parsing has begun
|
||||
:cvar in_progress: Tool call parsing is ongoing
|
||||
:cvar failed: Tool call parsing failed
|
||||
:cvar succeeded: Tool call parsing completed successfully
|
||||
"""
|
||||
|
||||
started = "started"
|
||||
in_progress = "in_progress"
|
||||
failed = "failed"
|
||||
succeeded = "succeeded"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ToolCallDelta(BaseModel):
|
||||
"""A tool call content delta for streaming responses.
|
||||
|
||||
:param type: Discriminator type of the delta. Always "tool_call"
|
||||
:param tool_call: Either an in-progress tool call string or the final parsed tool call
|
||||
:param parse_status: Current parsing status of the tool call
|
||||
"""
|
||||
|
||||
type: Literal["tool_call"] = "tool_call"
|
||||
|
||||
# you either send an in-progress tool call so the client can stream a long
|
||||
# code generation or you send the final parsed tool call at the end of the
|
||||
# stream
|
||||
tool_call: str | ToolCall
|
||||
parse_status: ToolCallParseStatus
|
||||
|
||||
|
||||
# streaming completions send a stream of ContentDeltas
|
||||
ContentDelta = Annotated[
|
||||
TextDelta | ImageDelta | ToolCallDelta,
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
register_schema(ContentDelta, name="ContentDelta")
|
||||
|
|
@ -1,95 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
# Custom Llama Stack Exception classes should follow the following schema
|
||||
# 1. All classes should inherit from an existing Built-In Exception class: https://docs.python.org/3/library/exceptions.html
|
||||
# 2. All classes should have a custom error message with the goal of informing the Llama Stack user specifically
|
||||
# 3. All classes should propogate the inherited __init__ function otherwise via 'super().__init__(message)'
|
||||
|
||||
|
||||
class ResourceNotFoundError(ValueError):
|
||||
"""generic exception for a missing Llama Stack resource"""
|
||||
|
||||
def __init__(self, resource_name: str, resource_type: str, client_list: str) -> None:
|
||||
message = (
|
||||
f"{resource_type} '{resource_name}' not found. Use '{client_list}' to list available {resource_type}s."
|
||||
)
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class UnsupportedModelError(ValueError):
|
||||
"""raised when model is not present in the list of supported models"""
|
||||
|
||||
def __init__(self, model_name: str, supported_models_list: list[str]):
|
||||
message = f"'{model_name}' model is not supported. Supported models are: {', '.join(supported_models_list)}"
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class ModelNotFoundError(ResourceNotFoundError):
|
||||
"""raised when Llama Stack cannot find a referenced model"""
|
||||
|
||||
def __init__(self, model_name: str) -> None:
|
||||
super().__init__(model_name, "Model", "client.models.list()")
|
||||
|
||||
|
||||
class VectorStoreNotFoundError(ResourceNotFoundError):
|
||||
"""raised when Llama Stack cannot find a referenced vector store"""
|
||||
|
||||
def __init__(self, vector_store_name: str) -> None:
|
||||
super().__init__(vector_store_name, "Vector Store", "client.vector_dbs.list()")
|
||||
|
||||
|
||||
class DatasetNotFoundError(ResourceNotFoundError):
|
||||
"""raised when Llama Stack cannot find a referenced dataset"""
|
||||
|
||||
def __init__(self, dataset_name: str) -> None:
|
||||
super().__init__(dataset_name, "Dataset", "client.datasets.list()")
|
||||
|
||||
|
||||
class ToolGroupNotFoundError(ResourceNotFoundError):
|
||||
"""raised when Llama Stack cannot find a referenced tool group"""
|
||||
|
||||
def __init__(self, toolgroup_name: str) -> None:
|
||||
super().__init__(toolgroup_name, "Tool Group", "client.toolgroups.list()")
|
||||
|
||||
|
||||
class ModelTypeError(TypeError):
|
||||
"""raised when a model is present but not the correct type"""
|
||||
|
||||
def __init__(self, model_name: str, model_type: str, expected_model_type: str) -> None:
|
||||
message = (
|
||||
f"Model '{model_name}' is of type '{model_type}' rather than the expected type '{expected_model_type}'"
|
||||
)
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class ConflictError(ValueError):
|
||||
"""raised when an operation cannot be performed due to a conflict with the current state"""
|
||||
|
||||
def __init__(self, message: str) -> None:
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class TokenValidationError(ValueError):
|
||||
"""raised when token validation fails during authentication"""
|
||||
|
||||
def __init__(self, message: str) -> None:
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class ConversationNotFoundError(ResourceNotFoundError):
|
||||
"""raised when Llama Stack cannot find a referenced conversation"""
|
||||
|
||||
def __init__(self, conversation_id: str) -> None:
|
||||
super().__init__(conversation_id, "Conversation", "client.conversations.list()")
|
||||
|
||||
|
||||
class InvalidConversationIdError(ValueError):
|
||||
"""raised when a conversation ID has an invalid format"""
|
||||
|
||||
def __init__(self, conversation_id: str) -> None:
|
||||
message = f"Invalid conversation ID '{conversation_id}'. Expected an ID that begins with 'conv_'."
|
||||
super().__init__(message)
|
||||
|
|
@ -1,38 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
from enum import Enum
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.schema_utils import json_schema_type
|
||||
|
||||
|
||||
class JobStatus(Enum):
|
||||
"""Status of a job execution.
|
||||
:cvar completed: Job has finished successfully
|
||||
:cvar in_progress: Job is currently running
|
||||
:cvar failed: Job has failed during execution
|
||||
:cvar scheduled: Job is scheduled but not yet started
|
||||
:cvar cancelled: Job was cancelled before completion
|
||||
"""
|
||||
|
||||
completed = "completed"
|
||||
in_progress = "in_progress"
|
||||
failed = "failed"
|
||||
scheduled = "scheduled"
|
||||
cancelled = "cancelled"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class Job(BaseModel):
|
||||
"""A job execution instance with status tracking.
|
||||
|
||||
:param job_id: Unique identifier for the job
|
||||
:param status: Current execution status of the job
|
||||
"""
|
||||
|
||||
job_id: str
|
||||
status: JobStatus
|
||||
|
|
@ -1,77 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.schema_utils import json_schema_type
|
||||
|
||||
|
||||
class Order(Enum):
|
||||
"""Sort order for paginated responses.
|
||||
:cvar asc: Ascending order
|
||||
:cvar desc: Descending order
|
||||
"""
|
||||
|
||||
asc = "asc"
|
||||
desc = "desc"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class PaginatedResponse(BaseModel):
|
||||
"""A generic paginated response that follows a simple format.
|
||||
|
||||
:param data: The list of items for the current page
|
||||
:param has_more: Whether there are more items available after this set
|
||||
:param url: The URL for accessing this list
|
||||
"""
|
||||
|
||||
data: list[dict[str, Any]]
|
||||
has_more: bool
|
||||
url: str | None = None
|
||||
|
||||
|
||||
# This is a short term solution to allow inference API to return metrics
|
||||
# The ideal way to do this is to have a way for all response types to include metrics
|
||||
# and all metric events logged to the telemetry API to be included with the response
|
||||
# To do this, we will need to augment all response types with a metrics field.
|
||||
# We have hit a blocker from stainless SDK that prevents us from doing this.
|
||||
# The blocker is that if we were to augment the response types that have a data field
|
||||
# in them like so
|
||||
# class ListModelsResponse(BaseModel):
|
||||
# metrics: Optional[List[MetricEvent]] = None
|
||||
# data: List[Models]
|
||||
# ...
|
||||
# The client SDK will need to access the data by using a .data field, which is not
|
||||
# ergonomic. Stainless SDK does support unwrapping the response type, but it
|
||||
# requires that the response type to only have a single field.
|
||||
|
||||
# We will need a way in the client SDK to signal that the metrics are needed
|
||||
# and if they are needed, the client SDK has to return the full response type
|
||||
# without unwrapping it.
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class MetricInResponse(BaseModel):
|
||||
"""A metric value included in API responses.
|
||||
:param metric: The name of the metric
|
||||
:param value: The numeric value of the metric
|
||||
:param unit: (Optional) The unit of measurement for the metric value
|
||||
"""
|
||||
|
||||
metric: str
|
||||
value: int | float
|
||||
unit: str | None = None
|
||||
|
||||
|
||||
class MetricResponseMixin(BaseModel):
|
||||
"""Mixin class for API responses that can include metrics.
|
||||
:param metrics: (Optional) List of metrics associated with the API response
|
||||
"""
|
||||
|
||||
metrics: list[MetricInResponse] | None = None
|
||||
|
|
@ -1,22 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
|
||||
def telemetry_traceable(cls):
|
||||
"""
|
||||
Mark a protocol for automatic tracing when telemetry is enabled.
|
||||
|
||||
This is a metadata-only decorator with no dependencies on core.
|
||||
Actual tracing is applied by core routers at runtime if telemetry is enabled.
|
||||
|
||||
Usage:
|
||||
@runtime_checkable
|
||||
@telemetry_traceable
|
||||
class MyProtocol(Protocol):
|
||||
...
|
||||
"""
|
||||
cls.__marked_for_tracing__ = True
|
||||
return cls
|
||||
|
|
@ -1,47 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.schema_utils import json_schema_type
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class PostTrainingMetric(BaseModel):
|
||||
"""Training metrics captured during post-training jobs.
|
||||
|
||||
:param epoch: Training epoch number
|
||||
:param train_loss: Loss value on the training dataset
|
||||
:param validation_loss: Loss value on the validation dataset
|
||||
:param perplexity: Perplexity metric indicating model confidence
|
||||
"""
|
||||
|
||||
epoch: int
|
||||
train_loss: float
|
||||
validation_loss: float
|
||||
perplexity: float
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class Checkpoint(BaseModel):
|
||||
"""Checkpoint created during training runs.
|
||||
|
||||
:param identifier: Unique identifier for the checkpoint
|
||||
:param created_at: Timestamp when the checkpoint was created
|
||||
:param epoch: Training epoch when the checkpoint was saved
|
||||
:param post_training_job_id: Identifier of the training job that created this checkpoint
|
||||
:param path: File system path where the checkpoint is stored
|
||||
:param training_metrics: (Optional) Training metrics associated with this checkpoint
|
||||
"""
|
||||
|
||||
identifier: str
|
||||
created_at: datetime
|
||||
epoch: int
|
||||
post_training_job_id: str
|
||||
path: str
|
||||
training_metrics: PostTrainingMetric | None = None
|
||||
|
|
@ -1,146 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Annotated, Literal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class StringType(BaseModel):
|
||||
"""Parameter type for string values.
|
||||
|
||||
:param type: Discriminator type. Always "string"
|
||||
"""
|
||||
|
||||
type: Literal["string"] = "string"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class NumberType(BaseModel):
|
||||
"""Parameter type for numeric values.
|
||||
|
||||
:param type: Discriminator type. Always "number"
|
||||
"""
|
||||
|
||||
type: Literal["number"] = "number"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class BooleanType(BaseModel):
|
||||
"""Parameter type for boolean values.
|
||||
|
||||
:param type: Discriminator type. Always "boolean"
|
||||
"""
|
||||
|
||||
type: Literal["boolean"] = "boolean"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ArrayType(BaseModel):
|
||||
"""Parameter type for array values.
|
||||
|
||||
:param type: Discriminator type. Always "array"
|
||||
"""
|
||||
|
||||
type: Literal["array"] = "array"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ObjectType(BaseModel):
|
||||
"""Parameter type for object values.
|
||||
|
||||
:param type: Discriminator type. Always "object"
|
||||
"""
|
||||
|
||||
type: Literal["object"] = "object"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class JsonType(BaseModel):
|
||||
"""Parameter type for JSON values.
|
||||
|
||||
:param type: Discriminator type. Always "json"
|
||||
"""
|
||||
|
||||
type: Literal["json"] = "json"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class UnionType(BaseModel):
|
||||
"""Parameter type for union values.
|
||||
|
||||
:param type: Discriminator type. Always "union"
|
||||
"""
|
||||
|
||||
type: Literal["union"] = "union"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ChatCompletionInputType(BaseModel):
|
||||
"""Parameter type for chat completion input.
|
||||
|
||||
:param type: Discriminator type. Always "chat_completion_input"
|
||||
"""
|
||||
|
||||
# expects List[Message] for messages
|
||||
type: Literal["chat_completion_input"] = "chat_completion_input"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class CompletionInputType(BaseModel):
|
||||
"""Parameter type for completion input.
|
||||
|
||||
:param type: Discriminator type. Always "completion_input"
|
||||
"""
|
||||
|
||||
# expects InterleavedTextMedia for content
|
||||
type: Literal["completion_input"] = "completion_input"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class DialogType(BaseModel):
|
||||
"""Parameter type for dialog data with semantic output labels.
|
||||
|
||||
:param type: Discriminator type. Always "dialog"
|
||||
"""
|
||||
|
||||
# expects List[Message] for messages
|
||||
# this type semantically contains the output label whereas ChatCompletionInputType does not
|
||||
type: Literal["dialog"] = "dialog"
|
||||
|
||||
|
||||
ParamType = Annotated[
|
||||
StringType
|
||||
| NumberType
|
||||
| BooleanType
|
||||
| ArrayType
|
||||
| ObjectType
|
||||
| JsonType
|
||||
| UnionType
|
||||
| ChatCompletionInputType
|
||||
| CompletionInputType,
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
register_schema(ParamType, name="ParamType")
|
||||
|
||||
"""
|
||||
# TODO: recursive definition of ParamType in these containers
|
||||
# will cause infinite recursion in OpenAPI generation script
|
||||
# since we are going with ChatCompletionInputType and CompletionInputType
|
||||
# we don't need to worry about ArrayType/ObjectType/UnionType for now
|
||||
ArrayType.model_rebuild()
|
||||
ObjectType.model_rebuild()
|
||||
UnionType.model_rebuild()
|
||||
|
||||
|
||||
class CustomType(BaseModel):
|
||||
pylint: disable=syntax-error
|
||||
type: Literal["custom"] = "custom"
|
||||
validator_class: str
|
||||
"""
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .conversations import (
|
||||
Conversation,
|
||||
ConversationDeletedResource,
|
||||
ConversationItem,
|
||||
ConversationItemCreateRequest,
|
||||
ConversationItemDeletedResource,
|
||||
ConversationItemList,
|
||||
Conversations,
|
||||
Metadata,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"Conversation",
|
||||
"ConversationDeletedResource",
|
||||
"ConversationItem",
|
||||
"ConversationItemCreateRequest",
|
||||
"ConversationItemDeletedResource",
|
||||
"ConversationItemList",
|
||||
"Conversations",
|
||||
"Metadata",
|
||||
]
|
||||
|
|
@ -1,272 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from enum import StrEnum
|
||||
from typing import Annotated, Literal, Protocol, runtime_checkable
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.agents.openai_responses import (
|
||||
OpenAIResponseInputFunctionToolCallOutput,
|
||||
OpenAIResponseMCPApprovalRequest,
|
||||
OpenAIResponseMCPApprovalResponse,
|
||||
OpenAIResponseMessage,
|
||||
OpenAIResponseOutputMessageFileSearchToolCall,
|
||||
OpenAIResponseOutputMessageFunctionToolCall,
|
||||
OpenAIResponseOutputMessageMCPCall,
|
||||
OpenAIResponseOutputMessageMCPListTools,
|
||||
OpenAIResponseOutputMessageWebSearchToolCall,
|
||||
)
|
||||
from llama_stack.apis.common.tracing import telemetry_traceable
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||
|
||||
Metadata = dict[str, str]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class Conversation(BaseModel):
|
||||
"""OpenAI-compatible conversation object."""
|
||||
|
||||
id: str = Field(..., description="The unique ID of the conversation.")
|
||||
object: Literal["conversation"] = Field(
|
||||
default="conversation", description="The object type, which is always conversation."
|
||||
)
|
||||
created_at: int = Field(
|
||||
..., description="The time at which the conversation was created, measured in seconds since the Unix epoch."
|
||||
)
|
||||
metadata: Metadata | None = Field(
|
||||
default=None,
|
||||
description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard.",
|
||||
)
|
||||
items: list[dict] | None = Field(
|
||||
default=None,
|
||||
description="Initial items to include in the conversation context. You may add up to 20 items at a time.",
|
||||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ConversationMessage(BaseModel):
|
||||
"""OpenAI-compatible message item for conversations."""
|
||||
|
||||
id: str = Field(..., description="unique identifier for this message")
|
||||
content: list[dict] = Field(..., description="message content")
|
||||
role: str = Field(..., description="message role")
|
||||
status: str = Field(..., description="message status")
|
||||
type: Literal["message"] = "message"
|
||||
object: Literal["message"] = "message"
|
||||
|
||||
|
||||
ConversationItem = Annotated[
|
||||
OpenAIResponseMessage
|
||||
| OpenAIResponseOutputMessageWebSearchToolCall
|
||||
| OpenAIResponseOutputMessageFileSearchToolCall
|
||||
| OpenAIResponseOutputMessageFunctionToolCall
|
||||
| OpenAIResponseInputFunctionToolCallOutput
|
||||
| OpenAIResponseMCPApprovalRequest
|
||||
| OpenAIResponseMCPApprovalResponse
|
||||
| OpenAIResponseOutputMessageMCPCall
|
||||
| OpenAIResponseOutputMessageMCPListTools
|
||||
| OpenAIResponseOutputMessageMCPCall
|
||||
| OpenAIResponseOutputMessageMCPListTools,
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
register_schema(ConversationItem, name="ConversationItem")
|
||||
|
||||
# Using OpenAI types directly caused issues but some notes for reference:
|
||||
# Note that ConversationItem is a Annotated Union of the types below:
|
||||
# from openai.types.responses import *
|
||||
# from openai.types.responses.response_item import *
|
||||
# from openai.types.conversations import ConversationItem
|
||||
# f = [
|
||||
# ResponseFunctionToolCallItem,
|
||||
# ResponseFunctionToolCallOutputItem,
|
||||
# ResponseFileSearchToolCall,
|
||||
# ResponseFunctionWebSearch,
|
||||
# ImageGenerationCall,
|
||||
# ResponseComputerToolCall,
|
||||
# ResponseComputerToolCallOutputItem,
|
||||
# ResponseReasoningItem,
|
||||
# ResponseCodeInterpreterToolCall,
|
||||
# LocalShellCall,
|
||||
# LocalShellCallOutput,
|
||||
# McpListTools,
|
||||
# McpApprovalRequest,
|
||||
# McpApprovalResponse,
|
||||
# McpCall,
|
||||
# ResponseCustomToolCall,
|
||||
# ResponseCustomToolCallOutput
|
||||
# ]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ConversationDeletedResource(BaseModel):
|
||||
"""Response for deleted conversation."""
|
||||
|
||||
id: str = Field(..., description="The deleted conversation identifier")
|
||||
object: str = Field(default="conversation.deleted", description="Object type")
|
||||
deleted: bool = Field(default=True, description="Whether the object was deleted")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ConversationItemCreateRequest(BaseModel):
|
||||
"""Request body for creating conversation items."""
|
||||
|
||||
items: list[ConversationItem] = Field(
|
||||
...,
|
||||
description="Items to include in the conversation context. You may add up to 20 items at a time.",
|
||||
max_length=20,
|
||||
)
|
||||
|
||||
|
||||
class ConversationItemInclude(StrEnum):
|
||||
"""
|
||||
Specify additional output data to include in the model response.
|
||||
"""
|
||||
|
||||
web_search_call_action_sources = "web_search_call.action.sources"
|
||||
code_interpreter_call_outputs = "code_interpreter_call.outputs"
|
||||
computer_call_output_output_image_url = "computer_call_output.output.image_url"
|
||||
file_search_call_results = "file_search_call.results"
|
||||
message_input_image_image_url = "message.input_image.image_url"
|
||||
message_output_text_logprobs = "message.output_text.logprobs"
|
||||
reasoning_encrypted_content = "reasoning.encrypted_content"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ConversationItemList(BaseModel):
|
||||
"""List of conversation items with pagination."""
|
||||
|
||||
object: str = Field(default="list", description="Object type")
|
||||
data: list[ConversationItem] = Field(..., description="List of conversation items")
|
||||
first_id: str | None = Field(default=None, description="The ID of the first item in the list")
|
||||
last_id: str | None = Field(default=None, description="The ID of the last item in the list")
|
||||
has_more: bool = Field(default=False, description="Whether there are more items available")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ConversationItemDeletedResource(BaseModel):
|
||||
"""Response for deleted conversation item."""
|
||||
|
||||
id: str = Field(..., description="The deleted item identifier")
|
||||
object: str = Field(default="conversation.item.deleted", description="Object type")
|
||||
deleted: bool = Field(default=True, description="Whether the object was deleted")
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
@telemetry_traceable
|
||||
class Conversations(Protocol):
|
||||
"""Conversations
|
||||
|
||||
Protocol for conversation management operations."""
|
||||
|
||||
@webmethod(route="/conversations", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def create_conversation(
|
||||
self, items: list[ConversationItem] | None = None, metadata: Metadata | None = None
|
||||
) -> Conversation:
|
||||
"""Create a conversation.
|
||||
|
||||
Create a conversation.
|
||||
|
||||
:param items: Initial items to include in the conversation context.
|
||||
:param metadata: Set of key-value pairs that can be attached to an object.
|
||||
:returns: The created conversation object.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/conversations/{conversation_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_conversation(self, conversation_id: str) -> Conversation:
|
||||
"""Retrieve a conversation.
|
||||
|
||||
Get a conversation with the given ID.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:returns: The conversation object.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/conversations/{conversation_id}", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def update_conversation(self, conversation_id: str, metadata: Metadata) -> Conversation:
|
||||
"""Update a conversation.
|
||||
|
||||
Update a conversation's metadata with the given ID.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:param metadata: Set of key-value pairs that can be attached to an object.
|
||||
:returns: The updated conversation object.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/conversations/{conversation_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def openai_delete_conversation(self, conversation_id: str) -> ConversationDeletedResource:
|
||||
"""Delete a conversation.
|
||||
|
||||
Delete a conversation with the given ID.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:returns: The deleted conversation resource.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/conversations/{conversation_id}/items", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def add_items(self, conversation_id: str, items: list[ConversationItem]) -> ConversationItemList:
|
||||
"""Create items.
|
||||
|
||||
Create items in the conversation.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:param items: Items to include in the conversation context.
|
||||
:returns: List of created items.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/conversations/{conversation_id}/items/{item_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def retrieve(self, conversation_id: str, item_id: str) -> ConversationItem:
|
||||
"""Retrieve an item.
|
||||
|
||||
Retrieve a conversation item.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:param item_id: The item identifier.
|
||||
:returns: The conversation item.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/conversations/{conversation_id}/items", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_items(
|
||||
self,
|
||||
conversation_id: str,
|
||||
after: str | None = None,
|
||||
include: list[ConversationItemInclude] | None = None,
|
||||
limit: int | None = None,
|
||||
order: Literal["asc", "desc"] | None = None,
|
||||
) -> ConversationItemList:
|
||||
"""List items.
|
||||
|
||||
List items in the conversation.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:param after: An item ID to list items after, used in pagination.
|
||||
:param include: Specify additional output data to include in the response.
|
||||
:param limit: A limit on the number of objects to be returned (1-100, default 20).
|
||||
:param order: The order to return items in (asc or desc, default desc).
|
||||
:returns: List of conversation items.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/conversations/{conversation_id}/items/{item_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def openai_delete_conversation_item(
|
||||
self, conversation_id: str, item_id: str
|
||||
) -> ConversationItemDeletedResource:
|
||||
"""Delete an item.
|
||||
|
||||
Delete a conversation item.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:param item_id: The item identifier.
|
||||
:returns: The deleted item resource.
|
||||
"""
|
||||
...
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .datasetio import *
|
||||
|
|
@ -1,55 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Any, Protocol, runtime_checkable
|
||||
|
||||
from llama_stack.apis.common.responses import PaginatedResponse
|
||||
from llama_stack.apis.datasets import Dataset
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1BETA
|
||||
from llama_stack.schema_utils import webmethod
|
||||
|
||||
|
||||
class DatasetStore(Protocol):
|
||||
def get_dataset(self, dataset_id: str) -> Dataset: ...
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class DatasetIO(Protocol):
|
||||
# keeping for aligning with inference/safety, but this is not used
|
||||
dataset_store: DatasetStore
|
||||
|
||||
@webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1BETA)
|
||||
async def iterrows(
|
||||
self,
|
||||
dataset_id: str,
|
||||
start_index: int | None = None,
|
||||
limit: int | None = None,
|
||||
) -> PaginatedResponse:
|
||||
"""Get a paginated list of rows from a dataset.
|
||||
|
||||
Uses offset-based pagination where:
|
||||
- start_index: The starting index (0-based). If None, starts from beginning.
|
||||
- limit: Number of items to return. If None or -1, returns all items.
|
||||
|
||||
The response includes:
|
||||
- data: List of items for the current page.
|
||||
- has_more: Whether there are more items available after this set.
|
||||
|
||||
:param dataset_id: The ID of the dataset to get the rows from.
|
||||
:param start_index: Index into dataset for the first row to get. Get all rows if None.
|
||||
:param limit: The number of rows to get.
|
||||
:returns: A PaginatedResponse.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST", level=LLAMA_STACK_API_V1BETA)
|
||||
async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
|
||||
"""Append rows to a dataset.
|
||||
|
||||
:param dataset_id: The ID of the dataset to append the rows to.
|
||||
:param rows: The rows to append to the dataset.
|
||||
"""
|
||||
...
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .datasets import *
|
||||
|
|
@ -1,247 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from enum import Enum, StrEnum
|
||||
from typing import Annotated, Any, Literal, Protocol
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1BETA
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||
|
||||
|
||||
class DatasetPurpose(StrEnum):
|
||||
"""
|
||||
Purpose of the dataset. Each purpose has a required input data schema.
|
||||
|
||||
:cvar post-training/messages: The dataset contains messages used for post-training.
|
||||
{
|
||||
"messages": [
|
||||
{"role": "user", "content": "Hello, world!"},
|
||||
{"role": "assistant", "content": "Hello, world!"},
|
||||
]
|
||||
}
|
||||
:cvar eval/question-answer: The dataset contains a question column and an answer column.
|
||||
{
|
||||
"question": "What is the capital of France?",
|
||||
"answer": "Paris"
|
||||
}
|
||||
:cvar eval/messages-answer: The dataset contains a messages column with list of messages and an answer column.
|
||||
{
|
||||
"messages": [
|
||||
{"role": "user", "content": "Hello, my name is John Doe."},
|
||||
{"role": "assistant", "content": "Hello, John Doe. How can I help you today?"},
|
||||
{"role": "user", "content": "What's my name?"},
|
||||
],
|
||||
"answer": "John Doe"
|
||||
}
|
||||
"""
|
||||
|
||||
post_training_messages = "post-training/messages"
|
||||
eval_question_answer = "eval/question-answer"
|
||||
eval_messages_answer = "eval/messages-answer"
|
||||
|
||||
# TODO: add more schemas here
|
||||
|
||||
|
||||
class DatasetType(Enum):
|
||||
"""
|
||||
Type of the dataset source.
|
||||
:cvar uri: The dataset can be obtained from a URI.
|
||||
:cvar rows: The dataset is stored in rows.
|
||||
"""
|
||||
|
||||
uri = "uri"
|
||||
rows = "rows"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class URIDataSource(BaseModel):
|
||||
"""A dataset that can be obtained from a URI.
|
||||
:param uri: The dataset can be obtained from a URI. E.g.
|
||||
- "https://mywebsite.com/mydata.jsonl"
|
||||
- "lsfs://mydata.jsonl"
|
||||
- "data:csv;base64,{base64_content}"
|
||||
"""
|
||||
|
||||
type: Literal["uri"] = "uri"
|
||||
uri: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class RowsDataSource(BaseModel):
|
||||
"""A dataset stored in rows.
|
||||
:param rows: The dataset is stored in rows. E.g.
|
||||
- [
|
||||
{"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]}
|
||||
]
|
||||
"""
|
||||
|
||||
type: Literal["rows"] = "rows"
|
||||
rows: list[dict[str, Any]]
|
||||
|
||||
|
||||
DataSource = Annotated[
|
||||
URIDataSource | RowsDataSource,
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
register_schema(DataSource, name="DataSource")
|
||||
|
||||
|
||||
class CommonDatasetFields(BaseModel):
|
||||
"""
|
||||
Common fields for a dataset.
|
||||
|
||||
:param purpose: Purpose of the dataset indicating its intended use
|
||||
:param source: Data source configuration for the dataset
|
||||
:param metadata: Additional metadata for the dataset
|
||||
"""
|
||||
|
||||
purpose: DatasetPurpose
|
||||
source: DataSource
|
||||
metadata: dict[str, Any] = Field(
|
||||
default_factory=dict,
|
||||
description="Any additional metadata for this dataset",
|
||||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class Dataset(CommonDatasetFields, Resource):
|
||||
"""Dataset resource for storing and accessing training or evaluation data.
|
||||
|
||||
:param type: Type of resource, always 'dataset' for datasets
|
||||
"""
|
||||
|
||||
type: Literal[ResourceType.dataset] = ResourceType.dataset
|
||||
|
||||
@property
|
||||
def dataset_id(self) -> str:
|
||||
return self.identifier
|
||||
|
||||
@property
|
||||
def provider_dataset_id(self) -> str | None:
|
||||
return self.provider_resource_id
|
||||
|
||||
|
||||
class DatasetInput(CommonDatasetFields, BaseModel):
|
||||
"""Input parameters for dataset operations.
|
||||
|
||||
:param dataset_id: Unique identifier for the dataset
|
||||
"""
|
||||
|
||||
dataset_id: str
|
||||
|
||||
|
||||
class ListDatasetsResponse(BaseModel):
|
||||
"""Response from listing datasets.
|
||||
|
||||
:param data: List of datasets
|
||||
"""
|
||||
|
||||
data: list[Dataset]
|
||||
|
||||
|
||||
class Datasets(Protocol):
|
||||
@webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1BETA, deprecated=True)
|
||||
async def register_dataset(
|
||||
self,
|
||||
purpose: DatasetPurpose,
|
||||
source: DataSource,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
dataset_id: str | None = None,
|
||||
) -> Dataset:
|
||||
"""
|
||||
Register a new dataset.
|
||||
|
||||
:param purpose: The purpose of the dataset.
|
||||
One of:
|
||||
- "post-training/messages": The dataset contains a messages column with list of messages for post-training.
|
||||
{
|
||||
"messages": [
|
||||
{"role": "user", "content": "Hello, world!"},
|
||||
{"role": "assistant", "content": "Hello, world!"},
|
||||
]
|
||||
}
|
||||
- "eval/question-answer": The dataset contains a question column and an answer column for evaluation.
|
||||
{
|
||||
"question": "What is the capital of France?",
|
||||
"answer": "Paris"
|
||||
}
|
||||
- "eval/messages-answer": The dataset contains a messages column with list of messages and an answer column for evaluation.
|
||||
{
|
||||
"messages": [
|
||||
{"role": "user", "content": "Hello, my name is John Doe."},
|
||||
{"role": "assistant", "content": "Hello, John Doe. How can I help you today?"},
|
||||
{"role": "user", "content": "What's my name?"},
|
||||
],
|
||||
"answer": "John Doe"
|
||||
}
|
||||
:param source: The data source of the dataset. Ensure that the data source schema is compatible with the purpose of the dataset. Examples:
|
||||
- {
|
||||
"type": "uri",
|
||||
"uri": "https://mywebsite.com/mydata.jsonl"
|
||||
}
|
||||
- {
|
||||
"type": "uri",
|
||||
"uri": "lsfs://mydata.jsonl"
|
||||
}
|
||||
- {
|
||||
"type": "uri",
|
||||
"uri": "data:csv;base64,{base64_content}"
|
||||
}
|
||||
- {
|
||||
"type": "uri",
|
||||
"uri": "huggingface://llamastack/simpleqa?split=train"
|
||||
}
|
||||
- {
|
||||
"type": "rows",
|
||||
"rows": [
|
||||
{
|
||||
"messages": [
|
||||
{"role": "user", "content": "Hello, world!"},
|
||||
{"role": "assistant", "content": "Hello, world!"},
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
:param metadata: The metadata for the dataset.
|
||||
- E.g. {"description": "My dataset"}.
|
||||
:param dataset_id: The ID of the dataset. If not provided, an ID will be generated.
|
||||
:returns: A Dataset.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/datasets/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1BETA)
|
||||
async def get_dataset(
|
||||
self,
|
||||
dataset_id: str,
|
||||
) -> Dataset:
|
||||
"""Get a dataset by its ID.
|
||||
|
||||
:param dataset_id: The ID of the dataset to get.
|
||||
:returns: A Dataset.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/datasets", method="GET", level=LLAMA_STACK_API_V1BETA)
|
||||
async def list_datasets(self) -> ListDatasetsResponse:
|
||||
"""List all datasets.
|
||||
|
||||
:returns: A ListDatasetsResponse.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1BETA, deprecated=True)
|
||||
async def unregister_dataset(
|
||||
self,
|
||||
dataset_id: str,
|
||||
) -> None:
|
||||
"""Unregister a dataset by its ID.
|
||||
|
||||
:param dataset_id: The ID of the dataset to unregister.
|
||||
"""
|
||||
...
|
||||
|
|
@ -1,158 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from enum import Enum, EnumMeta
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.schema_utils import json_schema_type
|
||||
|
||||
|
||||
class DynamicApiMeta(EnumMeta):
|
||||
def __new__(cls, name, bases, namespace):
|
||||
# Store the original enum values
|
||||
original_values = {k: v for k, v in namespace.items() if not k.startswith("_")}
|
||||
|
||||
# Create the enum class
|
||||
cls = super().__new__(cls, name, bases, namespace)
|
||||
|
||||
# Store the original values for reference
|
||||
cls._original_values = original_values
|
||||
# Initialize _dynamic_values
|
||||
cls._dynamic_values = {}
|
||||
|
||||
return cls
|
||||
|
||||
def __call__(cls, value):
|
||||
try:
|
||||
return super().__call__(value)
|
||||
except ValueError as e:
|
||||
# If this value was already dynamically added, return it
|
||||
if value in cls._dynamic_values:
|
||||
return cls._dynamic_values[value]
|
||||
|
||||
# If the value doesn't exist, create a new enum member
|
||||
# Create a new member name from the value
|
||||
member_name = value.lower().replace("-", "_")
|
||||
|
||||
# If this member name already exists in the enum, return the existing member
|
||||
if member_name in cls._member_map_:
|
||||
return cls._member_map_[member_name]
|
||||
|
||||
# Instead of creating a new member, raise ValueError to force users to use Api.add() to
|
||||
# register new APIs explicitly
|
||||
raise ValueError(f"API '{value}' does not exist. Use Api.add() to register new APIs.") from e
|
||||
|
||||
def __iter__(cls):
|
||||
# Allow iteration over both static and dynamic members
|
||||
yield from super().__iter__()
|
||||
if hasattr(cls, "_dynamic_values"):
|
||||
yield from cls._dynamic_values.values()
|
||||
|
||||
def add(cls, value):
|
||||
"""
|
||||
Add a new API to the enum.
|
||||
Used to register external APIs.
|
||||
"""
|
||||
member_name = value.lower().replace("-", "_")
|
||||
|
||||
# If this member name already exists in the enum, return it
|
||||
if member_name in cls._member_map_:
|
||||
return cls._member_map_[member_name]
|
||||
|
||||
# Create a new enum member
|
||||
member = object.__new__(cls)
|
||||
member._name_ = member_name
|
||||
member._value_ = value
|
||||
|
||||
# Add it to the enum class
|
||||
cls._member_map_[member_name] = member
|
||||
cls._member_names_.append(member_name)
|
||||
cls._member_type_ = str
|
||||
|
||||
# Store it in our dynamic values
|
||||
cls._dynamic_values[value] = member
|
||||
|
||||
return member
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class Api(Enum, metaclass=DynamicApiMeta):
|
||||
"""Enumeration of all available APIs in the Llama Stack system.
|
||||
:cvar providers: Provider management and configuration
|
||||
:cvar inference: Text generation, chat completions, and embeddings
|
||||
:cvar safety: Content moderation and safety shields
|
||||
:cvar agents: Agent orchestration and execution
|
||||
:cvar batches: Batch processing for asynchronous API requests
|
||||
:cvar vector_io: Vector database operations and queries
|
||||
:cvar datasetio: Dataset input/output operations
|
||||
:cvar scoring: Model output evaluation and scoring
|
||||
:cvar eval: Model evaluation and benchmarking framework
|
||||
:cvar post_training: Fine-tuning and model training
|
||||
:cvar tool_runtime: Tool execution and management
|
||||
:cvar telemetry: Observability and system monitoring
|
||||
:cvar models: Model metadata and management
|
||||
:cvar shields: Safety shield implementations
|
||||
:cvar datasets: Dataset creation and management
|
||||
:cvar scoring_functions: Scoring function definitions
|
||||
:cvar benchmarks: Benchmark suite management
|
||||
:cvar tool_groups: Tool group organization
|
||||
:cvar files: File storage and management
|
||||
:cvar prompts: Prompt versions and management
|
||||
:cvar inspect: Built-in system inspection and introspection
|
||||
"""
|
||||
|
||||
providers = "providers"
|
||||
inference = "inference"
|
||||
safety = "safety"
|
||||
agents = "agents"
|
||||
batches = "batches"
|
||||
vector_io = "vector_io"
|
||||
datasetio = "datasetio"
|
||||
scoring = "scoring"
|
||||
eval = "eval"
|
||||
post_training = "post_training"
|
||||
tool_runtime = "tool_runtime"
|
||||
|
||||
models = "models"
|
||||
shields = "shields"
|
||||
vector_stores = "vector_stores" # only used for routing table
|
||||
datasets = "datasets"
|
||||
scoring_functions = "scoring_functions"
|
||||
benchmarks = "benchmarks"
|
||||
tool_groups = "tool_groups"
|
||||
files = "files"
|
||||
prompts = "prompts"
|
||||
conversations = "conversations"
|
||||
|
||||
# built-in API
|
||||
inspect = "inspect"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class Error(BaseModel):
|
||||
"""
|
||||
Error response from the API. Roughly follows RFC 7807.
|
||||
|
||||
:param status: HTTP status code
|
||||
:param title: Error title, a short summary of the error which is invariant for an error type
|
||||
:param detail: Error detail, a longer human-readable description of the error
|
||||
:param instance: (Optional) A URL which can be used to retrieve more information about the specific occurrence of the error
|
||||
"""
|
||||
|
||||
status: int
|
||||
title: str
|
||||
detail: str
|
||||
instance: str | None = None
|
||||
|
||||
|
||||
class ExternalApiSpec(BaseModel):
|
||||
"""Specification for an external API implementation."""
|
||||
|
||||
module: str = Field(..., description="Python module containing the API implementation")
|
||||
name: str = Field(..., description="Name of the API")
|
||||
pip_packages: list[str] = Field(default=[], description="List of pip packages to install the API")
|
||||
protocol: str = Field(..., description="Name of the protocol class for the API")
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .eval import *
|
||||
|
|
@ -1,137 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Any, Literal, Protocol
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.common.job_types import Job
|
||||
from llama_stack.apis.inference import SamplingParams, SystemMessage
|
||||
from llama_stack.apis.scoring import ScoringResult
|
||||
from llama_stack.apis.scoring_functions import ScoringFnParams
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ModelCandidate(BaseModel):
|
||||
"""A model candidate for evaluation.
|
||||
|
||||
:param model: The model ID to evaluate.
|
||||
:param sampling_params: The sampling parameters for the model.
|
||||
:param system_message: (Optional) The system message providing instructions or context to the model.
|
||||
"""
|
||||
|
||||
type: Literal["model"] = "model"
|
||||
model: str
|
||||
sampling_params: SamplingParams
|
||||
system_message: SystemMessage | None = None
|
||||
|
||||
|
||||
EvalCandidate = ModelCandidate
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class BenchmarkConfig(BaseModel):
|
||||
"""A benchmark configuration for evaluation.
|
||||
|
||||
:param eval_candidate: The candidate to evaluate.
|
||||
:param scoring_params: Map between scoring function id and parameters for each scoring function you want to run
|
||||
:param num_examples: (Optional) The number of examples to evaluate. If not provided, all examples in the dataset will be evaluated
|
||||
"""
|
||||
|
||||
eval_candidate: EvalCandidate
|
||||
scoring_params: dict[str, ScoringFnParams] = Field(
|
||||
description="Map between scoring function id and parameters for each scoring function you want to run",
|
||||
default_factory=dict,
|
||||
)
|
||||
num_examples: int | None = Field(
|
||||
description="Number of examples to evaluate (useful for testing), if not provided, all examples in the dataset will be evaluated",
|
||||
default=None,
|
||||
)
|
||||
# we could optinally add any specific dataset config here
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class EvaluateResponse(BaseModel):
|
||||
"""The response from an evaluation.
|
||||
|
||||
:param generations: The generations from the evaluation.
|
||||
:param scores: The scores from the evaluation.
|
||||
"""
|
||||
|
||||
generations: list[dict[str, Any]]
|
||||
# each key in the dict is a scoring function name
|
||||
scores: dict[str, ScoringResult]
|
||||
|
||||
|
||||
class Eval(Protocol):
|
||||
"""Evaluations
|
||||
|
||||
Llama Stack Evaluation API for running evaluations on model and agent candidates."""
|
||||
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def run_eval(
|
||||
self,
|
||||
benchmark_id: str,
|
||||
benchmark_config: BenchmarkConfig,
|
||||
) -> Job:
|
||||
"""Run an evaluation on a benchmark.
|
||||
|
||||
:param benchmark_id: The ID of the benchmark to run the evaluation on.
|
||||
:param benchmark_config: The configuration for the benchmark.
|
||||
:returns: The job that was created to run the evaluation.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def evaluate_rows(
|
||||
self,
|
||||
benchmark_id: str,
|
||||
input_rows: list[dict[str, Any]],
|
||||
scoring_functions: list[str],
|
||||
benchmark_config: BenchmarkConfig,
|
||||
) -> EvaluateResponse:
|
||||
"""Evaluate a list of rows on a benchmark.
|
||||
|
||||
:param benchmark_id: The ID of the benchmark to run the evaluation on.
|
||||
:param input_rows: The rows to evaluate.
|
||||
:param scoring_functions: The scoring functions to use for the evaluation.
|
||||
:param benchmark_config: The configuration for the benchmark.
|
||||
:returns: EvaluateResponse object containing generations and scores.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def job_status(self, benchmark_id: str, job_id: str) -> Job:
|
||||
"""Get the status of a job.
|
||||
|
||||
:param benchmark_id: The ID of the benchmark to run the evaluation on.
|
||||
:param job_id: The ID of the job to get the status of.
|
||||
:returns: The status of the evaluation job.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
|
||||
"""Cancel a job.
|
||||
|
||||
:param benchmark_id: The ID of the benchmark to run the evaluation on.
|
||||
:param job_id: The ID of the job to cancel.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET", level=LLAMA_STACK_API_V1ALPHA
|
||||
)
|
||||
async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
|
||||
"""Get the result of a job.
|
||||
|
||||
:param benchmark_id: The ID of the benchmark to run the evaluation on.
|
||||
:param job_id: The ID of the job to get the result of.
|
||||
:returns: The result of the job.
|
||||
"""
|
||||
...
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .files import *
|
||||
|
|
@ -1,194 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from enum import StrEnum
|
||||
from typing import Annotated, ClassVar, Literal, Protocol, runtime_checkable
|
||||
|
||||
from fastapi import File, Form, Response, UploadFile
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.common.responses import Order
|
||||
from llama_stack.apis.common.tracing import telemetry_traceable
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
|
||||
# OpenAI Files API Models
|
||||
class OpenAIFilePurpose(StrEnum):
|
||||
"""
|
||||
Valid purpose values for OpenAI Files API.
|
||||
"""
|
||||
|
||||
ASSISTANTS = "assistants"
|
||||
BATCH = "batch"
|
||||
# TODO: Add other purposes as needed
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIFileObject(BaseModel):
|
||||
"""
|
||||
OpenAI File object as defined in the OpenAI Files API.
|
||||
|
||||
:param object: The object type, which is always "file"
|
||||
:param id: The file identifier, which can be referenced in the API endpoints
|
||||
:param bytes: The size of the file, in bytes
|
||||
:param created_at: The Unix timestamp (in seconds) for when the file was created
|
||||
:param expires_at: The Unix timestamp (in seconds) for when the file expires
|
||||
:param filename: The name of the file
|
||||
:param purpose: The intended purpose of the file
|
||||
"""
|
||||
|
||||
object: Literal["file"] = "file"
|
||||
id: str
|
||||
bytes: int
|
||||
created_at: int
|
||||
expires_at: int
|
||||
filename: str
|
||||
purpose: OpenAIFilePurpose
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ExpiresAfter(BaseModel):
|
||||
"""
|
||||
Control expiration of uploaded files.
|
||||
|
||||
Params:
|
||||
- anchor, must be "created_at"
|
||||
- seconds, must be int between 3600 and 2592000 (1 hour to 30 days)
|
||||
"""
|
||||
|
||||
MIN: ClassVar[int] = 3600 # 1 hour
|
||||
MAX: ClassVar[int] = 2592000 # 30 days
|
||||
|
||||
anchor: Literal["created_at"]
|
||||
seconds: int = Field(..., ge=3600, le=2592000)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ListOpenAIFileResponse(BaseModel):
|
||||
"""
|
||||
Response for listing files in OpenAI Files API.
|
||||
|
||||
:param data: List of file objects
|
||||
:param has_more: Whether there are more files available beyond this page
|
||||
:param first_id: ID of the first file in the list for pagination
|
||||
:param last_id: ID of the last file in the list for pagination
|
||||
:param object: The object type, which is always "list"
|
||||
"""
|
||||
|
||||
data: list[OpenAIFileObject]
|
||||
has_more: bool
|
||||
first_id: str
|
||||
last_id: str
|
||||
object: Literal["list"] = "list"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIFileDeleteResponse(BaseModel):
|
||||
"""
|
||||
Response for deleting a file in OpenAI Files API.
|
||||
|
||||
:param id: The file identifier that was deleted
|
||||
:param object: The object type, which is always "file"
|
||||
:param deleted: Whether the file was successfully deleted
|
||||
"""
|
||||
|
||||
id: str
|
||||
object: Literal["file"] = "file"
|
||||
deleted: bool
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
@telemetry_traceable
|
||||
class Files(Protocol):
|
||||
"""Files
|
||||
|
||||
This API is used to upload documents that can be used with other Llama Stack APIs.
|
||||
"""
|
||||
|
||||
# OpenAI Files API Endpoints
|
||||
@webmethod(route="/files", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def openai_upload_file(
|
||||
self,
|
||||
file: Annotated[UploadFile, File()],
|
||||
purpose: Annotated[OpenAIFilePurpose, Form()],
|
||||
expires_after: Annotated[ExpiresAfter | None, Form()] = None,
|
||||
) -> OpenAIFileObject:
|
||||
"""Upload file.
|
||||
|
||||
Upload a file that can be used across various endpoints.
|
||||
|
||||
The file upload should be a multipart form request with:
|
||||
- file: The File object (not file name) to be uploaded.
|
||||
- purpose: The intended purpose of the uploaded file.
|
||||
- expires_after: Optional form values describing expiration for the file.
|
||||
|
||||
:param file: The uploaded file object containing content and metadata (filename, content_type, etc.).
|
||||
:param purpose: The intended purpose of the uploaded file (e.g., "assistants", "fine-tune").
|
||||
:param expires_after: Optional form values describing expiration for the file.
|
||||
:returns: An OpenAIFileObject representing the uploaded file.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/files", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def openai_list_files(
|
||||
self,
|
||||
after: str | None = None,
|
||||
limit: int | None = 10000,
|
||||
order: Order | None = Order.desc,
|
||||
purpose: OpenAIFilePurpose | None = None,
|
||||
) -> ListOpenAIFileResponse:
|
||||
"""List files.
|
||||
|
||||
Returns a list of files that belong to the user's organization.
|
||||
|
||||
:param after: A cursor for use in pagination. `after` is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.
|
||||
:param limit: A limit on the number of objects to be returned. Limit can range between 1 and 10,000, and the default is 10,000.
|
||||
:param order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for descending order.
|
||||
:param purpose: Only return files with the given purpose.
|
||||
:returns: An ListOpenAIFileResponse containing the list of files.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def openai_retrieve_file(
|
||||
self,
|
||||
file_id: str,
|
||||
) -> OpenAIFileObject:
|
||||
"""Retrieve file.
|
||||
|
||||
Returns information about a specific file.
|
||||
|
||||
:param file_id: The ID of the file to use for this request.
|
||||
:returns: An OpenAIFileObject containing file information.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def openai_delete_file(
|
||||
self,
|
||||
file_id: str,
|
||||
) -> OpenAIFileDeleteResponse:
|
||||
"""Delete file.
|
||||
|
||||
:param file_id: The ID of the file to use for this request.
|
||||
:returns: An OpenAIFileDeleteResponse indicating successful deletion.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def openai_retrieve_file_content(
|
||||
self,
|
||||
file_id: str,
|
||||
) -> Response:
|
||||
"""Retrieve file content.
|
||||
|
||||
Returns the contents of the specified file.
|
||||
|
||||
:param file_id: The ID of the file to use for this request.
|
||||
:returns: The raw file content as a binary response.
|
||||
"""
|
||||
...
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .inference import *
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .inspect import *
|
||||
|
|
@ -1,102 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Literal, Protocol, runtime_checkable
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.version import (
|
||||
LLAMA_STACK_API_V1,
|
||||
)
|
||||
from llama_stack.providers.datatypes import HealthStatus
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
# Valid values for the route filter parameter.
|
||||
# Actual API levels: v1, v1alpha, v1beta (filters by level, excludes deprecated)
|
||||
# Special filter value: "deprecated" (shows deprecated routes regardless of level)
|
||||
ApiFilter = Literal["v1", "v1alpha", "v1beta", "deprecated"]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class RouteInfo(BaseModel):
|
||||
"""Information about an API route including its path, method, and implementing providers.
|
||||
|
||||
:param route: The API endpoint path
|
||||
:param method: HTTP method for the route
|
||||
:param provider_types: List of provider types that implement this route
|
||||
"""
|
||||
|
||||
route: str
|
||||
method: str
|
||||
provider_types: list[str]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class HealthInfo(BaseModel):
|
||||
"""Health status information for the service.
|
||||
|
||||
:param status: Current health status of the service
|
||||
"""
|
||||
|
||||
status: HealthStatus
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VersionInfo(BaseModel):
|
||||
"""Version information for the service.
|
||||
|
||||
:param version: Version number of the service
|
||||
"""
|
||||
|
||||
version: str
|
||||
|
||||
|
||||
class ListRoutesResponse(BaseModel):
|
||||
"""Response containing a list of all available API routes.
|
||||
|
||||
:param data: List of available route information objects
|
||||
"""
|
||||
|
||||
data: list[RouteInfo]
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class Inspect(Protocol):
|
||||
"""Inspect
|
||||
|
||||
APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers.
|
||||
"""
|
||||
|
||||
@webmethod(route="/inspect/routes", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_routes(self, api_filter: ApiFilter | None = None) -> ListRoutesResponse:
|
||||
"""List routes.
|
||||
|
||||
List all available API routes with their methods and implementing providers.
|
||||
|
||||
:param api_filter: Optional filter to control which routes are returned. Can be an API level ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level, or 'deprecated' to show deprecated routes across all levels. If not specified, returns all non-deprecated routes.
|
||||
:returns: Response containing information about all available routes.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/health", method="GET", level=LLAMA_STACK_API_V1, require_authentication=False)
|
||||
async def health(self) -> HealthInfo:
|
||||
"""Get health status.
|
||||
|
||||
Get the current health status of the service.
|
||||
|
||||
:returns: Health information indicating if the service is operational.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/version", method="GET", level=LLAMA_STACK_API_V1, require_authentication=False)
|
||||
async def version(self) -> VersionInfo:
|
||||
"""Get version.
|
||||
|
||||
Get the version of the service.
|
||||
|
||||
:returns: Version information containing the service version number.
|
||||
"""
|
||||
...
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .models import *
|
||||
|
|
@ -1,172 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from enum import StrEnum
|
||||
from typing import Any, Literal, Protocol, runtime_checkable
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
||||
|
||||
from llama_stack.apis.common.tracing import telemetry_traceable
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
|
||||
class CommonModelFields(BaseModel):
|
||||
metadata: dict[str, Any] = Field(
|
||||
default_factory=dict,
|
||||
description="Any additional metadata for this model",
|
||||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ModelType(StrEnum):
|
||||
"""Enumeration of supported model types in Llama Stack.
|
||||
:cvar llm: Large language model for text generation and completion
|
||||
:cvar embedding: Embedding model for converting text to vector representations
|
||||
:cvar rerank: Reranking model for reordering documents based on their relevance to a query
|
||||
"""
|
||||
|
||||
llm = "llm"
|
||||
embedding = "embedding"
|
||||
rerank = "rerank"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class Model(CommonModelFields, Resource):
|
||||
"""A model resource representing an AI model registered in Llama Stack.
|
||||
|
||||
:param type: The resource type, always 'model' for model resources
|
||||
:param model_type: The type of model (LLM or embedding model)
|
||||
:param metadata: Any additional metadata for this model
|
||||
:param identifier: Unique identifier for this resource in llama stack
|
||||
:param provider_resource_id: Unique identifier for this resource in the provider
|
||||
:param provider_id: ID of the provider that owns this resource
|
||||
"""
|
||||
|
||||
type: Literal[ResourceType.model] = ResourceType.model
|
||||
|
||||
@property
|
||||
def model_id(self) -> str:
|
||||
return self.identifier
|
||||
|
||||
@property
|
||||
def provider_model_id(self) -> str:
|
||||
assert self.provider_resource_id is not None, "Provider resource ID must be set"
|
||||
return self.provider_resource_id
|
||||
|
||||
model_config = ConfigDict(protected_namespaces=())
|
||||
|
||||
model_type: ModelType = Field(default=ModelType.llm)
|
||||
|
||||
@field_validator("provider_resource_id")
|
||||
@classmethod
|
||||
def validate_provider_resource_id(cls, v):
|
||||
if v is None:
|
||||
raise ValueError("provider_resource_id cannot be None")
|
||||
return v
|
||||
|
||||
|
||||
class ModelInput(CommonModelFields):
|
||||
model_id: str
|
||||
provider_id: str | None = None
|
||||
provider_model_id: str | None = None
|
||||
model_type: ModelType | None = ModelType.llm
|
||||
model_config = ConfigDict(protected_namespaces=())
|
||||
|
||||
|
||||
class ListModelsResponse(BaseModel):
|
||||
data: list[Model]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIModel(BaseModel):
|
||||
"""A model from OpenAI.
|
||||
|
||||
:id: The ID of the model
|
||||
:object: The object type, which will be "model"
|
||||
:created: The Unix timestamp in seconds when the model was created
|
||||
:owned_by: The owner of the model
|
||||
:custom_metadata: Llama Stack-specific metadata including model_type, provider info, and additional metadata
|
||||
"""
|
||||
|
||||
id: str
|
||||
object: Literal["model"] = "model"
|
||||
created: int
|
||||
owned_by: str
|
||||
custom_metadata: dict[str, Any] | None = None
|
||||
|
||||
|
||||
class OpenAIListModelsResponse(BaseModel):
|
||||
data: list[OpenAIModel]
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
@telemetry_traceable
|
||||
class Models(Protocol):
|
||||
async def list_models(self) -> ListModelsResponse:
|
||||
"""List all models.
|
||||
|
||||
:returns: A ListModelsResponse.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/models", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def openai_list_models(self) -> OpenAIListModelsResponse:
|
||||
"""List models using the OpenAI API.
|
||||
|
||||
:returns: A OpenAIListModelsResponse.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/models/{model_id:path}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_model(
|
||||
self,
|
||||
model_id: str,
|
||||
) -> Model:
|
||||
"""Get model.
|
||||
|
||||
Get a model by its identifier.
|
||||
|
||||
:param model_id: The identifier of the model to get.
|
||||
:returns: A Model.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/models", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
async def register_model(
|
||||
self,
|
||||
model_id: str,
|
||||
provider_model_id: str | None = None,
|
||||
provider_id: str | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
model_type: ModelType | None = None,
|
||||
) -> Model:
|
||||
"""Register model.
|
||||
|
||||
Register a model.
|
||||
|
||||
:param model_id: The identifier of the model to register.
|
||||
:param provider_model_id: The identifier of the model in the provider.
|
||||
:param provider_id: The identifier of the provider.
|
||||
:param metadata: Any additional metadata for this model.
|
||||
:param model_type: The type of model to register.
|
||||
:returns: A Model.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/models/{model_id:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
async def unregister_model(
|
||||
self,
|
||||
model_id: str,
|
||||
) -> None:
|
||||
"""Unregister model.
|
||||
|
||||
Unregister a model.
|
||||
|
||||
:param model_id: The identifier of the model to unregister.
|
||||
"""
|
||||
...
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .post_training import *
|
||||
|
|
@ -1,368 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Annotated, Any, Literal, Protocol
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.common.content_types import URL
|
||||
from llama_stack.apis.common.job_types import JobStatus
|
||||
from llama_stack.apis.common.training_types import Checkpoint
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OptimizerType(Enum):
|
||||
"""Available optimizer algorithms for training.
|
||||
:cvar adam: Adaptive Moment Estimation optimizer
|
||||
:cvar adamw: AdamW optimizer with weight decay
|
||||
:cvar sgd: Stochastic Gradient Descent optimizer
|
||||
"""
|
||||
|
||||
adam = "adam"
|
||||
adamw = "adamw"
|
||||
sgd = "sgd"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class DatasetFormat(Enum):
|
||||
"""Format of the training dataset.
|
||||
:cvar instruct: Instruction-following format with prompt and completion
|
||||
:cvar dialog: Multi-turn conversation format with messages
|
||||
"""
|
||||
|
||||
instruct = "instruct"
|
||||
dialog = "dialog"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class DataConfig(BaseModel):
|
||||
"""Configuration for training data and data loading.
|
||||
|
||||
:param dataset_id: Unique identifier for the training dataset
|
||||
:param batch_size: Number of samples per training batch
|
||||
:param shuffle: Whether to shuffle the dataset during training
|
||||
:param data_format: Format of the dataset (instruct or dialog)
|
||||
:param validation_dataset_id: (Optional) Unique identifier for the validation dataset
|
||||
:param packed: (Optional) Whether to pack multiple samples into a single sequence for efficiency
|
||||
:param train_on_input: (Optional) Whether to compute loss on input tokens as well as output tokens
|
||||
"""
|
||||
|
||||
dataset_id: str
|
||||
batch_size: int
|
||||
shuffle: bool
|
||||
data_format: DatasetFormat
|
||||
validation_dataset_id: str | None = None
|
||||
packed: bool | None = False
|
||||
train_on_input: bool | None = False
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OptimizerConfig(BaseModel):
|
||||
"""Configuration parameters for the optimization algorithm.
|
||||
|
||||
:param optimizer_type: Type of optimizer to use (adam, adamw, or sgd)
|
||||
:param lr: Learning rate for the optimizer
|
||||
:param weight_decay: Weight decay coefficient for regularization
|
||||
:param num_warmup_steps: Number of steps for learning rate warmup
|
||||
"""
|
||||
|
||||
optimizer_type: OptimizerType
|
||||
lr: float
|
||||
weight_decay: float
|
||||
num_warmup_steps: int
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class EfficiencyConfig(BaseModel):
|
||||
"""Configuration for memory and compute efficiency optimizations.
|
||||
|
||||
:param enable_activation_checkpointing: (Optional) Whether to use activation checkpointing to reduce memory usage
|
||||
:param enable_activation_offloading: (Optional) Whether to offload activations to CPU to save GPU memory
|
||||
:param memory_efficient_fsdp_wrap: (Optional) Whether to use memory-efficient FSDP wrapping
|
||||
:param fsdp_cpu_offload: (Optional) Whether to offload FSDP parameters to CPU
|
||||
"""
|
||||
|
||||
enable_activation_checkpointing: bool | None = False
|
||||
enable_activation_offloading: bool | None = False
|
||||
memory_efficient_fsdp_wrap: bool | None = False
|
||||
fsdp_cpu_offload: bool | None = False
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class TrainingConfig(BaseModel):
|
||||
"""Comprehensive configuration for the training process.
|
||||
|
||||
:param n_epochs: Number of training epochs to run
|
||||
:param max_steps_per_epoch: Maximum number of steps to run per epoch
|
||||
:param gradient_accumulation_steps: Number of steps to accumulate gradients before updating
|
||||
:param max_validation_steps: (Optional) Maximum number of validation steps per epoch
|
||||
:param data_config: (Optional) Configuration for data loading and formatting
|
||||
:param optimizer_config: (Optional) Configuration for the optimization algorithm
|
||||
:param efficiency_config: (Optional) Configuration for memory and compute optimizations
|
||||
:param dtype: (Optional) Data type for model parameters (bf16, fp16, fp32)
|
||||
"""
|
||||
|
||||
n_epochs: int
|
||||
max_steps_per_epoch: int = 1
|
||||
gradient_accumulation_steps: int = 1
|
||||
max_validation_steps: int | None = 1
|
||||
data_config: DataConfig | None = None
|
||||
optimizer_config: OptimizerConfig | None = None
|
||||
efficiency_config: EfficiencyConfig | None = None
|
||||
dtype: str | None = "bf16"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class LoraFinetuningConfig(BaseModel):
|
||||
"""Configuration for Low-Rank Adaptation (LoRA) fine-tuning.
|
||||
|
||||
:param type: Algorithm type identifier, always "LoRA"
|
||||
:param lora_attn_modules: List of attention module names to apply LoRA to
|
||||
:param apply_lora_to_mlp: Whether to apply LoRA to MLP layers
|
||||
:param apply_lora_to_output: Whether to apply LoRA to output projection layers
|
||||
:param rank: Rank of the LoRA adaptation (lower rank = fewer parameters)
|
||||
:param alpha: LoRA scaling parameter that controls adaptation strength
|
||||
:param use_dora: (Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation)
|
||||
:param quantize_base: (Optional) Whether to quantize the base model weights
|
||||
"""
|
||||
|
||||
type: Literal["LoRA"] = "LoRA"
|
||||
lora_attn_modules: list[str]
|
||||
apply_lora_to_mlp: bool
|
||||
apply_lora_to_output: bool
|
||||
rank: int
|
||||
alpha: int
|
||||
use_dora: bool | None = False
|
||||
quantize_base: bool | None = False
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class QATFinetuningConfig(BaseModel):
|
||||
"""Configuration for Quantization-Aware Training (QAT) fine-tuning.
|
||||
|
||||
:param type: Algorithm type identifier, always "QAT"
|
||||
:param quantizer_name: Name of the quantization algorithm to use
|
||||
:param group_size: Size of groups for grouped quantization
|
||||
"""
|
||||
|
||||
type: Literal["QAT"] = "QAT"
|
||||
quantizer_name: str
|
||||
group_size: int
|
||||
|
||||
|
||||
AlgorithmConfig = Annotated[LoraFinetuningConfig | QATFinetuningConfig, Field(discriminator="type")]
|
||||
register_schema(AlgorithmConfig, name="AlgorithmConfig")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class PostTrainingJobLogStream(BaseModel):
|
||||
"""Stream of logs from a finetuning job.
|
||||
|
||||
:param job_uuid: Unique identifier for the training job
|
||||
:param log_lines: List of log message strings from the training process
|
||||
"""
|
||||
|
||||
job_uuid: str
|
||||
log_lines: list[str]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class RLHFAlgorithm(Enum):
|
||||
"""Available reinforcement learning from human feedback algorithms.
|
||||
:cvar dpo: Direct Preference Optimization algorithm
|
||||
"""
|
||||
|
||||
dpo = "dpo"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class DPOLossType(Enum):
|
||||
sigmoid = "sigmoid"
|
||||
hinge = "hinge"
|
||||
ipo = "ipo"
|
||||
kto_pair = "kto_pair"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class DPOAlignmentConfig(BaseModel):
|
||||
"""Configuration for Direct Preference Optimization (DPO) alignment.
|
||||
|
||||
:param beta: Temperature parameter for the DPO loss
|
||||
:param loss_type: The type of loss function to use for DPO
|
||||
"""
|
||||
|
||||
beta: float
|
||||
loss_type: DPOLossType = DPOLossType.sigmoid
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class PostTrainingRLHFRequest(BaseModel):
|
||||
"""Request to finetune a model using reinforcement learning from human feedback.
|
||||
|
||||
:param job_uuid: Unique identifier for the training job
|
||||
:param finetuned_model: URL or path to the base model to fine-tune
|
||||
:param dataset_id: Unique identifier for the training dataset
|
||||
:param validation_dataset_id: Unique identifier for the validation dataset
|
||||
:param algorithm: RLHF algorithm to use for training
|
||||
:param algorithm_config: Configuration parameters for the RLHF algorithm
|
||||
:param optimizer_config: Configuration parameters for the optimization algorithm
|
||||
:param training_config: Configuration parameters for the training process
|
||||
:param hyperparam_search_config: Configuration for hyperparameter search
|
||||
:param logger_config: Configuration for training logging
|
||||
"""
|
||||
|
||||
job_uuid: str
|
||||
|
||||
finetuned_model: URL
|
||||
|
||||
dataset_id: str
|
||||
validation_dataset_id: str
|
||||
|
||||
algorithm: RLHFAlgorithm
|
||||
algorithm_config: DPOAlignmentConfig
|
||||
|
||||
optimizer_config: OptimizerConfig
|
||||
training_config: TrainingConfig
|
||||
|
||||
# TODO: define these
|
||||
hyperparam_search_config: dict[str, Any]
|
||||
logger_config: dict[str, Any]
|
||||
|
||||
|
||||
class PostTrainingJob(BaseModel):
|
||||
job_uuid: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class PostTrainingJobStatusResponse(BaseModel):
|
||||
"""Status of a finetuning job.
|
||||
|
||||
:param job_uuid: Unique identifier for the training job
|
||||
:param status: Current status of the training job
|
||||
:param scheduled_at: (Optional) Timestamp when the job was scheduled
|
||||
:param started_at: (Optional) Timestamp when the job execution began
|
||||
:param completed_at: (Optional) Timestamp when the job finished, if completed
|
||||
:param resources_allocated: (Optional) Information about computational resources allocated to the job
|
||||
:param checkpoints: List of model checkpoints created during training
|
||||
"""
|
||||
|
||||
job_uuid: str
|
||||
status: JobStatus
|
||||
|
||||
scheduled_at: datetime | None = None
|
||||
started_at: datetime | None = None
|
||||
completed_at: datetime | None = None
|
||||
|
||||
resources_allocated: dict[str, Any] | None = None
|
||||
|
||||
checkpoints: list[Checkpoint] = Field(default_factory=list)
|
||||
|
||||
|
||||
class ListPostTrainingJobsResponse(BaseModel):
|
||||
data: list[PostTrainingJob]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class PostTrainingJobArtifactsResponse(BaseModel):
|
||||
"""Artifacts of a finetuning job.
|
||||
|
||||
:param job_uuid: Unique identifier for the training job
|
||||
:param checkpoints: List of model checkpoints created during training
|
||||
"""
|
||||
|
||||
job_uuid: str
|
||||
checkpoints: list[Checkpoint] = Field(default_factory=list)
|
||||
|
||||
# TODO(ashwin): metrics, evals
|
||||
|
||||
|
||||
class PostTraining(Protocol):
|
||||
@webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def supervised_fine_tune(
|
||||
self,
|
||||
job_uuid: str,
|
||||
training_config: TrainingConfig,
|
||||
hyperparam_search_config: dict[str, Any],
|
||||
logger_config: dict[str, Any],
|
||||
model: str | None = Field(
|
||||
default=None,
|
||||
description="Model descriptor for training if not in provider config`",
|
||||
),
|
||||
checkpoint_dir: str | None = None,
|
||||
algorithm_config: AlgorithmConfig | None = None,
|
||||
) -> PostTrainingJob:
|
||||
"""Run supervised fine-tuning of a model.
|
||||
|
||||
:param job_uuid: The UUID of the job to create.
|
||||
:param training_config: The training configuration.
|
||||
:param hyperparam_search_config: The hyperparam search configuration.
|
||||
:param logger_config: The logger configuration.
|
||||
:param model: The model to fine-tune.
|
||||
:param checkpoint_dir: The directory to save checkpoint(s) to.
|
||||
:param algorithm_config: The algorithm configuration.
|
||||
:returns: A PostTrainingJob.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def preference_optimize(
|
||||
self,
|
||||
job_uuid: str,
|
||||
finetuned_model: str,
|
||||
algorithm_config: DPOAlignmentConfig,
|
||||
training_config: TrainingConfig,
|
||||
hyperparam_search_config: dict[str, Any],
|
||||
logger_config: dict[str, Any],
|
||||
) -> PostTrainingJob:
|
||||
"""Run preference optimization of a model.
|
||||
|
||||
:param job_uuid: The UUID of the job to create.
|
||||
:param finetuned_model: The model to fine-tune.
|
||||
:param algorithm_config: The algorithm configuration.
|
||||
:param training_config: The training configuration.
|
||||
:param hyperparam_search_config: The hyperparam search configuration.
|
||||
:param logger_config: The logger configuration.
|
||||
:returns: A PostTrainingJob.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def get_training_jobs(self) -> ListPostTrainingJobsResponse:
|
||||
"""Get all training jobs.
|
||||
|
||||
:returns: A ListPostTrainingJobsResponse.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse:
|
||||
"""Get the status of a training job.
|
||||
|
||||
:param job_uuid: The UUID of the job to get the status of.
|
||||
:returns: A PostTrainingJobStatusResponse.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def cancel_training_job(self, job_uuid: str) -> None:
|
||||
"""Cancel a training job.
|
||||
|
||||
:param job_uuid: The UUID of the job to cancel.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse:
|
||||
"""Get the artifacts of a training job.
|
||||
|
||||
:param job_uuid: The UUID of the job to get the artifacts of.
|
||||
:returns: A PostTrainingJobArtifactsResponse.
|
||||
"""
|
||||
...
|
||||
|
|
@ -1,9 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .prompts import ListPromptsResponse, Prompt, Prompts
|
||||
|
||||
__all__ = ["Prompt", "Prompts", "ListPromptsResponse"]
|
||||
|
|
@ -1,204 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import re
|
||||
import secrets
|
||||
from typing import Protocol, runtime_checkable
|
||||
|
||||
from pydantic import BaseModel, Field, field_validator, model_validator
|
||||
|
||||
from llama_stack.apis.common.tracing import telemetry_traceable
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class Prompt(BaseModel):
|
||||
"""A prompt resource representing a stored OpenAI Compatible prompt template in Llama Stack.
|
||||
|
||||
:param prompt: The system prompt text with variable placeholders. Variables are only supported when using the Responses API.
|
||||
:param version: Version (integer starting at 1, incremented on save)
|
||||
:param prompt_id: Unique identifier formatted as 'pmpt_<48-digit-hash>'
|
||||
:param variables: List of prompt variable names that can be used in the prompt template
|
||||
:param is_default: Boolean indicating whether this version is the default version for this prompt
|
||||
"""
|
||||
|
||||
prompt: str | None = Field(default=None, description="The system prompt with variable placeholders")
|
||||
version: int = Field(description="Version (integer starting at 1, incremented on save)", ge=1)
|
||||
prompt_id: str = Field(description="Unique identifier in format 'pmpt_<48-digit-hash>'")
|
||||
variables: list[str] = Field(
|
||||
default_factory=list, description="List of variable names that can be used in the prompt template"
|
||||
)
|
||||
is_default: bool = Field(
|
||||
default=False, description="Boolean indicating whether this version is the default version"
|
||||
)
|
||||
|
||||
@field_validator("prompt_id")
|
||||
@classmethod
|
||||
def validate_prompt_id(cls, prompt_id: str) -> str:
|
||||
if not isinstance(prompt_id, str):
|
||||
raise TypeError("prompt_id must be a string in format 'pmpt_<48-digit-hash>'")
|
||||
|
||||
if not prompt_id.startswith("pmpt_"):
|
||||
raise ValueError("prompt_id must start with 'pmpt_' prefix")
|
||||
|
||||
hex_part = prompt_id[5:]
|
||||
if len(hex_part) != 48:
|
||||
raise ValueError("prompt_id must be in format 'pmpt_<48-digit-hash>' (48 lowercase hex chars)")
|
||||
|
||||
for char in hex_part:
|
||||
if char not in "0123456789abcdef":
|
||||
raise ValueError("prompt_id hex part must contain only lowercase hex characters [0-9a-f]")
|
||||
|
||||
return prompt_id
|
||||
|
||||
@field_validator("version")
|
||||
@classmethod
|
||||
def validate_version(cls, prompt_version: int) -> int:
|
||||
if prompt_version < 1:
|
||||
raise ValueError("version must be >= 1")
|
||||
return prompt_version
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_prompt_variables(self):
|
||||
"""Validate that all variables used in the prompt are declared in the variables list."""
|
||||
if not self.prompt:
|
||||
return self
|
||||
|
||||
prompt_variables = set(re.findall(r"{{\s*(\w+)\s*}}", self.prompt))
|
||||
declared_variables = set(self.variables)
|
||||
|
||||
undeclared = prompt_variables - declared_variables
|
||||
if undeclared:
|
||||
raise ValueError(f"Prompt contains undeclared variables: {sorted(undeclared)}")
|
||||
|
||||
return self
|
||||
|
||||
@classmethod
|
||||
def generate_prompt_id(cls) -> str:
|
||||
# Generate 48 hex characters (24 bytes)
|
||||
random_bytes = secrets.token_bytes(24)
|
||||
hex_string = random_bytes.hex()
|
||||
return f"pmpt_{hex_string}"
|
||||
|
||||
|
||||
class ListPromptsResponse(BaseModel):
|
||||
"""Response model to list prompts."""
|
||||
|
||||
data: list[Prompt]
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
@telemetry_traceable
|
||||
class Prompts(Protocol):
|
||||
"""Prompts
|
||||
|
||||
Protocol for prompt management operations."""
|
||||
|
||||
@webmethod(route="/prompts", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_prompts(self) -> ListPromptsResponse:
|
||||
"""List all prompts.
|
||||
|
||||
:returns: A ListPromptsResponse containing all prompts.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/prompts/{prompt_id}/versions", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_prompt_versions(
|
||||
self,
|
||||
prompt_id: str,
|
||||
) -> ListPromptsResponse:
|
||||
"""List prompt versions.
|
||||
|
||||
List all versions of a specific prompt.
|
||||
|
||||
:param prompt_id: The identifier of the prompt to list versions for.
|
||||
:returns: A ListPromptsResponse containing all versions of the prompt.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/prompts/{prompt_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_prompt(
|
||||
self,
|
||||
prompt_id: str,
|
||||
version: int | None = None,
|
||||
) -> Prompt:
|
||||
"""Get prompt.
|
||||
|
||||
Get a prompt by its identifier and optional version.
|
||||
|
||||
:param prompt_id: The identifier of the prompt to get.
|
||||
:param version: The version of the prompt to get (defaults to latest).
|
||||
:returns: A Prompt resource.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/prompts", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def create_prompt(
|
||||
self,
|
||||
prompt: str,
|
||||
variables: list[str] | None = None,
|
||||
) -> Prompt:
|
||||
"""Create prompt.
|
||||
|
||||
Create a new prompt.
|
||||
|
||||
:param prompt: The prompt text content with variable placeholders.
|
||||
:param variables: List of variable names that can be used in the prompt template.
|
||||
:returns: The created Prompt resource.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/prompts/{prompt_id}", method="PUT", level=LLAMA_STACK_API_V1)
|
||||
async def update_prompt(
|
||||
self,
|
||||
prompt_id: str,
|
||||
prompt: str,
|
||||
version: int,
|
||||
variables: list[str] | None = None,
|
||||
set_as_default: bool = True,
|
||||
) -> Prompt:
|
||||
"""Update prompt.
|
||||
|
||||
Update an existing prompt (increments version).
|
||||
|
||||
:param prompt_id: The identifier of the prompt to update.
|
||||
:param prompt: The updated prompt text content.
|
||||
:param version: The current version of the prompt being updated.
|
||||
:param variables: Updated list of variable names that can be used in the prompt template.
|
||||
:param set_as_default: Set the new version as the default (default=True).
|
||||
:returns: The updated Prompt resource with incremented version.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/prompts/{prompt_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def delete_prompt(
|
||||
self,
|
||||
prompt_id: str,
|
||||
) -> None:
|
||||
"""Delete prompt.
|
||||
|
||||
Delete a prompt.
|
||||
|
||||
:param prompt_id: The identifier of the prompt to delete.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/prompts/{prompt_id}/set-default-version", method="PUT", level=LLAMA_STACK_API_V1)
|
||||
async def set_default_version(
|
||||
self,
|
||||
prompt_id: str,
|
||||
version: int,
|
||||
) -> Prompt:
|
||||
"""Set prompt version.
|
||||
|
||||
Set which version of a prompt should be the default in get_prompt (latest).
|
||||
|
||||
:param prompt_id: The identifier of the prompt.
|
||||
:param version: The version to set as default.
|
||||
:returns: The prompt with the specified version now set as default.
|
||||
"""
|
||||
...
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .providers import *
|
||||
|
|
@ -1,69 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Any, Protocol, runtime_checkable
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.providers.datatypes import HealthResponse
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ProviderInfo(BaseModel):
|
||||
"""Information about a registered provider including its configuration and health status.
|
||||
|
||||
:param api: The API name this provider implements
|
||||
:param provider_id: Unique identifier for the provider
|
||||
:param provider_type: The type of provider implementation
|
||||
:param config: Configuration parameters for the provider
|
||||
:param health: Current health status of the provider
|
||||
"""
|
||||
|
||||
api: str
|
||||
provider_id: str
|
||||
provider_type: str
|
||||
config: dict[str, Any]
|
||||
health: HealthResponse
|
||||
|
||||
|
||||
class ListProvidersResponse(BaseModel):
|
||||
"""Response containing a list of all available providers.
|
||||
|
||||
:param data: List of provider information objects
|
||||
"""
|
||||
|
||||
data: list[ProviderInfo]
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class Providers(Protocol):
|
||||
"""Providers
|
||||
|
||||
Providers API for inspecting, listing, and modifying providers and their configurations.
|
||||
"""
|
||||
|
||||
@webmethod(route="/providers", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_providers(self) -> ListProvidersResponse:
|
||||
"""List providers.
|
||||
|
||||
List all available providers.
|
||||
|
||||
:returns: A ListProvidersResponse containing information about all providers.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/providers/{provider_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def inspect_provider(self, provider_id: str) -> ProviderInfo:
|
||||
"""Get provider.
|
||||
|
||||
Get detailed information about a specific provider.
|
||||
|
||||
:param provider_id: The ID of the provider to inspect.
|
||||
:returns: A ProviderInfo object containing the provider's details.
|
||||
"""
|
||||
...
|
||||
|
|
@ -1,37 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
|
||||
from enum import StrEnum
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ResourceType(StrEnum):
|
||||
model = "model"
|
||||
shield = "shield"
|
||||
vector_store = "vector_store"
|
||||
dataset = "dataset"
|
||||
scoring_function = "scoring_function"
|
||||
benchmark = "benchmark"
|
||||
tool = "tool"
|
||||
tool_group = "tool_group"
|
||||
prompt = "prompt"
|
||||
|
||||
|
||||
class Resource(BaseModel):
|
||||
"""Base class for all Llama Stack resources"""
|
||||
|
||||
identifier: str = Field(description="Unique identifier for this resource in llama stack")
|
||||
|
||||
provider_resource_id: str | None = Field(
|
||||
default=None,
|
||||
description="Unique identifier for this resource in the provider",
|
||||
)
|
||||
|
||||
provider_id: str = Field(description="ID of the provider that owns this resource")
|
||||
|
||||
type: ResourceType = Field(description="Type of resource (e.g. 'model', 'shield', 'vector_store', etc.)")
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .safety import *
|
||||
|
|
@ -1,134 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from enum import Enum
|
||||
from typing import Any, Protocol, runtime_checkable
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.common.tracing import telemetry_traceable
|
||||
from llama_stack.apis.inference import OpenAIMessageParam
|
||||
from llama_stack.apis.shields import Shield
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ModerationObjectResults(BaseModel):
|
||||
"""A moderation object.
|
||||
:param flagged: Whether any of the below categories are flagged.
|
||||
:param categories: A list of the categories, and whether they are flagged or not.
|
||||
:param category_applied_input_types: A list of the categories along with the input type(s) that the score applies to.
|
||||
:param category_scores: A list of the categories along with their scores as predicted by model.
|
||||
"""
|
||||
|
||||
flagged: bool
|
||||
categories: dict[str, bool] | None = None
|
||||
category_applied_input_types: dict[str, list[str]] | None = None
|
||||
category_scores: dict[str, float] | None = None
|
||||
user_message: str | None = None
|
||||
metadata: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ModerationObject(BaseModel):
|
||||
"""A moderation object.
|
||||
:param id: The unique identifier for the moderation request.
|
||||
:param model: The model used to generate the moderation results.
|
||||
:param results: A list of moderation objects
|
||||
"""
|
||||
|
||||
id: str
|
||||
model: str
|
||||
results: list[ModerationObjectResults]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ViolationLevel(Enum):
|
||||
"""Severity level of a safety violation.
|
||||
|
||||
:cvar INFO: Informational level violation that does not require action
|
||||
:cvar WARN: Warning level violation that suggests caution but allows continuation
|
||||
:cvar ERROR: Error level violation that requires blocking or intervention
|
||||
"""
|
||||
|
||||
INFO = "info"
|
||||
WARN = "warn"
|
||||
ERROR = "error"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class SafetyViolation(BaseModel):
|
||||
"""Details of a safety violation detected by content moderation.
|
||||
|
||||
:param violation_level: Severity level of the violation
|
||||
:param user_message: (Optional) Message to convey to the user about the violation
|
||||
:param metadata: Additional metadata including specific violation codes for debugging and telemetry
|
||||
"""
|
||||
|
||||
violation_level: ViolationLevel
|
||||
|
||||
# what message should you convey to the user
|
||||
user_message: str | None = None
|
||||
|
||||
# additional metadata (including specific violation codes) more for
|
||||
# debugging, telemetry
|
||||
metadata: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class RunShieldResponse(BaseModel):
|
||||
"""Response from running a safety shield.
|
||||
|
||||
:param violation: (Optional) Safety violation detected by the shield, if any
|
||||
"""
|
||||
|
||||
violation: SafetyViolation | None = None
|
||||
|
||||
|
||||
class ShieldStore(Protocol):
|
||||
async def get_shield(self, identifier: str) -> Shield: ...
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
@telemetry_traceable
|
||||
class Safety(Protocol):
|
||||
"""Safety
|
||||
|
||||
OpenAI-compatible Moderations API.
|
||||
"""
|
||||
|
||||
shield_store: ShieldStore
|
||||
|
||||
@webmethod(route="/safety/run-shield", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def run_shield(
|
||||
self,
|
||||
shield_id: str,
|
||||
messages: list[OpenAIMessageParam],
|
||||
params: dict[str, Any],
|
||||
) -> RunShieldResponse:
|
||||
"""Run shield.
|
||||
|
||||
Run a shield.
|
||||
|
||||
:param shield_id: The identifier of the shield to run.
|
||||
:param messages: The messages to run the shield on.
|
||||
:param params: The parameters of the shield.
|
||||
:returns: A RunShieldResponse.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def run_moderation(self, input: str | list[str], model: str | None = None) -> ModerationObject:
|
||||
"""Create moderation.
|
||||
|
||||
Classifies if text and/or image inputs are potentially harmful.
|
||||
:param input: Input (or inputs) to classify.
|
||||
Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models.
|
||||
:param model: (Optional) The content moderation model you would like to use.
|
||||
:returns: A moderation object.
|
||||
"""
|
||||
...
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .scoring import *
|
||||
|
|
@ -1,93 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Any, Protocol, runtime_checkable
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
# mapping of metric to value
|
||||
ScoringResultRow = dict[str, Any]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ScoringResult(BaseModel):
|
||||
"""
|
||||
A scoring result for a single row.
|
||||
|
||||
:param score_rows: The scoring result for each row. Each row is a map of column name to value.
|
||||
:param aggregated_results: Map of metric name to aggregated value
|
||||
"""
|
||||
|
||||
score_rows: list[ScoringResultRow]
|
||||
# aggregated metrics to value
|
||||
aggregated_results: dict[str, Any]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ScoreBatchResponse(BaseModel):
|
||||
"""Response from batch scoring operations on datasets.
|
||||
|
||||
:param dataset_id: (Optional) The identifier of the dataset that was scored
|
||||
:param results: A map of scoring function name to ScoringResult
|
||||
"""
|
||||
|
||||
dataset_id: str | None = None
|
||||
results: dict[str, ScoringResult]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ScoreResponse(BaseModel):
|
||||
"""
|
||||
The response from scoring.
|
||||
|
||||
:param results: A map of scoring function name to ScoringResult.
|
||||
"""
|
||||
|
||||
# each key in the dict is a scoring function name
|
||||
results: dict[str, ScoringResult]
|
||||
|
||||
|
||||
class ScoringFunctionStore(Protocol):
|
||||
def get_scoring_function(self, scoring_fn_id: str) -> ScoringFn: ...
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class Scoring(Protocol):
|
||||
scoring_function_store: ScoringFunctionStore
|
||||
|
||||
@webmethod(route="/scoring/score-batch", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def score_batch(
|
||||
self,
|
||||
dataset_id: str,
|
||||
scoring_functions: dict[str, ScoringFnParams | None],
|
||||
save_results_dataset: bool = False,
|
||||
) -> ScoreBatchResponse:
|
||||
"""Score a batch of rows.
|
||||
|
||||
:param dataset_id: The ID of the dataset to score.
|
||||
:param scoring_functions: The scoring functions to use for the scoring.
|
||||
:param save_results_dataset: Whether to save the results to a dataset.
|
||||
:returns: A ScoreBatchResponse.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/scoring/score", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def score(
|
||||
self,
|
||||
input_rows: list[dict[str, Any]],
|
||||
scoring_functions: dict[str, ScoringFnParams | None],
|
||||
) -> ScoreResponse:
|
||||
"""Score a list of rows.
|
||||
|
||||
:param input_rows: The rows to score.
|
||||
:param scoring_functions: The scoring functions to use for the scoring.
|
||||
:returns: A ScoreResponse object containing rows and aggregated results.
|
||||
"""
|
||||
...
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .scoring_functions import *
|
||||
|
|
@ -1,210 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
# TODO: use enum.StrEnum when we drop support for python 3.10
|
||||
from enum import StrEnum
|
||||
from typing import (
|
||||
Annotated,
|
||||
Any,
|
||||
Literal,
|
||||
Protocol,
|
||||
runtime_checkable,
|
||||
)
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.common.type_system import ParamType
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||
|
||||
|
||||
# Perhaps more structure can be imposed on these functions. Maybe they could be associated
|
||||
# with standard metrics so they can be rolled up?
|
||||
@json_schema_type
|
||||
class ScoringFnParamsType(StrEnum):
|
||||
"""Types of scoring function parameter configurations.
|
||||
:cvar llm_as_judge: Use an LLM model to evaluate and score responses
|
||||
:cvar regex_parser: Use regex patterns to extract and score specific parts of responses
|
||||
:cvar basic: Basic scoring with simple aggregation functions
|
||||
"""
|
||||
|
||||
llm_as_judge = "llm_as_judge"
|
||||
regex_parser = "regex_parser"
|
||||
basic = "basic"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class AggregationFunctionType(StrEnum):
|
||||
"""Types of aggregation functions for scoring results.
|
||||
:cvar average: Calculate the arithmetic mean of scores
|
||||
:cvar weighted_average: Calculate a weighted average of scores
|
||||
:cvar median: Calculate the median value of scores
|
||||
:cvar categorical_count: Count occurrences of categorical values
|
||||
:cvar accuracy: Calculate accuracy as the proportion of correct answers
|
||||
"""
|
||||
|
||||
average = "average"
|
||||
weighted_average = "weighted_average"
|
||||
median = "median"
|
||||
categorical_count = "categorical_count"
|
||||
accuracy = "accuracy"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class LLMAsJudgeScoringFnParams(BaseModel):
|
||||
"""Parameters for LLM-as-judge scoring function configuration.
|
||||
:param type: The type of scoring function parameters, always llm_as_judge
|
||||
:param judge_model: Identifier of the LLM model to use as a judge for scoring
|
||||
:param prompt_template: (Optional) Custom prompt template for the judge model
|
||||
:param judge_score_regexes: Regexes to extract the answer from generated response
|
||||
:param aggregation_functions: Aggregation functions to apply to the scores of each row
|
||||
"""
|
||||
|
||||
type: Literal[ScoringFnParamsType.llm_as_judge] = ScoringFnParamsType.llm_as_judge
|
||||
judge_model: str
|
||||
prompt_template: str | None = None
|
||||
judge_score_regexes: list[str] = Field(
|
||||
description="Regexes to extract the answer from generated response",
|
||||
default_factory=lambda: [],
|
||||
)
|
||||
aggregation_functions: list[AggregationFunctionType] = Field(
|
||||
description="Aggregation functions to apply to the scores of each row",
|
||||
default_factory=lambda: [],
|
||||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class RegexParserScoringFnParams(BaseModel):
|
||||
"""Parameters for regex parser scoring function configuration.
|
||||
:param type: The type of scoring function parameters, always regex_parser
|
||||
:param parsing_regexes: Regex to extract the answer from generated response
|
||||
:param aggregation_functions: Aggregation functions to apply to the scores of each row
|
||||
"""
|
||||
|
||||
type: Literal[ScoringFnParamsType.regex_parser] = ScoringFnParamsType.regex_parser
|
||||
parsing_regexes: list[str] = Field(
|
||||
description="Regex to extract the answer from generated response",
|
||||
default_factory=lambda: [],
|
||||
)
|
||||
aggregation_functions: list[AggregationFunctionType] = Field(
|
||||
description="Aggregation functions to apply to the scores of each row",
|
||||
default_factory=lambda: [],
|
||||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class BasicScoringFnParams(BaseModel):
|
||||
"""Parameters for basic scoring function configuration.
|
||||
:param type: The type of scoring function parameters, always basic
|
||||
:param aggregation_functions: Aggregation functions to apply to the scores of each row
|
||||
"""
|
||||
|
||||
type: Literal[ScoringFnParamsType.basic] = ScoringFnParamsType.basic
|
||||
aggregation_functions: list[AggregationFunctionType] = Field(
|
||||
description="Aggregation functions to apply to the scores of each row",
|
||||
default_factory=list,
|
||||
)
|
||||
|
||||
|
||||
ScoringFnParams = Annotated[
|
||||
LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams,
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
register_schema(ScoringFnParams, name="ScoringFnParams")
|
||||
|
||||
|
||||
class CommonScoringFnFields(BaseModel):
|
||||
description: str | None = None
|
||||
metadata: dict[str, Any] = Field(
|
||||
default_factory=dict,
|
||||
description="Any additional metadata for this definition",
|
||||
)
|
||||
return_type: ParamType = Field(
|
||||
description="The return type of the deterministic function",
|
||||
)
|
||||
params: ScoringFnParams | None = Field(
|
||||
description="The parameters for the scoring function for benchmark eval, these can be overridden for app eval",
|
||||
default=None,
|
||||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ScoringFn(CommonScoringFnFields, Resource):
|
||||
"""A scoring function resource for evaluating model outputs.
|
||||
:param type: The resource type, always scoring_function
|
||||
"""
|
||||
|
||||
type: Literal[ResourceType.scoring_function] = ResourceType.scoring_function
|
||||
|
||||
@property
|
||||
def scoring_fn_id(self) -> str:
|
||||
return self.identifier
|
||||
|
||||
@property
|
||||
def provider_scoring_fn_id(self) -> str | None:
|
||||
return self.provider_resource_id
|
||||
|
||||
|
||||
class ScoringFnInput(CommonScoringFnFields, BaseModel):
|
||||
scoring_fn_id: str
|
||||
provider_id: str | None = None
|
||||
provider_scoring_fn_id: str | None = None
|
||||
|
||||
|
||||
class ListScoringFunctionsResponse(BaseModel):
|
||||
data: list[ScoringFn]
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class ScoringFunctions(Protocol):
|
||||
@webmethod(route="/scoring-functions", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_scoring_functions(self) -> ListScoringFunctionsResponse:
|
||||
"""List all scoring functions.
|
||||
|
||||
:returns: A ListScoringFunctionsResponse.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_scoring_function(self, scoring_fn_id: str, /) -> ScoringFn:
|
||||
"""Get a scoring function by its ID.
|
||||
|
||||
:param scoring_fn_id: The ID of the scoring function to get.
|
||||
:returns: A ScoringFn.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/scoring-functions", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
async def register_scoring_function(
|
||||
self,
|
||||
scoring_fn_id: str,
|
||||
description: str,
|
||||
return_type: ParamType,
|
||||
provider_scoring_fn_id: str | None = None,
|
||||
provider_id: str | None = None,
|
||||
params: ScoringFnParams | None = None,
|
||||
) -> None:
|
||||
"""Register a scoring function.
|
||||
|
||||
:param scoring_fn_id: The ID of the scoring function to register.
|
||||
:param description: The description of the scoring function.
|
||||
:param return_type: The return type of the scoring function.
|
||||
:param provider_scoring_fn_id: The ID of the provider scoring function to use for the scoring function.
|
||||
:param provider_id: The ID of the provider to use for the scoring function.
|
||||
:param params: The parameters for the scoring function for benchmark eval, these can be overridden for app eval.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/scoring-functions/{scoring_fn_id:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True
|
||||
)
|
||||
async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
|
||||
"""Unregister a scoring function.
|
||||
|
||||
:param scoring_fn_id: The ID of the scoring function to unregister.
|
||||
"""
|
||||
...
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .shields import *
|
||||
|
|
@ -1,94 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Any, Literal, Protocol, runtime_checkable
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.common.tracing import telemetry_traceable
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
|
||||
class CommonShieldFields(BaseModel):
|
||||
params: dict[str, Any] | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class Shield(CommonShieldFields, Resource):
|
||||
"""A safety shield resource that can be used to check content.
|
||||
|
||||
:param params: (Optional) Configuration parameters for the shield
|
||||
:param type: The resource type, always shield
|
||||
"""
|
||||
|
||||
type: Literal[ResourceType.shield] = ResourceType.shield
|
||||
|
||||
@property
|
||||
def shield_id(self) -> str:
|
||||
return self.identifier
|
||||
|
||||
@property
|
||||
def provider_shield_id(self) -> str | None:
|
||||
return self.provider_resource_id
|
||||
|
||||
|
||||
class ShieldInput(CommonShieldFields):
|
||||
shield_id: str
|
||||
provider_id: str | None = None
|
||||
provider_shield_id: str | None = None
|
||||
|
||||
|
||||
class ListShieldsResponse(BaseModel):
|
||||
data: list[Shield]
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
@telemetry_traceable
|
||||
class Shields(Protocol):
|
||||
@webmethod(route="/shields", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_shields(self) -> ListShieldsResponse:
|
||||
"""List all shields.
|
||||
|
||||
:returns: A ListShieldsResponse.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/shields/{identifier:path}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_shield(self, identifier: str) -> Shield:
|
||||
"""Get a shield by its identifier.
|
||||
|
||||
:param identifier: The identifier of the shield to get.
|
||||
:returns: A Shield.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/shields", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
async def register_shield(
|
||||
self,
|
||||
shield_id: str,
|
||||
provider_shield_id: str | None = None,
|
||||
provider_id: str | None = None,
|
||||
params: dict[str, Any] | None = None,
|
||||
) -> Shield:
|
||||
"""Register a shield.
|
||||
|
||||
:param shield_id: The identifier of the shield to register.
|
||||
:param provider_shield_id: The identifier of the shield in the provider.
|
||||
:param provider_id: The identifier of the provider.
|
||||
:param params: The parameters of the shield.
|
||||
:returns: A Shield.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/shields/{identifier:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
async def unregister_shield(self, identifier: str) -> None:
|
||||
"""Unregister a shield.
|
||||
|
||||
:param identifier: The identifier of the shield to unregister.
|
||||
"""
|
||||
...
|
||||
|
|
@ -1,8 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .rag_tool import *
|
||||
from .tools import *
|
||||
|
|
@ -1,168 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from enum import Enum, StrEnum
|
||||
from typing import Annotated, Any, Literal
|
||||
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
|
||||
from llama_stack.apis.common.content_types import URL, InterleavedContent
|
||||
|
||||
|
||||
class RRFRanker(BaseModel):
|
||||
"""
|
||||
Reciprocal Rank Fusion (RRF) ranker configuration.
|
||||
|
||||
:param type: The type of ranker, always "rrf"
|
||||
:param impact_factor: The impact factor for RRF scoring. Higher values give more weight to higher-ranked results.
|
||||
Must be greater than 0
|
||||
"""
|
||||
|
||||
type: Literal["rrf"] = "rrf"
|
||||
impact_factor: float = Field(default=60.0, gt=0.0) # default of 60 for optimal performance
|
||||
|
||||
|
||||
class WeightedRanker(BaseModel):
|
||||
"""
|
||||
Weighted ranker configuration that combines vector and keyword scores.
|
||||
|
||||
:param type: The type of ranker, always "weighted"
|
||||
:param alpha: Weight factor between 0 and 1.
|
||||
0 means only use keyword scores,
|
||||
1 means only use vector scores,
|
||||
values in between blend both scores.
|
||||
"""
|
||||
|
||||
type: Literal["weighted"] = "weighted"
|
||||
alpha: float = Field(
|
||||
default=0.5,
|
||||
ge=0.0,
|
||||
le=1.0,
|
||||
description="Weight factor between 0 and 1. 0 means only keyword scores, 1 means only vector scores.",
|
||||
)
|
||||
|
||||
|
||||
Ranker = Annotated[
|
||||
RRFRanker | WeightedRanker,
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
|
||||
|
||||
class RAGDocument(BaseModel):
|
||||
"""
|
||||
A document to be used for document ingestion in the RAG Tool.
|
||||
|
||||
:param document_id: The unique identifier for the document.
|
||||
:param content: The content of the document.
|
||||
:param mime_type: The MIME type of the document.
|
||||
:param metadata: Additional metadata for the document.
|
||||
"""
|
||||
|
||||
document_id: str
|
||||
content: InterleavedContent | URL
|
||||
mime_type: str | None = None
|
||||
metadata: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class RAGQueryResult(BaseModel):
|
||||
"""Result of a RAG query containing retrieved content and metadata.
|
||||
|
||||
:param content: (Optional) The retrieved content from the query
|
||||
:param metadata: Additional metadata about the query result
|
||||
"""
|
||||
|
||||
content: InterleavedContent | None = None
|
||||
metadata: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class RAGQueryGenerator(Enum):
|
||||
"""Types of query generators for RAG systems.
|
||||
|
||||
:cvar default: Default query generator using simple text processing
|
||||
:cvar llm: LLM-based query generator for enhanced query understanding
|
||||
:cvar custom: Custom query generator implementation
|
||||
"""
|
||||
|
||||
default = "default"
|
||||
llm = "llm"
|
||||
custom = "custom"
|
||||
|
||||
|
||||
class RAGSearchMode(StrEnum):
|
||||
"""
|
||||
Search modes for RAG query retrieval:
|
||||
- VECTOR: Uses vector similarity search for semantic matching
|
||||
- KEYWORD: Uses keyword-based search for exact matching
|
||||
- HYBRID: Combines both vector and keyword search for better results
|
||||
"""
|
||||
|
||||
VECTOR = "vector"
|
||||
KEYWORD = "keyword"
|
||||
HYBRID = "hybrid"
|
||||
|
||||
|
||||
class DefaultRAGQueryGeneratorConfig(BaseModel):
|
||||
"""Configuration for the default RAG query generator.
|
||||
|
||||
:param type: Type of query generator, always 'default'
|
||||
:param separator: String separator used to join query terms
|
||||
"""
|
||||
|
||||
type: Literal["default"] = "default"
|
||||
separator: str = " "
|
||||
|
||||
|
||||
class LLMRAGQueryGeneratorConfig(BaseModel):
|
||||
"""Configuration for the LLM-based RAG query generator.
|
||||
|
||||
:param type: Type of query generator, always 'llm'
|
||||
:param model: Name of the language model to use for query generation
|
||||
:param template: Template string for formatting the query generation prompt
|
||||
"""
|
||||
|
||||
type: Literal["llm"] = "llm"
|
||||
model: str
|
||||
template: str
|
||||
|
||||
|
||||
RAGQueryGeneratorConfig = Annotated[
|
||||
DefaultRAGQueryGeneratorConfig | LLMRAGQueryGeneratorConfig,
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
|
||||
|
||||
class RAGQueryConfig(BaseModel):
|
||||
"""
|
||||
Configuration for the RAG query generation.
|
||||
|
||||
:param query_generator_config: Configuration for the query generator.
|
||||
:param max_tokens_in_context: Maximum number of tokens in the context.
|
||||
:param max_chunks: Maximum number of chunks to retrieve.
|
||||
:param chunk_template: Template for formatting each retrieved chunk in the context.
|
||||
Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict).
|
||||
Default: "Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n"
|
||||
:param mode: Search mode for retrieval—either "vector", "keyword", or "hybrid". Default "vector".
|
||||
:param ranker: Configuration for the ranker to use in hybrid search. Defaults to RRF ranker.
|
||||
"""
|
||||
|
||||
# This config defines how a query is generated using the messages
|
||||
# for memory bank retrieval.
|
||||
query_generator_config: RAGQueryGeneratorConfig = Field(default=DefaultRAGQueryGeneratorConfig())
|
||||
max_tokens_in_context: int = 4096
|
||||
max_chunks: int = 5
|
||||
chunk_template: str = "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n"
|
||||
mode: RAGSearchMode | None = RAGSearchMode.VECTOR
|
||||
ranker: Ranker | None = Field(default=None) # Only used for hybrid mode
|
||||
|
||||
@field_validator("chunk_template")
|
||||
def validate_chunk_template(cls, v: str) -> str:
|
||||
if "{chunk.content}" not in v:
|
||||
raise ValueError("chunk_template must contain {chunk.content}")
|
||||
if "{index}" not in v:
|
||||
raise ValueError("chunk_template must contain {index}")
|
||||
if len(v) == 0:
|
||||
raise ValueError("chunk_template must not be empty")
|
||||
return v
|
||||
|
|
@ -1,217 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from enum import Enum
|
||||
from typing import Any, Literal, Protocol
|
||||
|
||||
from pydantic import BaseModel
|
||||
from typing_extensions import runtime_checkable
|
||||
|
||||
from llama_stack.apis.common.content_types import URL, InterleavedContent
|
||||
from llama_stack.apis.common.tracing import telemetry_traceable
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ToolDef(BaseModel):
|
||||
"""Tool definition used in runtime contexts.
|
||||
|
||||
:param name: Name of the tool
|
||||
:param description: (Optional) Human-readable description of what the tool does
|
||||
:param input_schema: (Optional) JSON Schema for tool inputs (MCP inputSchema)
|
||||
:param output_schema: (Optional) JSON Schema for tool outputs (MCP outputSchema)
|
||||
:param metadata: (Optional) Additional metadata about the tool
|
||||
:param toolgroup_id: (Optional) ID of the tool group this tool belongs to
|
||||
"""
|
||||
|
||||
toolgroup_id: str | None = None
|
||||
name: str
|
||||
description: str | None = None
|
||||
input_schema: dict[str, Any] | None = None
|
||||
output_schema: dict[str, Any] | None = None
|
||||
metadata: dict[str, Any] | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ToolGroupInput(BaseModel):
|
||||
"""Input data for registering a tool group.
|
||||
|
||||
:param toolgroup_id: Unique identifier for the tool group
|
||||
:param provider_id: ID of the provider that will handle this tool group
|
||||
:param args: (Optional) Additional arguments to pass to the provider
|
||||
:param mcp_endpoint: (Optional) Model Context Protocol endpoint for remote tools
|
||||
"""
|
||||
|
||||
toolgroup_id: str
|
||||
provider_id: str
|
||||
args: dict[str, Any] | None = None
|
||||
mcp_endpoint: URL | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ToolGroup(Resource):
|
||||
"""A group of related tools managed together.
|
||||
|
||||
:param type: Type of resource, always 'tool_group'
|
||||
:param mcp_endpoint: (Optional) Model Context Protocol endpoint for remote tools
|
||||
:param args: (Optional) Additional arguments for the tool group
|
||||
"""
|
||||
|
||||
type: Literal[ResourceType.tool_group] = ResourceType.tool_group
|
||||
mcp_endpoint: URL | None = None
|
||||
args: dict[str, Any] | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ToolInvocationResult(BaseModel):
|
||||
"""Result of a tool invocation.
|
||||
|
||||
:param content: (Optional) The output content from the tool execution
|
||||
:param error_message: (Optional) Error message if the tool execution failed
|
||||
:param error_code: (Optional) Numeric error code if the tool execution failed
|
||||
:param metadata: (Optional) Additional metadata about the tool execution
|
||||
"""
|
||||
|
||||
content: InterleavedContent | None = None
|
||||
error_message: str | None = None
|
||||
error_code: int | None = None
|
||||
metadata: dict[str, Any] | None = None
|
||||
|
||||
|
||||
class ToolStore(Protocol):
|
||||
async def get_tool(self, tool_name: str) -> ToolDef: ...
|
||||
async def get_tool_group(self, toolgroup_id: str) -> ToolGroup: ...
|
||||
|
||||
|
||||
class ListToolGroupsResponse(BaseModel):
|
||||
"""Response containing a list of tool groups.
|
||||
|
||||
:param data: List of tool groups
|
||||
"""
|
||||
|
||||
data: list[ToolGroup]
|
||||
|
||||
|
||||
class ListToolDefsResponse(BaseModel):
|
||||
"""Response containing a list of tool definitions.
|
||||
|
||||
:param data: List of tool definitions
|
||||
"""
|
||||
|
||||
data: list[ToolDef]
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
@telemetry_traceable
|
||||
class ToolGroups(Protocol):
|
||||
@webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
async def register_tool_group(
|
||||
self,
|
||||
toolgroup_id: str,
|
||||
provider_id: str,
|
||||
mcp_endpoint: URL | None = None,
|
||||
args: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
"""Register a tool group.
|
||||
|
||||
:param toolgroup_id: The ID of the tool group to register.
|
||||
:param provider_id: The ID of the provider to use for the tool group.
|
||||
:param mcp_endpoint: The MCP endpoint to use for the tool group.
|
||||
:param args: A dictionary of arguments to pass to the tool group.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/toolgroups/{toolgroup_id:path}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_tool_group(
|
||||
self,
|
||||
toolgroup_id: str,
|
||||
) -> ToolGroup:
|
||||
"""Get a tool group by its ID.
|
||||
|
||||
:param toolgroup_id: The ID of the tool group to get.
|
||||
:returns: A ToolGroup.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/toolgroups", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_tool_groups(self) -> ListToolGroupsResponse:
|
||||
"""List tool groups with optional provider.
|
||||
|
||||
:returns: A ListToolGroupsResponse.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/tools", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_tools(self, toolgroup_id: str | None = None) -> ListToolDefsResponse:
|
||||
"""List tools with optional tool group.
|
||||
|
||||
:param toolgroup_id: The ID of the tool group to list tools for.
|
||||
:returns: A ListToolDefsResponse.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/tools/{tool_name:path}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_tool(
|
||||
self,
|
||||
tool_name: str,
|
||||
) -> ToolDef:
|
||||
"""Get a tool by its name.
|
||||
|
||||
:param tool_name: The name of the tool to get.
|
||||
:returns: A ToolDef.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
async def unregister_toolgroup(
|
||||
self,
|
||||
toolgroup_id: str,
|
||||
) -> None:
|
||||
"""Unregister a tool group.
|
||||
|
||||
:param toolgroup_id: The ID of the tool group to unregister.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
class SpecialToolGroup(Enum):
|
||||
"""Special tool groups with predefined functionality.
|
||||
|
||||
:cvar rag_tool: Retrieval-Augmented Generation tool group for document search and retrieval
|
||||
"""
|
||||
|
||||
rag_tool = "rag_tool"
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
@telemetry_traceable
|
||||
class ToolRuntime(Protocol):
|
||||
tool_store: ToolStore | None = None
|
||||
|
||||
# TODO: This needs to be renamed once OPEN API generator name conflict issue is fixed.
|
||||
@webmethod(route="/tool-runtime/list-tools", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_runtime_tools(
|
||||
self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
|
||||
) -> ListToolDefsResponse:
|
||||
"""List all tools in the runtime.
|
||||
|
||||
:param tool_group_id: The ID of the tool group to list tools for.
|
||||
:param mcp_endpoint: The MCP endpoint to use for the tool group.
|
||||
:returns: A ListToolDefsResponse.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/tool-runtime/invoke", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
|
||||
"""Run a tool with the given arguments.
|
||||
|
||||
:param tool_name: The name of the tool to invoke.
|
||||
:param kwargs: A dictionary of arguments to pass to the tool.
|
||||
:returns: A ToolInvocationResult.
|
||||
"""
|
||||
...
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .vector_io import *
|
||||
|
|
@ -1,872 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
from typing import Annotated, Any, Literal, Protocol, runtime_checkable
|
||||
|
||||
from fastapi import Body, Query
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.common.tracing import telemetry_traceable
|
||||
from llama_stack.apis.inference import InterleavedContent
|
||||
from llama_stack.apis.vector_stores import VectorStore
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
from llama_stack.strong_typing.schema import register_schema
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ChunkMetadata(BaseModel):
|
||||
"""
|
||||
`ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that
|
||||
will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata`
|
||||
is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after.
|
||||
Use `Chunk.metadata` for metadata that will be used in the context during inference.
|
||||
:param chunk_id: The ID of the chunk. If not set, it will be generated based on the document ID and content.
|
||||
:param document_id: The ID of the document this chunk belongs to.
|
||||
:param source: The source of the content, such as a URL, file path, or other identifier.
|
||||
:param created_timestamp: An optional timestamp indicating when the chunk was created.
|
||||
:param updated_timestamp: An optional timestamp indicating when the chunk was last updated.
|
||||
:param chunk_window: The window of the chunk, which can be used to group related chunks together.
|
||||
:param chunk_tokenizer: The tokenizer used to create the chunk. Default is Tiktoken.
|
||||
:param chunk_embedding_model: The embedding model used to create the chunk's embedding.
|
||||
:param chunk_embedding_dimension: The dimension of the embedding vector for the chunk.
|
||||
:param content_token_count: The number of tokens in the content of the chunk.
|
||||
:param metadata_token_count: The number of tokens in the metadata of the chunk.
|
||||
"""
|
||||
|
||||
chunk_id: str | None = None
|
||||
document_id: str | None = None
|
||||
source: str | None = None
|
||||
created_timestamp: int | None = None
|
||||
updated_timestamp: int | None = None
|
||||
chunk_window: str | None = None
|
||||
chunk_tokenizer: str | None = None
|
||||
chunk_embedding_model: str | None = None
|
||||
chunk_embedding_dimension: int | None = None
|
||||
content_token_count: int | None = None
|
||||
metadata_token_count: int | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class Chunk(BaseModel):
|
||||
"""
|
||||
A chunk of content that can be inserted into a vector database.
|
||||
:param content: The content of the chunk, which can be interleaved text, images, or other types.
|
||||
:param chunk_id: Unique identifier for the chunk. Must be provided explicitly.
|
||||
:param metadata: Metadata associated with the chunk that will be used in the model context during inference.
|
||||
:param embedding: Optional embedding for the chunk. If not provided, it will be computed later.
|
||||
:param chunk_metadata: Metadata for the chunk that will NOT be used in the context during inference.
|
||||
The `chunk_metadata` is required backend functionality.
|
||||
"""
|
||||
|
||||
content: InterleavedContent
|
||||
chunk_id: str
|
||||
metadata: dict[str, Any] = Field(default_factory=dict)
|
||||
embedding: list[float] | None = None
|
||||
chunk_metadata: ChunkMetadata | None = None
|
||||
|
||||
@property
|
||||
def document_id(self) -> str | None:
|
||||
"""Returns the document_id from either metadata or chunk_metadata, with metadata taking precedence."""
|
||||
# Check metadata first (takes precedence)
|
||||
doc_id = self.metadata.get("document_id")
|
||||
if doc_id is not None:
|
||||
if not isinstance(doc_id, str):
|
||||
raise TypeError(f"metadata['document_id'] must be a string, got {type(doc_id).__name__}: {doc_id!r}")
|
||||
return doc_id
|
||||
|
||||
# Fall back to chunk_metadata if available (Pydantic ensures type safety)
|
||||
if self.chunk_metadata is not None:
|
||||
return self.chunk_metadata.document_id
|
||||
|
||||
return None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class QueryChunksResponse(BaseModel):
|
||||
"""Response from querying chunks in a vector database.
|
||||
|
||||
:param chunks: List of content chunks returned from the query
|
||||
:param scores: Relevance scores corresponding to each returned chunk
|
||||
"""
|
||||
|
||||
chunks: list[Chunk]
|
||||
scores: list[float]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreFileCounts(BaseModel):
|
||||
"""File processing status counts for a vector store.
|
||||
|
||||
:param completed: Number of files that have been successfully processed
|
||||
:param cancelled: Number of files that had their processing cancelled
|
||||
:param failed: Number of files that failed to process
|
||||
:param in_progress: Number of files currently being processed
|
||||
:param total: Total number of files in the vector store
|
||||
"""
|
||||
|
||||
completed: int
|
||||
cancelled: int
|
||||
failed: int
|
||||
in_progress: int
|
||||
total: int
|
||||
|
||||
|
||||
# TODO: rename this as OpenAIVectorStore
|
||||
@json_schema_type
|
||||
class VectorStoreObject(BaseModel):
|
||||
"""OpenAI Vector Store object.
|
||||
|
||||
:param id: Unique identifier for the vector store
|
||||
:param object: Object type identifier, always "vector_store"
|
||||
:param created_at: Timestamp when the vector store was created
|
||||
:param name: (Optional) Name of the vector store
|
||||
:param usage_bytes: Storage space used by the vector store in bytes
|
||||
:param file_counts: File processing status counts for the vector store
|
||||
:param status: Current status of the vector store
|
||||
:param expires_after: (Optional) Expiration policy for the vector store
|
||||
:param expires_at: (Optional) Timestamp when the vector store will expire
|
||||
:param last_active_at: (Optional) Timestamp of last activity on the vector store
|
||||
:param metadata: Set of key-value pairs that can be attached to the vector store
|
||||
"""
|
||||
|
||||
id: str
|
||||
object: str = "vector_store"
|
||||
created_at: int
|
||||
name: str | None = None
|
||||
usage_bytes: int = 0
|
||||
file_counts: VectorStoreFileCounts
|
||||
status: str = "completed"
|
||||
expires_after: dict[str, Any] | None = None
|
||||
expires_at: int | None = None
|
||||
last_active_at: int | None = None
|
||||
metadata: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreCreateRequest(BaseModel):
|
||||
"""Request to create a vector store.
|
||||
|
||||
:param name: (Optional) Name for the vector store
|
||||
:param file_ids: List of file IDs to include in the vector store
|
||||
:param expires_after: (Optional) Expiration policy for the vector store
|
||||
:param chunking_strategy: (Optional) Strategy for splitting files into chunks
|
||||
:param metadata: Set of key-value pairs that can be attached to the vector store
|
||||
"""
|
||||
|
||||
name: str | None = None
|
||||
file_ids: list[str] = Field(default_factory=list)
|
||||
expires_after: dict[str, Any] | None = None
|
||||
chunking_strategy: dict[str, Any] | None = None
|
||||
metadata: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreModifyRequest(BaseModel):
|
||||
"""Request to modify a vector store.
|
||||
|
||||
:param name: (Optional) Updated name for the vector store
|
||||
:param expires_after: (Optional) Updated expiration policy for the vector store
|
||||
:param metadata: (Optional) Updated set of key-value pairs for the vector store
|
||||
"""
|
||||
|
||||
name: str | None = None
|
||||
expires_after: dict[str, Any] | None = None
|
||||
metadata: dict[str, Any] | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreListResponse(BaseModel):
|
||||
"""Response from listing vector stores.
|
||||
|
||||
:param object: Object type identifier, always "list"
|
||||
:param data: List of vector store objects
|
||||
:param first_id: (Optional) ID of the first vector store in the list for pagination
|
||||
:param last_id: (Optional) ID of the last vector store in the list for pagination
|
||||
:param has_more: Whether there are more vector stores available beyond this page
|
||||
"""
|
||||
|
||||
object: str = "list"
|
||||
data: list[VectorStoreObject]
|
||||
first_id: str | None = None
|
||||
last_id: str | None = None
|
||||
has_more: bool = False
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreSearchRequest(BaseModel):
|
||||
"""Request to search a vector store.
|
||||
|
||||
:param query: Search query as a string or list of strings
|
||||
:param filters: (Optional) Filters based on file attributes to narrow search results
|
||||
:param max_num_results: Maximum number of results to return, defaults to 10
|
||||
:param ranking_options: (Optional) Options for ranking and filtering search results
|
||||
:param rewrite_query: Whether to rewrite the query for better vector search performance
|
||||
"""
|
||||
|
||||
query: str | list[str]
|
||||
filters: dict[str, Any] | None = None
|
||||
max_num_results: int = 10
|
||||
ranking_options: dict[str, Any] | None = None
|
||||
rewrite_query: bool = False
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreContent(BaseModel):
|
||||
"""Content item from a vector store file or search result.
|
||||
|
||||
:param type: Content type, currently only "text" is supported
|
||||
:param text: The actual text content
|
||||
:param embedding: Optional embedding vector for this content chunk
|
||||
:param chunk_metadata: Optional chunk metadata
|
||||
:param metadata: Optional user-defined metadata
|
||||
"""
|
||||
|
||||
type: Literal["text"]
|
||||
text: str
|
||||
embedding: list[float] | None = None
|
||||
chunk_metadata: ChunkMetadata | None = None
|
||||
metadata: dict[str, Any] | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreSearchResponse(BaseModel):
|
||||
"""Response from searching a vector store.
|
||||
|
||||
:param file_id: Unique identifier of the file containing the result
|
||||
:param filename: Name of the file containing the result
|
||||
:param score: Relevance score for this search result
|
||||
:param attributes: (Optional) Key-value attributes associated with the file
|
||||
:param content: List of content items matching the search query
|
||||
"""
|
||||
|
||||
file_id: str
|
||||
filename: str
|
||||
score: float
|
||||
attributes: dict[str, str | float | bool] | None = None
|
||||
content: list[VectorStoreContent]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreSearchResponsePage(BaseModel):
|
||||
"""Paginated response from searching a vector store.
|
||||
|
||||
:param object: Object type identifier for the search results page
|
||||
:param search_query: The original search query that was executed
|
||||
:param data: List of search result objects
|
||||
:param has_more: Whether there are more results available beyond this page
|
||||
:param next_page: (Optional) Token for retrieving the next page of results
|
||||
"""
|
||||
|
||||
object: str = "vector_store.search_results.page"
|
||||
search_query: list[str]
|
||||
data: list[VectorStoreSearchResponse]
|
||||
has_more: bool = False
|
||||
next_page: str | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreDeleteResponse(BaseModel):
|
||||
"""Response from deleting a vector store.
|
||||
|
||||
:param id: Unique identifier of the deleted vector store
|
||||
:param object: Object type identifier for the deletion response
|
||||
:param deleted: Whether the deletion operation was successful
|
||||
"""
|
||||
|
||||
id: str
|
||||
object: str = "vector_store.deleted"
|
||||
deleted: bool = True
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreFileContentResponse(BaseModel):
|
||||
"""Represents the parsed content of a vector store file.
|
||||
|
||||
:param object: The object type, which is always `vector_store.file_content.page`
|
||||
:param data: Parsed content of the file
|
||||
:param has_more: Indicates if there are more content pages to fetch
|
||||
:param next_page: The token for the next page, if any
|
||||
"""
|
||||
|
||||
object: Literal["vector_store.file_content.page"] = "vector_store.file_content.page"
|
||||
data: list[VectorStoreContent]
|
||||
has_more: bool = False
|
||||
next_page: str | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreChunkingStrategyAuto(BaseModel):
|
||||
"""Automatic chunking strategy for vector store files.
|
||||
|
||||
:param type: Strategy type, always "auto" for automatic chunking
|
||||
"""
|
||||
|
||||
type: Literal["auto"] = "auto"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreChunkingStrategyStaticConfig(BaseModel):
|
||||
"""Configuration for static chunking strategy.
|
||||
|
||||
:param chunk_overlap_tokens: Number of tokens to overlap between adjacent chunks
|
||||
:param max_chunk_size_tokens: Maximum number of tokens per chunk, must be between 100 and 4096
|
||||
"""
|
||||
|
||||
chunk_overlap_tokens: int = 400
|
||||
max_chunk_size_tokens: int = Field(800, ge=100, le=4096)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreChunkingStrategyStatic(BaseModel):
|
||||
"""Static chunking strategy with configurable parameters.
|
||||
|
||||
:param type: Strategy type, always "static" for static chunking
|
||||
:param static: Configuration parameters for the static chunking strategy
|
||||
"""
|
||||
|
||||
type: Literal["static"] = "static"
|
||||
static: VectorStoreChunkingStrategyStaticConfig
|
||||
|
||||
|
||||
VectorStoreChunkingStrategy = Annotated[
|
||||
VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic,
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
register_schema(VectorStoreChunkingStrategy, name="VectorStoreChunkingStrategy")
|
||||
|
||||
|
||||
class SearchRankingOptions(BaseModel):
|
||||
"""Options for ranking and filtering search results.
|
||||
|
||||
:param ranker: (Optional) Name of the ranking algorithm to use
|
||||
:param score_threshold: (Optional) Minimum relevance score threshold for results
|
||||
"""
|
||||
|
||||
ranker: str | None = None
|
||||
# NOTE: OpenAI File Search Tool requires threshold to be between 0 and 1, however
|
||||
# we don't guarantee that the score is between 0 and 1, so will leave this unconstrained
|
||||
# and let the provider handle it
|
||||
score_threshold: float | None = Field(default=0.0)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreFileLastError(BaseModel):
|
||||
"""Error information for failed vector store file processing.
|
||||
|
||||
:param code: Error code indicating the type of failure
|
||||
:param message: Human-readable error message describing the failure
|
||||
"""
|
||||
|
||||
code: Literal["server_error"] | Literal["rate_limit_exceeded"]
|
||||
message: str
|
||||
|
||||
|
||||
VectorStoreFileStatus = Literal["completed"] | Literal["in_progress"] | Literal["cancelled"] | Literal["failed"]
|
||||
register_schema(VectorStoreFileStatus, name="VectorStoreFileStatus")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreFileObject(BaseModel):
|
||||
"""OpenAI Vector Store File object.
|
||||
|
||||
:param id: Unique identifier for the file
|
||||
:param object: Object type identifier, always "vector_store.file"
|
||||
:param attributes: Key-value attributes associated with the file
|
||||
:param chunking_strategy: Strategy used for splitting the file into chunks
|
||||
:param created_at: Timestamp when the file was added to the vector store
|
||||
:param last_error: (Optional) Error information if file processing failed
|
||||
:param status: Current processing status of the file
|
||||
:param usage_bytes: Storage space used by this file in bytes
|
||||
:param vector_store_id: ID of the vector store containing this file
|
||||
"""
|
||||
|
||||
id: str
|
||||
object: str = "vector_store.file"
|
||||
attributes: dict[str, Any] = Field(default_factory=dict)
|
||||
chunking_strategy: VectorStoreChunkingStrategy
|
||||
created_at: int
|
||||
last_error: VectorStoreFileLastError | None = None
|
||||
status: VectorStoreFileStatus
|
||||
usage_bytes: int = 0
|
||||
vector_store_id: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreListFilesResponse(BaseModel):
|
||||
"""Response from listing files in a vector store.
|
||||
|
||||
:param object: Object type identifier, always "list"
|
||||
:param data: List of vector store file objects
|
||||
:param first_id: (Optional) ID of the first file in the list for pagination
|
||||
:param last_id: (Optional) ID of the last file in the list for pagination
|
||||
:param has_more: Whether there are more files available beyond this page
|
||||
"""
|
||||
|
||||
object: str = "list"
|
||||
data: list[VectorStoreFileObject]
|
||||
first_id: str | None = None
|
||||
last_id: str | None = None
|
||||
has_more: bool = False
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreFileDeleteResponse(BaseModel):
|
||||
"""Response from deleting a vector store file.
|
||||
|
||||
:param id: Unique identifier of the deleted file
|
||||
:param object: Object type identifier for the deletion response
|
||||
:param deleted: Whether the deletion operation was successful
|
||||
"""
|
||||
|
||||
id: str
|
||||
object: str = "vector_store.file.deleted"
|
||||
deleted: bool = True
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreFileBatchObject(BaseModel):
|
||||
"""OpenAI Vector Store File Batch object.
|
||||
|
||||
:param id: Unique identifier for the file batch
|
||||
:param object: Object type identifier, always "vector_store.file_batch"
|
||||
:param created_at: Timestamp when the file batch was created
|
||||
:param vector_store_id: ID of the vector store containing the file batch
|
||||
:param status: Current processing status of the file batch
|
||||
:param file_counts: File processing status counts for the batch
|
||||
"""
|
||||
|
||||
id: str
|
||||
object: str = "vector_store.file_batch"
|
||||
created_at: int
|
||||
vector_store_id: str
|
||||
status: VectorStoreFileStatus
|
||||
file_counts: VectorStoreFileCounts
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreFilesListInBatchResponse(BaseModel):
|
||||
"""Response from listing files in a vector store file batch.
|
||||
|
||||
:param object: Object type identifier, always "list"
|
||||
:param data: List of vector store file objects in the batch
|
||||
:param first_id: (Optional) ID of the first file in the list for pagination
|
||||
:param last_id: (Optional) ID of the last file in the list for pagination
|
||||
:param has_more: Whether there are more files available beyond this page
|
||||
"""
|
||||
|
||||
object: str = "list"
|
||||
data: list[VectorStoreFileObject]
|
||||
first_id: str | None = None
|
||||
last_id: str | None = None
|
||||
has_more: bool = False
|
||||
|
||||
|
||||
# extra_body can be accessed via .model_extra
|
||||
@json_schema_type
|
||||
class OpenAICreateVectorStoreRequestWithExtraBody(BaseModel, extra="allow"):
|
||||
"""Request to create a vector store with extra_body support.
|
||||
|
||||
:param name: (Optional) A name for the vector store
|
||||
:param file_ids: List of file IDs to include in the vector store
|
||||
:param expires_after: (Optional) Expiration policy for the vector store
|
||||
:param chunking_strategy: (Optional) Strategy for splitting files into chunks
|
||||
:param metadata: Set of key-value pairs that can be attached to the vector store
|
||||
"""
|
||||
|
||||
name: str | None = None
|
||||
file_ids: list[str] | None = None
|
||||
expires_after: dict[str, Any] | None = None
|
||||
chunking_strategy: VectorStoreChunkingStrategy | None = None
|
||||
metadata: dict[str, Any] | None = None
|
||||
|
||||
|
||||
# extra_body can be accessed via .model_extra
|
||||
@json_schema_type
|
||||
class OpenAICreateVectorStoreFileBatchRequestWithExtraBody(BaseModel, extra="allow"):
|
||||
"""Request to create a vector store file batch with extra_body support.
|
||||
|
||||
:param file_ids: A list of File IDs that the vector store should use
|
||||
:param attributes: (Optional) Key-value attributes to store with the files
|
||||
:param chunking_strategy: (Optional) The chunking strategy used to chunk the file(s). Defaults to auto
|
||||
"""
|
||||
|
||||
file_ids: list[str]
|
||||
attributes: dict[str, Any] | None = None
|
||||
chunking_strategy: VectorStoreChunkingStrategy | None = None
|
||||
|
||||
|
||||
class VectorStoreTable(Protocol):
|
||||
def get_vector_store(self, vector_store_id: str) -> VectorStore | None: ...
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
@telemetry_traceable
|
||||
class VectorIO(Protocol):
|
||||
vector_store_table: VectorStoreTable | None = None
|
||||
|
||||
# this will just block now until chunks are inserted, but it should
|
||||
# probably return a Job instance which can be polled for completion
|
||||
# TODO: rename vector_store_id to vector_store_id once Stainless is working
|
||||
@webmethod(route="/vector-io/insert", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def insert_chunks(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
chunks: list[Chunk],
|
||||
ttl_seconds: int | None = None,
|
||||
) -> None:
|
||||
"""Insert chunks into a vector database.
|
||||
|
||||
:param vector_store_id: The identifier of the vector database to insert the chunks into.
|
||||
:param chunks: The chunks to insert. Each `Chunk` should contain content which can be interleaved text, images, or other types.
|
||||
`metadata`: `dict[str, Any]` and `embedding`: `List[float]` are optional.
|
||||
If `metadata` is provided, you configure how Llama Stack formats the chunk during generation.
|
||||
If `embedding` is not provided, it will be computed later.
|
||||
:param ttl_seconds: The time to live of the chunks.
|
||||
"""
|
||||
...
|
||||
|
||||
# TODO: rename vector_store_id to vector_store_id once Stainless is working
|
||||
@webmethod(route="/vector-io/query", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def query_chunks(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
query: InterleavedContent,
|
||||
params: dict[str, Any] | None = None,
|
||||
) -> QueryChunksResponse:
|
||||
"""Query chunks from a vector database.
|
||||
|
||||
:param vector_store_id: The identifier of the vector database to query.
|
||||
:param query: The query to search for.
|
||||
:param params: The parameters of the query.
|
||||
:returns: A QueryChunksResponse.
|
||||
"""
|
||||
...
|
||||
|
||||
# OpenAI Vector Stores API endpoints
|
||||
@webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def openai_create_vector_store(
|
||||
self,
|
||||
params: Annotated[OpenAICreateVectorStoreRequestWithExtraBody, Body(...)],
|
||||
) -> VectorStoreObject:
|
||||
"""Creates a vector store.
|
||||
|
||||
Generate an OpenAI-compatible vector store with the given parameters.
|
||||
:returns: A VectorStoreObject representing the created vector store.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def openai_list_vector_stores(
|
||||
self,
|
||||
limit: int | None = 20,
|
||||
order: str | None = "desc",
|
||||
after: str | None = None,
|
||||
before: str | None = None,
|
||||
) -> VectorStoreListResponse:
|
||||
"""Returns a list of vector stores.
|
||||
|
||||
:param limit: A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.
|
||||
:param order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for descending order.
|
||||
:param after: A cursor for use in pagination. `after` is an object ID that defines your place in the list.
|
||||
:param before: A cursor for use in pagination. `before` is an object ID that defines your place in the list.
|
||||
:returns: A VectorStoreListResponse containing the list of vector stores.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def openai_retrieve_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
) -> VectorStoreObject:
|
||||
"""Retrieves a vector store.
|
||||
|
||||
:param vector_store_id: The ID of the vector store to retrieve.
|
||||
:returns: A VectorStoreObject representing the vector store.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/vector_stores/{vector_store_id}",
|
||||
method="POST",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
async def openai_update_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
name: str | None = None,
|
||||
expires_after: dict[str, Any] | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> VectorStoreObject:
|
||||
"""Updates a vector store.
|
||||
|
||||
:param vector_store_id: The ID of the vector store to update.
|
||||
:param name: The name of the vector store.
|
||||
:param expires_after: The expiration policy for a vector store.
|
||||
:param metadata: Set of 16 key-value pairs that can be attached to an object.
|
||||
:returns: A VectorStoreObject representing the updated vector store.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/vector_stores/{vector_store_id}",
|
||||
method="DELETE",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
async def openai_delete_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
) -> VectorStoreDeleteResponse:
|
||||
"""Delete a vector store.
|
||||
|
||||
:param vector_store_id: The ID of the vector store to delete.
|
||||
:returns: A VectorStoreDeleteResponse indicating the deletion status.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/vector_stores/{vector_store_id}/search",
|
||||
method="POST",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
async def openai_search_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
query: str | list[str],
|
||||
filters: dict[str, Any] | None = None,
|
||||
max_num_results: int | None = 10,
|
||||
ranking_options: SearchRankingOptions | None = None,
|
||||
rewrite_query: bool | None = False,
|
||||
search_mode: (
|
||||
str | None
|
||||
) = "vector", # Using str instead of Literal due to OpenAPI schema generator limitations
|
||||
) -> VectorStoreSearchResponsePage:
|
||||
"""Search for chunks in a vector store.
|
||||
|
||||
Searches a vector store for relevant chunks based on a query and optional file attribute filters.
|
||||
|
||||
:param vector_store_id: The ID of the vector store to search.
|
||||
:param query: The query string or array for performing the search.
|
||||
:param filters: Filters based on file attributes to narrow the search results.
|
||||
:param max_num_results: Maximum number of results to return (1 to 50 inclusive, default 10).
|
||||
:param ranking_options: Ranking options for fine-tuning the search results.
|
||||
:param rewrite_query: Whether to rewrite the natural language query for vector search (default false)
|
||||
:param search_mode: The search mode to use - "keyword", "vector", or "hybrid" (default "vector")
|
||||
:returns: A VectorStoreSearchResponse containing the search results.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/vector_stores/{vector_store_id}/files",
|
||||
method="POST",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
async def openai_attach_file_to_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
attributes: dict[str, Any] | None = None,
|
||||
chunking_strategy: VectorStoreChunkingStrategy | None = None,
|
||||
) -> VectorStoreFileObject:
|
||||
"""Attach a file to a vector store.
|
||||
|
||||
:param vector_store_id: The ID of the vector store to attach the file to.
|
||||
:param file_id: The ID of the file to attach to the vector store.
|
||||
:param attributes: The key-value attributes stored with the file, which can be used for filtering.
|
||||
:param chunking_strategy: The chunking strategy to use for the file.
|
||||
:returns: A VectorStoreFileObject representing the attached file.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/vector_stores/{vector_store_id}/files",
|
||||
method="GET",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
async def openai_list_files_in_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
limit: int | None = 20,
|
||||
order: str | None = "desc",
|
||||
after: str | None = None,
|
||||
before: str | None = None,
|
||||
filter: VectorStoreFileStatus | None = None,
|
||||
) -> VectorStoreListFilesResponse:
|
||||
"""List files in a vector store.
|
||||
|
||||
:param vector_store_id: The ID of the vector store to list files from.
|
||||
:param limit: (Optional) A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.
|
||||
:param order: (Optional) Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for descending order.
|
||||
:param after: (Optional) A cursor for use in pagination. `after` is an object ID that defines your place in the list.
|
||||
:param before: (Optional) A cursor for use in pagination. `before` is an object ID that defines your place in the list.
|
||||
:param filter: (Optional) Filter by file status to only return files with the specified status.
|
||||
:returns: A VectorStoreListFilesResponse containing the list of files.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/vector_stores/{vector_store_id}/files/{file_id}",
|
||||
method="GET",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
async def openai_retrieve_vector_store_file(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
) -> VectorStoreFileObject:
|
||||
"""Retrieves a vector store file.
|
||||
|
||||
:param vector_store_id: The ID of the vector store containing the file to retrieve.
|
||||
:param file_id: The ID of the file to retrieve.
|
||||
:returns: A VectorStoreFileObject representing the file.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/vector_stores/{vector_store_id}/files/{file_id}/content",
|
||||
method="GET",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
async def openai_retrieve_vector_store_file_contents(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
include_embeddings: Annotated[bool | None, Query(default=False)] = False,
|
||||
include_metadata: Annotated[bool | None, Query(default=False)] = False,
|
||||
) -> VectorStoreFileContentResponse:
|
||||
"""Retrieves the contents of a vector store file.
|
||||
|
||||
:param vector_store_id: The ID of the vector store containing the file to retrieve.
|
||||
:param file_id: The ID of the file to retrieve.
|
||||
:param include_embeddings: Whether to include embedding vectors in the response.
|
||||
:param include_metadata: Whether to include chunk metadata in the response.
|
||||
:returns: File contents, optionally with embeddings and metadata based on query parameters.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/vector_stores/{vector_store_id}/files/{file_id}",
|
||||
method="POST",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
async def openai_update_vector_store_file(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
attributes: dict[str, Any],
|
||||
) -> VectorStoreFileObject:
|
||||
"""Updates a vector store file.
|
||||
|
||||
:param vector_store_id: The ID of the vector store containing the file to update.
|
||||
:param file_id: The ID of the file to update.
|
||||
:param attributes: The updated key-value attributes to store with the file.
|
||||
:returns: A VectorStoreFileObject representing the updated file.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/vector_stores/{vector_store_id}/files/{file_id}",
|
||||
method="DELETE",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
async def openai_delete_vector_store_file(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
) -> VectorStoreFileDeleteResponse:
|
||||
"""Delete a vector store file.
|
||||
|
||||
:param vector_store_id: The ID of the vector store containing the file to delete.
|
||||
:param file_id: The ID of the file to delete.
|
||||
:returns: A VectorStoreFileDeleteResponse indicating the deletion status.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/vector_stores/{vector_store_id}/file_batches",
|
||||
method="POST",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
async def openai_create_vector_store_file_batch(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
params: Annotated[OpenAICreateVectorStoreFileBatchRequestWithExtraBody, Body(...)],
|
||||
) -> VectorStoreFileBatchObject:
|
||||
"""Create a vector store file batch.
|
||||
|
||||
Generate an OpenAI-compatible vector store file batch for the given vector store.
|
||||
:param vector_store_id: The ID of the vector store to create the file batch for.
|
||||
:returns: A VectorStoreFileBatchObject representing the created file batch.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/vector_stores/{vector_store_id}/file_batches/{batch_id}",
|
||||
method="GET",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
async def openai_retrieve_vector_store_file_batch(
|
||||
self,
|
||||
batch_id: str,
|
||||
vector_store_id: str,
|
||||
) -> VectorStoreFileBatchObject:
|
||||
"""Retrieve a vector store file batch.
|
||||
|
||||
:param batch_id: The ID of the file batch to retrieve.
|
||||
:param vector_store_id: The ID of the vector store containing the file batch.
|
||||
:returns: A VectorStoreFileBatchObject representing the file batch.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
|
||||
method="GET",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
async def openai_list_files_in_vector_store_file_batch(
|
||||
self,
|
||||
batch_id: str,
|
||||
vector_store_id: str,
|
||||
after: str | None = None,
|
||||
before: str | None = None,
|
||||
filter: str | None = None,
|
||||
limit: int | None = 20,
|
||||
order: str | None = "desc",
|
||||
) -> VectorStoreFilesListInBatchResponse:
|
||||
"""Returns a list of vector store files in a batch.
|
||||
|
||||
:param batch_id: The ID of the file batch to list files from.
|
||||
:param vector_store_id: The ID of the vector store containing the file batch.
|
||||
:param after: A cursor for use in pagination. `after` is an object ID that defines your place in the list.
|
||||
:param before: A cursor for use in pagination. `before` is an object ID that defines your place in the list.
|
||||
:param filter: Filter by file status. One of in_progress, completed, failed, cancelled.
|
||||
:param limit: A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.
|
||||
:param order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for descending order.
|
||||
:returns: A VectorStoreFilesListInBatchResponse containing the list of files in the batch.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(
|
||||
route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
|
||||
method="POST",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
async def openai_cancel_vector_store_file_batch(
|
||||
self,
|
||||
batch_id: str,
|
||||
vector_store_id: str,
|
||||
) -> VectorStoreFileBatchObject:
|
||||
"""Cancels a vector store file batch.
|
||||
|
||||
:param batch_id: The ID of the file batch to cancel.
|
||||
:param vector_store_id: The ID of the vector store containing the file batch.
|
||||
:returns: A VectorStoreFileBatchObject representing the cancelled file batch.
|
||||
"""
|
||||
...
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .vector_stores import *
|
||||
|
|
@ -1,51 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
|
||||
|
||||
# Internal resource type for storing the vector store routing and other information
|
||||
class VectorStore(Resource):
|
||||
"""Vector database resource for storing and querying vector embeddings.
|
||||
|
||||
:param type: Type of resource, always 'vector_store' for vector stores
|
||||
:param embedding_model: Name of the embedding model to use for vector generation
|
||||
:param embedding_dimension: Dimension of the embedding vectors
|
||||
"""
|
||||
|
||||
type: Literal[ResourceType.vector_store] = ResourceType.vector_store
|
||||
|
||||
embedding_model: str
|
||||
embedding_dimension: int
|
||||
vector_store_name: str | None = None
|
||||
|
||||
@property
|
||||
def vector_store_id(self) -> str:
|
||||
return self.identifier
|
||||
|
||||
@property
|
||||
def provider_vector_store_id(self) -> str | None:
|
||||
return self.provider_resource_id
|
||||
|
||||
|
||||
class VectorStoreInput(BaseModel):
|
||||
"""Input parameters for creating or configuring a vector database.
|
||||
|
||||
:param vector_store_id: Unique identifier for the vector store
|
||||
:param embedding_model: Name of the embedding model to use for vector generation
|
||||
:param embedding_dimension: Dimension of the embedding vectors
|
||||
:param provider_vector_store_id: (Optional) Provider-specific identifier for the vector store
|
||||
"""
|
||||
|
||||
vector_store_id: str
|
||||
embedding_model: str
|
||||
embedding_dimension: int
|
||||
provider_id: str | None = None
|
||||
provider_vector_store_id: str | None = None
|
||||
|
|
@ -1,9 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
LLAMA_STACK_API_V1 = "v1"
|
||||
LLAMA_STACK_API_V1BETA = "v1beta"
|
||||
LLAMA_STACK_API_V1ALPHA = "v1alpha"
|
||||
|
|
@ -9,6 +9,7 @@ import sys
|
|||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
from llama_stack_api import Api
|
||||
from termcolor import cprint
|
||||
|
||||
from llama_stack.cli.stack.utils import ImageType
|
||||
|
|
@ -21,7 +22,6 @@ from llama_stack.core.datatypes import (
|
|||
from llama_stack.core.distribution import get_provider_registry
|
||||
from llama_stack.core.stack import replace_env_vars
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import Api
|
||||
|
||||
TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates"
|
||||
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ from functools import lru_cache
|
|||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
from llama_stack_api import Api
|
||||
from termcolor import cprint
|
||||
|
||||
from llama_stack.core.datatypes import (
|
||||
|
|
@ -32,7 +33,6 @@ from llama_stack.core.storage.datatypes import (
|
|||
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
|
||||
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||
from llama_stack.core.utils.image_types import LlamaStackImageType
|
||||
from llama_stack.providers.datatypes import Api
|
||||
|
||||
TEMPLATES_PATH = Path(__file__).parent.parent.parent / "distributions"
|
||||
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
import sys
|
||||
|
||||
from llama_stack_api import Api
|
||||
from pydantic import BaseModel
|
||||
from termcolor import cprint
|
||||
|
||||
|
|
@ -13,7 +14,6 @@ from llama_stack.core.datatypes import BuildConfig
|
|||
from llama_stack.core.distribution import get_provider_registry
|
||||
from llama_stack.distributions.template import DistributionTemplate
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import Api
|
||||
|
||||
log = get_logger(name=__name__, category="core")
|
||||
|
||||
|
|
|
|||
|
|
@ -12,11 +12,10 @@ from enum import Enum
|
|||
from typing import Any, Union, get_args, get_origin
|
||||
|
||||
import httpx
|
||||
from llama_stack_api import RemoteProviderConfig
|
||||
from pydantic import BaseModel, parse_obj_as
|
||||
from termcolor import cprint
|
||||
|
||||
from llama_stack.providers.datatypes import RemoteProviderConfig
|
||||
|
||||
_CLIENT_CLASSES = {}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@
|
|||
import textwrap
|
||||
from typing import Any
|
||||
|
||||
from llama_stack_api import Api, ProviderSpec
|
||||
|
||||
from llama_stack.core.datatypes import (
|
||||
LLAMA_STACK_RUN_CONFIG_VERSION,
|
||||
DistributionSpec,
|
||||
|
|
@ -20,7 +22,6 @@ from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
|
|||
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||
from llama_stack.core.utils.prompt_for_config import prompt_for_config
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import Api, ProviderSpec
|
||||
|
||||
logger = get_logger(name=__name__, category="core")
|
||||
|
||||
|
|
|
|||
|
|
@ -8,9 +8,7 @@ import secrets
|
|||
import time
|
||||
from typing import Any, Literal
|
||||
|
||||
from pydantic import BaseModel, TypeAdapter
|
||||
|
||||
from llama_stack.apis.conversations.conversations import (
|
||||
from llama_stack_api import (
|
||||
Conversation,
|
||||
ConversationDeletedResource,
|
||||
ConversationItem,
|
||||
|
|
@ -20,6 +18,8 @@ from llama_stack.apis.conversations.conversations import (
|
|||
Conversations,
|
||||
Metadata,
|
||||
)
|
||||
from pydantic import BaseModel, TypeAdapter
|
||||
|
||||
from llama_stack.core.datatypes import AccessRule, StackRunConfig
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
|
||||
|
|
|
|||
|
|
@ -9,22 +9,34 @@ from pathlib import Path
|
|||
from typing import Annotated, Any, Literal, Self
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from llama_stack_api import (
|
||||
Api,
|
||||
Benchmark,
|
||||
BenchmarkInput,
|
||||
Dataset,
|
||||
DatasetInput,
|
||||
DatasetIO,
|
||||
Eval,
|
||||
Inference,
|
||||
Model,
|
||||
ModelInput,
|
||||
ProviderSpec,
|
||||
Resource,
|
||||
Safety,
|
||||
Scoring,
|
||||
ScoringFn,
|
||||
ScoringFnInput,
|
||||
Shield,
|
||||
ShieldInput,
|
||||
ToolGroup,
|
||||
ToolGroupInput,
|
||||
ToolRuntime,
|
||||
VectorIO,
|
||||
VectorStore,
|
||||
VectorStoreInput,
|
||||
)
|
||||
from pydantic import BaseModel, Field, field_validator, model_validator
|
||||
|
||||
from llama_stack.apis.benchmarks import Benchmark, BenchmarkInput
|
||||
from llama_stack.apis.datasetio import DatasetIO
|
||||
from llama_stack.apis.datasets import Dataset, DatasetInput
|
||||
from llama_stack.apis.eval import Eval
|
||||
from llama_stack.apis.inference import Inference
|
||||
from llama_stack.apis.models import Model, ModelInput
|
||||
from llama_stack.apis.resource import Resource
|
||||
from llama_stack.apis.safety import Safety
|
||||
from llama_stack.apis.scoring import Scoring
|
||||
from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnInput
|
||||
from llama_stack.apis.shields import Shield, ShieldInput
|
||||
from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
|
||||
from llama_stack.apis.vector_io import VectorIO
|
||||
from llama_stack.apis.vector_stores import VectorStore, VectorStoreInput
|
||||
from llama_stack.core.access_control.datatypes import AccessRule
|
||||
from llama_stack.core.storage.datatypes import (
|
||||
KVStoreReference,
|
||||
|
|
@ -32,7 +44,6 @@ from llama_stack.core.storage.datatypes import (
|
|||
StorageConfig,
|
||||
)
|
||||
from llama_stack.log import LoggingConfig
|
||||
from llama_stack.providers.datatypes import Api, ProviderSpec
|
||||
|
||||
LLAMA_STACK_BUILD_CONFIG_VERSION = 2
|
||||
LLAMA_STACK_RUN_CONFIG_VERSION = 2
|
||||
|
|
|
|||
|
|
@ -10,17 +10,17 @@ import os
|
|||
from typing import Any
|
||||
|
||||
import yaml
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.core.datatypes import BuildConfig, DistributionSpec
|
||||
from llama_stack.core.external import load_external_apis
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import (
|
||||
from llama_stack_api import (
|
||||
Api,
|
||||
InlineProviderSpec,
|
||||
ProviderSpec,
|
||||
RemoteProviderSpec,
|
||||
)
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.core.datatypes import BuildConfig, DistributionSpec
|
||||
from llama_stack.core.external import load_external_apis
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
logger = get_logger(name=__name__, category="core")
|
||||
|
||||
|
|
|
|||
|
|
@ -6,8 +6,8 @@
|
|||
|
||||
|
||||
import yaml
|
||||
from llama_stack_api import Api, ExternalApiSpec
|
||||
|
||||
from llama_stack.apis.datatypes import Api, ExternalApiSpec
|
||||
from llama_stack.core.datatypes import BuildConfig, StackRunConfig
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
|
|
|
|||
|
|
@ -6,19 +6,19 @@
|
|||
|
||||
from importlib.metadata import version
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.inspect import (
|
||||
from llama_stack_api import (
|
||||
HealthInfo,
|
||||
HealthStatus,
|
||||
Inspect,
|
||||
ListRoutesResponse,
|
||||
RouteInfo,
|
||||
VersionInfo,
|
||||
)
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.core.datatypes import StackRunConfig
|
||||
from llama_stack.core.external import load_external_apis
|
||||
from llama_stack.core.server.routes import get_all_api_routes
|
||||
from llama_stack.providers.datatypes import HealthStatus
|
||||
|
||||
|
||||
class DistributionInspectConfig(BaseModel):
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ from typing import Any, TypeVar, Union, get_args, get_origin
|
|||
import httpx
|
||||
import yaml
|
||||
from fastapi import Response as FastAPIResponse
|
||||
from llama_stack_api import is_unwrapped_body_param
|
||||
|
||||
try:
|
||||
from llama_stack_client import (
|
||||
|
|
@ -57,7 +58,6 @@ from llama_stack.core.utils.config import redact_sensitive_fields
|
|||
from llama_stack.core.utils.context import preserve_contexts_async_generator
|
||||
from llama_stack.core.utils.exec import in_notebook
|
||||
from llama_stack.log import get_logger, setup_logging
|
||||
from llama_stack.strong_typing.inspection import is_unwrapped_body_param
|
||||
|
||||
logger = get_logger(name=__name__, category="core")
|
||||
|
||||
|
|
|
|||
|
|
@ -7,9 +7,9 @@
|
|||
import json
|
||||
from typing import Any
|
||||
|
||||
from llama_stack_api import ListPromptsResponse, Prompt, Prompts
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.prompts import ListPromptsResponse, Prompt, Prompts
|
||||
from llama_stack.core.datatypes import StackRunConfig
|
||||
from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
|
||||
|
||||
|
|
|
|||
|
|
@ -7,11 +7,10 @@
|
|||
import asyncio
|
||||
from typing import Any
|
||||
|
||||
from llama_stack_api import HealthResponse, HealthStatus, ListProvidersResponse, ProviderInfo, Providers
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.providers import ListProvidersResponse, ProviderInfo, Providers
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import HealthResponse, HealthStatus
|
||||
|
||||
from .datatypes import StackRunConfig
|
||||
from .utils.config import redact_sensitive_fields
|
||||
|
|
|
|||
|
|
@ -8,29 +8,46 @@ import importlib.metadata
|
|||
import inspect
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.agents import Agents
|
||||
from llama_stack.apis.batches import Batches
|
||||
from llama_stack.apis.benchmarks import Benchmarks
|
||||
from llama_stack.apis.conversations import Conversations
|
||||
from llama_stack.apis.datasetio import DatasetIO
|
||||
from llama_stack.apis.datasets import Datasets
|
||||
from llama_stack.apis.datatypes import ExternalApiSpec
|
||||
from llama_stack.apis.eval import Eval
|
||||
from llama_stack.apis.files import Files
|
||||
from llama_stack.apis.inference import Inference, InferenceProvider
|
||||
from llama_stack.apis.inspect import Inspect
|
||||
from llama_stack.apis.models import Models
|
||||
from llama_stack.apis.post_training import PostTraining
|
||||
from llama_stack.apis.prompts import Prompts
|
||||
from llama_stack.apis.providers import Providers as ProvidersAPI
|
||||
from llama_stack.apis.safety import Safety
|
||||
from llama_stack.apis.scoring import Scoring
|
||||
from llama_stack.apis.scoring_functions import ScoringFunctions
|
||||
from llama_stack.apis.shields import Shields
|
||||
from llama_stack.apis.tools import ToolGroups, ToolRuntime
|
||||
from llama_stack.apis.vector_io import VectorIO
|
||||
from llama_stack.apis.vector_stores import VectorStore
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
|
||||
from llama_stack_api import (
|
||||
LLAMA_STACK_API_V1ALPHA,
|
||||
Agents,
|
||||
Api,
|
||||
Batches,
|
||||
Benchmarks,
|
||||
BenchmarksProtocolPrivate,
|
||||
Conversations,
|
||||
DatasetIO,
|
||||
Datasets,
|
||||
DatasetsProtocolPrivate,
|
||||
Eval,
|
||||
ExternalApiSpec,
|
||||
Files,
|
||||
Inference,
|
||||
InferenceProvider,
|
||||
Inspect,
|
||||
Models,
|
||||
ModelsProtocolPrivate,
|
||||
PostTraining,
|
||||
Prompts,
|
||||
ProviderSpec,
|
||||
RemoteProviderConfig,
|
||||
RemoteProviderSpec,
|
||||
Safety,
|
||||
Scoring,
|
||||
ScoringFunctions,
|
||||
ScoringFunctionsProtocolPrivate,
|
||||
Shields,
|
||||
ShieldsProtocolPrivate,
|
||||
ToolGroups,
|
||||
ToolGroupsProtocolPrivate,
|
||||
ToolRuntime,
|
||||
VectorIO,
|
||||
VectorStore,
|
||||
)
|
||||
from llama_stack_api import (
|
||||
Providers as ProvidersAPI,
|
||||
)
|
||||
|
||||
from llama_stack.core.client import get_client_impl
|
||||
from llama_stack.core.datatypes import (
|
||||
AccessRule,
|
||||
|
|
@ -44,18 +61,6 @@ from llama_stack.core.external import load_external_apis
|
|||
from llama_stack.core.store import DistributionRegistry
|
||||
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import (
|
||||
Api,
|
||||
BenchmarksProtocolPrivate,
|
||||
DatasetsProtocolPrivate,
|
||||
ModelsProtocolPrivate,
|
||||
ProviderSpec,
|
||||
RemoteProviderConfig,
|
||||
RemoteProviderSpec,
|
||||
ScoringFunctionsProtocolPrivate,
|
||||
ShieldsProtocolPrivate,
|
||||
ToolGroupsProtocolPrivate,
|
||||
)
|
||||
|
||||
logger = get_logger(name=__name__, category="core")
|
||||
|
||||
|
|
|
|||
|
|
@ -6,13 +6,14 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from llama_stack_api import Api, RoutingTable
|
||||
|
||||
from llama_stack.core.datatypes import (
|
||||
AccessRule,
|
||||
RoutedProtocol,
|
||||
)
|
||||
from llama_stack.core.stack import StackRunConfig
|
||||
from llama_stack.core.store import DistributionRegistry
|
||||
from llama_stack.providers.datatypes import Api, RoutingTable
|
||||
from llama_stack.providers.utils.inference.inference_store import InferenceStore
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -6,11 +6,9 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.common.responses import PaginatedResponse
|
||||
from llama_stack.apis.datasetio import DatasetIO
|
||||
from llama_stack.apis.datasets import DatasetPurpose, DataSource
|
||||
from llama_stack_api import DatasetIO, DatasetPurpose, DataSource, PaginatedResponse, RoutingTable
|
||||
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import RoutingTable
|
||||
|
||||
logger = get_logger(name=__name__, category="core::routers")
|
||||
|
||||
|
|
|
|||
|
|
@ -6,15 +6,19 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.eval import BenchmarkConfig, Eval, EvaluateResponse, Job
|
||||
from llama_stack.apis.scoring import (
|
||||
from llama_stack_api import (
|
||||
BenchmarkConfig,
|
||||
Eval,
|
||||
EvaluateResponse,
|
||||
Job,
|
||||
RoutingTable,
|
||||
ScoreBatchResponse,
|
||||
ScoreResponse,
|
||||
Scoring,
|
||||
ScoringFnParams,
|
||||
)
|
||||
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import RoutingTable
|
||||
|
||||
logger = get_logger(name=__name__, category="core::routers")
|
||||
|
||||
|
|
|
|||
|
|
@ -11,17 +11,19 @@ from datetime import UTC, datetime
|
|||
from typing import Annotated, Any
|
||||
|
||||
from fastapi import Body
|
||||
from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatCompletionToolChoiceOptionParam
|
||||
from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam
|
||||
from pydantic import TypeAdapter
|
||||
|
||||
from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
|
||||
from llama_stack.apis.inference import (
|
||||
from llama_stack_api import (
|
||||
HealthResponse,
|
||||
HealthStatus,
|
||||
Inference,
|
||||
ListOpenAIChatCompletionResponse,
|
||||
ModelNotFoundError,
|
||||
ModelType,
|
||||
ModelTypeError,
|
||||
OpenAIAssistantMessageParam,
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionChunk,
|
||||
OpenAIChatCompletionContentPartImageParam,
|
||||
OpenAIChatCompletionContentPartTextParam,
|
||||
OpenAIChatCompletionRequestWithExtraBody,
|
||||
OpenAIChatCompletionToolCall,
|
||||
OpenAIChatCompletionToolCallFunction,
|
||||
|
|
@ -35,18 +37,17 @@ from llama_stack.apis.inference import (
|
|||
OpenAIMessageParam,
|
||||
Order,
|
||||
RerankResponse,
|
||||
RoutingTable,
|
||||
)
|
||||
from llama_stack.apis.inference.inference import (
|
||||
OpenAIChatCompletionContentPartImageParam,
|
||||
OpenAIChatCompletionContentPartTextParam,
|
||||
)
|
||||
from llama_stack.apis.models import ModelType
|
||||
from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatCompletionToolChoiceOptionParam
|
||||
from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam
|
||||
from pydantic import TypeAdapter
|
||||
|
||||
from llama_stack.core.telemetry.telemetry import MetricEvent
|
||||
from llama_stack.core.telemetry.tracing import enqueue_event, get_current_span
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.models.llama.llama3.chat_format import ChatFormat
|
||||
from llama_stack.models.llama.llama3.tokenizer import Tokenizer
|
||||
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
|
||||
from llama_stack.providers.utils.inference.inference_store import InferenceStore
|
||||
|
||||
logger = get_logger(name=__name__, category="core::routers")
|
||||
|
|
|
|||
|
|
@ -6,13 +6,10 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.inference import OpenAIMessageParam
|
||||
from llama_stack.apis.safety import RunShieldResponse, Safety
|
||||
from llama_stack.apis.safety.safety import ModerationObject
|
||||
from llama_stack.apis.shields import Shield
|
||||
from llama_stack_api import ModerationObject, OpenAIMessageParam, RoutingTable, RunShieldResponse, Safety, Shield
|
||||
|
||||
from llama_stack.core.datatypes import SafetyConfig
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import RoutingTable
|
||||
|
||||
logger = get_logger(name=__name__, category="core::routers")
|
||||
|
||||
|
|
|
|||
|
|
@ -6,13 +6,12 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.common.content_types import (
|
||||
from llama_stack_api import (
|
||||
URL,
|
||||
)
|
||||
from llama_stack.apis.tools import (
|
||||
ListToolDefsResponse,
|
||||
ToolRuntime,
|
||||
)
|
||||
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
from ..routing_tables.toolgroups import ToolGroupsRoutingTable
|
||||
|
|
|
|||
|
|
@ -9,14 +9,16 @@ import uuid
|
|||
from typing import Annotated, Any
|
||||
|
||||
from fastapi import Body
|
||||
|
||||
from llama_stack.apis.common.content_types import InterleavedContent
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack.apis.vector_io import (
|
||||
from llama_stack_api import (
|
||||
Chunk,
|
||||
HealthResponse,
|
||||
HealthStatus,
|
||||
InterleavedContent,
|
||||
ModelType,
|
||||
OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
|
||||
OpenAICreateVectorStoreRequestWithExtraBody,
|
||||
QueryChunksResponse,
|
||||
RoutingTable,
|
||||
SearchRankingOptions,
|
||||
VectorIO,
|
||||
VectorStoreChunkingStrategy,
|
||||
|
|
@ -33,9 +35,9 @@ from llama_stack.apis.vector_io import (
|
|||
VectorStoreObject,
|
||||
VectorStoreSearchResponsePage,
|
||||
)
|
||||
|
||||
from llama_stack.core.datatypes import VectorStoresConfig
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
|
||||
|
||||
logger = get_logger(name=__name__, category="core::routers")
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,8 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse
|
||||
from llama_stack_api import Benchmark, Benchmarks, ListBenchmarksResponse
|
||||
|
||||
from llama_stack.core.datatypes import (
|
||||
BenchmarkWithOwner,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -6,9 +6,8 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.common.errors import ModelNotFoundError
|
||||
from llama_stack.apis.models import Model
|
||||
from llama_stack.apis.resource import ResourceType
|
||||
from llama_stack_api import Api, Model, ModelNotFoundError, ResourceType, RoutingTable
|
||||
|
||||
from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed
|
||||
from llama_stack.core.access_control.datatypes import Action
|
||||
from llama_stack.core.datatypes import (
|
||||
|
|
@ -21,7 +20,6 @@ from llama_stack.core.datatypes import (
|
|||
from llama_stack.core.request_headers import get_authenticated_user
|
||||
from llama_stack.core.store import DistributionRegistry
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import Api, RoutingTable
|
||||
|
||||
logger = get_logger(name=__name__, category="core::routing_tables")
|
||||
|
||||
|
|
|
|||
|
|
@ -7,18 +7,19 @@
|
|||
import uuid
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.common.errors import DatasetNotFoundError
|
||||
from llama_stack.apis.datasets import (
|
||||
from llama_stack_api import (
|
||||
Dataset,
|
||||
DatasetNotFoundError,
|
||||
DatasetPurpose,
|
||||
Datasets,
|
||||
DatasetType,
|
||||
DataSource,
|
||||
ListDatasetsResponse,
|
||||
ResourceType,
|
||||
RowsDataSource,
|
||||
URIDataSource,
|
||||
)
|
||||
from llama_stack.apis.resource import ResourceType
|
||||
|
||||
from llama_stack.core.datatypes import (
|
||||
DatasetWithOwner,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -7,8 +7,16 @@
|
|||
import time
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.common.errors import ModelNotFoundError
|
||||
from llama_stack.apis.models import ListModelsResponse, Model, Models, ModelType, OpenAIListModelsResponse, OpenAIModel
|
||||
from llama_stack_api import (
|
||||
ListModelsResponse,
|
||||
Model,
|
||||
ModelNotFoundError,
|
||||
Models,
|
||||
ModelType,
|
||||
OpenAIListModelsResponse,
|
||||
OpenAIModel,
|
||||
)
|
||||
|
||||
from llama_stack.core.datatypes import (
|
||||
ModelWithOwner,
|
||||
RegistryEntrySource,
|
||||
|
|
|
|||
|
|
@ -4,14 +4,15 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.apis.common.type_system import ParamType
|
||||
from llama_stack.apis.resource import ResourceType
|
||||
from llama_stack.apis.scoring_functions import (
|
||||
from llama_stack_api import (
|
||||
ListScoringFunctionsResponse,
|
||||
ParamType,
|
||||
ResourceType,
|
||||
ScoringFn,
|
||||
ScoringFnParams,
|
||||
ScoringFunctions,
|
||||
)
|
||||
|
||||
from llama_stack.core.datatypes import (
|
||||
ScoringFnWithOwner,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -6,8 +6,8 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.resource import ResourceType
|
||||
from llama_stack.apis.shields import ListShieldsResponse, Shield, Shields
|
||||
from llama_stack_api import ListShieldsResponse, ResourceType, Shield, Shields
|
||||
|
||||
from llama_stack.core.datatypes import (
|
||||
ShieldWithOwner,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -6,9 +6,16 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.common.content_types import URL
|
||||
from llama_stack.apis.common.errors import ToolGroupNotFoundError
|
||||
from llama_stack.apis.tools import ListToolDefsResponse, ListToolGroupsResponse, ToolDef, ToolGroup, ToolGroups
|
||||
from llama_stack_api import (
|
||||
URL,
|
||||
ListToolDefsResponse,
|
||||
ListToolGroupsResponse,
|
||||
ToolDef,
|
||||
ToolGroup,
|
||||
ToolGroupNotFoundError,
|
||||
ToolGroups,
|
||||
)
|
||||
|
||||
from llama_stack.core.datatypes import AuthenticationRequiredError, ToolGroupWithOwner
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
|
|
|
|||
|
|
@ -6,12 +6,12 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack.apis.resource import ResourceType
|
||||
|
||||
# Removed VectorStores import to avoid exposing public API
|
||||
from llama_stack.apis.vector_io.vector_io import (
|
||||
from llama_stack_api import (
|
||||
ModelNotFoundError,
|
||||
ModelType,
|
||||
ModelTypeError,
|
||||
ResourceType,
|
||||
SearchRankingOptions,
|
||||
VectorStoreChunkingStrategy,
|
||||
VectorStoreDeleteResponse,
|
||||
|
|
@ -22,6 +22,7 @@ from llama_stack.apis.vector_io.vector_io import (
|
|||
VectorStoreObject,
|
||||
VectorStoreSearchResponsePage,
|
||||
)
|
||||
|
||||
from llama_stack.core.datatypes import (
|
||||
VectorStoreWithOwner,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -11,9 +11,9 @@ from urllib.parse import parse_qs, urljoin, urlparse
|
|||
|
||||
import httpx
|
||||
import jwt
|
||||
from llama_stack_api import TokenValidationError
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.common.errors import TokenValidationError
|
||||
from llama_stack.core.datatypes import (
|
||||
AuthenticationConfig,
|
||||
CustomAuthConfig,
|
||||
|
|
|
|||
|
|
@ -10,11 +10,10 @@ from collections.abc import Callable
|
|||
from typing import Any
|
||||
|
||||
from aiohttp import hdrs
|
||||
from llama_stack_api import Api, ExternalApiSpec, WebMethod
|
||||
from starlette.routing import Route
|
||||
|
||||
from llama_stack.apis.datatypes import Api, ExternalApiSpec
|
||||
from llama_stack.core.resolver import api_protocol_map
|
||||
from llama_stack.schema_utils import WebMethod
|
||||
|
||||
EndpointFunc = Callable[..., Any]
|
||||
PathParams = dict[str, str]
|
||||
|
|
|
|||
|
|
@ -28,11 +28,10 @@ from fastapi import Path as FastapiPath
|
|||
from fastapi.exceptions import RequestValidationError
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse, StreamingResponse
|
||||
from llama_stack_api import Api, ConflictError, PaginatedResponse, ResourceNotFoundError
|
||||
from openai import BadRequestError
|
||||
from pydantic import BaseModel, ValidationError
|
||||
|
||||
from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError
|
||||
from llama_stack.apis.common.responses import PaginatedResponse
|
||||
from llama_stack.core.access_control.access_control import AccessDeniedError
|
||||
from llama_stack.core.datatypes import (
|
||||
AuthenticationRequiredError,
|
||||
|
|
@ -58,7 +57,6 @@ from llama_stack.core.utils.config import redact_sensitive_fields
|
|||
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
||||
from llama_stack.core.utils.context import preserve_contexts_async_generator
|
||||
from llama_stack.log import LoggingConfig, get_logger, setup_logging
|
||||
from llama_stack.providers.datatypes import Api
|
||||
|
||||
from .auth import AuthenticationMiddleware
|
||||
from .quota import QuotaMiddleware
|
||||
|
|
|
|||
|
|
@ -12,27 +12,31 @@ import tempfile
|
|||
from typing import Any
|
||||
|
||||
import yaml
|
||||
from llama_stack_api import (
|
||||
Agents,
|
||||
Api,
|
||||
Batches,
|
||||
Benchmarks,
|
||||
Conversations,
|
||||
DatasetIO,
|
||||
Datasets,
|
||||
Eval,
|
||||
Files,
|
||||
Inference,
|
||||
Inspect,
|
||||
Models,
|
||||
PostTraining,
|
||||
Prompts,
|
||||
Providers,
|
||||
Safety,
|
||||
Scoring,
|
||||
ScoringFunctions,
|
||||
Shields,
|
||||
ToolGroups,
|
||||
ToolRuntime,
|
||||
VectorIO,
|
||||
)
|
||||
|
||||
from llama_stack.apis.agents import Agents
|
||||
from llama_stack.apis.batches import Batches
|
||||
from llama_stack.apis.benchmarks import Benchmarks
|
||||
from llama_stack.apis.conversations import Conversations
|
||||
from llama_stack.apis.datasetio import DatasetIO
|
||||
from llama_stack.apis.datasets import Datasets
|
||||
from llama_stack.apis.eval import Eval
|
||||
from llama_stack.apis.files import Files
|
||||
from llama_stack.apis.inference import Inference
|
||||
from llama_stack.apis.inspect import Inspect
|
||||
from llama_stack.apis.models import Models
|
||||
from llama_stack.apis.post_training import PostTraining
|
||||
from llama_stack.apis.prompts import Prompts
|
||||
from llama_stack.apis.providers import Providers
|
||||
from llama_stack.apis.safety import Safety
|
||||
from llama_stack.apis.scoring import Scoring
|
||||
from llama_stack.apis.scoring_functions import ScoringFunctions
|
||||
from llama_stack.apis.shields import Shields
|
||||
from llama_stack.apis.tools import ToolGroups, ToolRuntime
|
||||
from llama_stack.apis.vector_io import VectorIO
|
||||
from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
|
||||
from llama_stack.core.datatypes import Provider, SafetyConfig, StackRunConfig, VectorStoresConfig
|
||||
from llama_stack.core.distribution import get_provider_registry
|
||||
|
|
@ -54,7 +58,6 @@ from llama_stack.core.storage.datatypes import (
|
|||
from llama_stack.core.store.registry import create_dist_registry
|
||||
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import Api
|
||||
|
||||
logger = get_logger(name=__name__, category="core")
|
||||
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ from typing import (
|
|||
cast,
|
||||
)
|
||||
|
||||
from llama_stack_api import json_schema_type, register_schema
|
||||
from opentelemetry import metrics, trace
|
||||
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
|
||||
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
||||
|
|
@ -28,7 +29,6 @@ from pydantic import BaseModel, Field
|
|||
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.models.llama.datatypes import Primitive
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema
|
||||
|
||||
ROOT_SPAN_MARKERS = ["__root__", "__root_span__"]
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack_api import ModelType
|
||||
|
||||
from llama_stack.core.datatypes import (
|
||||
BuildProvider,
|
||||
ModelInput,
|
||||
|
|
|
|||
|
|
@ -6,7 +6,8 @@
|
|||
|
||||
from pathlib import Path
|
||||
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack_api import ModelType
|
||||
|
||||
from llama_stack.core.datatypes import (
|
||||
BuildProvider,
|
||||
ModelInput,
|
||||
|
|
|
|||
|
|
@ -5,8 +5,8 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
|
||||
from llama_stack.apis.datasets import DatasetPurpose, URIDataSource
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack_api import DatasetPurpose, ModelType, URIDataSource
|
||||
|
||||
from llama_stack.core.datatypes import (
|
||||
BenchmarkInput,
|
||||
BuildProvider,
|
||||
|
|
|
|||
|
|
@ -7,6 +7,8 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from llama_stack_api import RemoteProviderSpec
|
||||
|
||||
from llama_stack.core.datatypes import (
|
||||
BuildProvider,
|
||||
Provider,
|
||||
|
|
@ -19,7 +21,6 @@ from llama_stack.core.datatypes import (
|
|||
)
|
||||
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
|
||||
from llama_stack.providers.datatypes import RemoteProviderSpec
|
||||
from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
|
||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
|
|
|
|||
|
|
@ -10,10 +10,9 @@ from typing import Any, Literal
|
|||
import jinja2
|
||||
import rich
|
||||
import yaml
|
||||
from llama_stack_api import DatasetPurpose, ModelType
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.datasets import DatasetPurpose
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack.core.datatypes import (
|
||||
LLAMA_STACK_RUN_CONFIG_VERSION,
|
||||
Api,
|
||||
|
|
|
|||
|
|
@ -1,217 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from enum import StrEnum
|
||||
from typing import Any, Protocol
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.benchmarks import Benchmark
|
||||
from llama_stack.apis.datasets import Dataset
|
||||
from llama_stack.apis.datatypes import Api
|
||||
from llama_stack.apis.models import Model
|
||||
from llama_stack.apis.scoring_functions import ScoringFn
|
||||
from llama_stack.apis.shields import Shield
|
||||
from llama_stack.apis.tools import ToolGroup
|
||||
from llama_stack.apis.vector_stores import VectorStore
|
||||
from llama_stack.schema_utils import json_schema_type
|
||||
|
||||
|
||||
class ModelsProtocolPrivate(Protocol):
|
||||
"""
|
||||
Protocol for model management.
|
||||
|
||||
This allows users to register their preferred model identifiers.
|
||||
|
||||
Model registration requires -
|
||||
- a provider, used to route the registration request
|
||||
- a model identifier, user's intended name for the model during inference
|
||||
- a provider model identifier, a model identifier supported by the provider
|
||||
|
||||
Providers will only accept registration for provider model ids they support.
|
||||
|
||||
Example,
|
||||
register: provider x my-model-id x provider-model-id
|
||||
-> Error if provider does not support provider-model-id
|
||||
-> Error if my-model-id is already registered
|
||||
-> Success if provider supports provider-model-id
|
||||
inference: my-model-id x ...
|
||||
-> Provider uses provider-model-id for inference
|
||||
"""
|
||||
|
||||
# this should be called `on_model_register` or something like that.
|
||||
# the provider should _not_ be able to change the object in this
|
||||
# callback
|
||||
async def register_model(self, model: Model) -> Model: ...
|
||||
|
||||
async def unregister_model(self, model_id: str) -> None: ...
|
||||
|
||||
# the Stack router will query each provider for their list of models
|
||||
# if a `refresh_interval_seconds` is provided, this method will be called
|
||||
# periodically to refresh the list of models
|
||||
#
|
||||
# NOTE: each model returned will be registered with the model registry. this means
|
||||
# a callback to the `register_model()` method will be made. this is duplicative and
|
||||
# may be removed in the future.
|
||||
async def list_models(self) -> list[Model] | None: ...
|
||||
|
||||
async def should_refresh_models(self) -> bool: ...
|
||||
|
||||
|
||||
class ShieldsProtocolPrivate(Protocol):
|
||||
async def register_shield(self, shield: Shield) -> None: ...
|
||||
|
||||
async def unregister_shield(self, identifier: str) -> None: ...
|
||||
|
||||
|
||||
class VectorStoresProtocolPrivate(Protocol):
|
||||
async def register_vector_store(self, vector_store: VectorStore) -> None: ...
|
||||
|
||||
async def unregister_vector_store(self, vector_store_id: str) -> None: ...
|
||||
|
||||
|
||||
class DatasetsProtocolPrivate(Protocol):
|
||||
async def register_dataset(self, dataset: Dataset) -> None: ...
|
||||
|
||||
async def unregister_dataset(self, dataset_id: str) -> None: ...
|
||||
|
||||
|
||||
class ScoringFunctionsProtocolPrivate(Protocol):
|
||||
async def list_scoring_functions(self) -> list[ScoringFn]: ...
|
||||
|
||||
async def register_scoring_function(self, scoring_fn: ScoringFn) -> None: ...
|
||||
|
||||
|
||||
class BenchmarksProtocolPrivate(Protocol):
|
||||
async def register_benchmark(self, benchmark: Benchmark) -> None: ...
|
||||
|
||||
|
||||
class ToolGroupsProtocolPrivate(Protocol):
|
||||
async def register_toolgroup(self, toolgroup: ToolGroup) -> None: ...
|
||||
|
||||
async def unregister_toolgroup(self, toolgroup_id: str) -> None: ...
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ProviderSpec(BaseModel):
|
||||
api: Api
|
||||
provider_type: str
|
||||
config_class: str = Field(
|
||||
...,
|
||||
description="Fully-qualified classname of the config for this provider",
|
||||
)
|
||||
api_dependencies: list[Api] = Field(
|
||||
default_factory=list,
|
||||
description="Higher-level API surfaces may depend on other providers to provide their functionality",
|
||||
)
|
||||
optional_api_dependencies: list[Api] = Field(
|
||||
default_factory=list,
|
||||
)
|
||||
deprecation_warning: str | None = Field(
|
||||
default=None,
|
||||
description="If this provider is deprecated, specify the warning message here",
|
||||
)
|
||||
deprecation_error: str | None = Field(
|
||||
default=None,
|
||||
description="If this provider is deprecated and does NOT work, specify the error message here",
|
||||
)
|
||||
|
||||
module: str | None = Field(
|
||||
default=None,
|
||||
description="""
|
||||
Fully-qualified name of the module to import. The module is expected to have:
|
||||
|
||||
- `get_adapter_impl(config, deps)`: returns the adapter implementation
|
||||
|
||||
Example: `module: ramalama_stack`
|
||||
""",
|
||||
)
|
||||
|
||||
pip_packages: list[str] = Field(
|
||||
default_factory=list,
|
||||
description="The pip dependencies needed for this implementation",
|
||||
)
|
||||
|
||||
provider_data_validator: str | None = Field(
|
||||
default=None,
|
||||
)
|
||||
|
||||
is_external: bool = Field(default=False, description="Notes whether this provider is an external provider.")
|
||||
|
||||
# used internally by the resolver; this is a hack for now
|
||||
deps__: list[str] = Field(default_factory=list)
|
||||
|
||||
@property
|
||||
def is_sample(self) -> bool:
|
||||
return self.provider_type in ("sample", "remote::sample")
|
||||
|
||||
|
||||
class RoutingTable(Protocol):
|
||||
async def get_provider_impl(self, routing_key: str) -> Any: ...
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class InlineProviderSpec(ProviderSpec):
|
||||
container_image: str | None = Field(
|
||||
default=None,
|
||||
description="""
|
||||
The container image to use for this implementation. If one is provided, pip_packages will be ignored.
|
||||
If a provider depends on other providers, the dependencies MUST NOT specify a container image.
|
||||
""",
|
||||
)
|
||||
description: str | None = Field(
|
||||
default=None,
|
||||
description="""
|
||||
A description of the provider. This is used to display in the documentation.
|
||||
""",
|
||||
)
|
||||
|
||||
|
||||
class RemoteProviderConfig(BaseModel):
|
||||
host: str = "localhost"
|
||||
port: int | None = None
|
||||
protocol: str = "http"
|
||||
|
||||
@property
|
||||
def url(self) -> str:
|
||||
if self.port is None:
|
||||
return f"{self.protocol}://{self.host}"
|
||||
return f"{self.protocol}://{self.host}:{self.port}"
|
||||
|
||||
@classmethod
|
||||
def from_url(cls, url: str) -> "RemoteProviderConfig":
|
||||
parsed = urlparse(url)
|
||||
attrs = {k: v for k, v in parsed._asdict().items() if v is not None}
|
||||
return cls(**attrs)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class RemoteProviderSpec(ProviderSpec):
|
||||
adapter_type: str = Field(
|
||||
...,
|
||||
description="Unique identifier for this adapter",
|
||||
)
|
||||
|
||||
description: str | None = Field(
|
||||
default=None,
|
||||
description="""
|
||||
A description of the provider. This is used to display in the documentation.
|
||||
""",
|
||||
)
|
||||
|
||||
@property
|
||||
def container_image(self) -> str | None:
|
||||
return None
|
||||
|
||||
|
||||
class HealthStatus(StrEnum):
|
||||
OK = "OK"
|
||||
ERROR = "Error"
|
||||
NOT_IMPLEMENTED = "Not Implemented"
|
||||
|
||||
|
||||
HealthResponse = dict[str, Any]
|
||||
|
|
@ -5,25 +5,26 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
|
||||
from llama_stack.apis.agents import (
|
||||
from llama_stack_api import (
|
||||
Agents,
|
||||
Conversations,
|
||||
Inference,
|
||||
ListOpenAIResponseInputItem,
|
||||
ListOpenAIResponseObject,
|
||||
OpenAIDeleteResponseObject,
|
||||
OpenAIResponseInput,
|
||||
OpenAIResponseInputTool,
|
||||
OpenAIResponseObject,
|
||||
OpenAIResponsePrompt,
|
||||
OpenAIResponseText,
|
||||
Order,
|
||||
ResponseGuardrail,
|
||||
Safety,
|
||||
ToolGroups,
|
||||
ToolRuntime,
|
||||
VectorIO,
|
||||
)
|
||||
from llama_stack.apis.agents.agents import ResponseGuardrail
|
||||
from llama_stack.apis.agents.openai_responses import OpenAIResponsePrompt, OpenAIResponseText
|
||||
from llama_stack.apis.conversations import Conversations
|
||||
from llama_stack.apis.inference import (
|
||||
Inference,
|
||||
)
|
||||
from llama_stack.apis.safety import Safety
|
||||
from llama_stack.apis.tools import ToolGroups, ToolRuntime
|
||||
from llama_stack.apis.vector_io import VectorIO
|
||||
|
||||
from llama_stack.core.datatypes import AccessRule
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl
|
||||
|
|
|
|||
|
|
@ -8,14 +8,15 @@ import time
|
|||
import uuid
|
||||
from collections.abc import AsyncIterator
|
||||
|
||||
from pydantic import BaseModel, TypeAdapter
|
||||
|
||||
from llama_stack.apis.agents import Order
|
||||
from llama_stack.apis.agents.agents import ResponseGuardrailSpec
|
||||
from llama_stack.apis.agents.openai_responses import (
|
||||
from llama_stack_api import (
|
||||
ConversationItem,
|
||||
Conversations,
|
||||
Inference,
|
||||
InvalidConversationIdError,
|
||||
ListOpenAIResponseInputItem,
|
||||
ListOpenAIResponseObject,
|
||||
OpenAIDeleteResponseObject,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseInput,
|
||||
OpenAIResponseInputMessageContentText,
|
||||
OpenAIResponseInputTool,
|
||||
|
|
@ -25,20 +26,16 @@ from llama_stack.apis.agents.openai_responses import (
|
|||
OpenAIResponsePrompt,
|
||||
OpenAIResponseText,
|
||||
OpenAIResponseTextFormat,
|
||||
)
|
||||
from llama_stack.apis.common.errors import (
|
||||
InvalidConversationIdError,
|
||||
)
|
||||
from llama_stack.apis.conversations import Conversations
|
||||
from llama_stack.apis.conversations.conversations import ConversationItem
|
||||
from llama_stack.apis.inference import (
|
||||
Inference,
|
||||
OpenAIMessageParam,
|
||||
OpenAISystemMessageParam,
|
||||
Order,
|
||||
ResponseGuardrailSpec,
|
||||
Safety,
|
||||
ToolGroups,
|
||||
ToolRuntime,
|
||||
VectorIO,
|
||||
)
|
||||
from llama_stack.apis.safety import Safety
|
||||
from llama_stack.apis.tools import ToolGroups, ToolRuntime
|
||||
from llama_stack.apis.vector_io import VectorIO
|
||||
from pydantic import BaseModel, TypeAdapter
|
||||
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.responses.responses_store import (
|
||||
ResponsesStore,
|
||||
|
|
|
|||
|
|
@ -8,10 +8,18 @@ import uuid
|
|||
from collections.abc import AsyncIterator
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.agents.openai_responses import (
|
||||
from llama_stack_api import (
|
||||
AllowedToolsFilter,
|
||||
ApprovalFilter,
|
||||
Inference,
|
||||
MCPListToolsTool,
|
||||
OpenAIAssistantMessageParam,
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionChunk,
|
||||
OpenAIChatCompletionRequestWithExtraBody,
|
||||
OpenAIChatCompletionToolCall,
|
||||
OpenAIChoice,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseContentPartOutputText,
|
||||
OpenAIResponseContentPartReasoningText,
|
||||
OpenAIResponseContentPartRefusal,
|
||||
|
|
@ -56,16 +64,7 @@ from llama_stack.apis.agents.openai_responses import (
|
|||
OpenAIResponseUsageOutputTokensDetails,
|
||||
WebSearchToolTypes,
|
||||
)
|
||||
from llama_stack.apis.inference import (
|
||||
Inference,
|
||||
OpenAIAssistantMessageParam,
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionChunk,
|
||||
OpenAIChatCompletionRequestWithExtraBody,
|
||||
OpenAIChatCompletionToolCall,
|
||||
OpenAIChoice,
|
||||
OpenAIMessageParam,
|
||||
)
|
||||
|
||||
from llama_stack.core.telemetry import tracing
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
|
||||
|
|
@ -1023,9 +1022,9 @@ class StreamingResponseOrchestrator:
|
|||
self, tools: list[OpenAIResponseInputTool], output_messages: list[OpenAIResponseOutput]
|
||||
) -> AsyncIterator[OpenAIResponseObjectStream]:
|
||||
"""Process all tools and emit appropriate streaming events."""
|
||||
from llama_stack_api import ToolDef
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
|
||||
from llama_stack.apis.tools import ToolDef
|
||||
from llama_stack.models.llama.datatypes import ToolDefinition
|
||||
from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,12 @@ import json
|
|||
from collections.abc import AsyncIterator
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.agents.openai_responses import (
|
||||
from llama_stack_api import (
|
||||
ImageContentItem,
|
||||
OpenAIChatCompletionContentPartImageParam,
|
||||
OpenAIChatCompletionContentPartTextParam,
|
||||
OpenAIChatCompletionToolCall,
|
||||
OpenAIImageURL,
|
||||
OpenAIResponseInputToolFileSearch,
|
||||
OpenAIResponseInputToolMCP,
|
||||
OpenAIResponseObjectStreamResponseFileSearchCallCompleted,
|
||||
|
|
@ -23,22 +28,15 @@ from llama_stack.apis.agents.openai_responses import (
|
|||
OpenAIResponseObjectStreamResponseWebSearchCallSearching,
|
||||
OpenAIResponseOutputMessageFileSearchToolCall,
|
||||
OpenAIResponseOutputMessageFileSearchToolCallResults,
|
||||
OpenAIResponseOutputMessageMCPCall,
|
||||
OpenAIResponseOutputMessageWebSearchToolCall,
|
||||
)
|
||||
from llama_stack.apis.common.content_types import (
|
||||
ImageContentItem,
|
||||
TextContentItem,
|
||||
)
|
||||
from llama_stack.apis.inference import (
|
||||
OpenAIChatCompletionContentPartImageParam,
|
||||
OpenAIChatCompletionContentPartTextParam,
|
||||
OpenAIChatCompletionToolCall,
|
||||
OpenAIImageURL,
|
||||
OpenAIToolMessageParam,
|
||||
TextContentItem,
|
||||
ToolGroups,
|
||||
ToolInvocationResult,
|
||||
ToolRuntime,
|
||||
VectorIO,
|
||||
)
|
||||
from llama_stack.apis.tools import ToolGroups, ToolInvocationResult, ToolRuntime
|
||||
from llama_stack.apis.vector_io import VectorIO
|
||||
|
||||
from llama_stack.core.telemetry import tracing
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
|
|
@ -398,6 +396,10 @@ class ToolExecutor:
|
|||
# Build output message
|
||||
message: Any
|
||||
if mcp_tool_to_server and function.name in mcp_tool_to_server:
|
||||
from llama_stack_api import (
|
||||
OpenAIResponseOutputMessageMCPCall,
|
||||
)
|
||||
|
||||
message = OpenAIResponseOutputMessageMCPCall(
|
||||
id=item_id,
|
||||
arguments=function.arguments,
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue