Merge branch 'main' into vllm_health_check

This commit is contained in:
Sumit Jaiswal 2025-06-05 18:09:36 +05:30 committed by GitHub
commit c18b585d32
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
143 changed files with 9210 additions and 5347 deletions

View file

@ -37,6 +37,7 @@ from .openai_responses import (
OpenAIResponseInputTool,
OpenAIResponseObject,
OpenAIResponseObjectStream,
OpenAIResponseText,
)
# TODO: use enum.StrEnum when we drop support for python 3.10
@ -603,7 +604,9 @@ class Agents(Protocol):
store: bool | None = True,
stream: bool | None = False,
temperature: float | None = None,
text: OpenAIResponseText | None = None,
tools: list[OpenAIResponseInputTool] | None = None,
max_infer_iters: int | None = 10, # this is an extension to the OpenAI API
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
"""Create a new OpenAI response.

View file

@ -7,6 +7,7 @@
from typing import Annotated, Any, Literal
from pydantic import BaseModel, Field
from typing_extensions import TypedDict
from llama_stack.schema_utils import json_schema_type, register_schema
@ -126,6 +127,32 @@ OpenAIResponseOutput = Annotated[
register_schema(OpenAIResponseOutput, name="OpenAIResponseOutput")
# This has to be a TypedDict because we need a "schema" field and our strong
# typing code in the schema generator doesn't support Pydantic aliases. That also
# means we can't use a discriminator field here, because TypedDicts don't support
# default values which the strong typing code requires for discriminators.
class OpenAIResponseTextFormat(TypedDict, total=False):
"""Configuration for Responses API text format.
:param type: Must be "text", "json_schema", or "json_object" to identify the format type
:param name: The name of the response format. Only used for json_schema.
:param schema: The JSON schema the response should conform to. In a Python SDK, this is often a `pydantic` model. Only used for json_schema.
:param description: (Optional) A description of the response format. Only used for json_schema.
:param strict: (Optional) Whether to strictly enforce the JSON schema. If true, the response must match the schema exactly. Only used for json_schema.
"""
type: Literal["text"] | Literal["json_schema"] | Literal["json_object"]
name: str | None
schema: dict[str, Any] | None
description: str | None
strict: bool | None
@json_schema_type
class OpenAIResponseText(BaseModel):
format: OpenAIResponseTextFormat | None = None
@json_schema_type
class OpenAIResponseObject(BaseModel):
created_at: int
@ -138,6 +165,9 @@ class OpenAIResponseObject(BaseModel):
previous_response_id: str | None = None
status: str
temperature: float | None = None
# Default to text format to avoid breaking the loading of old responses
# before the field was added. New responses will have this set always.
text: OpenAIResponseText = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text"))
top_p: float | None = None
truncation: str | None = None
user: str | None = None
@ -149,6 +179,30 @@ class OpenAIResponseObjectStreamResponseCreated(BaseModel):
type: Literal["response.created"] = "response.created"
@json_schema_type
class OpenAIResponseObjectStreamResponseCompleted(BaseModel):
response: OpenAIResponseObject
type: Literal["response.completed"] = "response.completed"
@json_schema_type
class OpenAIResponseObjectStreamResponseOutputItemAdded(BaseModel):
response_id: str
item: OpenAIResponseOutput
output_index: int
sequence_number: int
type: Literal["response.output_item.added"] = "response.output_item.added"
@json_schema_type
class OpenAIResponseObjectStreamResponseOutputItemDone(BaseModel):
response_id: str
item: OpenAIResponseOutput
output_index: int
sequence_number: int
type: Literal["response.output_item.done"] = "response.output_item.done"
@json_schema_type
class OpenAIResponseObjectStreamResponseOutputTextDelta(BaseModel):
content_index: int
@ -160,14 +214,132 @@ class OpenAIResponseObjectStreamResponseOutputTextDelta(BaseModel):
@json_schema_type
class OpenAIResponseObjectStreamResponseCompleted(BaseModel):
response: OpenAIResponseObject
type: Literal["response.completed"] = "response.completed"
class OpenAIResponseObjectStreamResponseOutputTextDone(BaseModel):
content_index: int
text: str # final text of the output item
item_id: str
output_index: int
sequence_number: int
type: Literal["response.output_text.done"] = "response.output_text.done"
@json_schema_type
class OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta(BaseModel):
delta: str
item_id: str
output_index: int
sequence_number: int
type: Literal["response.function_call_arguments.delta"] = "response.function_call_arguments.delta"
@json_schema_type
class OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone(BaseModel):
arguments: str # final arguments of the function call
item_id: str
output_index: int
sequence_number: int
type: Literal["response.function_call_arguments.done"] = "response.function_call_arguments.done"
@json_schema_type
class OpenAIResponseObjectStreamResponseWebSearchCallInProgress(BaseModel):
item_id: str
output_index: int
sequence_number: int
type: Literal["response.web_search_call.in_progress"] = "response.web_search_call.in_progress"
@json_schema_type
class OpenAIResponseObjectStreamResponseWebSearchCallSearching(BaseModel):
item_id: str
output_index: int
sequence_number: int
type: Literal["response.web_search_call.searching"] = "response.web_search_call.searching"
@json_schema_type
class OpenAIResponseObjectStreamResponseWebSearchCallCompleted(BaseModel):
item_id: str
output_index: int
sequence_number: int
type: Literal["response.web_search_call.completed"] = "response.web_search_call.completed"
@json_schema_type
class OpenAIResponseObjectStreamResponseMcpListToolsInProgress(BaseModel):
sequence_number: int
type: Literal["response.mcp_list_tools.in_progress"] = "response.mcp_list_tools.in_progress"
@json_schema_type
class OpenAIResponseObjectStreamResponseMcpListToolsFailed(BaseModel):
sequence_number: int
type: Literal["response.mcp_list_tools.failed"] = "response.mcp_list_tools.failed"
@json_schema_type
class OpenAIResponseObjectStreamResponseMcpListToolsCompleted(BaseModel):
sequence_number: int
type: Literal["response.mcp_list_tools.completed"] = "response.mcp_list_tools.completed"
@json_schema_type
class OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta(BaseModel):
delta: str
item_id: str
output_index: int
sequence_number: int
type: Literal["response.mcp_call.arguments.delta"] = "response.mcp_call.arguments.delta"
@json_schema_type
class OpenAIResponseObjectStreamResponseMcpCallArgumentsDone(BaseModel):
arguments: str # final arguments of the MCP call
item_id: str
output_index: int
sequence_number: int
type: Literal["response.mcp_call.arguments.done"] = "response.mcp_call.arguments.done"
@json_schema_type
class OpenAIResponseObjectStreamResponseMcpCallInProgress(BaseModel):
item_id: str
output_index: int
sequence_number: int
type: Literal["response.mcp_call.in_progress"] = "response.mcp_call.in_progress"
@json_schema_type
class OpenAIResponseObjectStreamResponseMcpCallFailed(BaseModel):
sequence_number: int
type: Literal["response.mcp_call.failed"] = "response.mcp_call.failed"
@json_schema_type
class OpenAIResponseObjectStreamResponseMcpCallCompleted(BaseModel):
sequence_number: int
type: Literal["response.mcp_call.completed"] = "response.mcp_call.completed"
OpenAIResponseObjectStream = Annotated[
OpenAIResponseObjectStreamResponseCreated
| OpenAIResponseObjectStreamResponseOutputItemAdded
| OpenAIResponseObjectStreamResponseOutputItemDone
| OpenAIResponseObjectStreamResponseOutputTextDelta
| OpenAIResponseObjectStreamResponseOutputTextDone
| OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta
| OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone
| OpenAIResponseObjectStreamResponseWebSearchCallInProgress
| OpenAIResponseObjectStreamResponseWebSearchCallSearching
| OpenAIResponseObjectStreamResponseWebSearchCallCompleted
| OpenAIResponseObjectStreamResponseMcpListToolsInProgress
| OpenAIResponseObjectStreamResponseMcpListToolsFailed
| OpenAIResponseObjectStreamResponseMcpListToolsCompleted
| OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta
| OpenAIResponseObjectStreamResponseMcpCallArgumentsDone
| OpenAIResponseObjectStreamResponseMcpCallInProgress
| OpenAIResponseObjectStreamResponseMcpCallFailed
| OpenAIResponseObjectStreamResponseMcpCallCompleted
| OpenAIResponseObjectStreamResponseCompleted,
Field(discriminator="type"),
]

View file

@ -4,179 +4,158 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Protocol, runtime_checkable
from enum import Enum
from typing import Annotated, Literal, Protocol, runtime_checkable
from fastapi import File, Form, Response, UploadFile
from pydantic import BaseModel
from llama_stack.apis.common.responses import Order
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
from llama_stack.schema_utils import json_schema_type, webmethod
@json_schema_type
class FileUploadResponse(BaseModel):
# OpenAI Files API Models
class OpenAIFilePurpose(str, Enum):
"""
Valid purpose values for OpenAI Files API.
"""
Response after initiating a file upload session.
:param id: ID of the upload session
:param url: Upload URL for the file or file parts
:param offset: Upload content offset
:param size: Upload content size
ASSISTANTS = "assistants"
# TODO: Add other purposes as needed
@json_schema_type
class OpenAIFileObject(BaseModel):
"""
OpenAI File object as defined in the OpenAI Files API.
:param object: The object type, which is always "file"
:param id: The file identifier, which can be referenced in the API endpoints
:param bytes: The size of the file, in bytes
:param created_at: The Unix timestamp (in seconds) for when the file was created
:param expires_at: The Unix timestamp (in seconds) for when the file expires
:param filename: The name of the file
:param purpose: The intended purpose of the file
"""
object: Literal["file"] = "file"
id: str
bytes: int
created_at: int
expires_at: int
filename: str
purpose: OpenAIFilePurpose
@json_schema_type
class ListOpenAIFileResponse(BaseModel):
"""
Response for listing files in OpenAI Files API.
:param data: List of file objects
:param object: The object type, which is always "list"
"""
data: list[OpenAIFileObject]
has_more: bool
first_id: str
last_id: str
object: Literal["list"] = "list"
@json_schema_type
class OpenAIFileDeleteResponse(BaseModel):
"""
Response for deleting a file in OpenAI Files API.
:param id: The file identifier that was deleted
:param object: The object type, which is always "file"
:param deleted: Whether the file was successfully deleted
"""
id: str
url: str
offset: int
size: int
@json_schema_type
class BucketResponse(BaseModel):
name: str
@json_schema_type
class ListBucketResponse(BaseModel):
"""
Response representing a list of file entries.
:param data: List of FileResponse entries
"""
data: list[BucketResponse]
@json_schema_type
class FileResponse(BaseModel):
"""
Response representing a file entry.
:param bucket: Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)
:param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
:param mime_type: MIME type of the file
:param url: Upload URL for the file contents
:param bytes: Size of the file in bytes
:param created_at: Timestamp of when the file was created
"""
bucket: str
key: str
mime_type: str
url: str
bytes: int
created_at: int
@json_schema_type
class ListFileResponse(BaseModel):
"""
Response representing a list of file entries.
:param data: List of FileResponse entries
"""
data: list[FileResponse]
object: Literal["file"] = "file"
deleted: bool
@runtime_checkable
@trace_protocol
class Files(Protocol):
@webmethod(route="/files", method="POST")
async def create_upload_session(
# OpenAI Files API Endpoints
@webmethod(route="/openai/v1/files", method="POST")
async def openai_upload_file(
self,
bucket: str,
key: str,
mime_type: str,
size: int,
) -> FileUploadResponse:
file: Annotated[UploadFile, File()],
purpose: Annotated[OpenAIFilePurpose, Form()],
) -> OpenAIFileObject:
"""
Create a new upload session for a file identified by a bucket and key.
Upload a file that can be used across various endpoints.
:param bucket: Bucket under which the file is stored (valid chars: a-zA-Z0-9_-).
:param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.).
:param mime_type: MIME type of the file.
:param size: File size in bytes.
:returns: A FileUploadResponse.
The file upload should be a multipart form request with:
- file: The File object (not file name) to be uploaded.
- purpose: The intended purpose of the uploaded file.
:param file: The uploaded file object containing content and metadata (filename, content_type, etc.).
:param purpose: The intended purpose of the uploaded file (e.g., "assistants", "fine-tune").
:returns: An OpenAIFileObject representing the uploaded file.
"""
...
@webmethod(route="/files/session:{upload_id}", method="POST", raw_bytes_request_body=True)
async def upload_content_to_session(
@webmethod(route="/openai/v1/files", method="GET")
async def openai_list_files(
self,
upload_id: str,
) -> FileResponse | None:
after: str | None = None,
limit: int | None = 10000,
order: Order | None = Order.desc,
purpose: OpenAIFilePurpose | None = None,
) -> ListOpenAIFileResponse:
"""
Upload file content to an existing upload session.
On the server, request body will have the raw bytes that are uploaded.
Returns a list of files that belong to the user's organization.
:param upload_id: ID of the upload session.
:returns: A FileResponse or None if the upload is not complete.
:param after: A cursor for use in pagination. `after` is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.
:param limit: A limit on the number of objects to be returned. Limit can range between 1 and 10,000, and the default is 10,000.
:param order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for descending order.
:param purpose: Only return files with the given purpose.
:returns: An ListOpenAIFileResponse containing the list of files.
"""
...
@webmethod(route="/files/session:{upload_id}", method="GET")
async def get_upload_session_info(
@webmethod(route="/openai/v1/files/{file_id}", method="GET")
async def openai_retrieve_file(
self,
upload_id: str,
) -> FileUploadResponse:
file_id: str,
) -> OpenAIFileObject:
"""
Returns information about an existsing upload session.
Returns information about a specific file.
:param upload_id: ID of the upload session.
:returns: A FileUploadResponse.
:param file_id: The ID of the file to use for this request.
:returns: An OpenAIFileObject containing file information.
"""
...
@webmethod(route="/files", method="GET")
async def list_all_buckets(
@webmethod(route="/openai/v1/files/{file_id}", method="DELETE")
async def openai_delete_file(
self,
bucket: str,
) -> ListBucketResponse:
file_id: str,
) -> OpenAIFileDeleteResponse:
"""
List all buckets.
Delete a file.
:param bucket: Bucket name (valid chars: a-zA-Z0-9_-).
:returns: A ListBucketResponse.
:param file_id: The ID of the file to use for this request.
:returns: An OpenAIFileDeleteResponse indicating successful deletion.
"""
...
@webmethod(route="/files/{bucket}", method="GET")
async def list_files_in_bucket(
@webmethod(route="/openai/v1/files/{file_id}/content", method="GET")
async def openai_retrieve_file_content(
self,
bucket: str,
) -> ListFileResponse:
file_id: str,
) -> Response:
"""
List all files in a bucket.
Returns the contents of the specified file.
:param bucket: Bucket name (valid chars: a-zA-Z0-9_-).
:returns: A ListFileResponse.
"""
...
@webmethod(route="/files/{bucket}/{key:path}", method="GET")
async def get_file(
self,
bucket: str,
key: str,
) -> FileResponse:
"""
Get a file info identified by a bucket and key.
:param bucket: Bucket name (valid chars: a-zA-Z0-9_-).
:param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.).
:returns: A FileResponse.
"""
...
@webmethod(route="/files/{bucket}/{key:path}", method="DELETE")
async def delete_file(
self,
bucket: str,
key: str,
) -> None:
"""
Delete a file identified by a bucket and key.
:param bucket: Bucket name (valid chars: a-zA-Z0-9_-).
:param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.).
:param file_id: The ID of the file to use for this request.
:returns: The raw file content as a binary response.
"""
...

View file

@ -35,7 +35,8 @@ class StackRun(Subcommand):
"config",
type=str,
nargs="?", # Make it optional
help="Path to config file to use for the run. Required for venv and conda environments.",
metavar="config | template",
help="Path to config file to use for the run or name of known template (`llama stack list` for a list).",
)
self.parser.add_argument(
"--port",
@ -59,7 +60,7 @@ class StackRun(Subcommand):
"--image-type",
type=str,
help="Image Type used during the build. This can be either conda or container or venv.",
choices=[e.value for e in ImageType],
choices=[e.value for e in ImageType if e.value != ImageType.CONTAINER.value],
)
self.parser.add_argument(
"--enable-ui",
@ -154,7 +155,10 @@ class StackRun(Subcommand):
# func=<bound method StackRun._run_stack_run_cmd of <llama_stack.cli.stack.run.StackRun object at 0x10484b010>>
if callable(getattr(args, arg)):
continue
setattr(server_args, arg, getattr(args, arg))
if arg == "config" and template_name:
server_args.config = str(config_file)
else:
setattr(server_args, arg, getattr(args, arg))
# Run the server
server_main(server_args)

View file

@ -1,86 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Any
from llama_stack.distribution.datatypes import AccessAttributes
from llama_stack.log import get_logger
logger = get_logger(__name__, category="core")
def check_access(
obj_identifier: str,
obj_attributes: AccessAttributes | None,
user_attributes: dict[str, Any] | None = None,
) -> bool:
"""Check if the current user has access to the given object, based on access attributes.
Access control algorithm:
1. If the resource has no access_attributes, access is GRANTED to all authenticated users
2. If the user has no attributes, access is DENIED to any object with access_attributes defined
3. For each attribute category in the resource's access_attributes:
a. If the user lacks that category, access is DENIED
b. If the user has the category but none of the required values, access is DENIED
c. If the user has at least one matching value in each required category, access is GRANTED
Example:
# Resource requires:
access_attributes = AccessAttributes(
roles=["admin", "data-scientist"],
teams=["ml-team"]
)
# User has:
user_attributes = {
"roles": ["data-scientist", "engineer"],
"teams": ["ml-team", "infra-team"],
"projects": ["llama-3"]
}
# Result: Access GRANTED
# - User has the "data-scientist" role (matches one of the required roles)
# - AND user is part of the "ml-team" (matches the required team)
# - The extra "projects" attribute is ignored
Args:
obj_identifier: The identifier of the resource object to check access for
obj_attributes: The access attributes of the resource object
user_attributes: The attributes of the current user
Returns:
bool: True if access is granted, False if denied
"""
# If object has no access attributes, allow access by default
if not obj_attributes:
return True
# If no user attributes, deny access to objects with access control
if not user_attributes:
return False
dict_attribs = obj_attributes.model_dump(exclude_none=True)
if not dict_attribs:
return True
# Check each attribute category (requires ALL categories to match)
# TODO: formalize this into a proper ABAC policy
for attr_key, required_values in dict_attribs.items():
user_values = user_attributes.get(attr_key, [])
if not user_values:
logger.debug(f"Access denied to {obj_identifier}: missing required attribute category '{attr_key}'")
return False
if not any(val in user_values for val in required_values):
logger.debug(
f"Access denied to {obj_identifier}: "
f"no match for attribute '{attr_key}', required one of {required_values}"
)
return False
logger.debug(f"Access granted to {obj_identifier}")
return True

View file

@ -3,5 +3,3 @@
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from .verification import get_distribution_template # noqa: F401

View file

@ -0,0 +1,109 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Any
from llama_stack.distribution.datatypes import User
from .conditions import (
Condition,
ProtectedResource,
parse_conditions,
)
from .datatypes import (
AccessRule,
Action,
Scope,
)
def matches_resource(resource_scope: str, actual_resource: str) -> bool:
if resource_scope == actual_resource:
return True
return resource_scope.endswith("::*") and actual_resource.startswith(resource_scope[:-1])
def matches_scope(
scope: Scope,
action: Action,
resource: str,
user: str | None,
) -> bool:
if scope.resource and not matches_resource(scope.resource, resource):
return False
if scope.principal and scope.principal != user:
return False
return action in scope.actions
def as_list(obj: Any) -> list[Any]:
if isinstance(obj, list):
return obj
return [obj]
def matches_conditions(
conditions: list[Condition],
resource: ProtectedResource,
user: User,
) -> bool:
for condition in conditions:
# must match all conditions
if not condition.matches(resource, user):
return False
return True
def default_policy() -> list[AccessRule]:
# for backwards compatibility, if no rules are provided, assume
# full access subject to previous attribute matching rules
return [
AccessRule(
permit=Scope(actions=list(Action)),
when=["user in owners " + name for name in ["roles", "teams", "projects", "namespaces"]],
),
]
def is_action_allowed(
policy: list[AccessRule],
action: Action,
resource: ProtectedResource,
user: User | None,
) -> bool:
# If user is not set, assume authentication is not enabled
if not user:
return True
if not len(policy):
policy = default_policy()
qualified_resource_id = resource.type + "::" + resource.identifier
for rule in policy:
if rule.forbid and matches_scope(rule.forbid, action, qualified_resource_id, user.principal):
if rule.when:
if matches_conditions(parse_conditions(as_list(rule.when)), resource, user):
return False
elif rule.unless:
if not matches_conditions(parse_conditions(as_list(rule.unless)), resource, user):
return False
else:
return False
elif rule.permit and matches_scope(rule.permit, action, qualified_resource_id, user.principal):
if rule.when:
if matches_conditions(parse_conditions(as_list(rule.when)), resource, user):
return True
elif rule.unless:
if not matches_conditions(parse_conditions(as_list(rule.unless)), resource, user):
return True
else:
return True
# assume access is denied unless we find a rule that permits access
return False
class AccessDeniedError(RuntimeError):
pass

View file

@ -0,0 +1,129 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Protocol
class User(Protocol):
principal: str
attributes: dict[str, list[str]] | None
class ProtectedResource(Protocol):
type: str
identifier: str
owner: User
class Condition(Protocol):
def matches(self, resource: ProtectedResource, user: User) -> bool: ...
class UserInOwnersList:
def __init__(self, name: str):
self.name = name
def owners_values(self, resource: ProtectedResource) -> list[str] | None:
if (
hasattr(resource, "owner")
and resource.owner
and resource.owner.attributes
and self.name in resource.owner.attributes
):
return resource.owner.attributes[self.name]
else:
return None
def matches(self, resource: ProtectedResource, user: User) -> bool:
required = self.owners_values(resource)
if not required:
return True
if not user.attributes or self.name not in user.attributes or not user.attributes[self.name]:
return False
user_values = user.attributes[self.name]
for value in required:
if value in user_values:
return True
return False
def __repr__(self):
return f"user in owners {self.name}"
class UserNotInOwnersList(UserInOwnersList):
def __init__(self, name: str):
super().__init__(name)
def matches(self, resource: ProtectedResource, user: User) -> bool:
return not super().matches(resource, user)
def __repr__(self):
return f"user not in owners {self.name}"
class UserWithValueInList:
def __init__(self, name: str, value: str):
self.name = name
self.value = value
def matches(self, resource: ProtectedResource, user: User) -> bool:
if user.attributes and self.name in user.attributes:
return self.value in user.attributes[self.name]
print(f"User does not have {self.value} in {self.name}")
return False
def __repr__(self):
return f"user with {self.value} in {self.name}"
class UserWithValueNotInList(UserWithValueInList):
def __init__(self, name: str, value: str):
super().__init__(name, value)
def matches(self, resource: ProtectedResource, user: User) -> bool:
return not super().matches(resource, user)
def __repr__(self):
return f"user with {self.value} not in {self.name}"
class UserIsOwner:
def matches(self, resource: ProtectedResource, user: User) -> bool:
return resource.owner.principal == user.principal if resource.owner else False
def __repr__(self):
return "user is owner"
class UserIsNotOwner:
def matches(self, resource: ProtectedResource, user: User) -> bool:
return not resource.owner or resource.owner.principal != user.principal
def __repr__(self):
return "user is not owner"
def parse_condition(condition: str) -> Condition:
words = condition.split()
match words:
case ["user", "is", "owner"]:
return UserIsOwner()
case ["user", "is", "not", "owner"]:
return UserIsNotOwner()
case ["user", "with", value, "in", name]:
return UserWithValueInList(name, value)
case ["user", "with", value, "not", "in", name]:
return UserWithValueNotInList(name, value)
case ["user", "in", "owners", name]:
return UserInOwnersList(name)
case ["user", "not", "in", "owners", name]:
return UserNotInOwnersList(name)
case _:
raise ValueError(f"Invalid condition: {condition}")
def parse_conditions(conditions: list[str]) -> list[Condition]:
return [parse_condition(c) for c in conditions]

View file

@ -0,0 +1,107 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from enum import Enum
from pydantic import BaseModel, model_validator
from typing_extensions import Self
from .conditions import parse_conditions
class Action(str, Enum):
CREATE = "create"
READ = "read"
UPDATE = "update"
DELETE = "delete"
class Scope(BaseModel):
principal: str | None = None
actions: Action | list[Action]
resource: str | None = None
def _mutually_exclusive(obj, a: str, b: str):
if getattr(obj, a) and getattr(obj, b):
raise ValueError(f"{a} and {b} are mutually exclusive")
def _require_one_of(obj, a: str, b: str):
if not getattr(obj, a) and not getattr(obj, b):
raise ValueError(f"on of {a} or {b} is required")
class AccessRule(BaseModel):
"""Access rule based loosely on cedar policy language
A rule defines a list of action either to permit or to forbid. It may specify a
principal or a resource that must match for the rule to take effect. The resource
to match should be specified in the form of a type qualified identifier, e.g.
model::my-model or vector_db::some-db, or a wildcard for all resources of a type,
e.g. model::*. If the principal or resource are not specified, they will match all
requests.
A rule may also specify a condition, either a 'when' or an 'unless', with additional
constraints as to where the rule applies. The constraints supported at present are:
- 'user with <attr-value> in <attr-name>'
- 'user with <attr-value> not in <attr-name>'
- 'user is owner'
- 'user is not owner'
- 'user in owners <attr-name>'
- 'user not in owners <attr-name>'
Rules are tested in order to find a match. If a match is found, the request is
permitted or forbidden depending on the type of rule. If no match is found, the
request is denied. If no rules are specified, a rule that allows any action as
long as the resource attributes match the user attributes is added
(i.e. the previous behaviour is the default).
Some examples in yaml:
- permit:
principal: user-1
actions: [create, read, delete]
resource: model::*
description: user-1 has full access to all models
- permit:
principal: user-2
actions: [read]
resource: model::model-1
description: user-2 has read access to model-1 only
- permit:
actions: [read]
when: user in owner teams
description: any user has read access to any resource created by a member of their team
- forbid:
actions: [create, read, delete]
resource: vector_db::*
unless: user with admin in roles
description: only user with admin role can use vector_db resources
"""
permit: Scope | None = None
forbid: Scope | None = None
when: str | list[str] | None = None
unless: str | list[str] | None = None
description: str | None = None
@model_validator(mode="after")
def validate_rule_format(self) -> Self:
_require_one_of(self, "permit", "forbid")
_mutually_exclusive(self, "permit", "forbid")
_mutually_exclusive(self, "when", "unless")
if isinstance(self.when, list):
parse_conditions(self.when)
elif self.when:
parse_conditions([self.when])
if isinstance(self.unless, list):
parse_conditions(self.unless)
elif self.unless:
parse_conditions([self.unless])
return self

View file

@ -29,6 +29,8 @@ SERVER_DEPENDENCIES = [
"fire",
"httpx",
"uvicorn",
"opentelemetry-sdk",
"opentelemetry-exporter-otlp-proto-http",
]

View file

@ -24,6 +24,7 @@ from llama_stack.apis.shields import Shield, ShieldInput
from llama_stack.apis.tools import Tool, ToolGroup, ToolGroupInput, ToolRuntime
from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput
from llama_stack.apis.vector_io import VectorIO
from llama_stack.distribution.access_control.datatypes import AccessRule
from llama_stack.providers.datatypes import Api, ProviderSpec
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
from llama_stack.providers.utils.sqlstore.sqlstore import SqlStoreConfig
@ -35,126 +36,66 @@ LLAMA_STACK_RUN_CONFIG_VERSION = "2"
RoutingKey = str | list[str]
class AccessAttributes(BaseModel):
"""Structured representation of user attributes for access control.
class User(BaseModel):
principal: str
# further attributes that may be used for access control decisions
attributes: dict[str, list[str]] | None = None
This model defines a structured approach to representing user attributes
with common standard categories for access control.
Standard attribute categories include:
- roles: Role-based attributes (e.g., admin, data-scientist)
- teams: Team-based attributes (e.g., ml-team, infra-team)
- projects: Project access attributes (e.g., llama-3, customer-insights)
- namespaces: Namespace-based access control for resource isolation
"""
# Standard attribute categories - the minimal set we need now
roles: list[str] | None = Field(
default=None, description="Role-based attributes (e.g., 'admin', 'data-scientist', 'user')"
)
teams: list[str] | None = Field(default=None, description="Team-based attributes (e.g., 'ml-team', 'nlp-team')")
projects: list[str] | None = Field(
default=None, description="Project-based access attributes (e.g., 'llama-3', 'customer-insights')"
)
namespaces: list[str] | None = Field(
default=None, description="Namespace-based access control for resource isolation"
)
def __init__(self, principal: str, attributes: dict[str, list[str]] | None):
super().__init__(principal=principal, attributes=attributes)
class ResourceWithACL(Resource):
"""Extension of Resource that adds attribute-based access control capabilities.
class ResourceWithOwner(Resource):
"""Extension of Resource that adds an optional owner, i.e. the user that created the
resource. This can be used to constrain access to the resource."""
This class adds an optional access_attributes field that allows fine-grained control
over which users can access each resource. When attributes are defined, a user must have
matching attributes to access the resource.
Attribute Matching Algorithm:
1. If a resource has no access_attributes (None or empty dict), it's visible to all authenticated users
2. Each key in access_attributes represents an attribute category (e.g., "roles", "teams", "projects")
3. The matching algorithm requires ALL categories to match (AND relationship between categories)
4. Within each category, ANY value match is sufficient (OR relationship within a category)
Examples:
# Resource visible to everyone (no access control)
model = Model(identifier="llama-2", ...)
# Resource visible only to admins
model = Model(
identifier="gpt-4",
access_attributes=AccessAttributes(roles=["admin"])
)
# Resource visible to data scientists on the ML team
model = Model(
identifier="private-model",
access_attributes=AccessAttributes(
roles=["data-scientist", "researcher"],
teams=["ml-team"]
)
)
# ^ User must have at least one of the roles AND be on the ml-team
# Resource visible to users with specific project access
vector_db = VectorDB(
identifier="customer-embeddings",
access_attributes=AccessAttributes(
projects=["customer-insights"],
namespaces=["confidential"]
)
)
# ^ User must have access to the customer-insights project AND have confidential namespace
"""
access_attributes: AccessAttributes | None = None
owner: User | None = None
# Use the extended Resource for all routable objects
class ModelWithACL(Model, ResourceWithACL):
class ModelWithOwner(Model, ResourceWithOwner):
pass
class ShieldWithACL(Shield, ResourceWithACL):
class ShieldWithOwner(Shield, ResourceWithOwner):
pass
class VectorDBWithACL(VectorDB, ResourceWithACL):
class VectorDBWithOwner(VectorDB, ResourceWithOwner):
pass
class DatasetWithACL(Dataset, ResourceWithACL):
class DatasetWithOwner(Dataset, ResourceWithOwner):
pass
class ScoringFnWithACL(ScoringFn, ResourceWithACL):
class ScoringFnWithOwner(ScoringFn, ResourceWithOwner):
pass
class BenchmarkWithACL(Benchmark, ResourceWithACL):
class BenchmarkWithOwner(Benchmark, ResourceWithOwner):
pass
class ToolWithACL(Tool, ResourceWithACL):
class ToolWithOwner(Tool, ResourceWithOwner):
pass
class ToolGroupWithACL(ToolGroup, ResourceWithACL):
class ToolGroupWithOwner(ToolGroup, ResourceWithOwner):
pass
RoutableObject = Model | Shield | VectorDB | Dataset | ScoringFn | Benchmark | Tool | ToolGroup
RoutableObjectWithProvider = Annotated[
ModelWithACL
| ShieldWithACL
| VectorDBWithACL
| DatasetWithACL
| ScoringFnWithACL
| BenchmarkWithACL
| ToolWithACL
| ToolGroupWithACL,
ModelWithOwner
| ShieldWithOwner
| VectorDBWithOwner
| DatasetWithOwner
| ScoringFnWithOwner
| BenchmarkWithOwner
| ToolWithOwner
| ToolGroupWithOwner,
Field(discriminator="type"),
]
@ -234,6 +175,7 @@ class AuthenticationConfig(BaseModel):
...,
description="Provider-specific configuration",
)
access_policy: list[AccessRule] = Field(default=[], description="Rules for determining access to resources")
class AuthenticationRequiredError(Exception):

View file

@ -10,6 +10,8 @@ import logging
from contextlib import AbstractContextManager
from typing import Any
from llama_stack.distribution.datatypes import User
from .utils.dynamic import instantiate_class_type
log = logging.getLogger(__name__)
@ -21,12 +23,10 @@ PROVIDER_DATA_VAR = contextvars.ContextVar("provider_data", default=None)
class RequestProviderDataContext(AbstractContextManager):
"""Context manager for request provider data"""
def __init__(
self, provider_data: dict[str, Any] | None = None, auth_attributes: dict[str, list[str]] | None = None
):
def __init__(self, provider_data: dict[str, Any] | None = None, user: User | None = None):
self.provider_data = provider_data or {}
if auth_attributes:
self.provider_data["__auth_attributes"] = auth_attributes
if user:
self.provider_data["__authenticated_user"] = user
self.token = None
@ -95,9 +95,9 @@ def request_provider_data_context(
return RequestProviderDataContext(provider_data, auth_attributes)
def get_auth_attributes() -> dict[str, list[str]] | None:
def get_authenticated_user() -> User | None:
"""Helper to retrieve auth attributes from the provider data context"""
provider_data = PROVIDER_DATA_VAR.get()
if not provider_data:
return None
return provider_data.get("__auth_attributes")
return provider_data.get("__authenticated_user")

View file

@ -28,6 +28,7 @@ from llama_stack.apis.vector_dbs import VectorDBs
from llama_stack.apis.vector_io import VectorIO
from llama_stack.distribution.client import get_client_impl
from llama_stack.distribution.datatypes import (
AccessRule,
AutoRoutedProviderSpec,
Provider,
RoutingTableProviderSpec,
@ -118,6 +119,7 @@ async def resolve_impls(
run_config: StackRunConfig,
provider_registry: ProviderRegistry,
dist_registry: DistributionRegistry,
policy: list[AccessRule],
) -> dict[Api, Any]:
"""
Resolves provider implementations by:
@ -140,7 +142,7 @@ async def resolve_impls(
sorted_providers = sort_providers_by_deps(providers_with_specs, run_config)
return await instantiate_providers(sorted_providers, router_apis, dist_registry, run_config)
return await instantiate_providers(sorted_providers, router_apis, dist_registry, run_config, policy)
def specs_for_autorouted_apis(apis_to_serve: list[str] | set[str]) -> dict[str, dict[str, ProviderWithSpec]]:
@ -247,6 +249,7 @@ async def instantiate_providers(
router_apis: set[Api],
dist_registry: DistributionRegistry,
run_config: StackRunConfig,
policy: list[AccessRule],
) -> dict:
"""Instantiates providers asynchronously while managing dependencies."""
impls: dict[Api, Any] = {}
@ -261,7 +264,7 @@ async def instantiate_providers(
if isinstance(provider.spec, RoutingTableProviderSpec):
inner_impls = inner_impls_by_provider_id[f"inner-{provider.spec.router_api.value}"]
impl = await instantiate_provider(provider, deps, inner_impls, dist_registry, run_config)
impl = await instantiate_provider(provider, deps, inner_impls, dist_registry, run_config, policy)
if api_str.startswith("inner-"):
inner_impls_by_provider_id[api_str][provider.provider_id] = impl
@ -312,6 +315,7 @@ async def instantiate_provider(
inner_impls: dict[str, Any],
dist_registry: DistributionRegistry,
run_config: StackRunConfig,
policy: list[AccessRule],
):
provider_spec = provider.spec
if not hasattr(provider_spec, "module"):
@ -336,13 +340,15 @@ async def instantiate_provider(
method = "get_routing_table_impl"
config = None
args = [provider_spec.api, inner_impls, deps, dist_registry]
args = [provider_spec.api, inner_impls, deps, dist_registry, policy]
else:
method = "get_provider_impl"
config_type = instantiate_class_type(provider_spec.config_class)
config = config_type(**provider.config)
args = [config, deps]
if "policy" in inspect.signature(getattr(module, method)).parameters:
args.append(policy)
fn = getattr(module, method)
impl = await fn(*args)

View file

@ -6,7 +6,7 @@
from typing import Any
from llama_stack.distribution.datatypes import RoutedProtocol
from llama_stack.distribution.datatypes import AccessRule, RoutedProtocol
from llama_stack.distribution.stack import StackRunConfig
from llama_stack.distribution.store import DistributionRegistry
from llama_stack.providers.datatypes import Api, RoutingTable
@ -18,6 +18,7 @@ async def get_routing_table_impl(
impls_by_provider_id: dict[str, RoutedProtocol],
_deps,
dist_registry: DistributionRegistry,
policy: list[AccessRule],
) -> Any:
from ..routing_tables.benchmarks import BenchmarksRoutingTable
from ..routing_tables.datasets import DatasetsRoutingTable
@ -40,7 +41,7 @@ async def get_routing_table_impl(
if api.value not in api_to_tables:
raise ValueError(f"API {api.value} not found in router map")
impl = api_to_tables[api.value](impls_by_provider_id, dist_registry)
impl = api_to_tables[api.value](impls_by_provider_id, dist_registry, policy)
await impl.initialize()
return impl

View file

@ -8,7 +8,7 @@ from typing import Any
from llama_stack.apis.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse
from llama_stack.distribution.datatypes import (
BenchmarkWithACL,
BenchmarkWithOwner,
)
from llama_stack.log import get_logger
@ -47,7 +47,7 @@ class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
)
if provider_benchmark_id is None:
provider_benchmark_id = benchmark_id
benchmark = BenchmarkWithACL(
benchmark = BenchmarkWithOwner(
identifier=benchmark_id,
dataset_id=dataset_id,
scoring_functions=scoring_functions,

View file

@ -8,14 +8,14 @@ from typing import Any
from llama_stack.apis.resource import ResourceType
from llama_stack.apis.scoring_functions import ScoringFn
from llama_stack.distribution.access_control import check_access
from llama_stack.distribution.access_control.access_control import AccessDeniedError, is_action_allowed
from llama_stack.distribution.datatypes import (
AccessAttributes,
AccessRule,
RoutableObject,
RoutableObjectWithProvider,
RoutedProtocol,
)
from llama_stack.distribution.request_headers import get_auth_attributes
from llama_stack.distribution.request_headers import get_authenticated_user
from llama_stack.distribution.store import DistributionRegistry
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import Api, RoutingTable
@ -73,9 +73,11 @@ class CommonRoutingTableImpl(RoutingTable):
self,
impls_by_provider_id: dict[str, RoutedProtocol],
dist_registry: DistributionRegistry,
policy: list[AccessRule],
) -> None:
self.impls_by_provider_id = impls_by_provider_id
self.dist_registry = dist_registry
self.policy = policy
async def initialize(self) -> None:
async def add_objects(objs: list[RoutableObjectWithProvider], provider_id: str, cls) -> None:
@ -166,13 +168,15 @@ class CommonRoutingTableImpl(RoutingTable):
return None
# Check if user has permission to access this object
if not check_access(obj.identifier, getattr(obj, "access_attributes", None), get_auth_attributes()):
logger.debug(f"Access denied to {type} '{identifier}' based on attribute mismatch")
if not is_action_allowed(self.policy, "read", obj, get_authenticated_user()):
logger.debug(f"Access denied to {type} '{identifier}'")
return None
return obj
async def unregister_object(self, obj: RoutableObjectWithProvider) -> None:
if not is_action_allowed(self.policy, "delete", obj, get_authenticated_user()):
raise AccessDeniedError()
await self.dist_registry.delete(obj.type, obj.identifier)
await unregister_object_from_provider(obj, self.impls_by_provider_id[obj.provider_id])
@ -187,11 +191,12 @@ class CommonRoutingTableImpl(RoutingTable):
p = self.impls_by_provider_id[obj.provider_id]
# If object supports access control but no attributes set, use creator's attributes
if not obj.access_attributes:
creator_attributes = get_auth_attributes()
if creator_attributes:
obj.access_attributes = AccessAttributes(**creator_attributes)
logger.info(f"Setting access attributes for {obj.type} '{obj.identifier}' based on creator's identity")
creator = get_authenticated_user()
if not is_action_allowed(self.policy, "create", obj, creator):
raise AccessDeniedError()
if creator:
obj.owner = creator
logger.info(f"Setting owner for {obj.type} '{obj.identifier}' to {obj.owner.principal}")
registered_obj = await register_object_with_provider(obj, p)
# TODO: This needs to be fixed for all APIs once they return the registered object
@ -210,9 +215,7 @@ class CommonRoutingTableImpl(RoutingTable):
# Apply attribute-based access control filtering
if filtered_objs:
filtered_objs = [
obj
for obj in filtered_objs
if check_access(obj.identifier, getattr(obj, "access_attributes", None), get_auth_attributes())
obj for obj in filtered_objs if is_action_allowed(self.policy, "read", obj, get_authenticated_user())
]
return filtered_objs

View file

@ -19,7 +19,7 @@ from llama_stack.apis.datasets import (
)
from llama_stack.apis.resource import ResourceType
from llama_stack.distribution.datatypes import (
DatasetWithACL,
DatasetWithOwner,
)
from llama_stack.log import get_logger
@ -74,7 +74,7 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets):
if metadata is None:
metadata = {}
dataset = DatasetWithACL(
dataset = DatasetWithOwner(
identifier=dataset_id,
provider_resource_id=provider_dataset_id,
provider_id=provider_id,

View file

@ -9,7 +9,7 @@ from typing import Any
from llama_stack.apis.models import ListModelsResponse, Model, Models, ModelType, OpenAIListModelsResponse, OpenAIModel
from llama_stack.distribution.datatypes import (
ModelWithACL,
ModelWithOwner,
)
from llama_stack.log import get_logger
@ -65,7 +65,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
model_type = ModelType.llm
if "embedding_dimension" not in metadata and model_type == ModelType.embedding:
raise ValueError("Embedding model must have an embedding dimension in its metadata")
model = ModelWithACL(
model = ModelWithOwner(
identifier=model_id,
provider_resource_id=provider_model_id,
provider_id=provider_id,

View file

@ -13,7 +13,7 @@ from llama_stack.apis.scoring_functions import (
ScoringFunctions,
)
from llama_stack.distribution.datatypes import (
ScoringFnWithACL,
ScoringFnWithOwner,
)
from llama_stack.log import get_logger
@ -50,7 +50,7 @@ class ScoringFunctionsRoutingTable(CommonRoutingTableImpl, ScoringFunctions):
raise ValueError(
"No provider specified and multiple providers available. Please specify a provider_id."
)
scoring_fn = ScoringFnWithACL(
scoring_fn = ScoringFnWithOwner(
identifier=scoring_fn_id,
description=description,
return_type=return_type,

View file

@ -9,7 +9,7 @@ from typing import Any
from llama_stack.apis.resource import ResourceType
from llama_stack.apis.shields import ListShieldsResponse, Shield, Shields
from llama_stack.distribution.datatypes import (
ShieldWithACL,
ShieldWithOwner,
)
from llama_stack.log import get_logger
@ -47,7 +47,7 @@ class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
)
if params is None:
params = {}
shield = ShieldWithACL(
shield = ShieldWithOwner(
identifier=shield_id,
provider_resource_id=provider_shield_id,
provider_id=provider_id,

View file

@ -8,7 +8,7 @@ from typing import Any
from llama_stack.apis.common.content_types import URL
from llama_stack.apis.tools import ListToolGroupsResponse, ListToolsResponse, Tool, ToolGroup, ToolGroups
from llama_stack.distribution.datatypes import ToolGroupWithACL
from llama_stack.distribution.datatypes import ToolGroupWithOwner
from llama_stack.log import get_logger
from .common import CommonRoutingTableImpl
@ -106,7 +106,7 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
mcp_endpoint: URL | None = None,
args: dict[str, Any] | None = None,
) -> None:
toolgroup = ToolGroupWithACL(
toolgroup = ToolGroupWithOwner(
identifier=toolgroup_id,
provider_id=provider_id,
provider_resource_id=toolgroup_id,

View file

@ -10,7 +10,7 @@ from llama_stack.apis.models import ModelType
from llama_stack.apis.resource import ResourceType
from llama_stack.apis.vector_dbs import ListVectorDBsResponse, VectorDB, VectorDBs
from llama_stack.distribution.datatypes import (
VectorDBWithACL,
VectorDBWithOwner,
)
from llama_stack.log import get_logger
@ -63,7 +63,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
"embedding_model": embedding_model,
"embedding_dimension": model.metadata["embedding_dimension"],
}
vector_db = TypeAdapter(VectorDBWithACL).validate_python(vector_db_data)
vector_db = TypeAdapter(VectorDBWithOwner).validate_python(vector_db_data)
await self.register_object(vector_db)
return vector_db

View file

@ -105,24 +105,16 @@ class AuthenticationMiddleware:
logger.exception("Error during authentication")
return await self._send_auth_error(send, "Authentication service error")
# Store attributes in request scope for access control
if validation_result.access_attributes:
user_attributes = validation_result.access_attributes.model_dump(exclude_none=True)
else:
logger.warning("No access attributes, setting namespace to token by default")
user_attributes = {
"roles": [token],
}
# Store the client ID in the request scope so that downstream middleware (like QuotaMiddleware)
# can identify the requester and enforce per-client rate limits.
scope["authenticated_client_id"] = token
# Store attributes in request scope
scope["user_attributes"] = user_attributes
scope["principal"] = validation_result.principal
if validation_result.attributes:
scope["user_attributes"] = validation_result.attributes
logger.debug(
f"Authentication successful: {validation_result.principal} with {len(scope['user_attributes'])} attributes"
f"Authentication successful: {validation_result.principal} with {len(validation_result.attributes)} attributes"
)
return await self.app(scope, receive, send)

View file

@ -16,43 +16,18 @@ from jose import jwt
from pydantic import BaseModel, Field, field_validator, model_validator
from typing_extensions import Self
from llama_stack.distribution.datatypes import AccessAttributes, AuthenticationConfig, AuthProviderType
from llama_stack.distribution.datatypes import AuthenticationConfig, AuthProviderType, User
from llama_stack.log import get_logger
logger = get_logger(name=__name__, category="auth")
class TokenValidationResult(BaseModel):
principal: str | None = Field(
default=None,
description="The principal (username or persistent identifier) of the authenticated user",
)
access_attributes: AccessAttributes | None = Field(
default=None,
description="""
Structured user attributes for attribute-based access control.
These attributes determine which resources the user can access.
The model provides standard categories like "roles", "teams", "projects", and "namespaces".
Each attribute category contains a list of values that the user has for that category.
During access control checks, these values are compared against resource requirements.
Example with standard categories:
```json
{
"roles": ["admin", "data-scientist"],
"teams": ["ml-team"],
"projects": ["llama-3"],
"namespaces": ["research"]
}
```
""",
)
class AuthResponse(TokenValidationResult):
class AuthResponse(BaseModel):
"""The format of the authentication response from the auth endpoint."""
principal: str
# further attributes that may be used for access control decisions
attributes: dict[str, list[str]] | None = None
message: str | None = Field(
default=None, description="Optional message providing additional context about the authentication result."
)
@ -78,7 +53,7 @@ class AuthProvider(ABC):
"""Abstract base class for authentication providers."""
@abstractmethod
async def validate_token(self, token: str, scope: dict | None = None) -> TokenValidationResult:
async def validate_token(self, token: str, scope: dict | None = None) -> User:
"""Validate a token and return access attributes."""
pass
@ -88,10 +63,10 @@ class AuthProvider(ABC):
pass
def get_attributes_from_claims(claims: dict[str, str], mapping: dict[str, str]) -> AccessAttributes:
attributes = AccessAttributes()
def get_attributes_from_claims(claims: dict[str, str], mapping: dict[str, str]) -> dict[str, list[str]]:
attributes: dict[str, list[str]] = {}
for claim_key, attribute_key in mapping.items():
if claim_key not in claims or not hasattr(attributes, attribute_key):
if claim_key not in claims:
continue
claim = claims[claim_key]
if isinstance(claim, list):
@ -99,11 +74,10 @@ def get_attributes_from_claims(claims: dict[str, str], mapping: dict[str, str])
else:
values = claim.split()
current = getattr(attributes, attribute_key)
if current:
current.extend(values)
if attribute_key in attributes:
attributes[attribute_key].extend(values)
else:
setattr(attributes, attribute_key, values)
attributes[attribute_key] = values
return attributes
@ -145,8 +119,6 @@ class OAuth2TokenAuthProviderConfig(BaseModel):
for key, value in v.items():
if not value:
raise ValueError(f"claims_mapping value cannot be empty: {key}")
if value not in AccessAttributes.model_fields:
raise ValueError(f"claims_mapping value is not a valid attribute: {value}")
return v
@model_validator(mode="after")
@ -171,14 +143,14 @@ class OAuth2TokenAuthProvider(AuthProvider):
self._jwks: dict[str, str] = {}
self._jwks_lock = Lock()
async def validate_token(self, token: str, scope: dict | None = None) -> TokenValidationResult:
async def validate_token(self, token: str, scope: dict | None = None) -> User:
if self.config.jwks:
return await self.validate_jwt_token(token, scope)
if self.config.introspection:
return await self.introspect_token(token, scope)
raise ValueError("One of jwks or introspection must be configured")
async def validate_jwt_token(self, token: str, scope: dict | None = None) -> TokenValidationResult:
async def validate_jwt_token(self, token: str, scope: dict | None = None) -> User:
"""Validate a token using the JWT token."""
await self._refresh_jwks()
@ -203,12 +175,12 @@ class OAuth2TokenAuthProvider(AuthProvider):
# We should incorporate these into the access attributes.
principal = claims["sub"]
access_attributes = get_attributes_from_claims(claims, self.config.claims_mapping)
return TokenValidationResult(
return User(
principal=principal,
access_attributes=access_attributes,
attributes=access_attributes,
)
async def introspect_token(self, token: str, scope: dict | None = None) -> TokenValidationResult:
async def introspect_token(self, token: str, scope: dict | None = None) -> User:
"""Validate a token using token introspection as defined by RFC 7662."""
form = {
"token": token,
@ -242,9 +214,9 @@ class OAuth2TokenAuthProvider(AuthProvider):
raise ValueError("Token not active")
principal = fields["sub"] or fields["username"]
access_attributes = get_attributes_from_claims(fields, self.config.claims_mapping)
return TokenValidationResult(
return User(
principal=principal,
access_attributes=access_attributes,
attributes=access_attributes,
)
except httpx.TimeoutException:
logger.exception("Token introspection request timed out")
@ -299,7 +271,7 @@ class CustomAuthProvider(AuthProvider):
self.config = config
self._client = None
async def validate_token(self, token: str, scope: dict | None = None) -> TokenValidationResult:
async def validate_token(self, token: str, scope: dict | None = None) -> User:
"""Validate a token using the custom authentication endpoint."""
if scope is None:
scope = {}
@ -341,7 +313,7 @@ class CustomAuthProvider(AuthProvider):
try:
response_data = response.json()
auth_response = AuthResponse(**response_data)
return auth_response
return User(auth_response.principal, auth_response.attributes)
except Exception as e:
logger.exception("Error parsing authentication response")
raise ValueError("Invalid authentication response format") from e

View file

@ -18,7 +18,7 @@ from collections.abc import Callable
from contextlib import asynccontextmanager
from importlib.metadata import version as parse_version
from pathlib import Path
from typing import Annotated, Any
from typing import Annotated, Any, get_origin
import rich.pretty
import yaml
@ -26,17 +26,13 @@ from aiohttp import hdrs
from fastapi import Body, FastAPI, HTTPException, Request
from fastapi import Path as FastapiPath
from fastapi.exceptions import RequestValidationError
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, StreamingResponse
from openai import BadRequestError
from pydantic import BaseModel, ValidationError
from llama_stack.distribution.datatypes import AuthenticationRequiredError, LoggingConfig, StackRunConfig
from llama_stack.distribution.distribution import builtin_automatically_routed_apis
from llama_stack.distribution.request_headers import (
PROVIDER_DATA_VAR,
request_provider_data_context,
)
from llama_stack.distribution.request_headers import PROVIDER_DATA_VAR, User, request_provider_data_context
from llama_stack.distribution.resolver import InvalidProviderError
from llama_stack.distribution.server.routes import (
find_matching_route,
@ -217,11 +213,13 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable:
async def route_handler(request: Request, **kwargs):
# Get auth attributes from the request scope
user_attributes = request.scope.get("user_attributes", {})
principal = request.scope.get("principal", "")
user = User(principal, user_attributes)
await log_request_pre_validation(request)
# Use context manager with both provider data and auth attributes
with request_provider_data_context(request.headers, user_attributes):
with request_provider_data_context(request.headers, user):
is_streaming = is_streaming_request(func.__name__, request, **kwargs)
try:
@ -244,15 +242,23 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable:
path_params = extract_path_params(route)
if method == "post":
# Annotate parameters that are in the path with Path(...) and others with Body(...)
new_params = [new_params[0]] + [
(
param.replace(annotation=Annotated[param.annotation, FastapiPath(..., title=param.name)])
if param.name in path_params
else param.replace(annotation=Annotated[param.annotation, Body(..., embed=True)])
)
for param in new_params[1:]
]
# Annotate parameters that are in the path with Path(...) and others with Body(...),
# but preserve existing File() and Form() annotations for multipart form data
new_params = (
[new_params[0]]
+ [
(
param.replace(annotation=Annotated[param.annotation, FastapiPath(..., title=param.name)])
if param.name in path_params
else (
param # Keep original annotation if it's already an Annotated type
if get_origin(param.annotation) is Annotated
else param.replace(annotation=Annotated[param.annotation, Body(..., embed=True)])
)
)
for param in new_params[1:]
]
)
route_handler.__signature__ = sig.replace(parameters=new_params)
@ -472,17 +478,6 @@ def main(args: argparse.Namespace | None = None):
window_seconds=window_seconds,
)
# --- CORS middleware for local development ---
# TODO: move to reverse proxy
ui_port = os.environ.get("LLAMA_STACK_UI_PORT", 8322)
app.add_middleware(
CORSMiddleware,
allow_origins=[f"http://localhost:{ui_port}"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
try:
impls = asyncio.run(construct_stack(config))
except InvalidProviderError as e:

View file

@ -223,7 +223,10 @@ async def construct_stack(
run_config: StackRunConfig, provider_registry: ProviderRegistry | None = None
) -> dict[Api, Any]:
dist_registry, _ = await create_dist_registry(run_config.metadata_store, run_config.image_name)
impls = await resolve_impls(run_config, provider_registry or get_provider_registry(run_config), dist_registry)
policy = run_config.server.auth.access_policy if run_config.server.auth else []
impls = await resolve_impls(
run_config, provider_registry or get_provider_registry(run_config), dist_registry, policy
)
# Add internal implementations after all other providers are resolved
add_internal_implementations(impls, run_config)

View file

@ -7,10 +7,6 @@
# the root directory of this source tree.
CONTAINER_BINARY=${CONTAINER_BINARY:-docker}
CONTAINER_OPTS=${CONTAINER_OPTS:-}
LLAMA_CHECKPOINT_DIR=${LLAMA_CHECKPOINT_DIR:-}
LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-}
TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-}
PYPI_VERSION=${PYPI_VERSION:-}
VIRTUAL_ENV=${VIRTUAL_ENV:-}
@ -132,63 +128,7 @@ if [[ "$env_type" == "venv" || "$env_type" == "conda" ]]; then
$env_vars \
$other_args
elif [[ "$env_type" == "container" ]]; then
set -x
# Check if container command is available
if ! is_command_available $CONTAINER_BINARY; then
printf "${RED}Error: ${CONTAINER_BINARY} command not found. Is ${CONTAINER_BINARY} installed and in your PATH?${NC}" >&2
exit 1
fi
if is_command_available selinuxenabled &> /dev/null && selinuxenabled; then
# Disable SELinux labels
CONTAINER_OPTS="$CONTAINER_OPTS --security-opt label=disable"
fi
mounts=""
if [ -n "$LLAMA_STACK_DIR" ]; then
mounts="$mounts -v $(readlink -f $LLAMA_STACK_DIR):/app/llama-stack-source"
fi
if [ -n "$LLAMA_CHECKPOINT_DIR" ]; then
mounts="$mounts -v $LLAMA_CHECKPOINT_DIR:/root/.llama"
CONTAINER_OPTS="$CONTAINER_OPTS --gpus=all"
fi
if [ -n "$PYPI_VERSION" ]; then
version_tag="$PYPI_VERSION"
elif [ -n "$LLAMA_STACK_DIR" ]; then
version_tag="dev"
elif [ -n "$TEST_PYPI_VERSION" ]; then
version_tag="test-$TEST_PYPI_VERSION"
else
if ! is_command_available jq; then
echo -e "${RED}Error: jq not found" >&2
exit 1
fi
URL="https://pypi.org/pypi/llama-stack/json"
version_tag=$(curl -s $URL | jq -r '.info.version')
fi
# Build the command with optional yaml config
cmd="$CONTAINER_BINARY run $CONTAINER_OPTS -it \
-p $port:$port \
$env_vars \
$mounts \
--env LLAMA_STACK_PORT=$port \
--entrypoint python \
$container_image:$version_tag \
-m llama_stack.distribution.server.server"
# Add yaml config if provided, otherwise use default
if [ -n "$yaml_config" ]; then
cmd="$cmd -v $yaml_config:/app/run.yaml --config /app/run.yaml"
else
cmd="$cmd --config /app/run.yaml"
fi
# Add any other args
cmd="$cmd $other_args"
# Execute the command
eval $cmd
echo -e "${RED}Warning: Llama Stack no longer supports running Containers via the 'llama stack run' command.${NC}"
echo -e "Please refer to the documentation for more information: https://llama-stack.readthedocs.io/en/latest/distributions/building_distro.html#llama-stack-build"
exit 1
fi

View file

@ -23,11 +23,8 @@ from llama_stack.distribution.utils.image_types import LlamaStackImageType
def formulate_run_args(image_type, image_name, config, template_name) -> list:
env_name = ""
if image_type == LlamaStackImageType.CONTAINER.value:
env_name = (
f"distribution-{template_name}" if template_name else (config.container_image if config else image_name)
)
elif image_type == LlamaStackImageType.CONDA.value:
if image_type == LlamaStackImageType.CONDA.value:
current_conda_env = os.environ.get("CONDA_DEFAULT_ENV")
env_name = image_name or current_conda_env
if not env_name:

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import os
from collections.abc import Collection, Iterator, Sequence, Set
from logging import getLogger
from pathlib import Path
@ -14,7 +13,8 @@ from typing import (
)
import tiktoken
from tiktoken.load import load_tiktoken_bpe
from llama_stack.models.llama.tokenizer_utils import load_bpe_file
logger = getLogger(__name__)
@ -48,19 +48,20 @@ class Tokenizer:
global _INSTANCE
if _INSTANCE is None:
_INSTANCE = Tokenizer(os.path.join(os.path.dirname(__file__), "tokenizer.model"))
_INSTANCE = Tokenizer(Path(__file__).parent / "tokenizer.model")
return _INSTANCE
def __init__(self, model_path: str):
def __init__(self, model_path: Path):
"""
Initializes the Tokenizer with a Tiktoken model.
Args:
model_path (str): The path to the Tiktoken model file.
"""
assert os.path.isfile(model_path), model_path
if not model_path.exists():
raise FileNotFoundError(f"Tokenizer model file not found: {model_path}")
mergeable_ranks = load_tiktoken_bpe(model_path)
mergeable_ranks = load_bpe_file(model_path)
num_base_tokens = len(mergeable_ranks)
special_tokens = [
"<|begin_of_text|>",
@ -83,7 +84,7 @@ class Tokenizer:
self.special_tokens = {token: num_base_tokens + i for i, token in enumerate(special_tokens)}
self.model = tiktoken.Encoding(
name=Path(model_path).name,
name=model_path.name,
pat_str=self.pat_str,
mergeable_ranks=mergeable_ranks,
special_tokens=self.special_tokens,

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import os
from collections.abc import Collection, Iterator, Sequence, Set
from logging import getLogger
from pathlib import Path
@ -14,7 +13,8 @@ from typing import (
)
import tiktoken
from tiktoken.load import load_tiktoken_bpe
from llama_stack.models.llama.tokenizer_utils import load_bpe_file
logger = getLogger(__name__)
@ -118,19 +118,20 @@ class Tokenizer:
global _INSTANCE
if _INSTANCE is None:
_INSTANCE = Tokenizer(os.path.join(os.path.dirname(__file__), "tokenizer.model"))
_INSTANCE = Tokenizer(Path(__file__).parent / "tokenizer.model")
return _INSTANCE
def __init__(self, model_path: str):
def __init__(self, model_path: Path):
"""
Initializes the Tokenizer with a Tiktoken model.
Args:
model_path (str): The path to the Tiktoken model file.
model_path (Path): The path to the Tiktoken model file.
"""
assert os.path.isfile(model_path), model_path
if not model_path.exists():
raise FileNotFoundError(f"Tokenizer model file not found: {model_path}")
mergeable_ranks = load_tiktoken_bpe(model_path)
mergeable_ranks = load_bpe_file(model_path)
num_base_tokens = len(mergeable_ranks)
special_tokens = BASIC_SPECIAL_TOKENS + LLAMA4_SPECIAL_TOKENS
@ -144,7 +145,7 @@ class Tokenizer:
self.special_tokens = {token: num_base_tokens + i for i, token in enumerate(special_tokens)}
self.model = tiktoken.Encoding(
name=Path(model_path).name,
name=model_path.name,
pat_str=self.O200K_PATTERN,
mergeable_ranks=mergeable_ranks,
special_tokens=self.special_tokens,

View file

@ -0,0 +1,40 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import base64
from pathlib import Path
from llama_stack.log import get_logger
logger = get_logger(__name__, "tokenizer_utils")
def load_bpe_file(model_path: Path) -> dict[bytes, int]:
"""
Load BPE file directly and return mergeable ranks.
Args:
model_path (Path): Path to the BPE model file.
Returns:
dict[bytes, int]: Dictionary mapping byte sequences to their ranks.
"""
mergeable_ranks = {}
with open(model_path, encoding="utf-8") as f:
content = f.read()
for line in content.splitlines():
if not line.strip(): # Skip empty lines
continue
try:
token, rank = line.split()
mergeable_ranks[base64.b64decode(token)] = int(rank)
except Exception as e:
logger.warning(f"Failed to parse line '{line}': {e}")
continue
return mergeable_ranks

View file

@ -6,12 +6,12 @@
from typing import Any
from llama_stack.distribution.datatypes import Api
from llama_stack.distribution.datatypes import AccessRule, Api
from .config import MetaReferenceAgentsImplConfig
async def get_provider_impl(config: MetaReferenceAgentsImplConfig, deps: dict[Api, Any]):
async def get_provider_impl(config: MetaReferenceAgentsImplConfig, deps: dict[Api, Any], policy: list[AccessRule]):
from .agents import MetaReferenceAgentsImpl
impl = MetaReferenceAgentsImpl(
@ -21,6 +21,7 @@ async def get_provider_impl(config: MetaReferenceAgentsImplConfig, deps: dict[Ap
deps[Api.safety],
deps[Api.tool_runtime],
deps[Api.tool_groups],
policy,
)
await impl.initialize()
return impl

View file

@ -60,6 +60,7 @@ from llama_stack.apis.inference import (
from llama_stack.apis.safety import Safety
from llama_stack.apis.tools import ToolGroups, ToolInvocationResult, ToolRuntime
from llama_stack.apis.vector_io import VectorIO
from llama_stack.distribution.datatypes import AccessRule
from llama_stack.log import get_logger
from llama_stack.models.llama.datatypes import (
BuiltinTool,
@ -96,13 +97,14 @@ class ChatAgent(ShieldRunnerMixin):
vector_io_api: VectorIO,
persistence_store: KVStore,
created_at: str,
policy: list[AccessRule],
):
self.agent_id = agent_id
self.agent_config = agent_config
self.inference_api = inference_api
self.safety_api = safety_api
self.vector_io_api = vector_io_api
self.storage = AgentPersistence(agent_id, persistence_store)
self.storage = AgentPersistence(agent_id, persistence_store, policy)
self.tool_runtime_api = tool_runtime_api
self.tool_groups_api = tool_groups_api
self.created_at = created_at

View file

@ -29,6 +29,7 @@ from llama_stack.apis.agents import (
Session,
Turn,
)
from llama_stack.apis.agents.openai_responses import OpenAIResponseText
from llama_stack.apis.common.responses import PaginatedResponse
from llama_stack.apis.inference import (
Inference,
@ -40,6 +41,7 @@ from llama_stack.apis.inference import (
from llama_stack.apis.safety import Safety
from llama_stack.apis.tools import ToolGroups, ToolRuntime
from llama_stack.apis.vector_io import VectorIO
from llama_stack.distribution.datatypes import AccessRule
from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl
from llama_stack.providers.utils.pagination import paginate_records
from llama_stack.providers.utils.responses.responses_store import ResponsesStore
@ -61,6 +63,7 @@ class MetaReferenceAgentsImpl(Agents):
safety_api: Safety,
tool_runtime_api: ToolRuntime,
tool_groups_api: ToolGroups,
policy: list[AccessRule],
):
self.config = config
self.inference_api = inference_api
@ -71,6 +74,7 @@ class MetaReferenceAgentsImpl(Agents):
self.in_memory_store = InmemoryKVStoreImpl()
self.openai_responses_impl: OpenAIResponsesImpl | None = None
self.policy = policy
async def initialize(self) -> None:
self.persistence_store = await kvstore_impl(self.config.persistence_store)
@ -129,6 +133,7 @@ class MetaReferenceAgentsImpl(Agents):
self.persistence_store if agent_info.enable_session_persistence else self.in_memory_store
),
created_at=agent_info.created_at,
policy=self.policy,
)
async def create_agent_session(
@ -324,10 +329,12 @@ class MetaReferenceAgentsImpl(Agents):
store: bool | None = True,
stream: bool | None = False,
temperature: float | None = None,
text: OpenAIResponseText | None = None,
tools: list[OpenAIResponseInputTool] | None = None,
max_infer_iters: int | None = 10,
) -> OpenAIResponseObject:
return await self.openai_responses_impl.create_openai_response(
input, model, instructions, previous_response_id, store, stream, temperature, tools
input, model, instructions, previous_response_id, store, stream, temperature, text, tools, max_infer_iters
)
async def list_openai_responses(

View file

@ -37,6 +37,8 @@ from llama_stack.apis.agents.openai_responses import (
OpenAIResponseOutputMessageFunctionToolCall,
OpenAIResponseOutputMessageMCPListTools,
OpenAIResponseOutputMessageWebSearchToolCall,
OpenAIResponseText,
OpenAIResponseTextFormat,
)
from llama_stack.apis.inference.inference import (
Inference,
@ -50,7 +52,12 @@ from llama_stack.apis.inference.inference import (
OpenAIChoice,
OpenAIDeveloperMessageParam,
OpenAIImageURL,
OpenAIJSONSchema,
OpenAIMessageParam,
OpenAIResponseFormatJSONObject,
OpenAIResponseFormatJSONSchema,
OpenAIResponseFormatParam,
OpenAIResponseFormatText,
OpenAISystemMessageParam,
OpenAIToolMessageParam,
OpenAIUserMessageParam,
@ -158,6 +165,21 @@ async def _convert_chat_choice_to_response_message(choice: OpenAIChoice) -> Open
)
async def _convert_response_text_to_chat_response_format(text: OpenAIResponseText) -> OpenAIResponseFormatParam:
"""
Convert an OpenAI Response text parameter into an OpenAI Chat Completion response format.
"""
if not text.format or text.format["type"] == "text":
return OpenAIResponseFormatText(type="text")
if text.format["type"] == "json_object":
return OpenAIResponseFormatJSONObject()
if text.format["type"] == "json_schema":
return OpenAIResponseFormatJSONSchema(
json_schema=OpenAIJSONSchema(name=text.format["name"], schema=text.format["schema"])
)
raise ValueError(f"Unsupported text format: {text.format}")
async def _get_message_type_by_role(role: str):
role_to_type = {
"user": OpenAIUserMessageParam,
@ -180,6 +202,7 @@ class ChatCompletionContext(BaseModel):
mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP]
stream: bool
temperature: float | None
response_format: OpenAIResponseFormatParam
class OpenAIResponsesImpl:
@ -258,6 +281,18 @@ class OpenAIResponsesImpl:
"""
return await self.responses_store.list_response_input_items(response_id, after, before, include, limit, order)
def _is_function_tool_call(
self,
tool_call: OpenAIChatCompletionToolCall,
tools: list[OpenAIResponseInputTool],
) -> bool:
if not tool_call.function:
return False
for t in tools:
if t.type == "function" and t.name == tool_call.function.name:
return True
return False
async def _process_response_choices(
self,
chat_response: OpenAIChatCompletion,
@ -270,7 +305,7 @@ class OpenAIResponsesImpl:
for choice in chat_response.choices:
if choice.message.tool_calls and tools:
# Assume if the first tool is a function, all tools are functions
if tools[0].type == "function":
if self._is_function_tool_call(choice.message.tool_calls[0], tools):
for tool_call in choice.message.tool_calls:
output_messages.append(
OpenAIResponseOutputMessageFunctionToolCall(
@ -331,9 +366,12 @@ class OpenAIResponsesImpl:
store: bool | None = True,
stream: bool | None = False,
temperature: float | None = None,
text: OpenAIResponseText | None = None,
tools: list[OpenAIResponseInputTool] | None = None,
max_infer_iters: int | None = 10,
):
stream = False if stream is None else stream
text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text
output_messages: list[OpenAIResponseOutput] = []
@ -342,6 +380,9 @@ class OpenAIResponsesImpl:
messages = await _convert_response_input_to_chat_messages(input)
await self._prepend_instructions(messages, instructions)
# Structured outputs
response_format = await _convert_response_text_to_chat_response_format(text)
# Tool setup
chat_tools, mcp_tool_to_server, mcp_list_message = (
await self._convert_response_tools_to_chat_tools(tools) if tools else (None, {}, None)
@ -356,65 +397,111 @@ class OpenAIResponsesImpl:
mcp_tool_to_server=mcp_tool_to_server,
stream=stream,
temperature=temperature,
response_format=response_format,
)
inference_result = await self.inference_api.openai_chat_completion(
model=model,
messages=messages,
tools=chat_tools,
stream=stream,
temperature=temperature,
)
# Fork to streaming vs non-streaming - let each handle ALL inference rounds
if stream:
return self._create_streaming_response(
inference_result=inference_result,
ctx=ctx,
output_messages=output_messages,
input=input,
model=model,
store=store,
text=text,
tools=tools,
max_infer_iters=max_infer_iters,
)
else:
return await self._create_non_streaming_response(
inference_result=inference_result,
ctx=ctx,
output_messages=output_messages,
input=input,
model=model,
store=store,
text=text,
tools=tools,
max_infer_iters=max_infer_iters,
)
async def _create_non_streaming_response(
self,
inference_result: Any,
ctx: ChatCompletionContext,
output_messages: list[OpenAIResponseOutput],
input: str | list[OpenAIResponseInput],
model: str,
store: bool | None,
text: OpenAIResponseText,
tools: list[OpenAIResponseInputTool] | None,
max_infer_iters: int,
) -> OpenAIResponseObject:
chat_response = OpenAIChatCompletion(**inference_result.model_dump())
n_iter = 0
messages = ctx.messages.copy()
# Process response choices (tool execution and message creation)
output_messages.extend(
await self._process_response_choices(
chat_response=chat_response,
ctx=ctx,
tools=tools,
while True:
# Do inference (including the first one)
inference_result = await self.inference_api.openai_chat_completion(
model=ctx.model,
messages=messages,
tools=ctx.tools,
stream=False,
temperature=ctx.temperature,
response_format=ctx.response_format,
)
)
completion = OpenAIChatCompletion(**inference_result.model_dump())
# Separate function vs non-function tool calls
function_tool_calls = []
non_function_tool_calls = []
for choice in completion.choices:
if choice.message.tool_calls and tools:
for tool_call in choice.message.tool_calls:
if self._is_function_tool_call(tool_call, tools):
function_tool_calls.append(tool_call)
else:
non_function_tool_calls.append(tool_call)
# Process response choices based on tool call types
if function_tool_calls:
# For function tool calls, use existing logic and return immediately
current_output_messages = await self._process_response_choices(
chat_response=completion,
ctx=ctx,
tools=tools,
)
output_messages.extend(current_output_messages)
break
elif non_function_tool_calls:
# For non-function tool calls, execute them and continue loop
for choice in completion.choices:
tool_outputs, tool_response_messages = await self._execute_tool_calls_only(choice, ctx)
output_messages.extend(tool_outputs)
# Add assistant message and tool responses to messages for next iteration
messages.append(choice.message)
messages.extend(tool_response_messages)
n_iter += 1
if n_iter >= max_infer_iters:
break
# Continue with next iteration of the loop
continue
else:
# No tool calls - convert response to message and we're done
for choice in completion.choices:
output_messages.append(await _convert_chat_choice_to_response_message(choice))
break
response = OpenAIResponseObject(
created_at=chat_response.created,
created_at=completion.created,
id=f"resp-{uuid.uuid4()}",
model=model,
object="response",
status="completed",
output=output_messages,
text=text,
)
logger.debug(f"OpenAI Responses response: {response}")
@ -429,13 +516,14 @@ class OpenAIResponsesImpl:
async def _create_streaming_response(
self,
inference_result: Any,
ctx: ChatCompletionContext,
output_messages: list[OpenAIResponseOutput],
input: str | list[OpenAIResponseInput],
model: str,
store: bool | None,
text: OpenAIResponseText,
tools: list[OpenAIResponseInputTool] | None,
max_infer_iters: int | None,
) -> AsyncIterator[OpenAIResponseObjectStream]:
# Create initial response and emit response.created immediately
response_id = f"resp-{uuid.uuid4()}"
@ -448,92 +536,141 @@ class OpenAIResponsesImpl:
object="response",
status="in_progress",
output=output_messages.copy(),
text=text,
)
# Emit response.created immediately
yield OpenAIResponseObjectStreamResponseCreated(response=initial_response)
# For streaming, inference_result is an async iterator of chunks
# Stream chunks and emit delta events as they arrive
chat_response_id = ""
chat_response_content = []
chat_response_tool_calls: dict[int, OpenAIChatCompletionToolCall] = {}
chunk_created = 0
chunk_model = ""
chunk_finish_reason = ""
sequence_number = 0
# Implement tool execution loop for streaming - handle ALL inference rounds including the first
n_iter = 0
messages = ctx.messages.copy()
# Create a placeholder message item for delta events
message_item_id = f"msg_{uuid.uuid4()}"
async for chunk in inference_result:
chat_response_id = chunk.id
chunk_created = chunk.created
chunk_model = chunk.model
for chunk_choice in chunk.choices:
# Emit incremental text content as delta events
if chunk_choice.delta.content:
sequence_number += 1
yield OpenAIResponseObjectStreamResponseOutputTextDelta(
content_index=0,
delta=chunk_choice.delta.content,
item_id=message_item_id,
output_index=0,
sequence_number=sequence_number,
)
# Collect content for final response
chat_response_content.append(chunk_choice.delta.content or "")
if chunk_choice.finish_reason:
chunk_finish_reason = chunk_choice.finish_reason
# Aggregate tool call arguments across chunks, using their index as the aggregation key
if chunk_choice.delta.tool_calls:
for tool_call in chunk_choice.delta.tool_calls:
response_tool_call = chat_response_tool_calls.get(tool_call.index, None)
if response_tool_call:
# Don't attempt to concatenate arguments if we don't have any new arguments
if tool_call.function.arguments:
# Guard against an initial None argument before we concatenate
response_tool_call.function.arguments = (
response_tool_call.function.arguments or ""
) + tool_call.function.arguments
else:
tool_call_dict: dict[str, Any] = tool_call.model_dump()
tool_call_dict.pop("type", None)
response_tool_call = OpenAIChatCompletionToolCall(**tool_call_dict)
chat_response_tool_calls[tool_call.index] = response_tool_call
# Convert collected chunks to complete response
if chat_response_tool_calls:
tool_calls = [chat_response_tool_calls[i] for i in sorted(chat_response_tool_calls.keys())]
else:
tool_calls = None
assistant_message = OpenAIAssistantMessageParam(
content="".join(chat_response_content),
tool_calls=tool_calls,
)
chat_response_obj = OpenAIChatCompletion(
id=chat_response_id,
choices=[
OpenAIChoice(
message=assistant_message,
finish_reason=chunk_finish_reason,
index=0,
)
],
created=chunk_created,
model=chunk_model,
)
# Process response choices (tool execution and message creation)
output_messages.extend(
await self._process_response_choices(
chat_response=chat_response_obj,
ctx=ctx,
tools=tools,
while True:
current_inference_result = await self.inference_api.openai_chat_completion(
model=ctx.model,
messages=messages,
tools=ctx.tools,
stream=True,
temperature=ctx.temperature,
response_format=ctx.response_format,
)
)
# Process streaming chunks and build complete response
chat_response_id = ""
chat_response_content = []
chat_response_tool_calls: dict[int, OpenAIChatCompletionToolCall] = {}
chunk_created = 0
chunk_model = ""
chunk_finish_reason = ""
sequence_number = 0
# Create a placeholder message item for delta events
message_item_id = f"msg_{uuid.uuid4()}"
async for chunk in current_inference_result:
chat_response_id = chunk.id
chunk_created = chunk.created
chunk_model = chunk.model
for chunk_choice in chunk.choices:
# Emit incremental text content as delta events
if chunk_choice.delta.content:
sequence_number += 1
yield OpenAIResponseObjectStreamResponseOutputTextDelta(
content_index=0,
delta=chunk_choice.delta.content,
item_id=message_item_id,
output_index=0,
sequence_number=sequence_number,
)
# Collect content for final response
chat_response_content.append(chunk_choice.delta.content or "")
if chunk_choice.finish_reason:
chunk_finish_reason = chunk_choice.finish_reason
# Aggregate tool call arguments across chunks
if chunk_choice.delta.tool_calls:
for tool_call in chunk_choice.delta.tool_calls:
response_tool_call = chat_response_tool_calls.get(tool_call.index, None)
if response_tool_call:
# Don't attempt to concatenate arguments if we don't have any new argumentsAdd commentMore actions
if tool_call.function.arguments:
# Guard against an initial None argument before we concatenate
response_tool_call.function.arguments = (
response_tool_call.function.arguments or ""
) + tool_call.function.arguments
else:
tool_call_dict: dict[str, Any] = tool_call.model_dump()
tool_call_dict.pop("type", None)
response_tool_call = OpenAIChatCompletionToolCall(**tool_call_dict)
chat_response_tool_calls[tool_call.index] = response_tool_call
# Convert collected chunks to complete response
if chat_response_tool_calls:
tool_calls = [chat_response_tool_calls[i] for i in sorted(chat_response_tool_calls.keys())]
else:
tool_calls = None
assistant_message = OpenAIAssistantMessageParam(
content="".join(chat_response_content),
tool_calls=tool_calls,
)
current_response = OpenAIChatCompletion(
id=chat_response_id,
choices=[
OpenAIChoice(
message=assistant_message,
finish_reason=chunk_finish_reason,
index=0,
)
],
created=chunk_created,
model=chunk_model,
)
# Separate function vs non-function tool calls
function_tool_calls = []
non_function_tool_calls = []
for choice in current_response.choices:
if choice.message.tool_calls and tools:
for tool_call in choice.message.tool_calls:
if self._is_function_tool_call(tool_call, tools):
function_tool_calls.append(tool_call)
else:
non_function_tool_calls.append(tool_call)
# Process response choices based on tool call types
if function_tool_calls:
# For function tool calls, use existing logic and break
current_output_messages = await self._process_response_choices(
chat_response=current_response,
ctx=ctx,
tools=tools,
)
output_messages.extend(current_output_messages)
break
elif non_function_tool_calls:
# For non-function tool calls, execute them and continue loop
for choice in current_response.choices:
tool_outputs, tool_response_messages = await self._execute_tool_calls_only(choice, ctx)
output_messages.extend(tool_outputs)
# Add assistant message and tool responses to messages for next iteration
messages.append(choice.message)
messages.extend(tool_response_messages)
n_iter += 1
if n_iter >= (max_infer_iters or 10):
break
# Continue with next iteration of the loop
continue
else:
# No tool calls - convert response to message and we're done
for choice in current_response.choices:
output_messages.append(await _convert_chat_choice_to_response_message(choice))
break
# Create final response
final_response = OpenAIResponseObject(
@ -542,6 +679,7 @@ class OpenAIResponsesImpl:
model=model,
object="response",
status="completed",
text=text,
output=output_messages,
)
@ -646,6 +784,30 @@ class OpenAIResponsesImpl:
raise ValueError(f"Llama Stack OpenAI Responses does not yet support tool type: {input_tool.type}")
return chat_tools, mcp_tool_to_server, mcp_list_message
async def _execute_tool_calls_only(
self,
choice: OpenAIChoice,
ctx: ChatCompletionContext,
) -> tuple[list[OpenAIResponseOutput], list[OpenAIMessageParam]]:
"""Execute tool calls and return output messages and tool response messages for next inference."""
output_messages: list[OpenAIResponseOutput] = []
tool_response_messages: list[OpenAIMessageParam] = []
if not isinstance(choice.message, OpenAIAssistantMessageParam):
return output_messages, tool_response_messages
if not choice.message.tool_calls:
return output_messages, tool_response_messages
for tool_call in choice.message.tool_calls:
tool_call_log, further_input = await self._execute_tool_call(tool_call, ctx)
if tool_call_log:
output_messages.append(tool_call_log)
if further_input:
tool_response_messages.append(further_input)
return output_messages, tool_response_messages
async def _execute_tool_and_return_final_output(
self,
choice: OpenAIChoice,
@ -772,5 +934,8 @@ class OpenAIResponsesImpl:
else:
raise ValueError(f"Unknown result content type: {type(result.content)}")
input_message = OpenAIToolMessageParam(content=content, tool_call_id=tool_call_id)
else:
text = str(error_exc)
input_message = OpenAIToolMessageParam(content=text, tool_call_id=tool_call_id)
return message, input_message

View file

@ -10,9 +10,10 @@ import uuid
from datetime import datetime, timezone
from llama_stack.apis.agents import AgentConfig, Session, ToolExecutionStep, Turn
from llama_stack.distribution.access_control import check_access
from llama_stack.distribution.datatypes import AccessAttributes
from llama_stack.distribution.request_headers import get_auth_attributes
from llama_stack.distribution.access_control.access_control import AccessDeniedError, is_action_allowed
from llama_stack.distribution.access_control.datatypes import AccessRule
from llama_stack.distribution.datatypes import User
from llama_stack.distribution.request_headers import get_authenticated_user
from llama_stack.providers.utils.kvstore import KVStore
log = logging.getLogger(__name__)
@ -22,7 +23,9 @@ class AgentSessionInfo(Session):
# TODO: is this used anywhere?
vector_db_id: str | None = None
started_at: datetime
access_attributes: AccessAttributes | None = None
owner: User | None = None
identifier: str | None = None
type: str = "session"
class AgentInfo(AgentConfig):
@ -30,24 +33,27 @@ class AgentInfo(AgentConfig):
class AgentPersistence:
def __init__(self, agent_id: str, kvstore: KVStore):
def __init__(self, agent_id: str, kvstore: KVStore, policy: list[AccessRule]):
self.agent_id = agent_id
self.kvstore = kvstore
self.policy = policy
async def create_session(self, name: str) -> str:
session_id = str(uuid.uuid4())
# Get current user's auth attributes for new sessions
auth_attributes = get_auth_attributes()
access_attributes = AccessAttributes(**auth_attributes) if auth_attributes else None
user = get_authenticated_user()
session_info = AgentSessionInfo(
session_id=session_id,
session_name=name,
started_at=datetime.now(timezone.utc),
access_attributes=access_attributes,
owner=user,
turns=[],
identifier=name, # should this be qualified in any way?
)
if not is_action_allowed(self.policy, "create", session_info, user):
raise AccessDeniedError()
await self.kvstore.set(
key=f"session:{self.agent_id}:{session_id}",
@ -73,10 +79,10 @@ class AgentPersistence:
def _check_session_access(self, session_info: AgentSessionInfo) -> bool:
"""Check if current user has access to the session."""
# Handle backward compatibility for old sessions without access control
if not hasattr(session_info, "access_attributes"):
if not hasattr(session_info, "access_attributes") and not hasattr(session_info, "owner"):
return True
return check_access(session_info.session_id, session_info.access_attributes, get_auth_attributes())
return is_action_allowed(self.policy, "read", session_info, get_authenticated_user())
async def get_session_if_accessible(self, session_id: str) -> AgentSessionInfo | None:
"""Get session info if the user has access to it. For internal use by sub-session methods."""

View file

@ -0,0 +1,20 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Any
from llama_stack.distribution.datatypes import Api
from .config import LocalfsFilesImplConfig
from .files import LocalfsFilesImpl
__all__ = ["LocalfsFilesImpl", "LocalfsFilesImplConfig"]
async def get_provider_impl(config: LocalfsFilesImplConfig, deps: dict[Api, Any]):
impl = LocalfsFilesImpl(config)
await impl.initialize()
return impl

View file

@ -0,0 +1,31 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Any
from pydantic import BaseModel, Field
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig
class LocalfsFilesImplConfig(BaseModel):
storage_dir: str = Field(
description="Directory to store uploaded files",
)
metadata_store: SqlStoreConfig = Field(
description="SQL store configuration for file metadata",
)
ttl_secs: int = 365 * 24 * 60 * 60 # 1 year
@classmethod
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
return {
"storage_dir": "${env.FILES_STORAGE_DIR:" + __distro_dir__ + "/files}",
"metadata_store": SqliteSqlStoreConfig.sample_run_config(
__distro_dir__=__distro_dir__,
db_name="files_metadata.db",
),
}

View file

@ -0,0 +1,214 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import time
import uuid
from pathlib import Path
from typing import Annotated
from fastapi import File, Form, Response, UploadFile
from llama_stack.apis.common.responses import Order
from llama_stack.apis.files import (
Files,
ListOpenAIFileResponse,
OpenAIFileDeleteResponse,
OpenAIFileObject,
OpenAIFilePurpose,
)
from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
from llama_stack.providers.utils.sqlstore.sqlstore import SqlStore, sqlstore_impl
from .config import LocalfsFilesImplConfig
class LocalfsFilesImpl(Files):
def __init__(self, config: LocalfsFilesImplConfig) -> None:
self.config = config
self.sql_store: SqlStore | None = None
async def initialize(self) -> None:
"""Initialize the files provider by setting up storage directory and metadata database."""
# Create storage directory if it doesn't exist
storage_path = Path(self.config.storage_dir)
storage_path.mkdir(parents=True, exist_ok=True)
# Initialize SQL store for metadata
self.sql_store = sqlstore_impl(self.config.metadata_store)
await self.sql_store.create_table(
"openai_files",
{
"id": ColumnDefinition(type=ColumnType.STRING, primary_key=True),
"filename": ColumnType.STRING,
"purpose": ColumnType.STRING,
"bytes": ColumnType.INTEGER,
"created_at": ColumnType.INTEGER,
"expires_at": ColumnType.INTEGER,
"file_path": ColumnType.STRING, # Path to actual file on disk
},
)
def _generate_file_id(self) -> str:
"""Generate a unique file ID for OpenAI API."""
return f"file-{uuid.uuid4().hex}"
def _get_file_path(self, file_id: str) -> Path:
"""Get the filesystem path for a file ID."""
return Path(self.config.storage_dir) / file_id
# OpenAI Files API Implementation
async def openai_upload_file(
self,
file: Annotated[UploadFile, File()],
purpose: Annotated[OpenAIFilePurpose, Form()],
) -> OpenAIFileObject:
"""Upload a file that can be used across various endpoints."""
if not self.sql_store:
raise RuntimeError("Files provider not initialized")
file_id = self._generate_file_id()
file_path = self._get_file_path(file_id)
content = await file.read()
file_size = len(content)
with open(file_path, "wb") as f:
f.write(content)
created_at = int(time.time())
expires_at = created_at + self.config.ttl_secs
await self.sql_store.insert(
"openai_files",
{
"id": file_id,
"filename": file.filename or "uploaded_file",
"purpose": purpose.value,
"bytes": file_size,
"created_at": created_at,
"expires_at": expires_at,
"file_path": file_path.as_posix(),
},
)
return OpenAIFileObject(
id=file_id,
filename=file.filename or "uploaded_file",
purpose=purpose,
bytes=file_size,
created_at=created_at,
expires_at=expires_at,
)
async def openai_list_files(
self,
after: str | None = None,
limit: int | None = 10000,
order: Order | None = Order.desc,
purpose: OpenAIFilePurpose | None = None,
) -> ListOpenAIFileResponse:
"""Returns a list of files that belong to the user's organization."""
if not self.sql_store:
raise RuntimeError("Files provider not initialized")
# TODO: Implement 'after' pagination properly
if after:
raise NotImplementedError("After pagination not yet implemented")
where = None
if purpose:
where = {"purpose": purpose.value}
rows = await self.sql_store.fetch_all(
"openai_files",
where=where,
order_by=[("created_at", order.value if order else Order.desc.value)],
limit=limit,
)
files = [
OpenAIFileObject(
id=row["id"],
filename=row["filename"],
purpose=OpenAIFilePurpose(row["purpose"]),
bytes=row["bytes"],
created_at=row["created_at"],
expires_at=row["expires_at"],
)
for row in rows
]
return ListOpenAIFileResponse(
data=files,
has_more=False, # TODO: Implement proper pagination
first_id=files[0].id if files else "",
last_id=files[-1].id if files else "",
)
async def openai_retrieve_file(self, file_id: str) -> OpenAIFileObject:
"""Returns information about a specific file."""
if not self.sql_store:
raise RuntimeError("Files provider not initialized")
row = await self.sql_store.fetch_one("openai_files", where={"id": file_id})
if not row:
raise ValueError(f"File with id {file_id} not found")
return OpenAIFileObject(
id=row["id"],
filename=row["filename"],
purpose=OpenAIFilePurpose(row["purpose"]),
bytes=row["bytes"],
created_at=row["created_at"],
expires_at=row["expires_at"],
)
async def openai_delete_file(self, file_id: str) -> OpenAIFileDeleteResponse:
"""Delete a file."""
if not self.sql_store:
raise RuntimeError("Files provider not initialized")
row = await self.sql_store.fetch_one("openai_files", where={"id": file_id})
if not row:
raise ValueError(f"File with id {file_id} not found")
# Delete physical file
file_path = Path(row["file_path"])
if file_path.exists():
file_path.unlink()
# Delete metadata from database
await self.sql_store.delete("openai_files", where={"id": file_id})
return OpenAIFileDeleteResponse(
id=file_id,
deleted=True,
)
async def openai_retrieve_file_content(self, file_id: str) -> Response:
"""Returns the contents of the specified file."""
if not self.sql_store:
raise RuntimeError("Files provider not initialized")
# Get file metadata
row = await self.sql_store.fetch_one("openai_files", where={"id": file_id})
if not row:
raise ValueError(f"File with id {file_id} not found")
# Read file content
file_path = Path(row["file_path"])
if not file_path.exists():
raise ValueError(f"File content not found on disk: {file_path}")
with open(file_path, "rb") as f:
content = f.read()
# Return as binary response with appropriate content type
return Response(
content=content,
media_type="application/octet-stream",
headers={"Content-Disposition": f'attachment; filename="{row["filename"]}"'},
)

View file

@ -30,7 +30,7 @@ class TelemetryConfig(BaseModel):
)
service_name: str = Field(
# service name is always the same, use zero-width space to avoid clutter
default="",
default="\u200b",
description="The service name to use for telemetry",
)
sinks: list[TelemetrySink] = Field(
@ -52,7 +52,7 @@ class TelemetryConfig(BaseModel):
@classmethod
def sample_run_config(cls, __distro_dir__: str, db_name: str = "trace_store.db") -> dict[str, Any]:
return {
"service_name": "${env.OTEL_SERVICE_NAME:}",
"service_name": "${env.OTEL_SERVICE_NAME:\u200b}",
"sinks": "${env.TELEMETRY_SINKS:console,sqlite}",
"sqlite_db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name,
}

View file

@ -4,8 +4,22 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.providers.datatypes import ProviderSpec
from llama_stack.providers.datatypes import (
Api,
InlineProviderSpec,
ProviderSpec,
)
from llama_stack.providers.utils.sqlstore.sqlstore import sql_store_pip_packages
def available_providers() -> list[ProviderSpec]:
return []
return [
InlineProviderSpec(
api=Api.files,
provider_type="inline::localfs",
# TODO: make this dynamic according to the sql store type
pip_packages=sql_store_pip_packages,
module="llama_stack.providers.inline.files.localfs",
config_class="llama_stack.providers.inline.files.localfs.config.LocalfsFilesImplConfig",
),
]

View file

@ -15,7 +15,6 @@ from llama_stack.providers.datatypes import (
META_REFERENCE_DEPS = [
"accelerate",
"blobfile",
"fairscale",
"torch",
"torchvision",

View file

@ -20,7 +20,6 @@ def available_providers() -> list[ProviderSpec]:
api=Api.tool_runtime,
provider_type="inline::rag-runtime",
pip_packages=[
"blobfile",
"chardet",
"pypdf",
"tqdm",

View file

@ -255,7 +255,7 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv
params = {
"model": request.model,
**input_dict,
"stream": request.stream,
"stream": bool(request.stream),
**self._build_options(request.sampling_params, request.response_format, request.logprobs),
}
logger.debug(f"params to fireworks: {params}")

View file

@ -12,7 +12,7 @@ from llama_stack.providers.utils.inference.model_registry import (
build_model_entry,
)
model_entries = [
MODEL_ENTRIES = [
build_hf_repo_model_entry(
"llama3.1:8b-instruct-fp16",
CoreModelId.llama3_1_8b_instruct.value,

View file

@ -5,6 +5,7 @@
# the root directory of this source tree.
import uuid
from collections.abc import AsyncGenerator, AsyncIterator
from typing import Any
@ -77,7 +78,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
request_has_media,
)
from .models import model_entries
from .models import MODEL_ENTRIES
logger = get_logger(name=__name__, category="inference")
@ -87,7 +88,7 @@ class OllamaInferenceAdapter(
ModelsProtocolPrivate,
):
def __init__(self, url: str) -> None:
self.register_helper = ModelRegistryHelper(model_entries)
self.register_helper = ModelRegistryHelper(MODEL_ENTRIES)
self.url = url
@property
@ -480,7 +481,25 @@ class OllamaInferenceAdapter(
top_p=top_p,
user=user,
)
return await self.openai_client.chat.completions.create(**params) # type: ignore
response = await self.openai_client.chat.completions.create(**params)
return await self._adjust_ollama_chat_completion_response_ids(response)
async def _adjust_ollama_chat_completion_response_ids(
self,
response: OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk],
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
id = f"chatcmpl-{uuid.uuid4()}"
if isinstance(response, AsyncIterator):
async def stream_with_chunk_ids() -> AsyncIterator[OpenAIChatCompletionChunk]:
async for chunk in response:
chunk.id = id
yield chunk
return stream_with_chunk_ids()
else:
response.id = id
return response
async def batch_completion(
self,

View file

@ -72,15 +72,15 @@ class PostgresKVStoreConfig(CommonConfig):
table_name: str = "llamastack_kvstore"
@classmethod
def sample_run_config(cls, table_name: str = "llamastack_kvstore"):
def sample_run_config(cls, table_name: str = "llamastack_kvstore", **kwargs):
return {
"type": "postgres",
"namespace": None,
"host": "${env.POSTGRES_HOST:localhost}",
"port": "${env.POSTGRES_PORT:5432}",
"db": "${env.POSTGRES_DB}",
"user": "${env.POSTGRES_USER}",
"password": "${env.POSTGRES_PASSWORD}",
"db": "${env.POSTGRES_DB:llamastack}",
"user": "${env.POSTGRES_USER:llamastack}",
"password": "${env.POSTGRES_PASSWORD:llamastack}",
"table_name": "${env.POSTGRES_TABLE_NAME:" + table_name + "}",
}

View file

@ -16,6 +16,8 @@ from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR
from .api import SqlStore
sql_store_pip_packages = ["sqlalchemy[asyncio]", "aiosqlite", "asyncpg"]
class SqlStoreType(Enum):
sqlite = "sqlite"
@ -72,6 +74,17 @@ class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
def pip_packages(self) -> list[str]:
return super().pip_packages + ["asyncpg"]
@classmethod
def sample_run_config(cls, **kwargs):
return cls(
type="postgres",
host="${env.POSTGRES_HOST:localhost}",
port="${env.POSTGRES_PORT:5432}",
db="${env.POSTGRES_DB:llamastack}",
user="${env.POSTGRES_USER:llamastack}",
password="${env.POSTGRES_PASSWORD:llamastack}",
)
SqlStoreConfig = Annotated[
SqliteSqlStoreConfig | PostgresSqlStoreConfig,

View file

@ -42,7 +42,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/trace_store.db
eval:

View file

@ -82,7 +82,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/trace_store.db
tool_runtime:

View file

@ -45,7 +45,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/trace_store.db
eval:

View file

@ -48,7 +48,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/trace_store.db
eval:

View file

@ -44,7 +44,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/trace_store.db
eval:

View file

@ -24,6 +24,8 @@ distribution_spec:
- inline::basic
- inline::llm-as-judge
- inline::braintrust
files:
- inline::localfs
tool_runtime:
- remote::brave-search
- remote::tavily-search

View file

@ -13,6 +13,7 @@ from llama_stack.distribution.datatypes import (
ShieldInput,
ToolGroupInput,
)
from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig,
)
@ -36,6 +37,7 @@ def get_distribution_template() -> DistributionTemplate:
"eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
"files": ["inline::localfs"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
@ -62,6 +64,11 @@ def get_distribution_template() -> DistributionTemplate:
provider_type="inline::faiss",
config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
)
files_provider = Provider(
provider_id="meta-reference-files",
provider_type="inline::localfs",
config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
)
available_models = {
"fireworks": MODEL_ENTRIES,
@ -104,6 +111,7 @@ def get_distribution_template() -> DistributionTemplate:
provider_overrides={
"inference": [inference_provider, embedding_provider],
"vector_io": [vector_io_provider],
"files": [files_provider],
},
default_models=default_models + [embedding_model],
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
@ -116,6 +124,7 @@ def get_distribution_template() -> DistributionTemplate:
embedding_provider,
],
"vector_io": [vector_io_provider],
"files": [files_provider],
"safety": [
Provider(
provider_id="llama-guard",

View file

@ -4,6 +4,7 @@ apis:
- agents
- datasetio
- eval
- files
- inference
- safety
- scoring
@ -53,7 +54,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/trace_store.db
eval:
@ -90,6 +91,14 @@ providers:
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
files:
- provider_id: meta-reference-files
provider_type: inline::localfs
config:
storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/fireworks/files}
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/files_metadata.db
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search

View file

@ -4,6 +4,7 @@ apis:
- agents
- datasetio
- eval
- files
- inference
- safety
- scoring
@ -48,7 +49,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/trace_store.db
eval:
@ -85,6 +86,14 @@ providers:
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
files:
- provider_id: meta-reference-files
provider_type: inline::localfs
config:
storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/fireworks/files}
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/files_metadata.db
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search

View file

@ -48,7 +48,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/trace_store.db
eval:
@ -112,7 +112,7 @@ models:
provider_model_id: groq/llama3-8b-8192
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct
model_id: groq/meta-llama/Llama-3.1-8B-Instruct
provider_id: groq
provider_model_id: groq/llama3-8b-8192
model_type: llm
@ -127,7 +127,7 @@ models:
provider_model_id: groq/llama3-70b-8192
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3-70B-Instruct
model_id: groq/meta-llama/Llama-3-70B-Instruct
provider_id: groq
provider_model_id: groq/llama3-70b-8192
model_type: llm
@ -137,7 +137,7 @@ models:
provider_model_id: groq/llama-3.3-70b-versatile
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct
model_id: groq/meta-llama/Llama-3.3-70B-Instruct
provider_id: groq
provider_model_id: groq/llama-3.3-70b-versatile
model_type: llm
@ -147,7 +147,7 @@ models:
provider_model_id: groq/llama-3.2-3b-preview
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct
model_id: groq/meta-llama/Llama-3.2-3B-Instruct
provider_id: groq
provider_model_id: groq/llama-3.2-3b-preview
model_type: llm
@ -157,7 +157,7 @@ models:
provider_model_id: groq/llama-4-scout-17b-16e-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
model_id: groq/meta-llama/Llama-4-Scout-17B-16E-Instruct
provider_id: groq
provider_model_id: groq/llama-4-scout-17b-16e-instruct
model_type: llm
@ -167,7 +167,7 @@ models:
provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
model_id: groq/meta-llama/Llama-4-Scout-17B-16E-Instruct
provider_id: groq
provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
model_type: llm
@ -177,7 +177,7 @@ models:
provider_model_id: groq/llama-4-maverick-17b-128e-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
model_id: groq/meta-llama/Llama-4-Maverick-17B-128E-Instruct
provider_id: groq
provider_model_id: groq/llama-4-maverick-17b-128e-instruct
model_type: llm
@ -187,7 +187,7 @@ models:
provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
model_id: groq/meta-llama/Llama-4-Maverick-17B-128E-Instruct
provider_id: groq
provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
model_type: llm

View file

@ -53,7 +53,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/trace_store.db
eval:

View file

@ -48,7 +48,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/trace_store.db
eval:

View file

@ -53,7 +53,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/trace_store.db
eval:

View file

@ -48,7 +48,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/trace_store.db
eval:

View file

@ -57,7 +57,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/trace_store.db
eval:

View file

@ -63,7 +63,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/trace_store.db
eval:

View file

@ -53,7 +53,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/trace_store.db
eval:

View file

@ -53,7 +53,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/trace_store.db
eval:

View file

@ -48,7 +48,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/trace_store.db
eval:

View file

@ -47,7 +47,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db
eval:

View file

@ -45,7 +45,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db
eval:

View file

@ -71,7 +71,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/trace_store.db
eval:

View file

@ -53,7 +53,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/trace_store.db
eval:

View file

@ -48,7 +48,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/trace_store.db
eval:

View file

@ -3,8 +3,8 @@ distribution_spec:
description: Quick start template for running Llama Stack with several popular providers
providers:
inference:
- remote::fireworks
- remote::vllm
- inline::sentence-transformers
vector_io:
- remote::chromadb
safety:

View file

@ -5,64 +5,36 @@
# the root directory of this source tree.
from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import (
ModelInput,
Provider,
ShieldInput,
ToolGroupInput,
)
from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig
from llama_stack.providers.remote.inference.fireworks.models import (
MODEL_ENTRIES as FIREWORKS_MODEL_ENTRIES,
)
from llama_stack.providers.inline.inference.sentence_transformers import SentenceTransformersInferenceConfig
from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig
from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
from llama_stack.templates.template import (
DistributionTemplate,
RunConfigSettings,
get_model_registry,
)
def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderModelEntry]]]:
# in this template, we allow each API key to be optional
providers = [
(
"fireworks",
FIREWORKS_MODEL_ENTRIES,
FireworksImplConfig.sample_run_config(api_key="${env.FIREWORKS_API_KEY:}"),
),
]
inference_providers = []
available_models = {}
for provider_id, model_entries, config in providers:
inference_providers.append(
Provider(
provider_id=provider_id,
provider_type=f"remote::{provider_id}",
config=config,
)
)
available_models[provider_id] = model_entries
inference_providers.append(
def get_distribution_template() -> DistributionTemplate:
inference_providers = [
Provider(
provider_id="vllm-inference",
provider_type="remote::vllm",
config=VLLMInferenceAdapterConfig.sample_run_config(
url="${env.VLLM_URL:http://localhost:8000/v1}",
),
)
)
return inference_providers, available_models
def get_distribution_template() -> DistributionTemplate:
inference_providers, available_models = get_inference_providers()
),
]
providers = {
"inference": ([p.provider_type for p in inference_providers]),
"inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]),
"vector_io": ["remote::chromadb"],
"safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"],
@ -94,22 +66,26 @@ def get_distribution_template() -> DistributionTemplate:
),
]
default_models = get_model_registry(available_models)
default_models.append(
default_models = [
ModelInput(
model_id="${env.INFERENCE_MODEL}",
provider_id="vllm-inference",
)
]
embedding_provider = Provider(
provider_id="sentence-transformers",
provider_type="inline::sentence-transformers",
config=SentenceTransformersInferenceConfig.sample_run_config(),
)
postgres_config = {
"type": "postgres",
"host": "${env.POSTGRES_HOST:localhost}",
"port": "${env.POSTGRES_PORT:5432}",
"db": "${env.POSTGRES_DB:llamastack}",
"user": "${env.POSTGRES_USER:llamastack}",
"password": "${env.POSTGRES_PASSWORD:llamastack}",
}
embedding_model = ModelInput(
model_id="all-MiniLM-L6-v2",
provider_id=embedding_provider.provider_id,
model_type=ModelType.embedding,
metadata={
"embedding_dimension": 384,
},
)
postgres_config = PostgresSqlStoreConfig.sample_run_config()
return DistributionTemplate(
name=name,
distro_type="self_hosted",
@ -117,11 +93,11 @@ def get_distribution_template() -> DistributionTemplate:
container_image=None,
template_path=None,
providers=providers,
available_models_by_provider=available_models,
available_models_by_provider={},
run_configs={
"run.yaml": RunConfigSettings(
provider_overrides={
"inference": inference_providers,
"inference": inference_providers + [embedding_provider],
"vector_io": vector_io_providers,
"agents": [
Provider(
@ -139,16 +115,17 @@ def get_distribution_template() -> DistributionTemplate:
provider_type="inline::meta-reference",
config=dict(
service_name="${env.OTEL_SERVICE_NAME:}",
sinks="${env.TELEMETRY_SINKS:console}",
sinks="${env.TELEMETRY_SINKS:console,otel_trace}",
otel_trace_endpoint="${env.OTEL_TRACE_ENDPOINT:http://localhost:4318/v1/traces}",
),
)
],
},
default_models=default_models,
default_models=default_models + [embedding_model],
default_tool_groups=default_tool_groups,
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
metadata_store=PostgresKVStoreConfig.model_validate(postgres_config),
inference_store=PostgresSqlStoreConfig.model_validate(postgres_config),
metadata_store=PostgresKVStoreConfig.sample_run_config(),
inference_store=postgres_config,
),
},
run_config_env_vars={
@ -156,9 +133,5 @@ def get_distribution_template() -> DistributionTemplate:
"8321",
"Port for the Llama Stack distribution server",
),
"FIREWORKS_API_KEY": (
"",
"Fireworks API Key",
),
},
)

View file

@ -9,11 +9,6 @@ apis:
- vector_io
providers:
inference:
- provider_id: fireworks
provider_type: remote::fireworks
config:
url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY:}
- provider_id: vllm-inference
provider_type: remote::vllm
config:
@ -21,6 +16,9 @@ providers:
max_tokens: ${env.VLLM_MAX_TOKENS:4096}
api_token: ${env.VLLM_API_TOKEN:fake}
tls_verify: ${env.VLLM_TLS_VERIFY:true}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
config: {}
vector_io:
- provider_id: ${env.ENABLE_CHROMADB+chromadb}
provider_type: remote::chromadb
@ -54,7 +52,8 @@ providers:
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
sinks: ${env.TELEMETRY_SINKS:console}
sinks: ${env.TELEMETRY_SINKS:console,otel_trace}
otel_trace_endpoint: ${env.OTEL_TRACE_ENDPOINT:http://localhost:4318/v1/traces}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
@ -79,7 +78,7 @@ metadata_store:
db: ${env.POSTGRES_DB:llamastack}
user: ${env.POSTGRES_USER:llamastack}
password: ${env.POSTGRES_PASSWORD:llamastack}
table_name: llamastack_kvstore
table_name: ${env.POSTGRES_TABLE_NAME:llamastack_kvstore}
inference_store:
type: postgres
host: ${env.POSTGRES_HOST:localhost}
@ -88,127 +87,15 @@ inference_store:
user: ${env.POSTGRES_USER:llamastack}
password: ${env.POSTGRES_PASSWORD:llamastack}
models:
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-70B-Instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-guard-3-8b
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-Guard-3-8B
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-guard-3-11b-vision
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-Guard-3-11B-Vision
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama4-scout-instruct-basic
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
model_type: llm
- metadata:
embedding_dimension: 768
context_length: 8192
model_id: nomic-ai/nomic-embed-text-v1.5
provider_id: fireworks
provider_model_id: nomic-ai/nomic-embed-text-v1.5
model_type: embedding
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: vllm-inference
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
model_type: embedding
shields:
- shield_id: meta-llama/Llama-Guard-3-8B
vector_dbs: []

View file

@ -91,7 +91,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/trace_store.db
tool_runtime:

View file

@ -84,7 +84,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/trace_store.db
tool_runtime:

View file

@ -58,7 +58,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/trace_store.db
tool_runtime:

View file

@ -5,10 +5,13 @@ distribution_spec:
inference:
- remote::openai
- remote::fireworks
- remote::together
- remote::ollama
- remote::anthropic
- remote::gemini
- remote::groq
- remote::sambanova
- remote::vllm
- inline::sentence-transformers
vector_io:
- inline::sqlite-vec
@ -37,4 +40,5 @@ distribution_spec:
image_type: conda
additional_pip_packages:
- aiosqlite
- asyncpg
- sqlalchemy[asyncio]

View file

@ -21,6 +21,15 @@ providers:
config:
url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY:}
- provider_id: together
provider_type: remote::together
config:
url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY:}
- provider_id: ollama
provider_type: remote::ollama
config:
url: ${env.OLLAMA_URL:http://localhost:11434}
- provider_id: anthropic
provider_type: remote::anthropic
config:
@ -39,6 +48,13 @@ providers:
config:
url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY:}
- provider_id: vllm
provider_type: remote::vllm
config:
url: ${env.VLLM_URL:http://localhost:8000/v1}
max_tokens: ${env.VLLM_MAX_TOKENS:4096}
api_token: ${env.VLLM_API_TOKEN:fake}
tls_verify: ${env.VLLM_TLS_VERIFY:true}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
config: {}
@ -79,7 +95,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/trace_store.db
eval:
@ -156,72 +172,72 @@ models:
provider_model_id: openai/chatgpt-4o-latest
model_type: llm
- metadata: {}
model_id: gpt-3.5-turbo-0125
model_id: openai/gpt-3.5-turbo-0125
provider_id: openai
provider_model_id: gpt-3.5-turbo-0125
model_type: llm
- metadata: {}
model_id: gpt-3.5-turbo
model_id: openai/gpt-3.5-turbo
provider_id: openai
provider_model_id: gpt-3.5-turbo
model_type: llm
- metadata: {}
model_id: gpt-3.5-turbo-instruct
model_id: openai/gpt-3.5-turbo-instruct
provider_id: openai
provider_model_id: gpt-3.5-turbo-instruct
model_type: llm
- metadata: {}
model_id: gpt-4
model_id: openai/gpt-4
provider_id: openai
provider_model_id: gpt-4
model_type: llm
- metadata: {}
model_id: gpt-4-turbo
model_id: openai/gpt-4-turbo
provider_id: openai
provider_model_id: gpt-4-turbo
model_type: llm
- metadata: {}
model_id: gpt-4o
model_id: openai/gpt-4o
provider_id: openai
provider_model_id: gpt-4o
model_type: llm
- metadata: {}
model_id: gpt-4o-2024-08-06
model_id: openai/gpt-4o-2024-08-06
provider_id: openai
provider_model_id: gpt-4o-2024-08-06
model_type: llm
- metadata: {}
model_id: gpt-4o-mini
model_id: openai/gpt-4o-mini
provider_id: openai
provider_model_id: gpt-4o-mini
model_type: llm
- metadata: {}
model_id: gpt-4o-audio-preview
model_id: openai/gpt-4o-audio-preview
provider_id: openai
provider_model_id: gpt-4o-audio-preview
model_type: llm
- metadata: {}
model_id: chatgpt-4o-latest
model_id: openai/chatgpt-4o-latest
provider_id: openai
provider_model_id: chatgpt-4o-latest
model_type: llm
- metadata: {}
model_id: o1
model_id: openai/o1
provider_id: openai
provider_model_id: o1
model_type: llm
- metadata: {}
model_id: o1-mini
model_id: openai/o1-mini
provider_id: openai
provider_model_id: o1-mini
model_type: llm
- metadata: {}
model_id: o3-mini
model_id: openai/o3-mini
provider_id: openai
provider_model_id: o3-mini
model_type: llm
- metadata: {}
model_id: o4-mini
model_id: openai/o4-mini
provider_id: openai
provider_model_id: o4-mini
model_type: llm
@ -242,14 +258,14 @@ models:
- metadata:
embedding_dimension: 1536
context_length: 8192
model_id: text-embedding-3-small
model_id: openai/text-embedding-3-small
provider_id: openai
provider_model_id: text-embedding-3-small
model_type: embedding
- metadata:
embedding_dimension: 3072
context_length: 8192
model_id: text-embedding-3-large
model_id: openai/text-embedding-3-large
provider_id: openai
provider_model_id: text-embedding-3-large
model_type: embedding
@ -259,7 +275,7 @@ models:
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct
model_id: fireworks/meta-llama/Llama-3.1-8B-Instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
model_type: llm
@ -269,7 +285,7 @@ models:
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-70B-Instruct
model_id: fireworks/meta-llama/Llama-3.1-70B-Instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
model_type: llm
@ -279,7 +295,7 @@ models:
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
model_id: fireworks/meta-llama/Llama-3.1-405B-Instruct-FP8
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
model_type: llm
@ -289,7 +305,7 @@ models:
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct
model_id: fireworks/meta-llama/Llama-3.2-3B-Instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
model_type: llm
@ -299,7 +315,7 @@ models:
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
model_id: fireworks/meta-llama/Llama-3.2-11B-Vision-Instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
model_type: llm
@ -309,7 +325,7 @@ models:
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
model_id: fireworks/meta-llama/Llama-3.2-90B-Vision-Instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
model_type: llm
@ -319,7 +335,7 @@ models:
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct
model_id: fireworks/meta-llama/Llama-3.3-70B-Instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
model_type: llm
@ -329,7 +345,7 @@ models:
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-Guard-3-8B
model_id: fireworks/meta-llama/Llama-Guard-3-8B
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
model_type: llm
@ -339,7 +355,7 @@ models:
provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-Guard-3-11B-Vision
model_id: fireworks/meta-llama/Llama-Guard-3-11B-Vision
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
model_type: llm
@ -349,7 +365,7 @@ models:
provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
model_id: fireworks/meta-llama/Llama-4-Scout-17B-16E-Instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
model_type: llm
@ -359,17 +375,307 @@ models:
provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
model_id: fireworks/meta-llama/Llama-4-Maverick-17B-128E-Instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
model_type: llm
- metadata:
embedding_dimension: 768
context_length: 8192
model_id: nomic-ai/nomic-embed-text-v1.5
model_id: fireworks/nomic-ai/nomic-embed-text-v1.5
provider_id: fireworks
provider_model_id: nomic-ai/nomic-embed-text-v1.5
model_type: embedding
- metadata: {}
model_id: together/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
model_type: llm
- metadata: {}
model_id: together/meta-llama/Llama-3.1-8B-Instruct
provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
model_type: llm
- metadata: {}
model_id: together/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
model_type: llm
- metadata: {}
model_id: together/meta-llama/Llama-3.1-70B-Instruct
provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
model_type: llm
- metadata: {}
model_id: together/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
model_type: llm
- metadata: {}
model_id: together/meta-llama/Llama-3.1-405B-Instruct-FP8
provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
model_type: llm
- metadata: {}
model_id: together/meta-llama/Llama-3.2-3B-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
model_type: llm
- metadata: {}
model_id: together/meta-llama/Llama-3.2-3B-Instruct
provider_id: together
provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
model_type: llm
- metadata: {}
model_id: together/meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
model_type: llm
- metadata: {}
model_id: together/meta-llama/Llama-3.2-11B-Vision-Instruct
provider_id: together
provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
model_type: llm
- metadata: {}
model_id: together/meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
model_type: llm
- metadata: {}
model_id: together/meta-llama/Llama-3.2-90B-Vision-Instruct
provider_id: together
provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
model_type: llm
- metadata: {}
model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
model_type: llm
- metadata: {}
model_id: together/meta-llama/Llama-3.3-70B-Instruct
provider_id: together
provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
model_type: llm
- metadata: {}
model_id: together/meta-llama/Meta-Llama-Guard-3-8B
provider_id: together
provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
model_type: llm
- metadata: {}
model_id: together/meta-llama/Llama-Guard-3-8B
provider_id: together
provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
model_type: llm
- metadata: {}
model_id: together/meta-llama/Llama-Guard-3-11B-Vision-Turbo
provider_id: together
provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
model_type: llm
- metadata: {}
model_id: together/meta-llama/Llama-Guard-3-11B-Vision
provider_id: together
provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
model_type: llm
- metadata:
embedding_dimension: 768
context_length: 8192
model_id: togethercomputer/m2-bert-80M-8k-retrieval
provider_id: together
provider_model_id: togethercomputer/m2-bert-80M-8k-retrieval
model_type: embedding
- metadata:
embedding_dimension: 768
context_length: 32768
model_id: togethercomputer/m2-bert-80M-32k-retrieval
provider_id: together
provider_model_id: togethercomputer/m2-bert-80M-32k-retrieval
model_type: embedding
- metadata: {}
model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct
provider_id: together
provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
model_type: llm
- metadata: {}
model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct
provider_id: together
provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
model_type: llm
- metadata: {}
model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct
provider_id: together
provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
model_type: llm
- metadata: {}
model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
provider_id: together
provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
model_type: llm
- metadata: {}
model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct
provider_id: together
provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
model_type: llm
- metadata: {}
model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
provider_id: together
provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
model_type: llm
- metadata: {}
model_id: ollama/llama3.1:8b-instruct-fp16
provider_id: ollama
provider_model_id: llama3.1:8b-instruct-fp16
model_type: llm
- metadata: {}
model_id: ollama/meta-llama/Llama-3.1-8B-Instruct
provider_id: ollama
provider_model_id: llama3.1:8b-instruct-fp16
model_type: llm
- metadata: {}
model_id: ollama/llama3.1:8b
provider_id: ollama
provider_model_id: llama3.1:8b
model_type: llm
- metadata: {}
model_id: ollama/llama3.1:70b-instruct-fp16
provider_id: ollama
provider_model_id: llama3.1:70b-instruct-fp16
model_type: llm
- metadata: {}
model_id: ollama/meta-llama/Llama-3.1-70B-Instruct
provider_id: ollama
provider_model_id: llama3.1:70b-instruct-fp16
model_type: llm
- metadata: {}
model_id: ollama/llama3.1:70b
provider_id: ollama
provider_model_id: llama3.1:70b
model_type: llm
- metadata: {}
model_id: ollama/llama3.1:405b-instruct-fp16
provider_id: ollama
provider_model_id: llama3.1:405b-instruct-fp16
model_type: llm
- metadata: {}
model_id: ollama/meta-llama/Llama-3.1-405B-Instruct-FP8
provider_id: ollama
provider_model_id: llama3.1:405b-instruct-fp16
model_type: llm
- metadata: {}
model_id: ollama/llama3.1:405b
provider_id: ollama
provider_model_id: llama3.1:405b
model_type: llm
- metadata: {}
model_id: ollama/llama3.2:1b-instruct-fp16
provider_id: ollama
provider_model_id: llama3.2:1b-instruct-fp16
model_type: llm
- metadata: {}
model_id: ollama/meta-llama/Llama-3.2-1B-Instruct
provider_id: ollama
provider_model_id: llama3.2:1b-instruct-fp16
model_type: llm
- metadata: {}
model_id: ollama/llama3.2:1b
provider_id: ollama
provider_model_id: llama3.2:1b
model_type: llm
- metadata: {}
model_id: ollama/llama3.2:3b-instruct-fp16
provider_id: ollama
provider_model_id: llama3.2:3b-instruct-fp16
model_type: llm
- metadata: {}
model_id: ollama/meta-llama/Llama-3.2-3B-Instruct
provider_id: ollama
provider_model_id: llama3.2:3b-instruct-fp16
model_type: llm
- metadata: {}
model_id: ollama/llama3.2:3b
provider_id: ollama
provider_model_id: llama3.2:3b
model_type: llm
- metadata: {}
model_id: ollama/llama3.2-vision:11b-instruct-fp16
provider_id: ollama
provider_model_id: llama3.2-vision:11b-instruct-fp16
model_type: llm
- metadata: {}
model_id: ollama/meta-llama/Llama-3.2-11B-Vision-Instruct
provider_id: ollama
provider_model_id: llama3.2-vision:11b-instruct-fp16
model_type: llm
- metadata: {}
model_id: ollama/llama3.2-vision:latest
provider_id: ollama
provider_model_id: llama3.2-vision:latest
model_type: llm
- metadata: {}
model_id: ollama/llama3.2-vision:90b-instruct-fp16
provider_id: ollama
provider_model_id: llama3.2-vision:90b-instruct-fp16
model_type: llm
- metadata: {}
model_id: ollama/meta-llama/Llama-3.2-90B-Vision-Instruct
provider_id: ollama
provider_model_id: llama3.2-vision:90b-instruct-fp16
model_type: llm
- metadata: {}
model_id: ollama/llama3.2-vision:90b
provider_id: ollama
provider_model_id: llama3.2-vision:90b
model_type: llm
- metadata: {}
model_id: ollama/llama3.3:70b
provider_id: ollama
provider_model_id: llama3.3:70b
model_type: llm
- metadata: {}
model_id: ollama/meta-llama/Llama-3.3-70B-Instruct
provider_id: ollama
provider_model_id: llama3.3:70b
model_type: llm
- metadata: {}
model_id: ollama/llama-guard3:8b
provider_id: ollama
provider_model_id: llama-guard3:8b
model_type: llm
- metadata: {}
model_id: ollama/meta-llama/Llama-Guard-3-8B
provider_id: ollama
provider_model_id: llama-guard3:8b
model_type: llm
- metadata: {}
model_id: ollama/llama-guard3:1b
provider_id: ollama
provider_model_id: llama-guard3:1b
model_type: llm
- metadata: {}
model_id: ollama/meta-llama/Llama-Guard-3-1B
provider_id: ollama
provider_model_id: llama-guard3:1b
model_type: llm
- metadata:
embedding_dimension: 384
context_length: 512
model_id: ollama/all-minilm:latest
provider_id: ollama
provider_model_id: all-minilm:latest
model_type: embedding
- metadata:
embedding_dimension: 384
context_length: 512
model_id: ollama/all-minilm
provider_id: ollama
provider_model_id: all-minilm:latest
model_type: embedding
- metadata:
embedding_dimension: 768
context_length: 8192
model_id: ollama/nomic-embed-text
provider_id: ollama
provider_model_id: nomic-embed-text
model_type: embedding
- metadata: {}
model_id: anthropic/claude-3-5-sonnet-latest
provider_id: anthropic
@ -429,7 +735,7 @@ models:
provider_model_id: groq/llama3-8b-8192
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct
model_id: groq/meta-llama/Llama-3.1-8B-Instruct
provider_id: groq
provider_model_id: groq/llama3-8b-8192
model_type: llm
@ -444,7 +750,7 @@ models:
provider_model_id: groq/llama3-70b-8192
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3-70B-Instruct
model_id: groq/meta-llama/Llama-3-70B-Instruct
provider_id: groq
provider_model_id: groq/llama3-70b-8192
model_type: llm
@ -454,7 +760,7 @@ models:
provider_model_id: groq/llama-3.3-70b-versatile
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct
model_id: groq/meta-llama/Llama-3.3-70B-Instruct
provider_id: groq
provider_model_id: groq/llama-3.3-70b-versatile
model_type: llm
@ -464,7 +770,7 @@ models:
provider_model_id: groq/llama-3.2-3b-preview
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct
model_id: groq/meta-llama/Llama-3.2-3B-Instruct
provider_id: groq
provider_model_id: groq/llama-3.2-3b-preview
model_type: llm
@ -474,7 +780,7 @@ models:
provider_model_id: groq/llama-4-scout-17b-16e-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
model_id: groq/meta-llama/Llama-4-Scout-17B-16E-Instruct
provider_id: groq
provider_model_id: groq/llama-4-scout-17b-16e-instruct
model_type: llm
@ -484,7 +790,7 @@ models:
provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
model_id: groq/meta-llama/Llama-4-Scout-17B-16E-Instruct
provider_id: groq
provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
model_type: llm
@ -494,7 +800,7 @@ models:
provider_model_id: groq/llama-4-maverick-17b-128e-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
model_id: groq/meta-llama/Llama-4-Maverick-17B-128E-Instruct
provider_id: groq
provider_model_id: groq/llama-4-maverick-17b-128e-instruct
model_type: llm
@ -504,7 +810,7 @@ models:
provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
model_id: groq/meta-llama/Llama-4-Maverick-17B-128E-Instruct
provider_id: groq
provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
model_type: llm
@ -514,7 +820,7 @@ models:
provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct
model_id: sambanova/meta-llama/Llama-3.1-8B-Instruct
provider_id: sambanova
provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct
model_type: llm
@ -524,7 +830,7 @@ models:
provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
model_id: sambanova/meta-llama/Llama-3.1-405B-Instruct-FP8
provider_id: sambanova
provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct
model_type: llm
@ -534,7 +840,7 @@ models:
provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-1B-Instruct
model_id: sambanova/meta-llama/Llama-3.2-1B-Instruct
provider_id: sambanova
provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct
model_type: llm
@ -544,7 +850,7 @@ models:
provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct
model_id: sambanova/meta-llama/Llama-3.2-3B-Instruct
provider_id: sambanova
provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct
model_type: llm
@ -554,7 +860,7 @@ models:
provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct
model_id: sambanova/meta-llama/Llama-3.3-70B-Instruct
provider_id: sambanova
provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct
model_type: llm
@ -564,7 +870,7 @@ models:
provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
model_id: sambanova/meta-llama/Llama-3.2-11B-Vision-Instruct
provider_id: sambanova
provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct
model_type: llm
@ -574,7 +880,7 @@ models:
provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
model_id: sambanova/meta-llama/Llama-3.2-90B-Vision-Instruct
provider_id: sambanova
provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct
model_type: llm
@ -584,7 +890,7 @@ models:
provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
model_id: sambanova/meta-llama/Llama-4-Scout-17B-16E-Instruct
provider_id: sambanova
provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
model_type: llm
@ -594,7 +900,7 @@ models:
provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
model_id: sambanova/meta-llama/Llama-4-Maverick-17B-128E-Instruct
provider_id: sambanova
provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
model_type: llm
@ -604,7 +910,7 @@ models:
provider_model_id: sambanova/Meta-Llama-Guard-3-8B
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-Guard-3-8B
model_id: sambanova/meta-llama/Llama-Guard-3-8B
provider_id: sambanova
provider_model_id: sambanova/Meta-Llama-Guard-3-8B
model_type: llm

View file

@ -34,6 +34,10 @@ from llama_stack.providers.remote.inference.groq.config import GroqConfig
from llama_stack.providers.remote.inference.groq.models import (
MODEL_ENTRIES as GROQ_MODEL_ENTRIES,
)
from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
from llama_stack.providers.remote.inference.ollama.models import (
MODEL_ENTRIES as OLLAMA_MODEL_ENTRIES,
)
from llama_stack.providers.remote.inference.openai.config import OpenAIConfig
from llama_stack.providers.remote.inference.openai.models import (
MODEL_ENTRIES as OPENAI_MODEL_ENTRIES,
@ -42,11 +46,17 @@ from llama_stack.providers.remote.inference.sambanova.config import SambaNovaImp
from llama_stack.providers.remote.inference.sambanova.models import (
MODEL_ENTRIES as SAMBANOVA_MODEL_ENTRIES,
)
from llama_stack.providers.remote.inference.together.config import TogetherImplConfig
from llama_stack.providers.remote.inference.together.models import (
MODEL_ENTRIES as TOGETHER_MODEL_ENTRIES,
)
from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
from llama_stack.providers.remote.vector_io.pgvector.config import (
PGVectorVectorIOConfig,
)
from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
from llama_stack.templates.template import (
DistributionTemplate,
RunConfigSettings,
@ -67,6 +77,16 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
FIREWORKS_MODEL_ENTRIES,
FireworksImplConfig.sample_run_config(api_key="${env.FIREWORKS_API_KEY:}"),
),
(
"together",
TOGETHER_MODEL_ENTRIES,
TogetherImplConfig.sample_run_config(api_key="${env.TOGETHER_API_KEY:}"),
),
(
"ollama",
OLLAMA_MODEL_ENTRIES,
OllamaImplConfig.sample_run_config(),
),
(
"anthropic",
ANTHROPIC_MODEL_ENTRIES,
@ -87,6 +107,13 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
SAMBANOVA_MODEL_ENTRIES,
SambaNovaImplConfig.sample_run_config(api_key="${env.SAMBANOVA_API_KEY:}"),
),
(
"vllm",
[],
VLLMInferenceAdapterConfig.sample_run_config(
url="${env.VLLM_URL:http://localhost:8000/v1}",
),
),
]
inference_providers = []
available_models = {}
@ -169,6 +196,8 @@ def get_distribution_template() -> DistributionTemplate:
)
default_models = get_model_registry(available_models)
postgres_store = PostgresSqlStoreConfig.sample_run_config()
return DistributionTemplate(
name=name,
distro_type="self_hosted",
@ -177,6 +206,7 @@ def get_distribution_template() -> DistributionTemplate:
template_path=None,
providers=providers,
available_models_by_provider=available_models,
additional_pip_packages=postgres_store.pip_packages,
run_configs={
"run.yaml": RunConfigSettings(
provider_overrides={
@ -201,5 +231,25 @@ def get_distribution_template() -> DistributionTemplate:
"",
"OpenAI API Key",
),
"GROQ_API_KEY": (
"",
"Groq API Key",
),
"ANTHROPIC_API_KEY": (
"",
"Anthropic API Key",
),
"GEMINI_API_KEY": (
"",
"Gemini API Key",
),
"SAMBANOVA_API_KEY": (
"",
"SambaNova API Key",
),
"VLLM_URL": (
"http://localhost:8000/v1",
"VLLM URL",
),
},
)

View file

@ -8,6 +8,7 @@ from pathlib import Path
from typing import Literal
import jinja2
import rich
import yaml
from pydantic import BaseModel, Field
@ -36,13 +37,35 @@ def get_model_registry(
available_models: dict[str, list[ProviderModelEntry]],
) -> list[ModelInput]:
models = []
# check for conflicts in model ids
all_ids = set()
ids_conflict = False
for _, entries in available_models.items():
for entry in entries:
ids = [entry.provider_model_id] + entry.aliases
for model_id in ids:
if model_id in all_ids:
ids_conflict = True
rich.print(
f"[yellow]Model id {model_id} conflicts; all model ids will be prefixed with provider id[/yellow]"
)
break
all_ids.update(ids)
if ids_conflict:
break
if ids_conflict:
break
for provider_id, entries in available_models.items():
for entry in entries:
ids = [entry.provider_model_id] + entry.aliases
for model_id in ids:
identifier = f"{provider_id}/{model_id}" if ids_conflict and provider_id not in model_id else model_id
models.append(
ModelInput(
model_id=model_id,
model_id=identifier,
provider_model_id=entry.provider_model_id,
provider_id=provider_id,
model_type=entry.model_type,
@ -154,6 +177,11 @@ class DistributionTemplate(BaseModel):
available_models_by_provider: dict[str, list[ProviderModelEntry]] | None = None
# we may want to specify additional pip packages without necessarily indicating a
# specific "default" inference store (which is what typically used to dictate additional
# pip packages)
additional_pip_packages: list[str] | None = None
def build_config(self) -> BuildConfig:
additional_pip_packages: list[str] = []
for run_config in self.run_configs.values():
@ -161,6 +189,9 @@ class DistributionTemplate(BaseModel):
if run_config_.inference_store:
additional_pip_packages.extend(run_config_.inference_store.pip_packages)
if self.additional_pip_packages:
additional_pip_packages.extend(self.additional_pip_packages)
return BuildConfig(
distribution_spec=DistributionSpec(
description=self.description,

View file

@ -48,7 +48,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/trace_store.db
eval:

View file

@ -47,7 +47,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/trace_store.db
eval:

View file

@ -53,7 +53,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/trace_store.db
eval:

View file

@ -48,7 +48,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/trace_store.db
eval:

View file

@ -1,40 +0,0 @@
version: '2'
distribution_spec:
description: Distribution for running e2e tests in CI
providers:
inference:
- remote::openai
- remote::fireworks-openai-compat
- remote::together-openai-compat
- remote::groq-openai-compat
- remote::sambanova-openai-compat
- remote::cerebras-openai-compat
- inline::sentence-transformers
vector_io:
- inline::sqlite-vec
- remote::chromadb
- remote::pgvector
safety:
- inline::llama-guard
agents:
- inline::meta-reference
telemetry:
- inline::meta-reference
eval:
- inline::meta-reference
datasetio:
- remote::huggingface
- inline::localfs
scoring:
- inline::basic
- inline::llm-as-judge
- inline::braintrust
tool_runtime:
- remote::brave-search
- remote::tavily-search
- inline::rag-runtime
- remote::model-context-protocol
image_type: conda
additional_pip_packages:
- aiosqlite
- sqlalchemy[asyncio]

View file

@ -1,731 +0,0 @@
version: '2'
image_name: verification
apis:
- agents
- datasetio
- eval
- inference
- safety
- scoring
- telemetry
- tool_runtime
- vector_io
providers:
inference:
- provider_id: openai
provider_type: remote::openai
config:
api_key: ${env.OPENAI_API_KEY:}
- provider_id: fireworks-openai-compat
provider_type: remote::fireworks-openai-compat
config:
openai_compat_api_base: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY:}
- provider_id: together-openai-compat
provider_type: remote::together-openai-compat
config:
openai_compat_api_base: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY:}
- provider_id: groq-openai-compat
provider_type: remote::groq-openai-compat
config:
openai_compat_api_base: https://api.groq.com/openai/v1
api_key: ${env.GROQ_API_KEY:}
- provider_id: sambanova-openai-compat
provider_type: remote::sambanova-openai-compat
config:
openai_compat_api_base: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY:}
- provider_id: cerebras-openai-compat
provider_type: remote::cerebras-openai-compat
config:
openai_compat_api_base: https://api.cerebras.ai/v1
api_key: ${env.CEREBRAS_API_KEY:}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
config: {}
vector_io:
- provider_id: sqlite-vec
provider_type: inline::sqlite-vec
config:
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/sqlite_vec.db
- provider_id: ${env.ENABLE_CHROMADB+chromadb}
provider_type: remote::chromadb
config:
url: ${env.CHROMADB_URL:}
- provider_id: ${env.ENABLE_PGVECTOR+pgvector}
provider_type: remote::pgvector
config:
host: ${env.PGVECTOR_HOST:localhost}
port: ${env.PGVECTOR_PORT:5432}
db: ${env.PGVECTOR_DB:}
user: ${env.PGVECTOR_USER:}
password: ${env.PGVECTOR_PASSWORD:}
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config:
excluded_categories: []
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/agents_store.db
responses_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/responses_store.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/trace_store.db
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/localfs_datasetio.db
scoring:
- provider_id: basic
provider_type: inline::basic
config: {}
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
config: {}
- provider_id: braintrust
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: rag-runtime
provider_type: inline::rag-runtime
config: {}
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
config: {}
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/inference_store.db
models:
- metadata: {}
model_id: openai/gpt-4o
provider_id: openai
provider_model_id: openai/gpt-4o
model_type: llm
- metadata: {}
model_id: openai/gpt-4o-mini
provider_id: openai
provider_model_id: openai/gpt-4o-mini
model_type: llm
- metadata: {}
model_id: openai/chatgpt-4o-latest
provider_id: openai
provider_model_id: openai/chatgpt-4o-latest
model_type: llm
- metadata: {}
model_id: gpt-3.5-turbo-0125
provider_id: openai
provider_model_id: gpt-3.5-turbo-0125
model_type: llm
- metadata: {}
model_id: gpt-3.5-turbo
provider_id: openai
provider_model_id: gpt-3.5-turbo
model_type: llm
- metadata: {}
model_id: gpt-3.5-turbo-instruct
provider_id: openai
provider_model_id: gpt-3.5-turbo-instruct
model_type: llm
- metadata: {}
model_id: gpt-4
provider_id: openai
provider_model_id: gpt-4
model_type: llm
- metadata: {}
model_id: gpt-4-turbo
provider_id: openai
provider_model_id: gpt-4-turbo
model_type: llm
- metadata: {}
model_id: gpt-4o
provider_id: openai
provider_model_id: gpt-4o
model_type: llm
- metadata: {}
model_id: gpt-4o-2024-08-06
provider_id: openai
provider_model_id: gpt-4o-2024-08-06
model_type: llm
- metadata: {}
model_id: gpt-4o-mini
provider_id: openai
provider_model_id: gpt-4o-mini
model_type: llm
- metadata: {}
model_id: gpt-4o-audio-preview
provider_id: openai
provider_model_id: gpt-4o-audio-preview
model_type: llm
- metadata: {}
model_id: chatgpt-4o-latest
provider_id: openai
provider_model_id: chatgpt-4o-latest
model_type: llm
- metadata: {}
model_id: o1
provider_id: openai
provider_model_id: o1
model_type: llm
- metadata: {}
model_id: o1-mini
provider_id: openai
provider_model_id: o1-mini
model_type: llm
- metadata: {}
model_id: o3-mini
provider_id: openai
provider_model_id: o3-mini
model_type: llm
- metadata: {}
model_id: o4-mini
provider_id: openai
provider_model_id: o4-mini
model_type: llm
- metadata:
embedding_dimension: 1536
context_length: 8192
model_id: openai/text-embedding-3-small
provider_id: openai
provider_model_id: openai/text-embedding-3-small
model_type: embedding
- metadata:
embedding_dimension: 3072
context_length: 8192
model_id: openai/text-embedding-3-large
provider_id: openai
provider_model_id: openai/text-embedding-3-large
model_type: embedding
- metadata:
embedding_dimension: 1536
context_length: 8192
model_id: text-embedding-3-small
provider_id: openai
provider_model_id: text-embedding-3-small
model_type: embedding
- metadata:
embedding_dimension: 3072
context_length: 8192
model_id: text-embedding-3-large
provider_id: openai
provider_model_id: text-embedding-3-large
model_type: embedding
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
provider_id: fireworks-openai-compat
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: fireworks-openai-compat
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
provider_id: fireworks-openai-compat
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-70B-Instruct
provider_id: fireworks-openai-compat
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
provider_id: fireworks-openai-compat
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
provider_id: fireworks-openai-compat
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
provider_id: fireworks-openai-compat
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct
provider_id: fireworks-openai-compat
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
provider_id: fireworks-openai-compat
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
provider_id: fireworks-openai-compat
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
provider_id: fireworks-openai-compat
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
provider_id: fireworks-openai-compat
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
provider_id: fireworks-openai-compat
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: fireworks-openai-compat
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-guard-3-8b
provider_id: fireworks-openai-compat
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-Guard-3-8B
provider_id: fireworks-openai-compat
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-guard-3-11b-vision
provider_id: fireworks-openai-compat
provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-Guard-3-11B-Vision
provider_id: fireworks-openai-compat
provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama4-scout-instruct-basic
provider_id: fireworks-openai-compat
provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
provider_id: fireworks-openai-compat
provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
provider_id: fireworks-openai-compat
provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
provider_id: fireworks-openai-compat
provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
model_type: llm
- metadata:
embedding_dimension: 768
context_length: 8192
model_id: nomic-ai/nomic-embed-text-v1.5
provider_id: fireworks-openai-compat
provider_model_id: nomic-ai/nomic-embed-text-v1.5
model_type: embedding
- metadata: {}
model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
provider_id: together-openai-compat
provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: together-openai-compat
provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
model_type: llm
- metadata: {}
model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
provider_id: together-openai-compat
provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-70B-Instruct
provider_id: together-openai-compat
provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
model_type: llm
- metadata: {}
model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
provider_id: together-openai-compat
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
provider_id: together-openai-compat
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
provider_id: together-openai-compat
provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct
provider_id: together-openai-compat
provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
provider_id: together-openai-compat
provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
provider_id: together-openai-compat
provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
provider_id: together-openai-compat
provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
provider_id: together-openai-compat
provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
provider_id: together-openai-compat
provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: together-openai-compat
provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
model_type: llm
- metadata: {}
model_id: meta-llama/Meta-Llama-Guard-3-8B
provider_id: together-openai-compat
provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-Guard-3-8B
provider_id: together-openai-compat
provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
provider_id: together-openai-compat
provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-Guard-3-11B-Vision
provider_id: together-openai-compat
provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
model_type: llm
- metadata:
embedding_dimension: 768
context_length: 8192
model_id: togethercomputer/m2-bert-80M-8k-retrieval
provider_id: together-openai-compat
provider_model_id: togethercomputer/m2-bert-80M-8k-retrieval
model_type: embedding
- metadata:
embedding_dimension: 768
context_length: 32768
model_id: togethercomputer/m2-bert-80M-32k-retrieval
provider_id: together-openai-compat
provider_model_id: togethercomputer/m2-bert-80M-32k-retrieval
model_type: embedding
- metadata: {}
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
provider_id: together-openai-compat
provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
provider_id: together-openai-compat
provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
model_type: llm
- metadata: {}
model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct
provider_id: together-openai-compat
provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
provider_id: together-openai-compat
provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
provider_id: together-openai-compat
provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
model_type: llm
- metadata: {}
model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
provider_id: together-openai-compat
provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
model_type: llm
- metadata: {}
model_id: groq/llama3-8b-8192
provider_id: groq-openai-compat
provider_model_id: groq/llama3-8b-8192
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: groq-openai-compat
provider_model_id: groq/llama3-8b-8192
model_type: llm
- metadata: {}
model_id: groq/llama-3.1-8b-instant
provider_id: groq-openai-compat
provider_model_id: groq/llama-3.1-8b-instant
model_type: llm
- metadata: {}
model_id: groq/llama3-70b-8192
provider_id: groq-openai-compat
provider_model_id: groq/llama3-70b-8192
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3-70B-Instruct
provider_id: groq-openai-compat
provider_model_id: groq/llama3-70b-8192
model_type: llm
- metadata: {}
model_id: groq/llama-3.3-70b-versatile
provider_id: groq-openai-compat
provider_model_id: groq/llama-3.3-70b-versatile
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: groq-openai-compat
provider_model_id: groq/llama-3.3-70b-versatile
model_type: llm
- metadata: {}
model_id: groq/llama-3.2-3b-preview
provider_id: groq-openai-compat
provider_model_id: groq/llama-3.2-3b-preview
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct
provider_id: groq-openai-compat
provider_model_id: groq/llama-3.2-3b-preview
model_type: llm
- metadata: {}
model_id: groq/llama-4-scout-17b-16e-instruct
provider_id: groq-openai-compat
provider_model_id: groq/llama-4-scout-17b-16e-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
provider_id: groq-openai-compat
provider_model_id: groq/llama-4-scout-17b-16e-instruct
model_type: llm
- metadata: {}
model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
provider_id: groq-openai-compat
provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
provider_id: groq-openai-compat
provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
model_type: llm
- metadata: {}
model_id: groq/llama-4-maverick-17b-128e-instruct
provider_id: groq-openai-compat
provider_model_id: groq/llama-4-maverick-17b-128e-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
provider_id: groq-openai-compat
provider_model_id: groq/llama-4-maverick-17b-128e-instruct
model_type: llm
- metadata: {}
model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
provider_id: groq-openai-compat
provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
provider_id: groq-openai-compat
provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
model_type: llm
- metadata: {}
model_id: sambanova/Meta-Llama-3.1-8B-Instruct
provider_id: sambanova-openai-compat
provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: sambanova-openai-compat
provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct
model_type: llm
- metadata: {}
model_id: sambanova/Meta-Llama-3.1-405B-Instruct
provider_id: sambanova-openai-compat
provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
provider_id: sambanova-openai-compat
provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct
model_type: llm
- metadata: {}
model_id: sambanova/Meta-Llama-3.2-1B-Instruct
provider_id: sambanova-openai-compat
provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-1B-Instruct
provider_id: sambanova-openai-compat
provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct
model_type: llm
- metadata: {}
model_id: sambanova/Meta-Llama-3.2-3B-Instruct
provider_id: sambanova-openai-compat
provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct
provider_id: sambanova-openai-compat
provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct
model_type: llm
- metadata: {}
model_id: sambanova/Meta-Llama-3.3-70B-Instruct
provider_id: sambanova-openai-compat
provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: sambanova-openai-compat
provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct
model_type: llm
- metadata: {}
model_id: sambanova/Llama-3.2-11B-Vision-Instruct
provider_id: sambanova-openai-compat
provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
provider_id: sambanova-openai-compat
provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct
model_type: llm
- metadata: {}
model_id: sambanova/Llama-3.2-90B-Vision-Instruct
provider_id: sambanova-openai-compat
provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
provider_id: sambanova-openai-compat
provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct
model_type: llm
- metadata: {}
model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
provider_id: sambanova-openai-compat
provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
provider_id: sambanova-openai-compat
provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
model_type: llm
- metadata: {}
model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
provider_id: sambanova-openai-compat
provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
provider_id: sambanova-openai-compat
provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
model_type: llm
- metadata: {}
model_id: sambanova/Meta-Llama-Guard-3-8B
provider_id: sambanova-openai-compat
provider_model_id: sambanova/Meta-Llama-Guard-3-8B
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-Guard-3-8B
provider_id: sambanova-openai-compat
provider_model_id: sambanova/Meta-Llama-Guard-3-8B
model_type: llm
- metadata: {}
model_id: llama3.1-8b
provider_id: cerebras-openai-compat
provider_model_id: llama3.1-8b
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: cerebras-openai-compat
provider_model_id: llama3.1-8b
model_type: llm
- metadata: {}
model_id: llama-3.3-70b
provider_id: cerebras-openai-compat
provider_model_id: llama-3.3-70b
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: cerebras-openai-compat
provider_model_id: llama-3.3-70b
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
model_type: embedding
shields:
- shield_id: meta-llama/Llama-Guard-3-8B
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321

View file

@ -1,201 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import (
ModelInput,
Provider,
ShieldInput,
ToolGroupInput,
)
from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig,
)
from llama_stack.providers.inline.vector_io.sqlite_vec.config import (
SQLiteVectorIOConfig,
)
from llama_stack.providers.remote.inference.cerebras.models import MODEL_ENTRIES as CEREBRAS_MODEL_ENTRIES
from llama_stack.providers.remote.inference.cerebras_openai_compat.config import CerebrasCompatConfig
from llama_stack.providers.remote.inference.fireworks.models import (
MODEL_ENTRIES as FIREWORKS_MODEL_ENTRIES,
)
from llama_stack.providers.remote.inference.fireworks_openai_compat.config import FireworksCompatConfig
from llama_stack.providers.remote.inference.groq.models import (
MODEL_ENTRIES as GROQ_MODEL_ENTRIES,
)
from llama_stack.providers.remote.inference.groq_openai_compat.config import GroqCompatConfig
from llama_stack.providers.remote.inference.openai.config import OpenAIConfig
from llama_stack.providers.remote.inference.openai.models import (
MODEL_ENTRIES as OPENAI_MODEL_ENTRIES,
)
from llama_stack.providers.remote.inference.sambanova.models import MODEL_ENTRIES as SAMBANOVA_MODEL_ENTRIES
from llama_stack.providers.remote.inference.sambanova_openai_compat.config import SambaNovaCompatConfig
from llama_stack.providers.remote.inference.together.models import (
MODEL_ENTRIES as TOGETHER_MODEL_ENTRIES,
)
from llama_stack.providers.remote.inference.together_openai_compat.config import TogetherCompatConfig
from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
from llama_stack.providers.remote.vector_io.pgvector.config import (
PGVectorVectorIOConfig,
)
from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
from llama_stack.templates.template import (
DistributionTemplate,
RunConfigSettings,
get_model_registry,
)
def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderModelEntry]]]:
# in this template, we allow each API key to be optional
providers = [
(
"openai",
OPENAI_MODEL_ENTRIES,
OpenAIConfig.sample_run_config(api_key="${env.OPENAI_API_KEY:}"),
),
(
"fireworks-openai-compat",
FIREWORKS_MODEL_ENTRIES,
FireworksCompatConfig.sample_run_config(api_key="${env.FIREWORKS_API_KEY:}"),
),
(
"together-openai-compat",
TOGETHER_MODEL_ENTRIES,
TogetherCompatConfig.sample_run_config(api_key="${env.TOGETHER_API_KEY:}"),
),
(
"groq-openai-compat",
GROQ_MODEL_ENTRIES,
GroqCompatConfig.sample_run_config(api_key="${env.GROQ_API_KEY:}"),
),
(
"sambanova-openai-compat",
SAMBANOVA_MODEL_ENTRIES,
SambaNovaCompatConfig.sample_run_config(api_key="${env.SAMBANOVA_API_KEY:}"),
),
(
"cerebras-openai-compat",
CEREBRAS_MODEL_ENTRIES,
CerebrasCompatConfig.sample_run_config(api_key="${env.CEREBRAS_API_KEY:}"),
),
]
inference_providers = []
available_models = {}
for provider_id, model_entries, config in providers:
inference_providers.append(
Provider(
provider_id=provider_id,
provider_type=f"remote::{provider_id}",
config=config,
)
)
available_models[provider_id] = model_entries
return inference_providers, available_models
def get_distribution_template() -> DistributionTemplate:
inference_providers, available_models = get_inference_providers()
providers = {
"inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]),
"vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"],
"eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
"inline::rag-runtime",
"remote::model-context-protocol",
],
}
name = "verification"
vector_io_providers = [
Provider(
provider_id="sqlite-vec",
provider_type="inline::sqlite-vec",
config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
),
Provider(
provider_id="${env.ENABLE_CHROMADB+chromadb}",
provider_type="remote::chromadb",
config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"),
),
Provider(
provider_id="${env.ENABLE_PGVECTOR+pgvector}",
provider_type="remote::pgvector",
config=PGVectorVectorIOConfig.sample_run_config(
db="${env.PGVECTOR_DB:}",
user="${env.PGVECTOR_USER:}",
password="${env.PGVECTOR_PASSWORD:}",
),
),
]
embedding_provider = Provider(
provider_id="sentence-transformers",
provider_type="inline::sentence-transformers",
config=SentenceTransformersInferenceConfig.sample_run_config(),
)
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::rag",
provider_id="rag-runtime",
),
]
embedding_model = ModelInput(
model_id="all-MiniLM-L6-v2",
provider_id=embedding_provider.provider_id,
model_type=ModelType.embedding,
metadata={
"embedding_dimension": 384,
},
)
default_models = get_model_registry(available_models)
return DistributionTemplate(
name=name,
distro_type="self_hosted",
description="Distribution for running e2e tests in CI",
container_image=None,
template_path=None,
providers=providers,
available_models_by_provider=available_models,
run_configs={
"run.yaml": RunConfigSettings(
provider_overrides={
"inference": inference_providers + [embedding_provider],
"vector_io": vector_io_providers,
},
default_models=default_models + [embedding_model],
default_tool_groups=default_tool_groups,
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
),
},
run_config_env_vars={
"LLAMA_STACK_PORT": (
"8321",
"Port for the Llama Stack distribution server",
),
"FIREWORKS_API_KEY": (
"",
"Fireworks API Key",
),
"OPENAI_API_KEY": (
"",
"OpenAI API Key",
),
},
)

View file

@ -52,7 +52,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/trace_store.db
eval:

View file

@ -49,7 +49,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/trace_store.db
eval:

View file

@ -0,0 +1,105 @@
import { NextRequest, NextResponse } from "next/server";
// Get backend URL from environment variable or default to localhost for development
const BACKEND_URL =
process.env.LLAMA_STACK_BACKEND_URL ||
`http://localhost:${process.env.LLAMA_STACK_PORT || 8321}`;
async function proxyRequest(request: NextRequest, method: string) {
try {
// Extract the path from the request URL
const url = new URL(request.url);
const pathSegments = url.pathname.split("/");
// Remove /api from the path to get the actual API path
// /api/v1/models/list -> /v1/models/list
const apiPath = pathSegments.slice(2).join("/"); // Remove 'api' segment
const targetUrl = `${BACKEND_URL}/${apiPath}${url.search}`;
console.log(`Proxying ${method} ${url.pathname} -> ${targetUrl}`);
// Prepare headers (exclude host and other problematic headers)
const headers = new Headers();
request.headers.forEach((value, key) => {
// Skip headers that might cause issues in proxy
if (
!["host", "connection", "content-length"].includes(key.toLowerCase())
) {
headers.set(key, value);
}
});
// Prepare the request options
const requestOptions: RequestInit = {
method,
headers,
};
// Add body for methods that support it
if (["POST", "PUT", "PATCH"].includes(method) && request.body) {
requestOptions.body = await request.text();
}
// Make the request to FastAPI backend
const response = await fetch(targetUrl, requestOptions);
// Get response data
const responseText = await response.text();
console.log(
`Response from FastAPI: ${response.status} ${response.statusText}`,
);
// Create response with same status and headers
const proxyResponse = new NextResponse(responseText, {
status: response.status,
statusText: response.statusText,
});
// Copy response headers (except problematic ones)
response.headers.forEach((value, key) => {
if (!["connection", "transfer-encoding"].includes(key.toLowerCase())) {
proxyResponse.headers.set(key, value);
}
});
return proxyResponse;
} catch (error) {
console.error("Proxy request failed:", error);
return NextResponse.json(
{
error: "Proxy request failed",
message: error instanceof Error ? error.message : "Unknown error",
backend_url: BACKEND_URL,
timestamp: new Date().toISOString(),
},
{ status: 500 },
);
}
}
// HTTP method handlers
export async function GET(request: NextRequest) {
return proxyRequest(request, "GET");
}
export async function POST(request: NextRequest) {
return proxyRequest(request, "POST");
}
export async function PUT(request: NextRequest) {
return proxyRequest(request, "PUT");
}
export async function DELETE(request: NextRequest) {
return proxyRequest(request, "DELETE");
}
export async function PATCH(request: NextRequest) {
return proxyRequest(request, "PATCH");
}
export async function OPTIONS(request: NextRequest) {
return proxyRequest(request, "OPTIONS");
}

View file

@ -1,12 +1,6 @@
import LlamaStackClient from "llama-stack-client";
import OpenAI from "openai";
export const client =
process.env.NEXT_PUBLIC_USE_OPENAI_CLIENT === "true" // useful for testing
? new OpenAI({
apiKey: process.env.NEXT_PUBLIC_OPENAI_API_KEY,
dangerouslyAllowBrowser: true,
})
: new LlamaStackClient({
baseURL: process.env.NEXT_PUBLIC_LLAMA_STACK_BASE_URL,
});
export const client = new LlamaStackClient({
baseURL:
typeof window !== "undefined" ? `${window.location.origin}/api` : "/api",
});

View file

@ -15,11 +15,10 @@
"@radix-ui/react-tooltip": "^1.2.6",
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"llama-stack-client": "github:stainless-sdks/llama-stack-node#ehhuang/dev",
"llama-stack-client": "0.2.9",
"lucide-react": "^0.510.0",
"next": "15.3.2",
"next-themes": "^0.4.6",
"openai": "^4.103.0",
"react": "^19.0.0",
"react-dom": "^19.0.0",
"tailwind-merge": "^3.3.0"
@ -677,6 +676,406 @@
"tslib": "^2.4.0"
}
},
"node_modules/@esbuild/aix-ppc64": {
"version": "0.25.5",
"resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.5.tgz",
"integrity": "sha512-9o3TMmpmftaCMepOdA5k/yDw8SfInyzWWTjYTFCX3kPSDJMROQTb8jg+h9Cnwnmm1vOzvxN7gIfB5V2ewpjtGA==",
"cpu": [
"ppc64"
],
"license": "MIT",
"optional": true,
"os": [
"aix"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/android-arm": {
"version": "0.25.5",
"resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.25.5.tgz",
"integrity": "sha512-AdJKSPeEHgi7/ZhuIPtcQKr5RQdo6OO2IL87JkianiMYMPbCtot9fxPbrMiBADOWWm3T2si9stAiVsGbTQFkbA==",
"cpu": [
"arm"
],
"license": "MIT",
"optional": true,
"os": [
"android"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/android-arm64": {
"version": "0.25.5",
"resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.25.5.tgz",
"integrity": "sha512-VGzGhj4lJO+TVGV1v8ntCZWJktV7SGCs3Pn1GRWI1SBFtRALoomm8k5E9Pmwg3HOAal2VDc2F9+PM/rEY6oIDg==",
"cpu": [
"arm64"
],
"license": "MIT",
"optional": true,
"os": [
"android"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/android-x64": {
"version": "0.25.5",
"resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.25.5.tgz",
"integrity": "sha512-D2GyJT1kjvO//drbRT3Hib9XPwQeWd9vZoBJn+bu/lVsOZ13cqNdDeqIF/xQ5/VmWvMduP6AmXvylO/PIc2isw==",
"cpu": [
"x64"
],
"license": "MIT",
"optional": true,
"os": [
"android"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/darwin-arm64": {
"version": "0.25.5",
"resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.25.5.tgz",
"integrity": "sha512-GtaBgammVvdF7aPIgH2jxMDdivezgFu6iKpmT+48+F8Hhg5J/sfnDieg0aeG/jfSvkYQU2/pceFPDKlqZzwnfQ==",
"cpu": [
"arm64"
],
"license": "MIT",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/darwin-x64": {
"version": "0.25.5",
"resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.25.5.tgz",
"integrity": "sha512-1iT4FVL0dJ76/q1wd7XDsXrSW+oLoquptvh4CLR4kITDtqi2e/xwXwdCVH8hVHU43wgJdsq7Gxuzcs6Iq/7bxQ==",
"cpu": [
"x64"
],
"license": "MIT",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/freebsd-arm64": {
"version": "0.25.5",
"resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.25.5.tgz",
"integrity": "sha512-nk4tGP3JThz4La38Uy/gzyXtpkPW8zSAmoUhK9xKKXdBCzKODMc2adkB2+8om9BDYugz+uGV7sLmpTYzvmz6Sw==",
"cpu": [
"arm64"
],
"license": "MIT",
"optional": true,
"os": [
"freebsd"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/freebsd-x64": {
"version": "0.25.5",
"resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.25.5.tgz",
"integrity": "sha512-PrikaNjiXdR2laW6OIjlbeuCPrPaAl0IwPIaRv+SMV8CiM8i2LqVUHFC1+8eORgWyY7yhQY+2U2fA55mBzReaw==",
"cpu": [
"x64"
],
"license": "MIT",
"optional": true,
"os": [
"freebsd"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-arm": {
"version": "0.25.5",
"resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.25.5.tgz",
"integrity": "sha512-cPzojwW2okgh7ZlRpcBEtsX7WBuqbLrNXqLU89GxWbNt6uIg78ET82qifUy3W6OVww6ZWobWub5oqZOVtwolfw==",
"cpu": [
"arm"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-arm64": {
"version": "0.25.5",
"resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.25.5.tgz",
"integrity": "sha512-Z9kfb1v6ZlGbWj8EJk9T6czVEjjq2ntSYLY2cw6pAZl4oKtfgQuS4HOq41M/BcoLPzrUbNd+R4BXFyH//nHxVg==",
"cpu": [
"arm64"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-ia32": {
"version": "0.25.5",
"resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.25.5.tgz",
"integrity": "sha512-sQ7l00M8bSv36GLV95BVAdhJ2QsIbCuCjh/uYrWiMQSUuV+LpXwIqhgJDcvMTj+VsQmqAHL2yYaasENvJ7CDKA==",
"cpu": [
"ia32"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-loong64": {
"version": "0.25.5",
"resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.25.5.tgz",
"integrity": "sha512-0ur7ae16hDUC4OL5iEnDb0tZHDxYmuQyhKhsPBV8f99f6Z9KQM02g33f93rNH5A30agMS46u2HP6qTdEt6Q1kg==",
"cpu": [
"loong64"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-mips64el": {
"version": "0.25.5",
"resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.25.5.tgz",
"integrity": "sha512-kB/66P1OsHO5zLz0i6X0RxlQ+3cu0mkxS3TKFvkb5lin6uwZ/ttOkP3Z8lfR9mJOBk14ZwZ9182SIIWFGNmqmg==",
"cpu": [
"mips64el"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-ppc64": {
"version": "0.25.5",
"resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.25.5.tgz",
"integrity": "sha512-UZCmJ7r9X2fe2D6jBmkLBMQetXPXIsZjQJCjgwpVDz+YMcS6oFR27alkgGv3Oqkv07bxdvw7fyB71/olceJhkQ==",
"cpu": [
"ppc64"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-riscv64": {
"version": "0.25.5",
"resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.25.5.tgz",
"integrity": "sha512-kTxwu4mLyeOlsVIFPfQo+fQJAV9mh24xL+y+Bm6ej067sYANjyEw1dNHmvoqxJUCMnkBdKpvOn0Ahql6+4VyeA==",
"cpu": [
"riscv64"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-s390x": {
"version": "0.25.5",
"resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.25.5.tgz",
"integrity": "sha512-K2dSKTKfmdh78uJ3NcWFiqyRrimfdinS5ErLSn3vluHNeHVnBAFWC8a4X5N+7FgVE1EjXS1QDZbpqZBjfrqMTQ==",
"cpu": [
"s390x"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-x64": {
"version": "0.25.5",
"resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.25.5.tgz",
"integrity": "sha512-uhj8N2obKTE6pSZ+aMUbqq+1nXxNjZIIjCjGLfsWvVpy7gKCOL6rsY1MhRh9zLtUtAI7vpgLMK6DxjO8Qm9lJw==",
"cpu": [
"x64"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/netbsd-arm64": {
"version": "0.25.5",
"resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.25.5.tgz",
"integrity": "sha512-pwHtMP9viAy1oHPvgxtOv+OkduK5ugofNTVDilIzBLpoWAM16r7b/mxBvfpuQDpRQFMfuVr5aLcn4yveGvBZvw==",
"cpu": [
"arm64"
],
"license": "MIT",
"optional": true,
"os": [
"netbsd"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/netbsd-x64": {
"version": "0.25.5",
"resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.25.5.tgz",
"integrity": "sha512-WOb5fKrvVTRMfWFNCroYWWklbnXH0Q5rZppjq0vQIdlsQKuw6mdSihwSo4RV/YdQ5UCKKvBy7/0ZZYLBZKIbwQ==",
"cpu": [
"x64"
],
"license": "MIT",
"optional": true,
"os": [
"netbsd"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/openbsd-arm64": {
"version": "0.25.5",
"resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.25.5.tgz",
"integrity": "sha512-7A208+uQKgTxHd0G0uqZO8UjK2R0DDb4fDmERtARjSHWxqMTye4Erz4zZafx7Di9Cv+lNHYuncAkiGFySoD+Mw==",
"cpu": [
"arm64"
],
"license": "MIT",
"optional": true,
"os": [
"openbsd"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/openbsd-x64": {
"version": "0.25.5",
"resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.25.5.tgz",
"integrity": "sha512-G4hE405ErTWraiZ8UiSoesH8DaCsMm0Cay4fsFWOOUcz8b8rC6uCvnagr+gnioEjWn0wC+o1/TAHt+It+MpIMg==",
"cpu": [
"x64"
],
"license": "MIT",
"optional": true,
"os": [
"openbsd"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/sunos-x64": {
"version": "0.25.5",
"resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.25.5.tgz",
"integrity": "sha512-l+azKShMy7FxzY0Rj4RCt5VD/q8mG/e+mDivgspo+yL8zW7qEwctQ6YqKX34DTEleFAvCIUviCFX1SDZRSyMQA==",
"cpu": [
"x64"
],
"license": "MIT",
"optional": true,
"os": [
"sunos"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/win32-arm64": {
"version": "0.25.5",
"resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.25.5.tgz",
"integrity": "sha512-O2S7SNZzdcFG7eFKgvwUEZ2VG9D/sn/eIiz8XRZ1Q/DO5a3s76Xv0mdBzVM5j5R639lXQmPmSo0iRpHqUUrsxw==",
"cpu": [
"arm64"
],
"license": "MIT",
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/win32-ia32": {
"version": "0.25.5",
"resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.25.5.tgz",
"integrity": "sha512-onOJ02pqs9h1iMJ1PQphR+VZv8qBMQ77Klcsqv9CNW2w6yLqoURLcgERAIurY6QE63bbLuqgP9ATqajFLK5AMQ==",
"cpu": [
"ia32"
],
"license": "MIT",
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/win32-x64": {
"version": "0.25.5",
"resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.25.5.tgz",
"integrity": "sha512-TXv6YnJ8ZMVdX+SXWVBo/0p8LTcrUYngpWjvm91TMjjBQii7Oz11Lw5lbDV5Y0TzuhSJHwiH4hEtC1I42mMS0g==",
"cpu": [
"x64"
],
"license": "MIT",
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@eslint-community/eslint-utils": {
"version": "4.7.0",
"resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.7.0.tgz",
@ -5601,6 +6000,46 @@
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/esbuild": {
"version": "0.25.5",
"resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.25.5.tgz",
"integrity": "sha512-P8OtKZRv/5J5hhz0cUAdu/cLuPIKXpQl1R9pZtvmHWQvrAUVd0UNIPT4IB4W3rNOqVO0rlqHmCIbSwxh/c9yUQ==",
"hasInstallScript": true,
"license": "MIT",
"bin": {
"esbuild": "bin/esbuild"
},
"engines": {
"node": ">=18"
},
"optionalDependencies": {
"@esbuild/aix-ppc64": "0.25.5",
"@esbuild/android-arm": "0.25.5",
"@esbuild/android-arm64": "0.25.5",
"@esbuild/android-x64": "0.25.5",
"@esbuild/darwin-arm64": "0.25.5",
"@esbuild/darwin-x64": "0.25.5",
"@esbuild/freebsd-arm64": "0.25.5",
"@esbuild/freebsd-x64": "0.25.5",
"@esbuild/linux-arm": "0.25.5",
"@esbuild/linux-arm64": "0.25.5",
"@esbuild/linux-ia32": "0.25.5",
"@esbuild/linux-loong64": "0.25.5",
"@esbuild/linux-mips64el": "0.25.5",
"@esbuild/linux-ppc64": "0.25.5",
"@esbuild/linux-riscv64": "0.25.5",
"@esbuild/linux-s390x": "0.25.5",
"@esbuild/linux-x64": "0.25.5",
"@esbuild/netbsd-arm64": "0.25.5",
"@esbuild/netbsd-x64": "0.25.5",
"@esbuild/openbsd-arm64": "0.25.5",
"@esbuild/openbsd-x64": "0.25.5",
"@esbuild/sunos-x64": "0.25.5",
"@esbuild/win32-arm64": "0.25.5",
"@esbuild/win32-ia32": "0.25.5",
"@esbuild/win32-x64": "0.25.5"
}
},
"node_modules/escalade": {
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
@ -6555,7 +6994,6 @@
"version": "2.3.3",
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
"integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
"dev": true,
"hasInstallScript": true,
"license": "MIT",
"optional": true,
@ -6717,7 +7155,6 @@
"version": "4.10.0",
"resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.10.0.tgz",
"integrity": "sha512-kGzZ3LWWQcGIAmg6iWvXn0ei6WDtV26wzHRMwDSzmAbcXrTEXxHy6IehI6/4eT6VRKyMP1eF1VqwrVUmE/LR7A==",
"dev": true,
"license": "MIT",
"dependencies": {
"resolve-pkg-maps": "^1.0.0"
@ -9092,8 +9529,9 @@
"license": "MIT"
},
"node_modules/llama-stack-client": {
"version": "0.0.1-alpha.0",
"resolved": "git+ssh://git@github.com/stainless-sdks/llama-stack-node.git#5d34d229fb53b6dad02da0f19f4b310b529c6b15",
"version": "0.2.9",
"resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.9.tgz",
"integrity": "sha512-7+2WuPYt2j/k/Twh5IGn8hd8q4W6lVEK+Ql4PpICGLj4N8YmooCfydI1UvdT2UlX7PNYKNeyeFqTifWT2MjWKg==",
"license": "Apache-2.0",
"dependencies": {
"@types/node": "^18.11.18",
@ -9102,7 +9540,8 @@
"agentkeepalive": "^4.2.1",
"form-data-encoder": "1.7.2",
"formdata-node": "^4.3.2",
"node-fetch": "^2.6.7"
"node-fetch": "^2.6.7",
"tsx": "^4.19.2"
}
},
"node_modules/llama-stack-client/node_modules/@types/node": {
@ -9805,51 +10244,6 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/openai": {
"version": "4.103.0",
"resolved": "https://registry.npmjs.org/openai/-/openai-4.103.0.tgz",
"integrity": "sha512-eWcz9kdurkGOFDtd5ySS5y251H2uBgq9+1a2lTBnjMMzlexJ40Am5t6Mu76SSE87VvitPa0dkIAp75F+dZVC0g==",
"license": "Apache-2.0",
"dependencies": {
"@types/node": "^18.11.18",
"@types/node-fetch": "^2.6.4",
"abort-controller": "^3.0.0",
"agentkeepalive": "^4.2.1",
"form-data-encoder": "1.7.2",
"formdata-node": "^4.3.2",
"node-fetch": "^2.6.7"
},
"bin": {
"openai": "bin/cli"
},
"peerDependencies": {
"ws": "^8.18.0",
"zod": "^3.23.8"
},
"peerDependenciesMeta": {
"ws": {
"optional": true
},
"zod": {
"optional": true
}
}
},
"node_modules/openai/node_modules/@types/node": {
"version": "18.19.103",
"resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.103.tgz",
"integrity": "sha512-hHTHp+sEz6SxFsp+SA+Tqrua3AbmlAw+Y//aEwdHrdZkYVRWdvWD3y5uPZ0flYOkgskaFWqZ/YGFm3FaFQ0pRw==",
"license": "MIT",
"dependencies": {
"undici-types": "~5.26.4"
}
},
"node_modules/openai/node_modules/undici-types": {
"version": "5.26.5",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
"license": "MIT"
},
"node_modules/optionator": {
"version": "0.9.4",
"resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz",
@ -10631,7 +11025,6 @@
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz",
"integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==",
"dev": true,
"license": "MIT",
"funding": {
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
@ -11682,6 +12075,25 @@
"integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
"license": "0BSD"
},
"node_modules/tsx": {
"version": "4.19.4",
"resolved": "https://registry.npmjs.org/tsx/-/tsx-4.19.4.tgz",
"integrity": "sha512-gK5GVzDkJK1SI1zwHf32Mqxf2tSJkNx+eYcNly5+nHvWqXUJYUkWBQtKauoESz3ymezAI++ZwT855x5p5eop+Q==",
"license": "MIT",
"dependencies": {
"esbuild": "~0.25.0",
"get-tsconfig": "^4.7.5"
},
"bin": {
"tsx": "dist/cli.mjs"
},
"engines": {
"node": ">=18.0.0"
},
"optionalDependencies": {
"fsevents": "~2.3.3"
}
},
"node_modules/tw-animate-css": {
"version": "1.2.9",
"resolved": "https://registry.npmjs.org/tw-animate-css/-/tw-animate-css-1.2.9.tgz",
@ -12269,7 +12681,7 @@
"version": "8.18.2",
"resolved": "https://registry.npmjs.org/ws/-/ws-8.18.2.tgz",
"integrity": "sha512-DMricUmwGZUVr++AEAe2uiVM7UoO9MAVZMDu05UQOaUII0lp+zOzLLU4Xqh/JvTqklB1T4uELaaPBKyjE1r4fQ==",
"devOptional": true,
"dev": true,
"license": "MIT",
"engines": {
"node": ">=10.0.0"
@ -12380,7 +12792,7 @@
"version": "3.24.4",
"resolved": "https://registry.npmjs.org/zod/-/zod-3.24.4.tgz",
"integrity": "sha512-OdqJE9UDRPwWsrHjLN2F8bPxvwJBK22EHLWtanu0LSYr5YqzsaaW3RMgmjwr8Rypg5k+meEJdSPXJZXE/yqOMg==",
"devOptional": true,
"dev": true,
"license": "MIT",
"funding": {
"url": "https://github.com/sponsors/colinhacks"