mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-20 04:38:42 +00:00
Merge branch 'main' into vllm_health_check
This commit is contained in:
commit
c18b585d32
143 changed files with 9210 additions and 5347 deletions
|
|
@ -37,6 +37,7 @@ from .openai_responses import (
|
|||
OpenAIResponseInputTool,
|
||||
OpenAIResponseObject,
|
||||
OpenAIResponseObjectStream,
|
||||
OpenAIResponseText,
|
||||
)
|
||||
|
||||
# TODO: use enum.StrEnum when we drop support for python 3.10
|
||||
|
|
@ -603,7 +604,9 @@ class Agents(Protocol):
|
|||
store: bool | None = True,
|
||||
stream: bool | None = False,
|
||||
temperature: float | None = None,
|
||||
text: OpenAIResponseText | None = None,
|
||||
tools: list[OpenAIResponseInputTool] | None = None,
|
||||
max_infer_iters: int | None = 10, # this is an extension to the OpenAI API
|
||||
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
|
||||
"""Create a new OpenAI response.
|
||||
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
from typing import Annotated, Any, Literal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema
|
||||
|
||||
|
|
@ -126,6 +127,32 @@ OpenAIResponseOutput = Annotated[
|
|||
register_schema(OpenAIResponseOutput, name="OpenAIResponseOutput")
|
||||
|
||||
|
||||
# This has to be a TypedDict because we need a "schema" field and our strong
|
||||
# typing code in the schema generator doesn't support Pydantic aliases. That also
|
||||
# means we can't use a discriminator field here, because TypedDicts don't support
|
||||
# default values which the strong typing code requires for discriminators.
|
||||
class OpenAIResponseTextFormat(TypedDict, total=False):
|
||||
"""Configuration for Responses API text format.
|
||||
|
||||
:param type: Must be "text", "json_schema", or "json_object" to identify the format type
|
||||
:param name: The name of the response format. Only used for json_schema.
|
||||
:param schema: The JSON schema the response should conform to. In a Python SDK, this is often a `pydantic` model. Only used for json_schema.
|
||||
:param description: (Optional) A description of the response format. Only used for json_schema.
|
||||
:param strict: (Optional) Whether to strictly enforce the JSON schema. If true, the response must match the schema exactly. Only used for json_schema.
|
||||
"""
|
||||
|
||||
type: Literal["text"] | Literal["json_schema"] | Literal["json_object"]
|
||||
name: str | None
|
||||
schema: dict[str, Any] | None
|
||||
description: str | None
|
||||
strict: bool | None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseText(BaseModel):
|
||||
format: OpenAIResponseTextFormat | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObject(BaseModel):
|
||||
created_at: int
|
||||
|
|
@ -138,6 +165,9 @@ class OpenAIResponseObject(BaseModel):
|
|||
previous_response_id: str | None = None
|
||||
status: str
|
||||
temperature: float | None = None
|
||||
# Default to text format to avoid breaking the loading of old responses
|
||||
# before the field was added. New responses will have this set always.
|
||||
text: OpenAIResponseText = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text"))
|
||||
top_p: float | None = None
|
||||
truncation: str | None = None
|
||||
user: str | None = None
|
||||
|
|
@ -149,6 +179,30 @@ class OpenAIResponseObjectStreamResponseCreated(BaseModel):
|
|||
type: Literal["response.created"] = "response.created"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseCompleted(BaseModel):
|
||||
response: OpenAIResponseObject
|
||||
type: Literal["response.completed"] = "response.completed"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseOutputItemAdded(BaseModel):
|
||||
response_id: str
|
||||
item: OpenAIResponseOutput
|
||||
output_index: int
|
||||
sequence_number: int
|
||||
type: Literal["response.output_item.added"] = "response.output_item.added"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseOutputItemDone(BaseModel):
|
||||
response_id: str
|
||||
item: OpenAIResponseOutput
|
||||
output_index: int
|
||||
sequence_number: int
|
||||
type: Literal["response.output_item.done"] = "response.output_item.done"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseOutputTextDelta(BaseModel):
|
||||
content_index: int
|
||||
|
|
@ -160,14 +214,132 @@ class OpenAIResponseObjectStreamResponseOutputTextDelta(BaseModel):
|
|||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseCompleted(BaseModel):
|
||||
response: OpenAIResponseObject
|
||||
type: Literal["response.completed"] = "response.completed"
|
||||
class OpenAIResponseObjectStreamResponseOutputTextDone(BaseModel):
|
||||
content_index: int
|
||||
text: str # final text of the output item
|
||||
item_id: str
|
||||
output_index: int
|
||||
sequence_number: int
|
||||
type: Literal["response.output_text.done"] = "response.output_text.done"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta(BaseModel):
|
||||
delta: str
|
||||
item_id: str
|
||||
output_index: int
|
||||
sequence_number: int
|
||||
type: Literal["response.function_call_arguments.delta"] = "response.function_call_arguments.delta"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone(BaseModel):
|
||||
arguments: str # final arguments of the function call
|
||||
item_id: str
|
||||
output_index: int
|
||||
sequence_number: int
|
||||
type: Literal["response.function_call_arguments.done"] = "response.function_call_arguments.done"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseWebSearchCallInProgress(BaseModel):
|
||||
item_id: str
|
||||
output_index: int
|
||||
sequence_number: int
|
||||
type: Literal["response.web_search_call.in_progress"] = "response.web_search_call.in_progress"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseWebSearchCallSearching(BaseModel):
|
||||
item_id: str
|
||||
output_index: int
|
||||
sequence_number: int
|
||||
type: Literal["response.web_search_call.searching"] = "response.web_search_call.searching"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseWebSearchCallCompleted(BaseModel):
|
||||
item_id: str
|
||||
output_index: int
|
||||
sequence_number: int
|
||||
type: Literal["response.web_search_call.completed"] = "response.web_search_call.completed"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseMcpListToolsInProgress(BaseModel):
|
||||
sequence_number: int
|
||||
type: Literal["response.mcp_list_tools.in_progress"] = "response.mcp_list_tools.in_progress"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseMcpListToolsFailed(BaseModel):
|
||||
sequence_number: int
|
||||
type: Literal["response.mcp_list_tools.failed"] = "response.mcp_list_tools.failed"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseMcpListToolsCompleted(BaseModel):
|
||||
sequence_number: int
|
||||
type: Literal["response.mcp_list_tools.completed"] = "response.mcp_list_tools.completed"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta(BaseModel):
|
||||
delta: str
|
||||
item_id: str
|
||||
output_index: int
|
||||
sequence_number: int
|
||||
type: Literal["response.mcp_call.arguments.delta"] = "response.mcp_call.arguments.delta"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseMcpCallArgumentsDone(BaseModel):
|
||||
arguments: str # final arguments of the MCP call
|
||||
item_id: str
|
||||
output_index: int
|
||||
sequence_number: int
|
||||
type: Literal["response.mcp_call.arguments.done"] = "response.mcp_call.arguments.done"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseMcpCallInProgress(BaseModel):
|
||||
item_id: str
|
||||
output_index: int
|
||||
sequence_number: int
|
||||
type: Literal["response.mcp_call.in_progress"] = "response.mcp_call.in_progress"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseMcpCallFailed(BaseModel):
|
||||
sequence_number: int
|
||||
type: Literal["response.mcp_call.failed"] = "response.mcp_call.failed"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseObjectStreamResponseMcpCallCompleted(BaseModel):
|
||||
sequence_number: int
|
||||
type: Literal["response.mcp_call.completed"] = "response.mcp_call.completed"
|
||||
|
||||
|
||||
OpenAIResponseObjectStream = Annotated[
|
||||
OpenAIResponseObjectStreamResponseCreated
|
||||
| OpenAIResponseObjectStreamResponseOutputItemAdded
|
||||
| OpenAIResponseObjectStreamResponseOutputItemDone
|
||||
| OpenAIResponseObjectStreamResponseOutputTextDelta
|
||||
| OpenAIResponseObjectStreamResponseOutputTextDone
|
||||
| OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta
|
||||
| OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone
|
||||
| OpenAIResponseObjectStreamResponseWebSearchCallInProgress
|
||||
| OpenAIResponseObjectStreamResponseWebSearchCallSearching
|
||||
| OpenAIResponseObjectStreamResponseWebSearchCallCompleted
|
||||
| OpenAIResponseObjectStreamResponseMcpListToolsInProgress
|
||||
| OpenAIResponseObjectStreamResponseMcpListToolsFailed
|
||||
| OpenAIResponseObjectStreamResponseMcpListToolsCompleted
|
||||
| OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta
|
||||
| OpenAIResponseObjectStreamResponseMcpCallArgumentsDone
|
||||
| OpenAIResponseObjectStreamResponseMcpCallInProgress
|
||||
| OpenAIResponseObjectStreamResponseMcpCallFailed
|
||||
| OpenAIResponseObjectStreamResponseMcpCallCompleted
|
||||
| OpenAIResponseObjectStreamResponseCompleted,
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -4,179 +4,158 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Protocol, runtime_checkable
|
||||
from enum import Enum
|
||||
from typing import Annotated, Literal, Protocol, runtime_checkable
|
||||
|
||||
from fastapi import File, Form, Response, UploadFile
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.common.responses import Order
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class FileUploadResponse(BaseModel):
|
||||
# OpenAI Files API Models
|
||||
class OpenAIFilePurpose(str, Enum):
|
||||
"""
|
||||
Valid purpose values for OpenAI Files API.
|
||||
"""
|
||||
Response after initiating a file upload session.
|
||||
|
||||
:param id: ID of the upload session
|
||||
:param url: Upload URL for the file or file parts
|
||||
:param offset: Upload content offset
|
||||
:param size: Upload content size
|
||||
ASSISTANTS = "assistants"
|
||||
# TODO: Add other purposes as needed
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIFileObject(BaseModel):
|
||||
"""
|
||||
OpenAI File object as defined in the OpenAI Files API.
|
||||
|
||||
:param object: The object type, which is always "file"
|
||||
:param id: The file identifier, which can be referenced in the API endpoints
|
||||
:param bytes: The size of the file, in bytes
|
||||
:param created_at: The Unix timestamp (in seconds) for when the file was created
|
||||
:param expires_at: The Unix timestamp (in seconds) for when the file expires
|
||||
:param filename: The name of the file
|
||||
:param purpose: The intended purpose of the file
|
||||
"""
|
||||
|
||||
object: Literal["file"] = "file"
|
||||
id: str
|
||||
bytes: int
|
||||
created_at: int
|
||||
expires_at: int
|
||||
filename: str
|
||||
purpose: OpenAIFilePurpose
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ListOpenAIFileResponse(BaseModel):
|
||||
"""
|
||||
Response for listing files in OpenAI Files API.
|
||||
|
||||
:param data: List of file objects
|
||||
:param object: The object type, which is always "list"
|
||||
"""
|
||||
|
||||
data: list[OpenAIFileObject]
|
||||
has_more: bool
|
||||
first_id: str
|
||||
last_id: str
|
||||
object: Literal["list"] = "list"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIFileDeleteResponse(BaseModel):
|
||||
"""
|
||||
Response for deleting a file in OpenAI Files API.
|
||||
|
||||
:param id: The file identifier that was deleted
|
||||
:param object: The object type, which is always "file"
|
||||
:param deleted: Whether the file was successfully deleted
|
||||
"""
|
||||
|
||||
id: str
|
||||
url: str
|
||||
offset: int
|
||||
size: int
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class BucketResponse(BaseModel):
|
||||
name: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ListBucketResponse(BaseModel):
|
||||
"""
|
||||
Response representing a list of file entries.
|
||||
|
||||
:param data: List of FileResponse entries
|
||||
"""
|
||||
|
||||
data: list[BucketResponse]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class FileResponse(BaseModel):
|
||||
"""
|
||||
Response representing a file entry.
|
||||
|
||||
:param bucket: Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)
|
||||
:param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
|
||||
:param mime_type: MIME type of the file
|
||||
:param url: Upload URL for the file contents
|
||||
:param bytes: Size of the file in bytes
|
||||
:param created_at: Timestamp of when the file was created
|
||||
"""
|
||||
|
||||
bucket: str
|
||||
key: str
|
||||
mime_type: str
|
||||
url: str
|
||||
bytes: int
|
||||
created_at: int
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ListFileResponse(BaseModel):
|
||||
"""
|
||||
Response representing a list of file entries.
|
||||
|
||||
:param data: List of FileResponse entries
|
||||
"""
|
||||
|
||||
data: list[FileResponse]
|
||||
object: Literal["file"] = "file"
|
||||
deleted: bool
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class Files(Protocol):
|
||||
@webmethod(route="/files", method="POST")
|
||||
async def create_upload_session(
|
||||
# OpenAI Files API Endpoints
|
||||
@webmethod(route="/openai/v1/files", method="POST")
|
||||
async def openai_upload_file(
|
||||
self,
|
||||
bucket: str,
|
||||
key: str,
|
||||
mime_type: str,
|
||||
size: int,
|
||||
) -> FileUploadResponse:
|
||||
file: Annotated[UploadFile, File()],
|
||||
purpose: Annotated[OpenAIFilePurpose, Form()],
|
||||
) -> OpenAIFileObject:
|
||||
"""
|
||||
Create a new upload session for a file identified by a bucket and key.
|
||||
Upload a file that can be used across various endpoints.
|
||||
|
||||
:param bucket: Bucket under which the file is stored (valid chars: a-zA-Z0-9_-).
|
||||
:param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.).
|
||||
:param mime_type: MIME type of the file.
|
||||
:param size: File size in bytes.
|
||||
:returns: A FileUploadResponse.
|
||||
The file upload should be a multipart form request with:
|
||||
- file: The File object (not file name) to be uploaded.
|
||||
- purpose: The intended purpose of the uploaded file.
|
||||
|
||||
:param file: The uploaded file object containing content and metadata (filename, content_type, etc.).
|
||||
:param purpose: The intended purpose of the uploaded file (e.g., "assistants", "fine-tune").
|
||||
:returns: An OpenAIFileObject representing the uploaded file.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/files/session:{upload_id}", method="POST", raw_bytes_request_body=True)
|
||||
async def upload_content_to_session(
|
||||
@webmethod(route="/openai/v1/files", method="GET")
|
||||
async def openai_list_files(
|
||||
self,
|
||||
upload_id: str,
|
||||
) -> FileResponse | None:
|
||||
after: str | None = None,
|
||||
limit: int | None = 10000,
|
||||
order: Order | None = Order.desc,
|
||||
purpose: OpenAIFilePurpose | None = None,
|
||||
) -> ListOpenAIFileResponse:
|
||||
"""
|
||||
Upload file content to an existing upload session.
|
||||
On the server, request body will have the raw bytes that are uploaded.
|
||||
Returns a list of files that belong to the user's organization.
|
||||
|
||||
:param upload_id: ID of the upload session.
|
||||
:returns: A FileResponse or None if the upload is not complete.
|
||||
:param after: A cursor for use in pagination. `after` is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.
|
||||
:param limit: A limit on the number of objects to be returned. Limit can range between 1 and 10,000, and the default is 10,000.
|
||||
:param order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for descending order.
|
||||
:param purpose: Only return files with the given purpose.
|
||||
:returns: An ListOpenAIFileResponse containing the list of files.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/files/session:{upload_id}", method="GET")
|
||||
async def get_upload_session_info(
|
||||
@webmethod(route="/openai/v1/files/{file_id}", method="GET")
|
||||
async def openai_retrieve_file(
|
||||
self,
|
||||
upload_id: str,
|
||||
) -> FileUploadResponse:
|
||||
file_id: str,
|
||||
) -> OpenAIFileObject:
|
||||
"""
|
||||
Returns information about an existsing upload session.
|
||||
Returns information about a specific file.
|
||||
|
||||
:param upload_id: ID of the upload session.
|
||||
:returns: A FileUploadResponse.
|
||||
:param file_id: The ID of the file to use for this request.
|
||||
:returns: An OpenAIFileObject containing file information.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/files", method="GET")
|
||||
async def list_all_buckets(
|
||||
@webmethod(route="/openai/v1/files/{file_id}", method="DELETE")
|
||||
async def openai_delete_file(
|
||||
self,
|
||||
bucket: str,
|
||||
) -> ListBucketResponse:
|
||||
file_id: str,
|
||||
) -> OpenAIFileDeleteResponse:
|
||||
"""
|
||||
List all buckets.
|
||||
Delete a file.
|
||||
|
||||
:param bucket: Bucket name (valid chars: a-zA-Z0-9_-).
|
||||
:returns: A ListBucketResponse.
|
||||
:param file_id: The ID of the file to use for this request.
|
||||
:returns: An OpenAIFileDeleteResponse indicating successful deletion.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/files/{bucket}", method="GET")
|
||||
async def list_files_in_bucket(
|
||||
@webmethod(route="/openai/v1/files/{file_id}/content", method="GET")
|
||||
async def openai_retrieve_file_content(
|
||||
self,
|
||||
bucket: str,
|
||||
) -> ListFileResponse:
|
||||
file_id: str,
|
||||
) -> Response:
|
||||
"""
|
||||
List all files in a bucket.
|
||||
Returns the contents of the specified file.
|
||||
|
||||
:param bucket: Bucket name (valid chars: a-zA-Z0-9_-).
|
||||
:returns: A ListFileResponse.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/files/{bucket}/{key:path}", method="GET")
|
||||
async def get_file(
|
||||
self,
|
||||
bucket: str,
|
||||
key: str,
|
||||
) -> FileResponse:
|
||||
"""
|
||||
Get a file info identified by a bucket and key.
|
||||
|
||||
:param bucket: Bucket name (valid chars: a-zA-Z0-9_-).
|
||||
:param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.).
|
||||
:returns: A FileResponse.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/files/{bucket}/{key:path}", method="DELETE")
|
||||
async def delete_file(
|
||||
self,
|
||||
bucket: str,
|
||||
key: str,
|
||||
) -> None:
|
||||
"""
|
||||
Delete a file identified by a bucket and key.
|
||||
|
||||
:param bucket: Bucket name (valid chars: a-zA-Z0-9_-).
|
||||
:param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.).
|
||||
:param file_id: The ID of the file to use for this request.
|
||||
:returns: The raw file content as a binary response.
|
||||
"""
|
||||
...
|
||||
|
|
|
|||
|
|
@ -35,7 +35,8 @@ class StackRun(Subcommand):
|
|||
"config",
|
||||
type=str,
|
||||
nargs="?", # Make it optional
|
||||
help="Path to config file to use for the run. Required for venv and conda environments.",
|
||||
metavar="config | template",
|
||||
help="Path to config file to use for the run or name of known template (`llama stack list` for a list).",
|
||||
)
|
||||
self.parser.add_argument(
|
||||
"--port",
|
||||
|
|
@ -59,7 +60,7 @@ class StackRun(Subcommand):
|
|||
"--image-type",
|
||||
type=str,
|
||||
help="Image Type used during the build. This can be either conda or container or venv.",
|
||||
choices=[e.value for e in ImageType],
|
||||
choices=[e.value for e in ImageType if e.value != ImageType.CONTAINER.value],
|
||||
)
|
||||
self.parser.add_argument(
|
||||
"--enable-ui",
|
||||
|
|
@ -154,7 +155,10 @@ class StackRun(Subcommand):
|
|||
# func=<bound method StackRun._run_stack_run_cmd of <llama_stack.cli.stack.run.StackRun object at 0x10484b010>>
|
||||
if callable(getattr(args, arg)):
|
||||
continue
|
||||
setattr(server_args, arg, getattr(args, arg))
|
||||
if arg == "config" and template_name:
|
||||
server_args.config = str(config_file)
|
||||
else:
|
||||
setattr(server_args, arg, getattr(args, arg))
|
||||
|
||||
# Run the server
|
||||
server_main(server_args)
|
||||
|
|
|
|||
|
|
@ -1,86 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.distribution.datatypes import AccessAttributes
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
logger = get_logger(__name__, category="core")
|
||||
|
||||
|
||||
def check_access(
|
||||
obj_identifier: str,
|
||||
obj_attributes: AccessAttributes | None,
|
||||
user_attributes: dict[str, Any] | None = None,
|
||||
) -> bool:
|
||||
"""Check if the current user has access to the given object, based on access attributes.
|
||||
|
||||
Access control algorithm:
|
||||
1. If the resource has no access_attributes, access is GRANTED to all authenticated users
|
||||
2. If the user has no attributes, access is DENIED to any object with access_attributes defined
|
||||
3. For each attribute category in the resource's access_attributes:
|
||||
a. If the user lacks that category, access is DENIED
|
||||
b. If the user has the category but none of the required values, access is DENIED
|
||||
c. If the user has at least one matching value in each required category, access is GRANTED
|
||||
|
||||
Example:
|
||||
# Resource requires:
|
||||
access_attributes = AccessAttributes(
|
||||
roles=["admin", "data-scientist"],
|
||||
teams=["ml-team"]
|
||||
)
|
||||
|
||||
# User has:
|
||||
user_attributes = {
|
||||
"roles": ["data-scientist", "engineer"],
|
||||
"teams": ["ml-team", "infra-team"],
|
||||
"projects": ["llama-3"]
|
||||
}
|
||||
|
||||
# Result: Access GRANTED
|
||||
# - User has the "data-scientist" role (matches one of the required roles)
|
||||
# - AND user is part of the "ml-team" (matches the required team)
|
||||
# - The extra "projects" attribute is ignored
|
||||
|
||||
Args:
|
||||
obj_identifier: The identifier of the resource object to check access for
|
||||
obj_attributes: The access attributes of the resource object
|
||||
user_attributes: The attributes of the current user
|
||||
|
||||
Returns:
|
||||
bool: True if access is granted, False if denied
|
||||
"""
|
||||
# If object has no access attributes, allow access by default
|
||||
if not obj_attributes:
|
||||
return True
|
||||
|
||||
# If no user attributes, deny access to objects with access control
|
||||
if not user_attributes:
|
||||
return False
|
||||
|
||||
dict_attribs = obj_attributes.model_dump(exclude_none=True)
|
||||
if not dict_attribs:
|
||||
return True
|
||||
|
||||
# Check each attribute category (requires ALL categories to match)
|
||||
# TODO: formalize this into a proper ABAC policy
|
||||
for attr_key, required_values in dict_attribs.items():
|
||||
user_values = user_attributes.get(attr_key, [])
|
||||
|
||||
if not user_values:
|
||||
logger.debug(f"Access denied to {obj_identifier}: missing required attribute category '{attr_key}'")
|
||||
return False
|
||||
|
||||
if not any(val in user_values for val in required_values):
|
||||
logger.debug(
|
||||
f"Access denied to {obj_identifier}: "
|
||||
f"no match for attribute '{attr_key}', required one of {required_values}"
|
||||
)
|
||||
return False
|
||||
|
||||
logger.debug(f"Access granted to {obj_identifier}")
|
||||
return True
|
||||
|
|
@ -3,5 +3,3 @@
|
|||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .verification import get_distribution_template # noqa: F401
|
||||
109
llama_stack/distribution/access_control/access_control.py
Normal file
109
llama_stack/distribution/access_control/access_control.py
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.distribution.datatypes import User
|
||||
|
||||
from .conditions import (
|
||||
Condition,
|
||||
ProtectedResource,
|
||||
parse_conditions,
|
||||
)
|
||||
from .datatypes import (
|
||||
AccessRule,
|
||||
Action,
|
||||
Scope,
|
||||
)
|
||||
|
||||
|
||||
def matches_resource(resource_scope: str, actual_resource: str) -> bool:
|
||||
if resource_scope == actual_resource:
|
||||
return True
|
||||
return resource_scope.endswith("::*") and actual_resource.startswith(resource_scope[:-1])
|
||||
|
||||
|
||||
def matches_scope(
|
||||
scope: Scope,
|
||||
action: Action,
|
||||
resource: str,
|
||||
user: str | None,
|
||||
) -> bool:
|
||||
if scope.resource and not matches_resource(scope.resource, resource):
|
||||
return False
|
||||
if scope.principal and scope.principal != user:
|
||||
return False
|
||||
return action in scope.actions
|
||||
|
||||
|
||||
def as_list(obj: Any) -> list[Any]:
|
||||
if isinstance(obj, list):
|
||||
return obj
|
||||
return [obj]
|
||||
|
||||
|
||||
def matches_conditions(
|
||||
conditions: list[Condition],
|
||||
resource: ProtectedResource,
|
||||
user: User,
|
||||
) -> bool:
|
||||
for condition in conditions:
|
||||
# must match all conditions
|
||||
if not condition.matches(resource, user):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def default_policy() -> list[AccessRule]:
|
||||
# for backwards compatibility, if no rules are provided, assume
|
||||
# full access subject to previous attribute matching rules
|
||||
return [
|
||||
AccessRule(
|
||||
permit=Scope(actions=list(Action)),
|
||||
when=["user in owners " + name for name in ["roles", "teams", "projects", "namespaces"]],
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def is_action_allowed(
|
||||
policy: list[AccessRule],
|
||||
action: Action,
|
||||
resource: ProtectedResource,
|
||||
user: User | None,
|
||||
) -> bool:
|
||||
# If user is not set, assume authentication is not enabled
|
||||
if not user:
|
||||
return True
|
||||
|
||||
if not len(policy):
|
||||
policy = default_policy()
|
||||
|
||||
qualified_resource_id = resource.type + "::" + resource.identifier
|
||||
for rule in policy:
|
||||
if rule.forbid and matches_scope(rule.forbid, action, qualified_resource_id, user.principal):
|
||||
if rule.when:
|
||||
if matches_conditions(parse_conditions(as_list(rule.when)), resource, user):
|
||||
return False
|
||||
elif rule.unless:
|
||||
if not matches_conditions(parse_conditions(as_list(rule.unless)), resource, user):
|
||||
return False
|
||||
else:
|
||||
return False
|
||||
elif rule.permit and matches_scope(rule.permit, action, qualified_resource_id, user.principal):
|
||||
if rule.when:
|
||||
if matches_conditions(parse_conditions(as_list(rule.when)), resource, user):
|
||||
return True
|
||||
elif rule.unless:
|
||||
if not matches_conditions(parse_conditions(as_list(rule.unless)), resource, user):
|
||||
return True
|
||||
else:
|
||||
return True
|
||||
# assume access is denied unless we find a rule that permits access
|
||||
return False
|
||||
|
||||
|
||||
class AccessDeniedError(RuntimeError):
|
||||
pass
|
||||
129
llama_stack/distribution/access_control/conditions.py
Normal file
129
llama_stack/distribution/access_control/conditions.py
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Protocol
|
||||
|
||||
|
||||
class User(Protocol):
|
||||
principal: str
|
||||
attributes: dict[str, list[str]] | None
|
||||
|
||||
|
||||
class ProtectedResource(Protocol):
|
||||
type: str
|
||||
identifier: str
|
||||
owner: User
|
||||
|
||||
|
||||
class Condition(Protocol):
|
||||
def matches(self, resource: ProtectedResource, user: User) -> bool: ...
|
||||
|
||||
|
||||
class UserInOwnersList:
|
||||
def __init__(self, name: str):
|
||||
self.name = name
|
||||
|
||||
def owners_values(self, resource: ProtectedResource) -> list[str] | None:
|
||||
if (
|
||||
hasattr(resource, "owner")
|
||||
and resource.owner
|
||||
and resource.owner.attributes
|
||||
and self.name in resource.owner.attributes
|
||||
):
|
||||
return resource.owner.attributes[self.name]
|
||||
else:
|
||||
return None
|
||||
|
||||
def matches(self, resource: ProtectedResource, user: User) -> bool:
|
||||
required = self.owners_values(resource)
|
||||
if not required:
|
||||
return True
|
||||
if not user.attributes or self.name not in user.attributes or not user.attributes[self.name]:
|
||||
return False
|
||||
user_values = user.attributes[self.name]
|
||||
for value in required:
|
||||
if value in user_values:
|
||||
return True
|
||||
return False
|
||||
|
||||
def __repr__(self):
|
||||
return f"user in owners {self.name}"
|
||||
|
||||
|
||||
class UserNotInOwnersList(UserInOwnersList):
|
||||
def __init__(self, name: str):
|
||||
super().__init__(name)
|
||||
|
||||
def matches(self, resource: ProtectedResource, user: User) -> bool:
|
||||
return not super().matches(resource, user)
|
||||
|
||||
def __repr__(self):
|
||||
return f"user not in owners {self.name}"
|
||||
|
||||
|
||||
class UserWithValueInList:
|
||||
def __init__(self, name: str, value: str):
|
||||
self.name = name
|
||||
self.value = value
|
||||
|
||||
def matches(self, resource: ProtectedResource, user: User) -> bool:
|
||||
if user.attributes and self.name in user.attributes:
|
||||
return self.value in user.attributes[self.name]
|
||||
print(f"User does not have {self.value} in {self.name}")
|
||||
return False
|
||||
|
||||
def __repr__(self):
|
||||
return f"user with {self.value} in {self.name}"
|
||||
|
||||
|
||||
class UserWithValueNotInList(UserWithValueInList):
|
||||
def __init__(self, name: str, value: str):
|
||||
super().__init__(name, value)
|
||||
|
||||
def matches(self, resource: ProtectedResource, user: User) -> bool:
|
||||
return not super().matches(resource, user)
|
||||
|
||||
def __repr__(self):
|
||||
return f"user with {self.value} not in {self.name}"
|
||||
|
||||
|
||||
class UserIsOwner:
|
||||
def matches(self, resource: ProtectedResource, user: User) -> bool:
|
||||
return resource.owner.principal == user.principal if resource.owner else False
|
||||
|
||||
def __repr__(self):
|
||||
return "user is owner"
|
||||
|
||||
|
||||
class UserIsNotOwner:
|
||||
def matches(self, resource: ProtectedResource, user: User) -> bool:
|
||||
return not resource.owner or resource.owner.principal != user.principal
|
||||
|
||||
def __repr__(self):
|
||||
return "user is not owner"
|
||||
|
||||
|
||||
def parse_condition(condition: str) -> Condition:
|
||||
words = condition.split()
|
||||
match words:
|
||||
case ["user", "is", "owner"]:
|
||||
return UserIsOwner()
|
||||
case ["user", "is", "not", "owner"]:
|
||||
return UserIsNotOwner()
|
||||
case ["user", "with", value, "in", name]:
|
||||
return UserWithValueInList(name, value)
|
||||
case ["user", "with", value, "not", "in", name]:
|
||||
return UserWithValueNotInList(name, value)
|
||||
case ["user", "in", "owners", name]:
|
||||
return UserInOwnersList(name)
|
||||
case ["user", "not", "in", "owners", name]:
|
||||
return UserNotInOwnersList(name)
|
||||
case _:
|
||||
raise ValueError(f"Invalid condition: {condition}")
|
||||
|
||||
|
||||
def parse_conditions(conditions: list[str]) -> list[Condition]:
|
||||
return [parse_condition(c) for c in conditions]
|
||||
107
llama_stack/distribution/access_control/datatypes.py
Normal file
107
llama_stack/distribution/access_control/datatypes.py
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from enum import Enum
|
||||
|
||||
from pydantic import BaseModel, model_validator
|
||||
from typing_extensions import Self
|
||||
|
||||
from .conditions import parse_conditions
|
||||
|
||||
|
||||
class Action(str, Enum):
|
||||
CREATE = "create"
|
||||
READ = "read"
|
||||
UPDATE = "update"
|
||||
DELETE = "delete"
|
||||
|
||||
|
||||
class Scope(BaseModel):
|
||||
principal: str | None = None
|
||||
actions: Action | list[Action]
|
||||
resource: str | None = None
|
||||
|
||||
|
||||
def _mutually_exclusive(obj, a: str, b: str):
|
||||
if getattr(obj, a) and getattr(obj, b):
|
||||
raise ValueError(f"{a} and {b} are mutually exclusive")
|
||||
|
||||
|
||||
def _require_one_of(obj, a: str, b: str):
|
||||
if not getattr(obj, a) and not getattr(obj, b):
|
||||
raise ValueError(f"on of {a} or {b} is required")
|
||||
|
||||
|
||||
class AccessRule(BaseModel):
|
||||
"""Access rule based loosely on cedar policy language
|
||||
|
||||
A rule defines a list of action either to permit or to forbid. It may specify a
|
||||
principal or a resource that must match for the rule to take effect. The resource
|
||||
to match should be specified in the form of a type qualified identifier, e.g.
|
||||
model::my-model or vector_db::some-db, or a wildcard for all resources of a type,
|
||||
e.g. model::*. If the principal or resource are not specified, they will match all
|
||||
requests.
|
||||
|
||||
A rule may also specify a condition, either a 'when' or an 'unless', with additional
|
||||
constraints as to where the rule applies. The constraints supported at present are:
|
||||
|
||||
- 'user with <attr-value> in <attr-name>'
|
||||
- 'user with <attr-value> not in <attr-name>'
|
||||
- 'user is owner'
|
||||
- 'user is not owner'
|
||||
- 'user in owners <attr-name>'
|
||||
- 'user not in owners <attr-name>'
|
||||
|
||||
Rules are tested in order to find a match. If a match is found, the request is
|
||||
permitted or forbidden depending on the type of rule. If no match is found, the
|
||||
request is denied. If no rules are specified, a rule that allows any action as
|
||||
long as the resource attributes match the user attributes is added
|
||||
(i.e. the previous behaviour is the default).
|
||||
|
||||
Some examples in yaml:
|
||||
|
||||
- permit:
|
||||
principal: user-1
|
||||
actions: [create, read, delete]
|
||||
resource: model::*
|
||||
description: user-1 has full access to all models
|
||||
- permit:
|
||||
principal: user-2
|
||||
actions: [read]
|
||||
resource: model::model-1
|
||||
description: user-2 has read access to model-1 only
|
||||
- permit:
|
||||
actions: [read]
|
||||
when: user in owner teams
|
||||
description: any user has read access to any resource created by a member of their team
|
||||
- forbid:
|
||||
actions: [create, read, delete]
|
||||
resource: vector_db::*
|
||||
unless: user with admin in roles
|
||||
description: only user with admin role can use vector_db resources
|
||||
|
||||
"""
|
||||
|
||||
permit: Scope | None = None
|
||||
forbid: Scope | None = None
|
||||
when: str | list[str] | None = None
|
||||
unless: str | list[str] | None = None
|
||||
description: str | None = None
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_rule_format(self) -> Self:
|
||||
_require_one_of(self, "permit", "forbid")
|
||||
_mutually_exclusive(self, "permit", "forbid")
|
||||
_mutually_exclusive(self, "when", "unless")
|
||||
if isinstance(self.when, list):
|
||||
parse_conditions(self.when)
|
||||
elif self.when:
|
||||
parse_conditions([self.when])
|
||||
if isinstance(self.unless, list):
|
||||
parse_conditions(self.unless)
|
||||
elif self.unless:
|
||||
parse_conditions([self.unless])
|
||||
return self
|
||||
|
|
@ -29,6 +29,8 @@ SERVER_DEPENDENCIES = [
|
|||
"fire",
|
||||
"httpx",
|
||||
"uvicorn",
|
||||
"opentelemetry-sdk",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
]
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ from llama_stack.apis.shields import Shield, ShieldInput
|
|||
from llama_stack.apis.tools import Tool, ToolGroup, ToolGroupInput, ToolRuntime
|
||||
from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput
|
||||
from llama_stack.apis.vector_io import VectorIO
|
||||
from llama_stack.distribution.access_control.datatypes import AccessRule
|
||||
from llama_stack.providers.datatypes import Api, ProviderSpec
|
||||
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
|
||||
from llama_stack.providers.utils.sqlstore.sqlstore import SqlStoreConfig
|
||||
|
|
@ -35,126 +36,66 @@ LLAMA_STACK_RUN_CONFIG_VERSION = "2"
|
|||
RoutingKey = str | list[str]
|
||||
|
||||
|
||||
class AccessAttributes(BaseModel):
|
||||
"""Structured representation of user attributes for access control.
|
||||
class User(BaseModel):
|
||||
principal: str
|
||||
# further attributes that may be used for access control decisions
|
||||
attributes: dict[str, list[str]] | None = None
|
||||
|
||||
This model defines a structured approach to representing user attributes
|
||||
with common standard categories for access control.
|
||||
|
||||
Standard attribute categories include:
|
||||
- roles: Role-based attributes (e.g., admin, data-scientist)
|
||||
- teams: Team-based attributes (e.g., ml-team, infra-team)
|
||||
- projects: Project access attributes (e.g., llama-3, customer-insights)
|
||||
- namespaces: Namespace-based access control for resource isolation
|
||||
"""
|
||||
|
||||
# Standard attribute categories - the minimal set we need now
|
||||
roles: list[str] | None = Field(
|
||||
default=None, description="Role-based attributes (e.g., 'admin', 'data-scientist', 'user')"
|
||||
)
|
||||
|
||||
teams: list[str] | None = Field(default=None, description="Team-based attributes (e.g., 'ml-team', 'nlp-team')")
|
||||
|
||||
projects: list[str] | None = Field(
|
||||
default=None, description="Project-based access attributes (e.g., 'llama-3', 'customer-insights')"
|
||||
)
|
||||
|
||||
namespaces: list[str] | None = Field(
|
||||
default=None, description="Namespace-based access control for resource isolation"
|
||||
)
|
||||
def __init__(self, principal: str, attributes: dict[str, list[str]] | None):
|
||||
super().__init__(principal=principal, attributes=attributes)
|
||||
|
||||
|
||||
class ResourceWithACL(Resource):
|
||||
"""Extension of Resource that adds attribute-based access control capabilities.
|
||||
class ResourceWithOwner(Resource):
|
||||
"""Extension of Resource that adds an optional owner, i.e. the user that created the
|
||||
resource. This can be used to constrain access to the resource."""
|
||||
|
||||
This class adds an optional access_attributes field that allows fine-grained control
|
||||
over which users can access each resource. When attributes are defined, a user must have
|
||||
matching attributes to access the resource.
|
||||
|
||||
Attribute Matching Algorithm:
|
||||
1. If a resource has no access_attributes (None or empty dict), it's visible to all authenticated users
|
||||
2. Each key in access_attributes represents an attribute category (e.g., "roles", "teams", "projects")
|
||||
3. The matching algorithm requires ALL categories to match (AND relationship between categories)
|
||||
4. Within each category, ANY value match is sufficient (OR relationship within a category)
|
||||
|
||||
Examples:
|
||||
# Resource visible to everyone (no access control)
|
||||
model = Model(identifier="llama-2", ...)
|
||||
|
||||
# Resource visible only to admins
|
||||
model = Model(
|
||||
identifier="gpt-4",
|
||||
access_attributes=AccessAttributes(roles=["admin"])
|
||||
)
|
||||
|
||||
# Resource visible to data scientists on the ML team
|
||||
model = Model(
|
||||
identifier="private-model",
|
||||
access_attributes=AccessAttributes(
|
||||
roles=["data-scientist", "researcher"],
|
||||
teams=["ml-team"]
|
||||
)
|
||||
)
|
||||
# ^ User must have at least one of the roles AND be on the ml-team
|
||||
|
||||
# Resource visible to users with specific project access
|
||||
vector_db = VectorDB(
|
||||
identifier="customer-embeddings",
|
||||
access_attributes=AccessAttributes(
|
||||
projects=["customer-insights"],
|
||||
namespaces=["confidential"]
|
||||
)
|
||||
)
|
||||
# ^ User must have access to the customer-insights project AND have confidential namespace
|
||||
"""
|
||||
|
||||
access_attributes: AccessAttributes | None = None
|
||||
owner: User | None = None
|
||||
|
||||
|
||||
# Use the extended Resource for all routable objects
|
||||
class ModelWithACL(Model, ResourceWithACL):
|
||||
class ModelWithOwner(Model, ResourceWithOwner):
|
||||
pass
|
||||
|
||||
|
||||
class ShieldWithACL(Shield, ResourceWithACL):
|
||||
class ShieldWithOwner(Shield, ResourceWithOwner):
|
||||
pass
|
||||
|
||||
|
||||
class VectorDBWithACL(VectorDB, ResourceWithACL):
|
||||
class VectorDBWithOwner(VectorDB, ResourceWithOwner):
|
||||
pass
|
||||
|
||||
|
||||
class DatasetWithACL(Dataset, ResourceWithACL):
|
||||
class DatasetWithOwner(Dataset, ResourceWithOwner):
|
||||
pass
|
||||
|
||||
|
||||
class ScoringFnWithACL(ScoringFn, ResourceWithACL):
|
||||
class ScoringFnWithOwner(ScoringFn, ResourceWithOwner):
|
||||
pass
|
||||
|
||||
|
||||
class BenchmarkWithACL(Benchmark, ResourceWithACL):
|
||||
class BenchmarkWithOwner(Benchmark, ResourceWithOwner):
|
||||
pass
|
||||
|
||||
|
||||
class ToolWithACL(Tool, ResourceWithACL):
|
||||
class ToolWithOwner(Tool, ResourceWithOwner):
|
||||
pass
|
||||
|
||||
|
||||
class ToolGroupWithACL(ToolGroup, ResourceWithACL):
|
||||
class ToolGroupWithOwner(ToolGroup, ResourceWithOwner):
|
||||
pass
|
||||
|
||||
|
||||
RoutableObject = Model | Shield | VectorDB | Dataset | ScoringFn | Benchmark | Tool | ToolGroup
|
||||
|
||||
RoutableObjectWithProvider = Annotated[
|
||||
ModelWithACL
|
||||
| ShieldWithACL
|
||||
| VectorDBWithACL
|
||||
| DatasetWithACL
|
||||
| ScoringFnWithACL
|
||||
| BenchmarkWithACL
|
||||
| ToolWithACL
|
||||
| ToolGroupWithACL,
|
||||
ModelWithOwner
|
||||
| ShieldWithOwner
|
||||
| VectorDBWithOwner
|
||||
| DatasetWithOwner
|
||||
| ScoringFnWithOwner
|
||||
| BenchmarkWithOwner
|
||||
| ToolWithOwner
|
||||
| ToolGroupWithOwner,
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
|
||||
|
|
@ -234,6 +175,7 @@ class AuthenticationConfig(BaseModel):
|
|||
...,
|
||||
description="Provider-specific configuration",
|
||||
)
|
||||
access_policy: list[AccessRule] = Field(default=[], description="Rules for determining access to resources")
|
||||
|
||||
|
||||
class AuthenticationRequiredError(Exception):
|
||||
|
|
|
|||
|
|
@ -10,6 +10,8 @@ import logging
|
|||
from contextlib import AbstractContextManager
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.distribution.datatypes import User
|
||||
|
||||
from .utils.dynamic import instantiate_class_type
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
|
@ -21,12 +23,10 @@ PROVIDER_DATA_VAR = contextvars.ContextVar("provider_data", default=None)
|
|||
class RequestProviderDataContext(AbstractContextManager):
|
||||
"""Context manager for request provider data"""
|
||||
|
||||
def __init__(
|
||||
self, provider_data: dict[str, Any] | None = None, auth_attributes: dict[str, list[str]] | None = None
|
||||
):
|
||||
def __init__(self, provider_data: dict[str, Any] | None = None, user: User | None = None):
|
||||
self.provider_data = provider_data or {}
|
||||
if auth_attributes:
|
||||
self.provider_data["__auth_attributes"] = auth_attributes
|
||||
if user:
|
||||
self.provider_data["__authenticated_user"] = user
|
||||
|
||||
self.token = None
|
||||
|
||||
|
|
@ -95,9 +95,9 @@ def request_provider_data_context(
|
|||
return RequestProviderDataContext(provider_data, auth_attributes)
|
||||
|
||||
|
||||
def get_auth_attributes() -> dict[str, list[str]] | None:
|
||||
def get_authenticated_user() -> User | None:
|
||||
"""Helper to retrieve auth attributes from the provider data context"""
|
||||
provider_data = PROVIDER_DATA_VAR.get()
|
||||
if not provider_data:
|
||||
return None
|
||||
return provider_data.get("__auth_attributes")
|
||||
return provider_data.get("__authenticated_user")
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ from llama_stack.apis.vector_dbs import VectorDBs
|
|||
from llama_stack.apis.vector_io import VectorIO
|
||||
from llama_stack.distribution.client import get_client_impl
|
||||
from llama_stack.distribution.datatypes import (
|
||||
AccessRule,
|
||||
AutoRoutedProviderSpec,
|
||||
Provider,
|
||||
RoutingTableProviderSpec,
|
||||
|
|
@ -118,6 +119,7 @@ async def resolve_impls(
|
|||
run_config: StackRunConfig,
|
||||
provider_registry: ProviderRegistry,
|
||||
dist_registry: DistributionRegistry,
|
||||
policy: list[AccessRule],
|
||||
) -> dict[Api, Any]:
|
||||
"""
|
||||
Resolves provider implementations by:
|
||||
|
|
@ -140,7 +142,7 @@ async def resolve_impls(
|
|||
|
||||
sorted_providers = sort_providers_by_deps(providers_with_specs, run_config)
|
||||
|
||||
return await instantiate_providers(sorted_providers, router_apis, dist_registry, run_config)
|
||||
return await instantiate_providers(sorted_providers, router_apis, dist_registry, run_config, policy)
|
||||
|
||||
|
||||
def specs_for_autorouted_apis(apis_to_serve: list[str] | set[str]) -> dict[str, dict[str, ProviderWithSpec]]:
|
||||
|
|
@ -247,6 +249,7 @@ async def instantiate_providers(
|
|||
router_apis: set[Api],
|
||||
dist_registry: DistributionRegistry,
|
||||
run_config: StackRunConfig,
|
||||
policy: list[AccessRule],
|
||||
) -> dict:
|
||||
"""Instantiates providers asynchronously while managing dependencies."""
|
||||
impls: dict[Api, Any] = {}
|
||||
|
|
@ -261,7 +264,7 @@ async def instantiate_providers(
|
|||
if isinstance(provider.spec, RoutingTableProviderSpec):
|
||||
inner_impls = inner_impls_by_provider_id[f"inner-{provider.spec.router_api.value}"]
|
||||
|
||||
impl = await instantiate_provider(provider, deps, inner_impls, dist_registry, run_config)
|
||||
impl = await instantiate_provider(provider, deps, inner_impls, dist_registry, run_config, policy)
|
||||
|
||||
if api_str.startswith("inner-"):
|
||||
inner_impls_by_provider_id[api_str][provider.provider_id] = impl
|
||||
|
|
@ -312,6 +315,7 @@ async def instantiate_provider(
|
|||
inner_impls: dict[str, Any],
|
||||
dist_registry: DistributionRegistry,
|
||||
run_config: StackRunConfig,
|
||||
policy: list[AccessRule],
|
||||
):
|
||||
provider_spec = provider.spec
|
||||
if not hasattr(provider_spec, "module"):
|
||||
|
|
@ -336,13 +340,15 @@ async def instantiate_provider(
|
|||
method = "get_routing_table_impl"
|
||||
|
||||
config = None
|
||||
args = [provider_spec.api, inner_impls, deps, dist_registry]
|
||||
args = [provider_spec.api, inner_impls, deps, dist_registry, policy]
|
||||
else:
|
||||
method = "get_provider_impl"
|
||||
|
||||
config_type = instantiate_class_type(provider_spec.config_class)
|
||||
config = config_type(**provider.config)
|
||||
args = [config, deps]
|
||||
if "policy" in inspect.signature(getattr(module, method)).parameters:
|
||||
args.append(policy)
|
||||
|
||||
fn = getattr(module, method)
|
||||
impl = await fn(*args)
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.distribution.datatypes import RoutedProtocol
|
||||
from llama_stack.distribution.datatypes import AccessRule, RoutedProtocol
|
||||
from llama_stack.distribution.stack import StackRunConfig
|
||||
from llama_stack.distribution.store import DistributionRegistry
|
||||
from llama_stack.providers.datatypes import Api, RoutingTable
|
||||
|
|
@ -18,6 +18,7 @@ async def get_routing_table_impl(
|
|||
impls_by_provider_id: dict[str, RoutedProtocol],
|
||||
_deps,
|
||||
dist_registry: DistributionRegistry,
|
||||
policy: list[AccessRule],
|
||||
) -> Any:
|
||||
from ..routing_tables.benchmarks import BenchmarksRoutingTable
|
||||
from ..routing_tables.datasets import DatasetsRoutingTable
|
||||
|
|
@ -40,7 +41,7 @@ async def get_routing_table_impl(
|
|||
if api.value not in api_to_tables:
|
||||
raise ValueError(f"API {api.value} not found in router map")
|
||||
|
||||
impl = api_to_tables[api.value](impls_by_provider_id, dist_registry)
|
||||
impl = api_to_tables[api.value](impls_by_provider_id, dist_registry, policy)
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ from typing import Any
|
|||
|
||||
from llama_stack.apis.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse
|
||||
from llama_stack.distribution.datatypes import (
|
||||
BenchmarkWithACL,
|
||||
BenchmarkWithOwner,
|
||||
)
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
|
|
@ -47,7 +47,7 @@ class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
|
|||
)
|
||||
if provider_benchmark_id is None:
|
||||
provider_benchmark_id = benchmark_id
|
||||
benchmark = BenchmarkWithACL(
|
||||
benchmark = BenchmarkWithOwner(
|
||||
identifier=benchmark_id,
|
||||
dataset_id=dataset_id,
|
||||
scoring_functions=scoring_functions,
|
||||
|
|
|
|||
|
|
@ -8,14 +8,14 @@ from typing import Any
|
|||
|
||||
from llama_stack.apis.resource import ResourceType
|
||||
from llama_stack.apis.scoring_functions import ScoringFn
|
||||
from llama_stack.distribution.access_control import check_access
|
||||
from llama_stack.distribution.access_control.access_control import AccessDeniedError, is_action_allowed
|
||||
from llama_stack.distribution.datatypes import (
|
||||
AccessAttributes,
|
||||
AccessRule,
|
||||
RoutableObject,
|
||||
RoutableObjectWithProvider,
|
||||
RoutedProtocol,
|
||||
)
|
||||
from llama_stack.distribution.request_headers import get_auth_attributes
|
||||
from llama_stack.distribution.request_headers import get_authenticated_user
|
||||
from llama_stack.distribution.store import DistributionRegistry
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import Api, RoutingTable
|
||||
|
|
@ -73,9 +73,11 @@ class CommonRoutingTableImpl(RoutingTable):
|
|||
self,
|
||||
impls_by_provider_id: dict[str, RoutedProtocol],
|
||||
dist_registry: DistributionRegistry,
|
||||
policy: list[AccessRule],
|
||||
) -> None:
|
||||
self.impls_by_provider_id = impls_by_provider_id
|
||||
self.dist_registry = dist_registry
|
||||
self.policy = policy
|
||||
|
||||
async def initialize(self) -> None:
|
||||
async def add_objects(objs: list[RoutableObjectWithProvider], provider_id: str, cls) -> None:
|
||||
|
|
@ -166,13 +168,15 @@ class CommonRoutingTableImpl(RoutingTable):
|
|||
return None
|
||||
|
||||
# Check if user has permission to access this object
|
||||
if not check_access(obj.identifier, getattr(obj, "access_attributes", None), get_auth_attributes()):
|
||||
logger.debug(f"Access denied to {type} '{identifier}' based on attribute mismatch")
|
||||
if not is_action_allowed(self.policy, "read", obj, get_authenticated_user()):
|
||||
logger.debug(f"Access denied to {type} '{identifier}'")
|
||||
return None
|
||||
|
||||
return obj
|
||||
|
||||
async def unregister_object(self, obj: RoutableObjectWithProvider) -> None:
|
||||
if not is_action_allowed(self.policy, "delete", obj, get_authenticated_user()):
|
||||
raise AccessDeniedError()
|
||||
await self.dist_registry.delete(obj.type, obj.identifier)
|
||||
await unregister_object_from_provider(obj, self.impls_by_provider_id[obj.provider_id])
|
||||
|
||||
|
|
@ -187,11 +191,12 @@ class CommonRoutingTableImpl(RoutingTable):
|
|||
p = self.impls_by_provider_id[obj.provider_id]
|
||||
|
||||
# If object supports access control but no attributes set, use creator's attributes
|
||||
if not obj.access_attributes:
|
||||
creator_attributes = get_auth_attributes()
|
||||
if creator_attributes:
|
||||
obj.access_attributes = AccessAttributes(**creator_attributes)
|
||||
logger.info(f"Setting access attributes for {obj.type} '{obj.identifier}' based on creator's identity")
|
||||
creator = get_authenticated_user()
|
||||
if not is_action_allowed(self.policy, "create", obj, creator):
|
||||
raise AccessDeniedError()
|
||||
if creator:
|
||||
obj.owner = creator
|
||||
logger.info(f"Setting owner for {obj.type} '{obj.identifier}' to {obj.owner.principal}")
|
||||
|
||||
registered_obj = await register_object_with_provider(obj, p)
|
||||
# TODO: This needs to be fixed for all APIs once they return the registered object
|
||||
|
|
@ -210,9 +215,7 @@ class CommonRoutingTableImpl(RoutingTable):
|
|||
# Apply attribute-based access control filtering
|
||||
if filtered_objs:
|
||||
filtered_objs = [
|
||||
obj
|
||||
for obj in filtered_objs
|
||||
if check_access(obj.identifier, getattr(obj, "access_attributes", None), get_auth_attributes())
|
||||
obj for obj in filtered_objs if is_action_allowed(self.policy, "read", obj, get_authenticated_user())
|
||||
]
|
||||
|
||||
return filtered_objs
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ from llama_stack.apis.datasets import (
|
|||
)
|
||||
from llama_stack.apis.resource import ResourceType
|
||||
from llama_stack.distribution.datatypes import (
|
||||
DatasetWithACL,
|
||||
DatasetWithOwner,
|
||||
)
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
|
|
@ -74,7 +74,7 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets):
|
|||
if metadata is None:
|
||||
metadata = {}
|
||||
|
||||
dataset = DatasetWithACL(
|
||||
dataset = DatasetWithOwner(
|
||||
identifier=dataset_id,
|
||||
provider_resource_id=provider_dataset_id,
|
||||
provider_id=provider_id,
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from typing import Any
|
|||
|
||||
from llama_stack.apis.models import ListModelsResponse, Model, Models, ModelType, OpenAIListModelsResponse, OpenAIModel
|
||||
from llama_stack.distribution.datatypes import (
|
||||
ModelWithACL,
|
||||
ModelWithOwner,
|
||||
)
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
|
|
@ -65,7 +65,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
|
|||
model_type = ModelType.llm
|
||||
if "embedding_dimension" not in metadata and model_type == ModelType.embedding:
|
||||
raise ValueError("Embedding model must have an embedding dimension in its metadata")
|
||||
model = ModelWithACL(
|
||||
model = ModelWithOwner(
|
||||
identifier=model_id,
|
||||
provider_resource_id=provider_model_id,
|
||||
provider_id=provider_id,
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ from llama_stack.apis.scoring_functions import (
|
|||
ScoringFunctions,
|
||||
)
|
||||
from llama_stack.distribution.datatypes import (
|
||||
ScoringFnWithACL,
|
||||
ScoringFnWithOwner,
|
||||
)
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
|
|
@ -50,7 +50,7 @@ class ScoringFunctionsRoutingTable(CommonRoutingTableImpl, ScoringFunctions):
|
|||
raise ValueError(
|
||||
"No provider specified and multiple providers available. Please specify a provider_id."
|
||||
)
|
||||
scoring_fn = ScoringFnWithACL(
|
||||
scoring_fn = ScoringFnWithOwner(
|
||||
identifier=scoring_fn_id,
|
||||
description=description,
|
||||
return_type=return_type,
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from typing import Any
|
|||
from llama_stack.apis.resource import ResourceType
|
||||
from llama_stack.apis.shields import ListShieldsResponse, Shield, Shields
|
||||
from llama_stack.distribution.datatypes import (
|
||||
ShieldWithACL,
|
||||
ShieldWithOwner,
|
||||
)
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
|
|
@ -47,7 +47,7 @@ class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
|
|||
)
|
||||
if params is None:
|
||||
params = {}
|
||||
shield = ShieldWithACL(
|
||||
shield = ShieldWithOwner(
|
||||
identifier=shield_id,
|
||||
provider_resource_id=provider_shield_id,
|
||||
provider_id=provider_id,
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ from typing import Any
|
|||
|
||||
from llama_stack.apis.common.content_types import URL
|
||||
from llama_stack.apis.tools import ListToolGroupsResponse, ListToolsResponse, Tool, ToolGroup, ToolGroups
|
||||
from llama_stack.distribution.datatypes import ToolGroupWithACL
|
||||
from llama_stack.distribution.datatypes import ToolGroupWithOwner
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
from .common import CommonRoutingTableImpl
|
||||
|
|
@ -106,7 +106,7 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
|
|||
mcp_endpoint: URL | None = None,
|
||||
args: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
toolgroup = ToolGroupWithACL(
|
||||
toolgroup = ToolGroupWithOwner(
|
||||
identifier=toolgroup_id,
|
||||
provider_id=provider_id,
|
||||
provider_resource_id=toolgroup_id,
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ from llama_stack.apis.models import ModelType
|
|||
from llama_stack.apis.resource import ResourceType
|
||||
from llama_stack.apis.vector_dbs import ListVectorDBsResponse, VectorDB, VectorDBs
|
||||
from llama_stack.distribution.datatypes import (
|
||||
VectorDBWithACL,
|
||||
VectorDBWithOwner,
|
||||
)
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
|
|
@ -63,7 +63,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
|
|||
"embedding_model": embedding_model,
|
||||
"embedding_dimension": model.metadata["embedding_dimension"],
|
||||
}
|
||||
vector_db = TypeAdapter(VectorDBWithACL).validate_python(vector_db_data)
|
||||
vector_db = TypeAdapter(VectorDBWithOwner).validate_python(vector_db_data)
|
||||
await self.register_object(vector_db)
|
||||
return vector_db
|
||||
|
||||
|
|
|
|||
|
|
@ -105,24 +105,16 @@ class AuthenticationMiddleware:
|
|||
logger.exception("Error during authentication")
|
||||
return await self._send_auth_error(send, "Authentication service error")
|
||||
|
||||
# Store attributes in request scope for access control
|
||||
if validation_result.access_attributes:
|
||||
user_attributes = validation_result.access_attributes.model_dump(exclude_none=True)
|
||||
else:
|
||||
logger.warning("No access attributes, setting namespace to token by default")
|
||||
user_attributes = {
|
||||
"roles": [token],
|
||||
}
|
||||
|
||||
# Store the client ID in the request scope so that downstream middleware (like QuotaMiddleware)
|
||||
# can identify the requester and enforce per-client rate limits.
|
||||
scope["authenticated_client_id"] = token
|
||||
|
||||
# Store attributes in request scope
|
||||
scope["user_attributes"] = user_attributes
|
||||
scope["principal"] = validation_result.principal
|
||||
if validation_result.attributes:
|
||||
scope["user_attributes"] = validation_result.attributes
|
||||
logger.debug(
|
||||
f"Authentication successful: {validation_result.principal} with {len(scope['user_attributes'])} attributes"
|
||||
f"Authentication successful: {validation_result.principal} with {len(validation_result.attributes)} attributes"
|
||||
)
|
||||
|
||||
return await self.app(scope, receive, send)
|
||||
|
|
|
|||
|
|
@ -16,43 +16,18 @@ from jose import jwt
|
|||
from pydantic import BaseModel, Field, field_validator, model_validator
|
||||
from typing_extensions import Self
|
||||
|
||||
from llama_stack.distribution.datatypes import AccessAttributes, AuthenticationConfig, AuthProviderType
|
||||
from llama_stack.distribution.datatypes import AuthenticationConfig, AuthProviderType, User
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
logger = get_logger(name=__name__, category="auth")
|
||||
|
||||
|
||||
class TokenValidationResult(BaseModel):
|
||||
principal: str | None = Field(
|
||||
default=None,
|
||||
description="The principal (username or persistent identifier) of the authenticated user",
|
||||
)
|
||||
access_attributes: AccessAttributes | None = Field(
|
||||
default=None,
|
||||
description="""
|
||||
Structured user attributes for attribute-based access control.
|
||||
|
||||
These attributes determine which resources the user can access.
|
||||
The model provides standard categories like "roles", "teams", "projects", and "namespaces".
|
||||
Each attribute category contains a list of values that the user has for that category.
|
||||
During access control checks, these values are compared against resource requirements.
|
||||
|
||||
Example with standard categories:
|
||||
```json
|
||||
{
|
||||
"roles": ["admin", "data-scientist"],
|
||||
"teams": ["ml-team"],
|
||||
"projects": ["llama-3"],
|
||||
"namespaces": ["research"]
|
||||
}
|
||||
```
|
||||
""",
|
||||
)
|
||||
|
||||
|
||||
class AuthResponse(TokenValidationResult):
|
||||
class AuthResponse(BaseModel):
|
||||
"""The format of the authentication response from the auth endpoint."""
|
||||
|
||||
principal: str
|
||||
# further attributes that may be used for access control decisions
|
||||
attributes: dict[str, list[str]] | None = None
|
||||
message: str | None = Field(
|
||||
default=None, description="Optional message providing additional context about the authentication result."
|
||||
)
|
||||
|
|
@ -78,7 +53,7 @@ class AuthProvider(ABC):
|
|||
"""Abstract base class for authentication providers."""
|
||||
|
||||
@abstractmethod
|
||||
async def validate_token(self, token: str, scope: dict | None = None) -> TokenValidationResult:
|
||||
async def validate_token(self, token: str, scope: dict | None = None) -> User:
|
||||
"""Validate a token and return access attributes."""
|
||||
pass
|
||||
|
||||
|
|
@ -88,10 +63,10 @@ class AuthProvider(ABC):
|
|||
pass
|
||||
|
||||
|
||||
def get_attributes_from_claims(claims: dict[str, str], mapping: dict[str, str]) -> AccessAttributes:
|
||||
attributes = AccessAttributes()
|
||||
def get_attributes_from_claims(claims: dict[str, str], mapping: dict[str, str]) -> dict[str, list[str]]:
|
||||
attributes: dict[str, list[str]] = {}
|
||||
for claim_key, attribute_key in mapping.items():
|
||||
if claim_key not in claims or not hasattr(attributes, attribute_key):
|
||||
if claim_key not in claims:
|
||||
continue
|
||||
claim = claims[claim_key]
|
||||
if isinstance(claim, list):
|
||||
|
|
@ -99,11 +74,10 @@ def get_attributes_from_claims(claims: dict[str, str], mapping: dict[str, str])
|
|||
else:
|
||||
values = claim.split()
|
||||
|
||||
current = getattr(attributes, attribute_key)
|
||||
if current:
|
||||
current.extend(values)
|
||||
if attribute_key in attributes:
|
||||
attributes[attribute_key].extend(values)
|
||||
else:
|
||||
setattr(attributes, attribute_key, values)
|
||||
attributes[attribute_key] = values
|
||||
return attributes
|
||||
|
||||
|
||||
|
|
@ -145,8 +119,6 @@ class OAuth2TokenAuthProviderConfig(BaseModel):
|
|||
for key, value in v.items():
|
||||
if not value:
|
||||
raise ValueError(f"claims_mapping value cannot be empty: {key}")
|
||||
if value not in AccessAttributes.model_fields:
|
||||
raise ValueError(f"claims_mapping value is not a valid attribute: {value}")
|
||||
return v
|
||||
|
||||
@model_validator(mode="after")
|
||||
|
|
@ -171,14 +143,14 @@ class OAuth2TokenAuthProvider(AuthProvider):
|
|||
self._jwks: dict[str, str] = {}
|
||||
self._jwks_lock = Lock()
|
||||
|
||||
async def validate_token(self, token: str, scope: dict | None = None) -> TokenValidationResult:
|
||||
async def validate_token(self, token: str, scope: dict | None = None) -> User:
|
||||
if self.config.jwks:
|
||||
return await self.validate_jwt_token(token, scope)
|
||||
if self.config.introspection:
|
||||
return await self.introspect_token(token, scope)
|
||||
raise ValueError("One of jwks or introspection must be configured")
|
||||
|
||||
async def validate_jwt_token(self, token: str, scope: dict | None = None) -> TokenValidationResult:
|
||||
async def validate_jwt_token(self, token: str, scope: dict | None = None) -> User:
|
||||
"""Validate a token using the JWT token."""
|
||||
await self._refresh_jwks()
|
||||
|
||||
|
|
@ -203,12 +175,12 @@ class OAuth2TokenAuthProvider(AuthProvider):
|
|||
# We should incorporate these into the access attributes.
|
||||
principal = claims["sub"]
|
||||
access_attributes = get_attributes_from_claims(claims, self.config.claims_mapping)
|
||||
return TokenValidationResult(
|
||||
return User(
|
||||
principal=principal,
|
||||
access_attributes=access_attributes,
|
||||
attributes=access_attributes,
|
||||
)
|
||||
|
||||
async def introspect_token(self, token: str, scope: dict | None = None) -> TokenValidationResult:
|
||||
async def introspect_token(self, token: str, scope: dict | None = None) -> User:
|
||||
"""Validate a token using token introspection as defined by RFC 7662."""
|
||||
form = {
|
||||
"token": token,
|
||||
|
|
@ -242,9 +214,9 @@ class OAuth2TokenAuthProvider(AuthProvider):
|
|||
raise ValueError("Token not active")
|
||||
principal = fields["sub"] or fields["username"]
|
||||
access_attributes = get_attributes_from_claims(fields, self.config.claims_mapping)
|
||||
return TokenValidationResult(
|
||||
return User(
|
||||
principal=principal,
|
||||
access_attributes=access_attributes,
|
||||
attributes=access_attributes,
|
||||
)
|
||||
except httpx.TimeoutException:
|
||||
logger.exception("Token introspection request timed out")
|
||||
|
|
@ -299,7 +271,7 @@ class CustomAuthProvider(AuthProvider):
|
|||
self.config = config
|
||||
self._client = None
|
||||
|
||||
async def validate_token(self, token: str, scope: dict | None = None) -> TokenValidationResult:
|
||||
async def validate_token(self, token: str, scope: dict | None = None) -> User:
|
||||
"""Validate a token using the custom authentication endpoint."""
|
||||
if scope is None:
|
||||
scope = {}
|
||||
|
|
@ -341,7 +313,7 @@ class CustomAuthProvider(AuthProvider):
|
|||
try:
|
||||
response_data = response.json()
|
||||
auth_response = AuthResponse(**response_data)
|
||||
return auth_response
|
||||
return User(auth_response.principal, auth_response.attributes)
|
||||
except Exception as e:
|
||||
logger.exception("Error parsing authentication response")
|
||||
raise ValueError("Invalid authentication response format") from e
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ from collections.abc import Callable
|
|||
from contextlib import asynccontextmanager
|
||||
from importlib.metadata import version as parse_version
|
||||
from pathlib import Path
|
||||
from typing import Annotated, Any
|
||||
from typing import Annotated, Any, get_origin
|
||||
|
||||
import rich.pretty
|
||||
import yaml
|
||||
|
|
@ -26,17 +26,13 @@ from aiohttp import hdrs
|
|||
from fastapi import Body, FastAPI, HTTPException, Request
|
||||
from fastapi import Path as FastapiPath
|
||||
from fastapi.exceptions import RequestValidationError
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse, StreamingResponse
|
||||
from openai import BadRequestError
|
||||
from pydantic import BaseModel, ValidationError
|
||||
|
||||
from llama_stack.distribution.datatypes import AuthenticationRequiredError, LoggingConfig, StackRunConfig
|
||||
from llama_stack.distribution.distribution import builtin_automatically_routed_apis
|
||||
from llama_stack.distribution.request_headers import (
|
||||
PROVIDER_DATA_VAR,
|
||||
request_provider_data_context,
|
||||
)
|
||||
from llama_stack.distribution.request_headers import PROVIDER_DATA_VAR, User, request_provider_data_context
|
||||
from llama_stack.distribution.resolver import InvalidProviderError
|
||||
from llama_stack.distribution.server.routes import (
|
||||
find_matching_route,
|
||||
|
|
@ -217,11 +213,13 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable:
|
|||
async def route_handler(request: Request, **kwargs):
|
||||
# Get auth attributes from the request scope
|
||||
user_attributes = request.scope.get("user_attributes", {})
|
||||
principal = request.scope.get("principal", "")
|
||||
user = User(principal, user_attributes)
|
||||
|
||||
await log_request_pre_validation(request)
|
||||
|
||||
# Use context manager with both provider data and auth attributes
|
||||
with request_provider_data_context(request.headers, user_attributes):
|
||||
with request_provider_data_context(request.headers, user):
|
||||
is_streaming = is_streaming_request(func.__name__, request, **kwargs)
|
||||
|
||||
try:
|
||||
|
|
@ -244,15 +242,23 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable:
|
|||
|
||||
path_params = extract_path_params(route)
|
||||
if method == "post":
|
||||
# Annotate parameters that are in the path with Path(...) and others with Body(...)
|
||||
new_params = [new_params[0]] + [
|
||||
(
|
||||
param.replace(annotation=Annotated[param.annotation, FastapiPath(..., title=param.name)])
|
||||
if param.name in path_params
|
||||
else param.replace(annotation=Annotated[param.annotation, Body(..., embed=True)])
|
||||
)
|
||||
for param in new_params[1:]
|
||||
]
|
||||
# Annotate parameters that are in the path with Path(...) and others with Body(...),
|
||||
# but preserve existing File() and Form() annotations for multipart form data
|
||||
new_params = (
|
||||
[new_params[0]]
|
||||
+ [
|
||||
(
|
||||
param.replace(annotation=Annotated[param.annotation, FastapiPath(..., title=param.name)])
|
||||
if param.name in path_params
|
||||
else (
|
||||
param # Keep original annotation if it's already an Annotated type
|
||||
if get_origin(param.annotation) is Annotated
|
||||
else param.replace(annotation=Annotated[param.annotation, Body(..., embed=True)])
|
||||
)
|
||||
)
|
||||
for param in new_params[1:]
|
||||
]
|
||||
)
|
||||
|
||||
route_handler.__signature__ = sig.replace(parameters=new_params)
|
||||
|
||||
|
|
@ -472,17 +478,6 @@ def main(args: argparse.Namespace | None = None):
|
|||
window_seconds=window_seconds,
|
||||
)
|
||||
|
||||
# --- CORS middleware for local development ---
|
||||
# TODO: move to reverse proxy
|
||||
ui_port = os.environ.get("LLAMA_STACK_UI_PORT", 8322)
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=[f"http://localhost:{ui_port}"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
try:
|
||||
impls = asyncio.run(construct_stack(config))
|
||||
except InvalidProviderError as e:
|
||||
|
|
|
|||
|
|
@ -223,7 +223,10 @@ async def construct_stack(
|
|||
run_config: StackRunConfig, provider_registry: ProviderRegistry | None = None
|
||||
) -> dict[Api, Any]:
|
||||
dist_registry, _ = await create_dist_registry(run_config.metadata_store, run_config.image_name)
|
||||
impls = await resolve_impls(run_config, provider_registry or get_provider_registry(run_config), dist_registry)
|
||||
policy = run_config.server.auth.access_policy if run_config.server.auth else []
|
||||
impls = await resolve_impls(
|
||||
run_config, provider_registry or get_provider_registry(run_config), dist_registry, policy
|
||||
)
|
||||
|
||||
# Add internal implementations after all other providers are resolved
|
||||
add_internal_implementations(impls, run_config)
|
||||
|
|
|
|||
|
|
@ -7,10 +7,6 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
|
||||
CONTAINER_BINARY=${CONTAINER_BINARY:-docker}
|
||||
CONTAINER_OPTS=${CONTAINER_OPTS:-}
|
||||
LLAMA_CHECKPOINT_DIR=${LLAMA_CHECKPOINT_DIR:-}
|
||||
LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-}
|
||||
TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-}
|
||||
PYPI_VERSION=${PYPI_VERSION:-}
|
||||
VIRTUAL_ENV=${VIRTUAL_ENV:-}
|
||||
|
|
@ -132,63 +128,7 @@ if [[ "$env_type" == "venv" || "$env_type" == "conda" ]]; then
|
|||
$env_vars \
|
||||
$other_args
|
||||
elif [[ "$env_type" == "container" ]]; then
|
||||
set -x
|
||||
|
||||
# Check if container command is available
|
||||
if ! is_command_available $CONTAINER_BINARY; then
|
||||
printf "${RED}Error: ${CONTAINER_BINARY} command not found. Is ${CONTAINER_BINARY} installed and in your PATH?${NC}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if is_command_available selinuxenabled &> /dev/null && selinuxenabled; then
|
||||
# Disable SELinux labels
|
||||
CONTAINER_OPTS="$CONTAINER_OPTS --security-opt label=disable"
|
||||
fi
|
||||
|
||||
mounts=""
|
||||
if [ -n "$LLAMA_STACK_DIR" ]; then
|
||||
mounts="$mounts -v $(readlink -f $LLAMA_STACK_DIR):/app/llama-stack-source"
|
||||
fi
|
||||
if [ -n "$LLAMA_CHECKPOINT_DIR" ]; then
|
||||
mounts="$mounts -v $LLAMA_CHECKPOINT_DIR:/root/.llama"
|
||||
CONTAINER_OPTS="$CONTAINER_OPTS --gpus=all"
|
||||
fi
|
||||
|
||||
if [ -n "$PYPI_VERSION" ]; then
|
||||
version_tag="$PYPI_VERSION"
|
||||
elif [ -n "$LLAMA_STACK_DIR" ]; then
|
||||
version_tag="dev"
|
||||
elif [ -n "$TEST_PYPI_VERSION" ]; then
|
||||
version_tag="test-$TEST_PYPI_VERSION"
|
||||
else
|
||||
if ! is_command_available jq; then
|
||||
echo -e "${RED}Error: jq not found" >&2
|
||||
exit 1
|
||||
fi
|
||||
URL="https://pypi.org/pypi/llama-stack/json"
|
||||
version_tag=$(curl -s $URL | jq -r '.info.version')
|
||||
fi
|
||||
|
||||
# Build the command with optional yaml config
|
||||
cmd="$CONTAINER_BINARY run $CONTAINER_OPTS -it \
|
||||
-p $port:$port \
|
||||
$env_vars \
|
||||
$mounts \
|
||||
--env LLAMA_STACK_PORT=$port \
|
||||
--entrypoint python \
|
||||
$container_image:$version_tag \
|
||||
-m llama_stack.distribution.server.server"
|
||||
|
||||
# Add yaml config if provided, otherwise use default
|
||||
if [ -n "$yaml_config" ]; then
|
||||
cmd="$cmd -v $yaml_config:/app/run.yaml --config /app/run.yaml"
|
||||
else
|
||||
cmd="$cmd --config /app/run.yaml"
|
||||
fi
|
||||
|
||||
# Add any other args
|
||||
cmd="$cmd $other_args"
|
||||
|
||||
# Execute the command
|
||||
eval $cmd
|
||||
echo -e "${RED}Warning: Llama Stack no longer supports running Containers via the 'llama stack run' command.${NC}"
|
||||
echo -e "Please refer to the documentation for more information: https://llama-stack.readthedocs.io/en/latest/distributions/building_distro.html#llama-stack-build"
|
||||
exit 1
|
||||
fi
|
||||
|
|
|
|||
|
|
@ -23,11 +23,8 @@ from llama_stack.distribution.utils.image_types import LlamaStackImageType
|
|||
|
||||
def formulate_run_args(image_type, image_name, config, template_name) -> list:
|
||||
env_name = ""
|
||||
if image_type == LlamaStackImageType.CONTAINER.value:
|
||||
env_name = (
|
||||
f"distribution-{template_name}" if template_name else (config.container_image if config else image_name)
|
||||
)
|
||||
elif image_type == LlamaStackImageType.CONDA.value:
|
||||
|
||||
if image_type == LlamaStackImageType.CONDA.value:
|
||||
current_conda_env = os.environ.get("CONDA_DEFAULT_ENV")
|
||||
env_name = image_name or current_conda_env
|
||||
if not env_name:
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import os
|
||||
from collections.abc import Collection, Iterator, Sequence, Set
|
||||
from logging import getLogger
|
||||
from pathlib import Path
|
||||
|
|
@ -14,7 +13,8 @@ from typing import (
|
|||
)
|
||||
|
||||
import tiktoken
|
||||
from tiktoken.load import load_tiktoken_bpe
|
||||
|
||||
from llama_stack.models.llama.tokenizer_utils import load_bpe_file
|
||||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
|
@ -48,19 +48,20 @@ class Tokenizer:
|
|||
global _INSTANCE
|
||||
|
||||
if _INSTANCE is None:
|
||||
_INSTANCE = Tokenizer(os.path.join(os.path.dirname(__file__), "tokenizer.model"))
|
||||
_INSTANCE = Tokenizer(Path(__file__).parent / "tokenizer.model")
|
||||
return _INSTANCE
|
||||
|
||||
def __init__(self, model_path: str):
|
||||
def __init__(self, model_path: Path):
|
||||
"""
|
||||
Initializes the Tokenizer with a Tiktoken model.
|
||||
|
||||
Args:
|
||||
model_path (str): The path to the Tiktoken model file.
|
||||
"""
|
||||
assert os.path.isfile(model_path), model_path
|
||||
if not model_path.exists():
|
||||
raise FileNotFoundError(f"Tokenizer model file not found: {model_path}")
|
||||
|
||||
mergeable_ranks = load_tiktoken_bpe(model_path)
|
||||
mergeable_ranks = load_bpe_file(model_path)
|
||||
num_base_tokens = len(mergeable_ranks)
|
||||
special_tokens = [
|
||||
"<|begin_of_text|>",
|
||||
|
|
@ -83,7 +84,7 @@ class Tokenizer:
|
|||
|
||||
self.special_tokens = {token: num_base_tokens + i for i, token in enumerate(special_tokens)}
|
||||
self.model = tiktoken.Encoding(
|
||||
name=Path(model_path).name,
|
||||
name=model_path.name,
|
||||
pat_str=self.pat_str,
|
||||
mergeable_ranks=mergeable_ranks,
|
||||
special_tokens=self.special_tokens,
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import os
|
||||
from collections.abc import Collection, Iterator, Sequence, Set
|
||||
from logging import getLogger
|
||||
from pathlib import Path
|
||||
|
|
@ -14,7 +13,8 @@ from typing import (
|
|||
)
|
||||
|
||||
import tiktoken
|
||||
from tiktoken.load import load_tiktoken_bpe
|
||||
|
||||
from llama_stack.models.llama.tokenizer_utils import load_bpe_file
|
||||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
|
@ -118,19 +118,20 @@ class Tokenizer:
|
|||
global _INSTANCE
|
||||
|
||||
if _INSTANCE is None:
|
||||
_INSTANCE = Tokenizer(os.path.join(os.path.dirname(__file__), "tokenizer.model"))
|
||||
_INSTANCE = Tokenizer(Path(__file__).parent / "tokenizer.model")
|
||||
return _INSTANCE
|
||||
|
||||
def __init__(self, model_path: str):
|
||||
def __init__(self, model_path: Path):
|
||||
"""
|
||||
Initializes the Tokenizer with a Tiktoken model.
|
||||
|
||||
Args:
|
||||
model_path (str): The path to the Tiktoken model file.
|
||||
model_path (Path): The path to the Tiktoken model file.
|
||||
"""
|
||||
assert os.path.isfile(model_path), model_path
|
||||
if not model_path.exists():
|
||||
raise FileNotFoundError(f"Tokenizer model file not found: {model_path}")
|
||||
|
||||
mergeable_ranks = load_tiktoken_bpe(model_path)
|
||||
mergeable_ranks = load_bpe_file(model_path)
|
||||
num_base_tokens = len(mergeable_ranks)
|
||||
|
||||
special_tokens = BASIC_SPECIAL_TOKENS + LLAMA4_SPECIAL_TOKENS
|
||||
|
|
@ -144,7 +145,7 @@ class Tokenizer:
|
|||
|
||||
self.special_tokens = {token: num_base_tokens + i for i, token in enumerate(special_tokens)}
|
||||
self.model = tiktoken.Encoding(
|
||||
name=Path(model_path).name,
|
||||
name=model_path.name,
|
||||
pat_str=self.O200K_PATTERN,
|
||||
mergeable_ranks=mergeable_ranks,
|
||||
special_tokens=self.special_tokens,
|
||||
|
|
|
|||
40
llama_stack/models/llama/tokenizer_utils.py
Normal file
40
llama_stack/models/llama/tokenizer_utils.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import base64
|
||||
from pathlib import Path
|
||||
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
logger = get_logger(__name__, "tokenizer_utils")
|
||||
|
||||
|
||||
def load_bpe_file(model_path: Path) -> dict[bytes, int]:
|
||||
"""
|
||||
Load BPE file directly and return mergeable ranks.
|
||||
|
||||
Args:
|
||||
model_path (Path): Path to the BPE model file.
|
||||
|
||||
Returns:
|
||||
dict[bytes, int]: Dictionary mapping byte sequences to their ranks.
|
||||
"""
|
||||
mergeable_ranks = {}
|
||||
|
||||
with open(model_path, encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
|
||||
for line in content.splitlines():
|
||||
if not line.strip(): # Skip empty lines
|
||||
continue
|
||||
try:
|
||||
token, rank = line.split()
|
||||
mergeable_ranks[base64.b64decode(token)] = int(rank)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse line '{line}': {e}")
|
||||
continue
|
||||
|
||||
return mergeable_ranks
|
||||
|
|
@ -6,12 +6,12 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.distribution.datatypes import Api
|
||||
from llama_stack.distribution.datatypes import AccessRule, Api
|
||||
|
||||
from .config import MetaReferenceAgentsImplConfig
|
||||
|
||||
|
||||
async def get_provider_impl(config: MetaReferenceAgentsImplConfig, deps: dict[Api, Any]):
|
||||
async def get_provider_impl(config: MetaReferenceAgentsImplConfig, deps: dict[Api, Any], policy: list[AccessRule]):
|
||||
from .agents import MetaReferenceAgentsImpl
|
||||
|
||||
impl = MetaReferenceAgentsImpl(
|
||||
|
|
@ -21,6 +21,7 @@ async def get_provider_impl(config: MetaReferenceAgentsImplConfig, deps: dict[Ap
|
|||
deps[Api.safety],
|
||||
deps[Api.tool_runtime],
|
||||
deps[Api.tool_groups],
|
||||
policy,
|
||||
)
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
|
|
|||
|
|
@ -60,6 +60,7 @@ from llama_stack.apis.inference import (
|
|||
from llama_stack.apis.safety import Safety
|
||||
from llama_stack.apis.tools import ToolGroups, ToolInvocationResult, ToolRuntime
|
||||
from llama_stack.apis.vector_io import VectorIO
|
||||
from llama_stack.distribution.datatypes import AccessRule
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.models.llama.datatypes import (
|
||||
BuiltinTool,
|
||||
|
|
@ -96,13 +97,14 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
vector_io_api: VectorIO,
|
||||
persistence_store: KVStore,
|
||||
created_at: str,
|
||||
policy: list[AccessRule],
|
||||
):
|
||||
self.agent_id = agent_id
|
||||
self.agent_config = agent_config
|
||||
self.inference_api = inference_api
|
||||
self.safety_api = safety_api
|
||||
self.vector_io_api = vector_io_api
|
||||
self.storage = AgentPersistence(agent_id, persistence_store)
|
||||
self.storage = AgentPersistence(agent_id, persistence_store, policy)
|
||||
self.tool_runtime_api = tool_runtime_api
|
||||
self.tool_groups_api = tool_groups_api
|
||||
self.created_at = created_at
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ from llama_stack.apis.agents import (
|
|||
Session,
|
||||
Turn,
|
||||
)
|
||||
from llama_stack.apis.agents.openai_responses import OpenAIResponseText
|
||||
from llama_stack.apis.common.responses import PaginatedResponse
|
||||
from llama_stack.apis.inference import (
|
||||
Inference,
|
||||
|
|
@ -40,6 +41,7 @@ from llama_stack.apis.inference import (
|
|||
from llama_stack.apis.safety import Safety
|
||||
from llama_stack.apis.tools import ToolGroups, ToolRuntime
|
||||
from llama_stack.apis.vector_io import VectorIO
|
||||
from llama_stack.distribution.datatypes import AccessRule
|
||||
from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl
|
||||
from llama_stack.providers.utils.pagination import paginate_records
|
||||
from llama_stack.providers.utils.responses.responses_store import ResponsesStore
|
||||
|
|
@ -61,6 +63,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
safety_api: Safety,
|
||||
tool_runtime_api: ToolRuntime,
|
||||
tool_groups_api: ToolGroups,
|
||||
policy: list[AccessRule],
|
||||
):
|
||||
self.config = config
|
||||
self.inference_api = inference_api
|
||||
|
|
@ -71,6 +74,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
|
||||
self.in_memory_store = InmemoryKVStoreImpl()
|
||||
self.openai_responses_impl: OpenAIResponsesImpl | None = None
|
||||
self.policy = policy
|
||||
|
||||
async def initialize(self) -> None:
|
||||
self.persistence_store = await kvstore_impl(self.config.persistence_store)
|
||||
|
|
@ -129,6 +133,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
self.persistence_store if agent_info.enable_session_persistence else self.in_memory_store
|
||||
),
|
||||
created_at=agent_info.created_at,
|
||||
policy=self.policy,
|
||||
)
|
||||
|
||||
async def create_agent_session(
|
||||
|
|
@ -324,10 +329,12 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
store: bool | None = True,
|
||||
stream: bool | None = False,
|
||||
temperature: float | None = None,
|
||||
text: OpenAIResponseText | None = None,
|
||||
tools: list[OpenAIResponseInputTool] | None = None,
|
||||
max_infer_iters: int | None = 10,
|
||||
) -> OpenAIResponseObject:
|
||||
return await self.openai_responses_impl.create_openai_response(
|
||||
input, model, instructions, previous_response_id, store, stream, temperature, tools
|
||||
input, model, instructions, previous_response_id, store, stream, temperature, text, tools, max_infer_iters
|
||||
)
|
||||
|
||||
async def list_openai_responses(
|
||||
|
|
|
|||
|
|
@ -37,6 +37,8 @@ from llama_stack.apis.agents.openai_responses import (
|
|||
OpenAIResponseOutputMessageFunctionToolCall,
|
||||
OpenAIResponseOutputMessageMCPListTools,
|
||||
OpenAIResponseOutputMessageWebSearchToolCall,
|
||||
OpenAIResponseText,
|
||||
OpenAIResponseTextFormat,
|
||||
)
|
||||
from llama_stack.apis.inference.inference import (
|
||||
Inference,
|
||||
|
|
@ -50,7 +52,12 @@ from llama_stack.apis.inference.inference import (
|
|||
OpenAIChoice,
|
||||
OpenAIDeveloperMessageParam,
|
||||
OpenAIImageURL,
|
||||
OpenAIJSONSchema,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseFormatJSONObject,
|
||||
OpenAIResponseFormatJSONSchema,
|
||||
OpenAIResponseFormatParam,
|
||||
OpenAIResponseFormatText,
|
||||
OpenAISystemMessageParam,
|
||||
OpenAIToolMessageParam,
|
||||
OpenAIUserMessageParam,
|
||||
|
|
@ -158,6 +165,21 @@ async def _convert_chat_choice_to_response_message(choice: OpenAIChoice) -> Open
|
|||
)
|
||||
|
||||
|
||||
async def _convert_response_text_to_chat_response_format(text: OpenAIResponseText) -> OpenAIResponseFormatParam:
|
||||
"""
|
||||
Convert an OpenAI Response text parameter into an OpenAI Chat Completion response format.
|
||||
"""
|
||||
if not text.format or text.format["type"] == "text":
|
||||
return OpenAIResponseFormatText(type="text")
|
||||
if text.format["type"] == "json_object":
|
||||
return OpenAIResponseFormatJSONObject()
|
||||
if text.format["type"] == "json_schema":
|
||||
return OpenAIResponseFormatJSONSchema(
|
||||
json_schema=OpenAIJSONSchema(name=text.format["name"], schema=text.format["schema"])
|
||||
)
|
||||
raise ValueError(f"Unsupported text format: {text.format}")
|
||||
|
||||
|
||||
async def _get_message_type_by_role(role: str):
|
||||
role_to_type = {
|
||||
"user": OpenAIUserMessageParam,
|
||||
|
|
@ -180,6 +202,7 @@ class ChatCompletionContext(BaseModel):
|
|||
mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP]
|
||||
stream: bool
|
||||
temperature: float | None
|
||||
response_format: OpenAIResponseFormatParam
|
||||
|
||||
|
||||
class OpenAIResponsesImpl:
|
||||
|
|
@ -258,6 +281,18 @@ class OpenAIResponsesImpl:
|
|||
"""
|
||||
return await self.responses_store.list_response_input_items(response_id, after, before, include, limit, order)
|
||||
|
||||
def _is_function_tool_call(
|
||||
self,
|
||||
tool_call: OpenAIChatCompletionToolCall,
|
||||
tools: list[OpenAIResponseInputTool],
|
||||
) -> bool:
|
||||
if not tool_call.function:
|
||||
return False
|
||||
for t in tools:
|
||||
if t.type == "function" and t.name == tool_call.function.name:
|
||||
return True
|
||||
return False
|
||||
|
||||
async def _process_response_choices(
|
||||
self,
|
||||
chat_response: OpenAIChatCompletion,
|
||||
|
|
@ -270,7 +305,7 @@ class OpenAIResponsesImpl:
|
|||
for choice in chat_response.choices:
|
||||
if choice.message.tool_calls and tools:
|
||||
# Assume if the first tool is a function, all tools are functions
|
||||
if tools[0].type == "function":
|
||||
if self._is_function_tool_call(choice.message.tool_calls[0], tools):
|
||||
for tool_call in choice.message.tool_calls:
|
||||
output_messages.append(
|
||||
OpenAIResponseOutputMessageFunctionToolCall(
|
||||
|
|
@ -331,9 +366,12 @@ class OpenAIResponsesImpl:
|
|||
store: bool | None = True,
|
||||
stream: bool | None = False,
|
||||
temperature: float | None = None,
|
||||
text: OpenAIResponseText | None = None,
|
||||
tools: list[OpenAIResponseInputTool] | None = None,
|
||||
max_infer_iters: int | None = 10,
|
||||
):
|
||||
stream = False if stream is None else stream
|
||||
text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text
|
||||
|
||||
output_messages: list[OpenAIResponseOutput] = []
|
||||
|
||||
|
|
@ -342,6 +380,9 @@ class OpenAIResponsesImpl:
|
|||
messages = await _convert_response_input_to_chat_messages(input)
|
||||
await self._prepend_instructions(messages, instructions)
|
||||
|
||||
# Structured outputs
|
||||
response_format = await _convert_response_text_to_chat_response_format(text)
|
||||
|
||||
# Tool setup
|
||||
chat_tools, mcp_tool_to_server, mcp_list_message = (
|
||||
await self._convert_response_tools_to_chat_tools(tools) if tools else (None, {}, None)
|
||||
|
|
@ -356,65 +397,111 @@ class OpenAIResponsesImpl:
|
|||
mcp_tool_to_server=mcp_tool_to_server,
|
||||
stream=stream,
|
||||
temperature=temperature,
|
||||
response_format=response_format,
|
||||
)
|
||||
|
||||
inference_result = await self.inference_api.openai_chat_completion(
|
||||
model=model,
|
||||
messages=messages,
|
||||
tools=chat_tools,
|
||||
stream=stream,
|
||||
temperature=temperature,
|
||||
)
|
||||
|
||||
# Fork to streaming vs non-streaming - let each handle ALL inference rounds
|
||||
if stream:
|
||||
return self._create_streaming_response(
|
||||
inference_result=inference_result,
|
||||
ctx=ctx,
|
||||
output_messages=output_messages,
|
||||
input=input,
|
||||
model=model,
|
||||
store=store,
|
||||
text=text,
|
||||
tools=tools,
|
||||
max_infer_iters=max_infer_iters,
|
||||
)
|
||||
else:
|
||||
return await self._create_non_streaming_response(
|
||||
inference_result=inference_result,
|
||||
ctx=ctx,
|
||||
output_messages=output_messages,
|
||||
input=input,
|
||||
model=model,
|
||||
store=store,
|
||||
text=text,
|
||||
tools=tools,
|
||||
max_infer_iters=max_infer_iters,
|
||||
)
|
||||
|
||||
async def _create_non_streaming_response(
|
||||
self,
|
||||
inference_result: Any,
|
||||
ctx: ChatCompletionContext,
|
||||
output_messages: list[OpenAIResponseOutput],
|
||||
input: str | list[OpenAIResponseInput],
|
||||
model: str,
|
||||
store: bool | None,
|
||||
text: OpenAIResponseText,
|
||||
tools: list[OpenAIResponseInputTool] | None,
|
||||
max_infer_iters: int,
|
||||
) -> OpenAIResponseObject:
|
||||
chat_response = OpenAIChatCompletion(**inference_result.model_dump())
|
||||
n_iter = 0
|
||||
messages = ctx.messages.copy()
|
||||
|
||||
# Process response choices (tool execution and message creation)
|
||||
output_messages.extend(
|
||||
await self._process_response_choices(
|
||||
chat_response=chat_response,
|
||||
ctx=ctx,
|
||||
tools=tools,
|
||||
while True:
|
||||
# Do inference (including the first one)
|
||||
inference_result = await self.inference_api.openai_chat_completion(
|
||||
model=ctx.model,
|
||||
messages=messages,
|
||||
tools=ctx.tools,
|
||||
stream=False,
|
||||
temperature=ctx.temperature,
|
||||
response_format=ctx.response_format,
|
||||
)
|
||||
)
|
||||
completion = OpenAIChatCompletion(**inference_result.model_dump())
|
||||
|
||||
# Separate function vs non-function tool calls
|
||||
function_tool_calls = []
|
||||
non_function_tool_calls = []
|
||||
|
||||
for choice in completion.choices:
|
||||
if choice.message.tool_calls and tools:
|
||||
for tool_call in choice.message.tool_calls:
|
||||
if self._is_function_tool_call(tool_call, tools):
|
||||
function_tool_calls.append(tool_call)
|
||||
else:
|
||||
non_function_tool_calls.append(tool_call)
|
||||
|
||||
# Process response choices based on tool call types
|
||||
if function_tool_calls:
|
||||
# For function tool calls, use existing logic and return immediately
|
||||
current_output_messages = await self._process_response_choices(
|
||||
chat_response=completion,
|
||||
ctx=ctx,
|
||||
tools=tools,
|
||||
)
|
||||
output_messages.extend(current_output_messages)
|
||||
break
|
||||
elif non_function_tool_calls:
|
||||
# For non-function tool calls, execute them and continue loop
|
||||
for choice in completion.choices:
|
||||
tool_outputs, tool_response_messages = await self._execute_tool_calls_only(choice, ctx)
|
||||
output_messages.extend(tool_outputs)
|
||||
|
||||
# Add assistant message and tool responses to messages for next iteration
|
||||
messages.append(choice.message)
|
||||
messages.extend(tool_response_messages)
|
||||
|
||||
n_iter += 1
|
||||
if n_iter >= max_infer_iters:
|
||||
break
|
||||
|
||||
# Continue with next iteration of the loop
|
||||
continue
|
||||
else:
|
||||
# No tool calls - convert response to message and we're done
|
||||
for choice in completion.choices:
|
||||
output_messages.append(await _convert_chat_choice_to_response_message(choice))
|
||||
break
|
||||
|
||||
response = OpenAIResponseObject(
|
||||
created_at=chat_response.created,
|
||||
created_at=completion.created,
|
||||
id=f"resp-{uuid.uuid4()}",
|
||||
model=model,
|
||||
object="response",
|
||||
status="completed",
|
||||
output=output_messages,
|
||||
text=text,
|
||||
)
|
||||
logger.debug(f"OpenAI Responses response: {response}")
|
||||
|
||||
|
|
@ -429,13 +516,14 @@ class OpenAIResponsesImpl:
|
|||
|
||||
async def _create_streaming_response(
|
||||
self,
|
||||
inference_result: Any,
|
||||
ctx: ChatCompletionContext,
|
||||
output_messages: list[OpenAIResponseOutput],
|
||||
input: str | list[OpenAIResponseInput],
|
||||
model: str,
|
||||
store: bool | None,
|
||||
text: OpenAIResponseText,
|
||||
tools: list[OpenAIResponseInputTool] | None,
|
||||
max_infer_iters: int | None,
|
||||
) -> AsyncIterator[OpenAIResponseObjectStream]:
|
||||
# Create initial response and emit response.created immediately
|
||||
response_id = f"resp-{uuid.uuid4()}"
|
||||
|
|
@ -448,92 +536,141 @@ class OpenAIResponsesImpl:
|
|||
object="response",
|
||||
status="in_progress",
|
||||
output=output_messages.copy(),
|
||||
text=text,
|
||||
)
|
||||
|
||||
# Emit response.created immediately
|
||||
yield OpenAIResponseObjectStreamResponseCreated(response=initial_response)
|
||||
|
||||
# For streaming, inference_result is an async iterator of chunks
|
||||
# Stream chunks and emit delta events as they arrive
|
||||
chat_response_id = ""
|
||||
chat_response_content = []
|
||||
chat_response_tool_calls: dict[int, OpenAIChatCompletionToolCall] = {}
|
||||
chunk_created = 0
|
||||
chunk_model = ""
|
||||
chunk_finish_reason = ""
|
||||
sequence_number = 0
|
||||
# Implement tool execution loop for streaming - handle ALL inference rounds including the first
|
||||
n_iter = 0
|
||||
messages = ctx.messages.copy()
|
||||
|
||||
# Create a placeholder message item for delta events
|
||||
message_item_id = f"msg_{uuid.uuid4()}"
|
||||
|
||||
async for chunk in inference_result:
|
||||
chat_response_id = chunk.id
|
||||
chunk_created = chunk.created
|
||||
chunk_model = chunk.model
|
||||
for chunk_choice in chunk.choices:
|
||||
# Emit incremental text content as delta events
|
||||
if chunk_choice.delta.content:
|
||||
sequence_number += 1
|
||||
yield OpenAIResponseObjectStreamResponseOutputTextDelta(
|
||||
content_index=0,
|
||||
delta=chunk_choice.delta.content,
|
||||
item_id=message_item_id,
|
||||
output_index=0,
|
||||
sequence_number=sequence_number,
|
||||
)
|
||||
|
||||
# Collect content for final response
|
||||
chat_response_content.append(chunk_choice.delta.content or "")
|
||||
if chunk_choice.finish_reason:
|
||||
chunk_finish_reason = chunk_choice.finish_reason
|
||||
|
||||
# Aggregate tool call arguments across chunks, using their index as the aggregation key
|
||||
if chunk_choice.delta.tool_calls:
|
||||
for tool_call in chunk_choice.delta.tool_calls:
|
||||
response_tool_call = chat_response_tool_calls.get(tool_call.index, None)
|
||||
if response_tool_call:
|
||||
# Don't attempt to concatenate arguments if we don't have any new arguments
|
||||
if tool_call.function.arguments:
|
||||
# Guard against an initial None argument before we concatenate
|
||||
response_tool_call.function.arguments = (
|
||||
response_tool_call.function.arguments or ""
|
||||
) + tool_call.function.arguments
|
||||
else:
|
||||
tool_call_dict: dict[str, Any] = tool_call.model_dump()
|
||||
tool_call_dict.pop("type", None)
|
||||
response_tool_call = OpenAIChatCompletionToolCall(**tool_call_dict)
|
||||
chat_response_tool_calls[tool_call.index] = response_tool_call
|
||||
|
||||
# Convert collected chunks to complete response
|
||||
if chat_response_tool_calls:
|
||||
tool_calls = [chat_response_tool_calls[i] for i in sorted(chat_response_tool_calls.keys())]
|
||||
else:
|
||||
tool_calls = None
|
||||
assistant_message = OpenAIAssistantMessageParam(
|
||||
content="".join(chat_response_content),
|
||||
tool_calls=tool_calls,
|
||||
)
|
||||
chat_response_obj = OpenAIChatCompletion(
|
||||
id=chat_response_id,
|
||||
choices=[
|
||||
OpenAIChoice(
|
||||
message=assistant_message,
|
||||
finish_reason=chunk_finish_reason,
|
||||
index=0,
|
||||
)
|
||||
],
|
||||
created=chunk_created,
|
||||
model=chunk_model,
|
||||
)
|
||||
|
||||
# Process response choices (tool execution and message creation)
|
||||
output_messages.extend(
|
||||
await self._process_response_choices(
|
||||
chat_response=chat_response_obj,
|
||||
ctx=ctx,
|
||||
tools=tools,
|
||||
while True:
|
||||
current_inference_result = await self.inference_api.openai_chat_completion(
|
||||
model=ctx.model,
|
||||
messages=messages,
|
||||
tools=ctx.tools,
|
||||
stream=True,
|
||||
temperature=ctx.temperature,
|
||||
response_format=ctx.response_format,
|
||||
)
|
||||
)
|
||||
|
||||
# Process streaming chunks and build complete response
|
||||
chat_response_id = ""
|
||||
chat_response_content = []
|
||||
chat_response_tool_calls: dict[int, OpenAIChatCompletionToolCall] = {}
|
||||
chunk_created = 0
|
||||
chunk_model = ""
|
||||
chunk_finish_reason = ""
|
||||
sequence_number = 0
|
||||
|
||||
# Create a placeholder message item for delta events
|
||||
message_item_id = f"msg_{uuid.uuid4()}"
|
||||
|
||||
async for chunk in current_inference_result:
|
||||
chat_response_id = chunk.id
|
||||
chunk_created = chunk.created
|
||||
chunk_model = chunk.model
|
||||
for chunk_choice in chunk.choices:
|
||||
# Emit incremental text content as delta events
|
||||
if chunk_choice.delta.content:
|
||||
sequence_number += 1
|
||||
yield OpenAIResponseObjectStreamResponseOutputTextDelta(
|
||||
content_index=0,
|
||||
delta=chunk_choice.delta.content,
|
||||
item_id=message_item_id,
|
||||
output_index=0,
|
||||
sequence_number=sequence_number,
|
||||
)
|
||||
|
||||
# Collect content for final response
|
||||
chat_response_content.append(chunk_choice.delta.content or "")
|
||||
if chunk_choice.finish_reason:
|
||||
chunk_finish_reason = chunk_choice.finish_reason
|
||||
|
||||
# Aggregate tool call arguments across chunks
|
||||
if chunk_choice.delta.tool_calls:
|
||||
for tool_call in chunk_choice.delta.tool_calls:
|
||||
response_tool_call = chat_response_tool_calls.get(tool_call.index, None)
|
||||
if response_tool_call:
|
||||
# Don't attempt to concatenate arguments if we don't have any new argumentsAdd commentMore actions
|
||||
if tool_call.function.arguments:
|
||||
# Guard against an initial None argument before we concatenate
|
||||
response_tool_call.function.arguments = (
|
||||
response_tool_call.function.arguments or ""
|
||||
) + tool_call.function.arguments
|
||||
else:
|
||||
tool_call_dict: dict[str, Any] = tool_call.model_dump()
|
||||
tool_call_dict.pop("type", None)
|
||||
response_tool_call = OpenAIChatCompletionToolCall(**tool_call_dict)
|
||||
chat_response_tool_calls[tool_call.index] = response_tool_call
|
||||
|
||||
# Convert collected chunks to complete response
|
||||
if chat_response_tool_calls:
|
||||
tool_calls = [chat_response_tool_calls[i] for i in sorted(chat_response_tool_calls.keys())]
|
||||
else:
|
||||
tool_calls = None
|
||||
assistant_message = OpenAIAssistantMessageParam(
|
||||
content="".join(chat_response_content),
|
||||
tool_calls=tool_calls,
|
||||
)
|
||||
current_response = OpenAIChatCompletion(
|
||||
id=chat_response_id,
|
||||
choices=[
|
||||
OpenAIChoice(
|
||||
message=assistant_message,
|
||||
finish_reason=chunk_finish_reason,
|
||||
index=0,
|
||||
)
|
||||
],
|
||||
created=chunk_created,
|
||||
model=chunk_model,
|
||||
)
|
||||
|
||||
# Separate function vs non-function tool calls
|
||||
function_tool_calls = []
|
||||
non_function_tool_calls = []
|
||||
|
||||
for choice in current_response.choices:
|
||||
if choice.message.tool_calls and tools:
|
||||
for tool_call in choice.message.tool_calls:
|
||||
if self._is_function_tool_call(tool_call, tools):
|
||||
function_tool_calls.append(tool_call)
|
||||
else:
|
||||
non_function_tool_calls.append(tool_call)
|
||||
|
||||
# Process response choices based on tool call types
|
||||
if function_tool_calls:
|
||||
# For function tool calls, use existing logic and break
|
||||
current_output_messages = await self._process_response_choices(
|
||||
chat_response=current_response,
|
||||
ctx=ctx,
|
||||
tools=tools,
|
||||
)
|
||||
output_messages.extend(current_output_messages)
|
||||
break
|
||||
elif non_function_tool_calls:
|
||||
# For non-function tool calls, execute them and continue loop
|
||||
for choice in current_response.choices:
|
||||
tool_outputs, tool_response_messages = await self._execute_tool_calls_only(choice, ctx)
|
||||
output_messages.extend(tool_outputs)
|
||||
|
||||
# Add assistant message and tool responses to messages for next iteration
|
||||
messages.append(choice.message)
|
||||
messages.extend(tool_response_messages)
|
||||
|
||||
n_iter += 1
|
||||
if n_iter >= (max_infer_iters or 10):
|
||||
break
|
||||
|
||||
# Continue with next iteration of the loop
|
||||
continue
|
||||
else:
|
||||
# No tool calls - convert response to message and we're done
|
||||
for choice in current_response.choices:
|
||||
output_messages.append(await _convert_chat_choice_to_response_message(choice))
|
||||
break
|
||||
|
||||
# Create final response
|
||||
final_response = OpenAIResponseObject(
|
||||
|
|
@ -542,6 +679,7 @@ class OpenAIResponsesImpl:
|
|||
model=model,
|
||||
object="response",
|
||||
status="completed",
|
||||
text=text,
|
||||
output=output_messages,
|
||||
)
|
||||
|
||||
|
|
@ -646,6 +784,30 @@ class OpenAIResponsesImpl:
|
|||
raise ValueError(f"Llama Stack OpenAI Responses does not yet support tool type: {input_tool.type}")
|
||||
return chat_tools, mcp_tool_to_server, mcp_list_message
|
||||
|
||||
async def _execute_tool_calls_only(
|
||||
self,
|
||||
choice: OpenAIChoice,
|
||||
ctx: ChatCompletionContext,
|
||||
) -> tuple[list[OpenAIResponseOutput], list[OpenAIMessageParam]]:
|
||||
"""Execute tool calls and return output messages and tool response messages for next inference."""
|
||||
output_messages: list[OpenAIResponseOutput] = []
|
||||
tool_response_messages: list[OpenAIMessageParam] = []
|
||||
|
||||
if not isinstance(choice.message, OpenAIAssistantMessageParam):
|
||||
return output_messages, tool_response_messages
|
||||
|
||||
if not choice.message.tool_calls:
|
||||
return output_messages, tool_response_messages
|
||||
|
||||
for tool_call in choice.message.tool_calls:
|
||||
tool_call_log, further_input = await self._execute_tool_call(tool_call, ctx)
|
||||
if tool_call_log:
|
||||
output_messages.append(tool_call_log)
|
||||
if further_input:
|
||||
tool_response_messages.append(further_input)
|
||||
|
||||
return output_messages, tool_response_messages
|
||||
|
||||
async def _execute_tool_and_return_final_output(
|
||||
self,
|
||||
choice: OpenAIChoice,
|
||||
|
|
@ -772,5 +934,8 @@ class OpenAIResponsesImpl:
|
|||
else:
|
||||
raise ValueError(f"Unknown result content type: {type(result.content)}")
|
||||
input_message = OpenAIToolMessageParam(content=content, tool_call_id=tool_call_id)
|
||||
else:
|
||||
text = str(error_exc)
|
||||
input_message = OpenAIToolMessageParam(content=text, tool_call_id=tool_call_id)
|
||||
|
||||
return message, input_message
|
||||
|
|
|
|||
|
|
@ -10,9 +10,10 @@ import uuid
|
|||
from datetime import datetime, timezone
|
||||
|
||||
from llama_stack.apis.agents import AgentConfig, Session, ToolExecutionStep, Turn
|
||||
from llama_stack.distribution.access_control import check_access
|
||||
from llama_stack.distribution.datatypes import AccessAttributes
|
||||
from llama_stack.distribution.request_headers import get_auth_attributes
|
||||
from llama_stack.distribution.access_control.access_control import AccessDeniedError, is_action_allowed
|
||||
from llama_stack.distribution.access_control.datatypes import AccessRule
|
||||
from llama_stack.distribution.datatypes import User
|
||||
from llama_stack.distribution.request_headers import get_authenticated_user
|
||||
from llama_stack.providers.utils.kvstore import KVStore
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
|
@ -22,7 +23,9 @@ class AgentSessionInfo(Session):
|
|||
# TODO: is this used anywhere?
|
||||
vector_db_id: str | None = None
|
||||
started_at: datetime
|
||||
access_attributes: AccessAttributes | None = None
|
||||
owner: User | None = None
|
||||
identifier: str | None = None
|
||||
type: str = "session"
|
||||
|
||||
|
||||
class AgentInfo(AgentConfig):
|
||||
|
|
@ -30,24 +33,27 @@ class AgentInfo(AgentConfig):
|
|||
|
||||
|
||||
class AgentPersistence:
|
||||
def __init__(self, agent_id: str, kvstore: KVStore):
|
||||
def __init__(self, agent_id: str, kvstore: KVStore, policy: list[AccessRule]):
|
||||
self.agent_id = agent_id
|
||||
self.kvstore = kvstore
|
||||
self.policy = policy
|
||||
|
||||
async def create_session(self, name: str) -> str:
|
||||
session_id = str(uuid.uuid4())
|
||||
|
||||
# Get current user's auth attributes for new sessions
|
||||
auth_attributes = get_auth_attributes()
|
||||
access_attributes = AccessAttributes(**auth_attributes) if auth_attributes else None
|
||||
user = get_authenticated_user()
|
||||
|
||||
session_info = AgentSessionInfo(
|
||||
session_id=session_id,
|
||||
session_name=name,
|
||||
started_at=datetime.now(timezone.utc),
|
||||
access_attributes=access_attributes,
|
||||
owner=user,
|
||||
turns=[],
|
||||
identifier=name, # should this be qualified in any way?
|
||||
)
|
||||
if not is_action_allowed(self.policy, "create", session_info, user):
|
||||
raise AccessDeniedError()
|
||||
|
||||
await self.kvstore.set(
|
||||
key=f"session:{self.agent_id}:{session_id}",
|
||||
|
|
@ -73,10 +79,10 @@ class AgentPersistence:
|
|||
def _check_session_access(self, session_info: AgentSessionInfo) -> bool:
|
||||
"""Check if current user has access to the session."""
|
||||
# Handle backward compatibility for old sessions without access control
|
||||
if not hasattr(session_info, "access_attributes"):
|
||||
if not hasattr(session_info, "access_attributes") and not hasattr(session_info, "owner"):
|
||||
return True
|
||||
|
||||
return check_access(session_info.session_id, session_info.access_attributes, get_auth_attributes())
|
||||
return is_action_allowed(self.policy, "read", session_info, get_authenticated_user())
|
||||
|
||||
async def get_session_if_accessible(self, session_id: str) -> AgentSessionInfo | None:
|
||||
"""Get session info if the user has access to it. For internal use by sub-session methods."""
|
||||
|
|
|
|||
20
llama_stack/providers/inline/files/localfs/__init__.py
Normal file
20
llama_stack/providers/inline/files/localfs/__init__.py
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.distribution.datatypes import Api
|
||||
|
||||
from .config import LocalfsFilesImplConfig
|
||||
from .files import LocalfsFilesImpl
|
||||
|
||||
__all__ = ["LocalfsFilesImpl", "LocalfsFilesImplConfig"]
|
||||
|
||||
|
||||
async def get_provider_impl(config: LocalfsFilesImplConfig, deps: dict[Api, Any]):
|
||||
impl = LocalfsFilesImpl(config)
|
||||
await impl.initialize()
|
||||
return impl
|
||||
31
llama_stack/providers/inline/files/localfs/config.py
Normal file
31
llama_stack/providers/inline/files/localfs/config.py
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig
|
||||
|
||||
|
||||
class LocalfsFilesImplConfig(BaseModel):
|
||||
storage_dir: str = Field(
|
||||
description="Directory to store uploaded files",
|
||||
)
|
||||
metadata_store: SqlStoreConfig = Field(
|
||||
description="SQL store configuration for file metadata",
|
||||
)
|
||||
ttl_secs: int = 365 * 24 * 60 * 60 # 1 year
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
|
||||
return {
|
||||
"storage_dir": "${env.FILES_STORAGE_DIR:" + __distro_dir__ + "/files}",
|
||||
"metadata_store": SqliteSqlStoreConfig.sample_run_config(
|
||||
__distro_dir__=__distro_dir__,
|
||||
db_name="files_metadata.db",
|
||||
),
|
||||
}
|
||||
214
llama_stack/providers/inline/files/localfs/files.py
Normal file
214
llama_stack/providers/inline/files/localfs/files.py
Normal file
|
|
@ -0,0 +1,214 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import time
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from typing import Annotated
|
||||
|
||||
from fastapi import File, Form, Response, UploadFile
|
||||
|
||||
from llama_stack.apis.common.responses import Order
|
||||
from llama_stack.apis.files import (
|
||||
Files,
|
||||
ListOpenAIFileResponse,
|
||||
OpenAIFileDeleteResponse,
|
||||
OpenAIFileObject,
|
||||
OpenAIFilePurpose,
|
||||
)
|
||||
from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
|
||||
from llama_stack.providers.utils.sqlstore.sqlstore import SqlStore, sqlstore_impl
|
||||
|
||||
from .config import LocalfsFilesImplConfig
|
||||
|
||||
|
||||
class LocalfsFilesImpl(Files):
|
||||
def __init__(self, config: LocalfsFilesImplConfig) -> None:
|
||||
self.config = config
|
||||
self.sql_store: SqlStore | None = None
|
||||
|
||||
async def initialize(self) -> None:
|
||||
"""Initialize the files provider by setting up storage directory and metadata database."""
|
||||
# Create storage directory if it doesn't exist
|
||||
storage_path = Path(self.config.storage_dir)
|
||||
storage_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Initialize SQL store for metadata
|
||||
self.sql_store = sqlstore_impl(self.config.metadata_store)
|
||||
await self.sql_store.create_table(
|
||||
"openai_files",
|
||||
{
|
||||
"id": ColumnDefinition(type=ColumnType.STRING, primary_key=True),
|
||||
"filename": ColumnType.STRING,
|
||||
"purpose": ColumnType.STRING,
|
||||
"bytes": ColumnType.INTEGER,
|
||||
"created_at": ColumnType.INTEGER,
|
||||
"expires_at": ColumnType.INTEGER,
|
||||
"file_path": ColumnType.STRING, # Path to actual file on disk
|
||||
},
|
||||
)
|
||||
|
||||
def _generate_file_id(self) -> str:
|
||||
"""Generate a unique file ID for OpenAI API."""
|
||||
return f"file-{uuid.uuid4().hex}"
|
||||
|
||||
def _get_file_path(self, file_id: str) -> Path:
|
||||
"""Get the filesystem path for a file ID."""
|
||||
return Path(self.config.storage_dir) / file_id
|
||||
|
||||
# OpenAI Files API Implementation
|
||||
async def openai_upload_file(
|
||||
self,
|
||||
file: Annotated[UploadFile, File()],
|
||||
purpose: Annotated[OpenAIFilePurpose, Form()],
|
||||
) -> OpenAIFileObject:
|
||||
"""Upload a file that can be used across various endpoints."""
|
||||
if not self.sql_store:
|
||||
raise RuntimeError("Files provider not initialized")
|
||||
|
||||
file_id = self._generate_file_id()
|
||||
file_path = self._get_file_path(file_id)
|
||||
|
||||
content = await file.read()
|
||||
file_size = len(content)
|
||||
|
||||
with open(file_path, "wb") as f:
|
||||
f.write(content)
|
||||
|
||||
created_at = int(time.time())
|
||||
expires_at = created_at + self.config.ttl_secs
|
||||
|
||||
await self.sql_store.insert(
|
||||
"openai_files",
|
||||
{
|
||||
"id": file_id,
|
||||
"filename": file.filename or "uploaded_file",
|
||||
"purpose": purpose.value,
|
||||
"bytes": file_size,
|
||||
"created_at": created_at,
|
||||
"expires_at": expires_at,
|
||||
"file_path": file_path.as_posix(),
|
||||
},
|
||||
)
|
||||
|
||||
return OpenAIFileObject(
|
||||
id=file_id,
|
||||
filename=file.filename or "uploaded_file",
|
||||
purpose=purpose,
|
||||
bytes=file_size,
|
||||
created_at=created_at,
|
||||
expires_at=expires_at,
|
||||
)
|
||||
|
||||
async def openai_list_files(
|
||||
self,
|
||||
after: str | None = None,
|
||||
limit: int | None = 10000,
|
||||
order: Order | None = Order.desc,
|
||||
purpose: OpenAIFilePurpose | None = None,
|
||||
) -> ListOpenAIFileResponse:
|
||||
"""Returns a list of files that belong to the user's organization."""
|
||||
if not self.sql_store:
|
||||
raise RuntimeError("Files provider not initialized")
|
||||
|
||||
# TODO: Implement 'after' pagination properly
|
||||
if after:
|
||||
raise NotImplementedError("After pagination not yet implemented")
|
||||
|
||||
where = None
|
||||
if purpose:
|
||||
where = {"purpose": purpose.value}
|
||||
|
||||
rows = await self.sql_store.fetch_all(
|
||||
"openai_files",
|
||||
where=where,
|
||||
order_by=[("created_at", order.value if order else Order.desc.value)],
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
files = [
|
||||
OpenAIFileObject(
|
||||
id=row["id"],
|
||||
filename=row["filename"],
|
||||
purpose=OpenAIFilePurpose(row["purpose"]),
|
||||
bytes=row["bytes"],
|
||||
created_at=row["created_at"],
|
||||
expires_at=row["expires_at"],
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
|
||||
return ListOpenAIFileResponse(
|
||||
data=files,
|
||||
has_more=False, # TODO: Implement proper pagination
|
||||
first_id=files[0].id if files else "",
|
||||
last_id=files[-1].id if files else "",
|
||||
)
|
||||
|
||||
async def openai_retrieve_file(self, file_id: str) -> OpenAIFileObject:
|
||||
"""Returns information about a specific file."""
|
||||
if not self.sql_store:
|
||||
raise RuntimeError("Files provider not initialized")
|
||||
|
||||
row = await self.sql_store.fetch_one("openai_files", where={"id": file_id})
|
||||
if not row:
|
||||
raise ValueError(f"File with id {file_id} not found")
|
||||
|
||||
return OpenAIFileObject(
|
||||
id=row["id"],
|
||||
filename=row["filename"],
|
||||
purpose=OpenAIFilePurpose(row["purpose"]),
|
||||
bytes=row["bytes"],
|
||||
created_at=row["created_at"],
|
||||
expires_at=row["expires_at"],
|
||||
)
|
||||
|
||||
async def openai_delete_file(self, file_id: str) -> OpenAIFileDeleteResponse:
|
||||
"""Delete a file."""
|
||||
if not self.sql_store:
|
||||
raise RuntimeError("Files provider not initialized")
|
||||
|
||||
row = await self.sql_store.fetch_one("openai_files", where={"id": file_id})
|
||||
if not row:
|
||||
raise ValueError(f"File with id {file_id} not found")
|
||||
|
||||
# Delete physical file
|
||||
file_path = Path(row["file_path"])
|
||||
if file_path.exists():
|
||||
file_path.unlink()
|
||||
|
||||
# Delete metadata from database
|
||||
await self.sql_store.delete("openai_files", where={"id": file_id})
|
||||
|
||||
return OpenAIFileDeleteResponse(
|
||||
id=file_id,
|
||||
deleted=True,
|
||||
)
|
||||
|
||||
async def openai_retrieve_file_content(self, file_id: str) -> Response:
|
||||
"""Returns the contents of the specified file."""
|
||||
if not self.sql_store:
|
||||
raise RuntimeError("Files provider not initialized")
|
||||
|
||||
# Get file metadata
|
||||
row = await self.sql_store.fetch_one("openai_files", where={"id": file_id})
|
||||
if not row:
|
||||
raise ValueError(f"File with id {file_id} not found")
|
||||
|
||||
# Read file content
|
||||
file_path = Path(row["file_path"])
|
||||
if not file_path.exists():
|
||||
raise ValueError(f"File content not found on disk: {file_path}")
|
||||
|
||||
with open(file_path, "rb") as f:
|
||||
content = f.read()
|
||||
|
||||
# Return as binary response with appropriate content type
|
||||
return Response(
|
||||
content=content,
|
||||
media_type="application/octet-stream",
|
||||
headers={"Content-Disposition": f'attachment; filename="{row["filename"]}"'},
|
||||
)
|
||||
|
|
@ -30,7 +30,7 @@ class TelemetryConfig(BaseModel):
|
|||
)
|
||||
service_name: str = Field(
|
||||
# service name is always the same, use zero-width space to avoid clutter
|
||||
default="",
|
||||
default="\u200b",
|
||||
description="The service name to use for telemetry",
|
||||
)
|
||||
sinks: list[TelemetrySink] = Field(
|
||||
|
|
@ -52,7 +52,7 @@ class TelemetryConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str, db_name: str = "trace_store.db") -> dict[str, Any]:
|
||||
return {
|
||||
"service_name": "${env.OTEL_SERVICE_NAME:}",
|
||||
"service_name": "${env.OTEL_SERVICE_NAME:\u200b}",
|
||||
"sinks": "${env.TELEMETRY_SINKS:console,sqlite}",
|
||||
"sqlite_db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,8 +4,22 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.providers.datatypes import ProviderSpec
|
||||
from llama_stack.providers.datatypes import (
|
||||
Api,
|
||||
InlineProviderSpec,
|
||||
ProviderSpec,
|
||||
)
|
||||
from llama_stack.providers.utils.sqlstore.sqlstore import sql_store_pip_packages
|
||||
|
||||
|
||||
def available_providers() -> list[ProviderSpec]:
|
||||
return []
|
||||
return [
|
||||
InlineProviderSpec(
|
||||
api=Api.files,
|
||||
provider_type="inline::localfs",
|
||||
# TODO: make this dynamic according to the sql store type
|
||||
pip_packages=sql_store_pip_packages,
|
||||
module="llama_stack.providers.inline.files.localfs",
|
||||
config_class="llama_stack.providers.inline.files.localfs.config.LocalfsFilesImplConfig",
|
||||
),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -15,7 +15,6 @@ from llama_stack.providers.datatypes import (
|
|||
|
||||
META_REFERENCE_DEPS = [
|
||||
"accelerate",
|
||||
"blobfile",
|
||||
"fairscale",
|
||||
"torch",
|
||||
"torchvision",
|
||||
|
|
|
|||
|
|
@ -20,7 +20,6 @@ def available_providers() -> list[ProviderSpec]:
|
|||
api=Api.tool_runtime,
|
||||
provider_type="inline::rag-runtime",
|
||||
pip_packages=[
|
||||
"blobfile",
|
||||
"chardet",
|
||||
"pypdf",
|
||||
"tqdm",
|
||||
|
|
|
|||
|
|
@ -255,7 +255,7 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv
|
|||
params = {
|
||||
"model": request.model,
|
||||
**input_dict,
|
||||
"stream": request.stream,
|
||||
"stream": bool(request.stream),
|
||||
**self._build_options(request.sampling_params, request.response_format, request.logprobs),
|
||||
}
|
||||
logger.debug(f"params to fireworks: {params}")
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ from llama_stack.providers.utils.inference.model_registry import (
|
|||
build_model_entry,
|
||||
)
|
||||
|
||||
model_entries = [
|
||||
MODEL_ENTRIES = [
|
||||
build_hf_repo_model_entry(
|
||||
"llama3.1:8b-instruct-fp16",
|
||||
CoreModelId.llama3_1_8b_instruct.value,
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
|
||||
import uuid
|
||||
from collections.abc import AsyncGenerator, AsyncIterator
|
||||
from typing import Any
|
||||
|
||||
|
|
@ -77,7 +78,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
|
|||
request_has_media,
|
||||
)
|
||||
|
||||
from .models import model_entries
|
||||
from .models import MODEL_ENTRIES
|
||||
|
||||
logger = get_logger(name=__name__, category="inference")
|
||||
|
||||
|
|
@ -87,7 +88,7 @@ class OllamaInferenceAdapter(
|
|||
ModelsProtocolPrivate,
|
||||
):
|
||||
def __init__(self, url: str) -> None:
|
||||
self.register_helper = ModelRegistryHelper(model_entries)
|
||||
self.register_helper = ModelRegistryHelper(MODEL_ENTRIES)
|
||||
self.url = url
|
||||
|
||||
@property
|
||||
|
|
@ -480,7 +481,25 @@ class OllamaInferenceAdapter(
|
|||
top_p=top_p,
|
||||
user=user,
|
||||
)
|
||||
return await self.openai_client.chat.completions.create(**params) # type: ignore
|
||||
response = await self.openai_client.chat.completions.create(**params)
|
||||
return await self._adjust_ollama_chat_completion_response_ids(response)
|
||||
|
||||
async def _adjust_ollama_chat_completion_response_ids(
|
||||
self,
|
||||
response: OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk],
|
||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||
id = f"chatcmpl-{uuid.uuid4()}"
|
||||
if isinstance(response, AsyncIterator):
|
||||
|
||||
async def stream_with_chunk_ids() -> AsyncIterator[OpenAIChatCompletionChunk]:
|
||||
async for chunk in response:
|
||||
chunk.id = id
|
||||
yield chunk
|
||||
|
||||
return stream_with_chunk_ids()
|
||||
else:
|
||||
response.id = id
|
||||
return response
|
||||
|
||||
async def batch_completion(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -72,15 +72,15 @@ class PostgresKVStoreConfig(CommonConfig):
|
|||
table_name: str = "llamastack_kvstore"
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, table_name: str = "llamastack_kvstore"):
|
||||
def sample_run_config(cls, table_name: str = "llamastack_kvstore", **kwargs):
|
||||
return {
|
||||
"type": "postgres",
|
||||
"namespace": None,
|
||||
"host": "${env.POSTGRES_HOST:localhost}",
|
||||
"port": "${env.POSTGRES_PORT:5432}",
|
||||
"db": "${env.POSTGRES_DB}",
|
||||
"user": "${env.POSTGRES_USER}",
|
||||
"password": "${env.POSTGRES_PASSWORD}",
|
||||
"db": "${env.POSTGRES_DB:llamastack}",
|
||||
"user": "${env.POSTGRES_USER:llamastack}",
|
||||
"password": "${env.POSTGRES_PASSWORD:llamastack}",
|
||||
"table_name": "${env.POSTGRES_TABLE_NAME:" + table_name + "}",
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -16,6 +16,8 @@ from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR
|
|||
|
||||
from .api import SqlStore
|
||||
|
||||
sql_store_pip_packages = ["sqlalchemy[asyncio]", "aiosqlite", "asyncpg"]
|
||||
|
||||
|
||||
class SqlStoreType(Enum):
|
||||
sqlite = "sqlite"
|
||||
|
|
@ -72,6 +74,17 @@ class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
|
|||
def pip_packages(self) -> list[str]:
|
||||
return super().pip_packages + ["asyncpg"]
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, **kwargs):
|
||||
return cls(
|
||||
type="postgres",
|
||||
host="${env.POSTGRES_HOST:localhost}",
|
||||
port="${env.POSTGRES_PORT:5432}",
|
||||
db="${env.POSTGRES_DB:llamastack}",
|
||||
user="${env.POSTGRES_USER:llamastack}",
|
||||
password="${env.POSTGRES_PASSWORD:llamastack}",
|
||||
)
|
||||
|
||||
|
||||
SqlStoreConfig = Annotated[
|
||||
SqliteSqlStoreConfig | PostgresSqlStoreConfig,
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/trace_store.db
|
||||
eval:
|
||||
|
|
|
|||
|
|
@ -82,7 +82,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/trace_store.db
|
||||
tool_runtime:
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/trace_store.db
|
||||
eval:
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/trace_store.db
|
||||
eval:
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/trace_store.db
|
||||
eval:
|
||||
|
|
|
|||
|
|
@ -24,6 +24,8 @@ distribution_spec:
|
|||
- inline::basic
|
||||
- inline::llm-as-judge
|
||||
- inline::braintrust
|
||||
files:
|
||||
- inline::localfs
|
||||
tool_runtime:
|
||||
- remote::brave-search
|
||||
- remote::tavily-search
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ from llama_stack.distribution.datatypes import (
|
|||
ShieldInput,
|
||||
ToolGroupInput,
|
||||
)
|
||||
from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
|
||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
|
|
@ -36,6 +37,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"eval": ["inline::meta-reference"],
|
||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||
"files": ["inline::localfs"],
|
||||
"tool_runtime": [
|
||||
"remote::brave-search",
|
||||
"remote::tavily-search",
|
||||
|
|
@ -62,6 +64,11 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
provider_type="inline::faiss",
|
||||
config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
||||
)
|
||||
files_provider = Provider(
|
||||
provider_id="meta-reference-files",
|
||||
provider_type="inline::localfs",
|
||||
config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
||||
)
|
||||
|
||||
available_models = {
|
||||
"fireworks": MODEL_ENTRIES,
|
||||
|
|
@ -104,6 +111,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
provider_overrides={
|
||||
"inference": [inference_provider, embedding_provider],
|
||||
"vector_io": [vector_io_provider],
|
||||
"files": [files_provider],
|
||||
},
|
||||
default_models=default_models + [embedding_model],
|
||||
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
|
||||
|
|
@ -116,6 +124,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
embedding_provider,
|
||||
],
|
||||
"vector_io": [vector_io_provider],
|
||||
"files": [files_provider],
|
||||
"safety": [
|
||||
Provider(
|
||||
provider_id="llama-guard",
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ apis:
|
|||
- agents
|
||||
- datasetio
|
||||
- eval
|
||||
- files
|
||||
- inference
|
||||
- safety
|
||||
- scoring
|
||||
|
|
@ -53,7 +54,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/trace_store.db
|
||||
eval:
|
||||
|
|
@ -90,6 +91,14 @@ providers:
|
|||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||
files:
|
||||
- provider_id: meta-reference-files
|
||||
provider_type: inline::localfs
|
||||
config:
|
||||
storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/fireworks/files}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/files_metadata.db
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ apis:
|
|||
- agents
|
||||
- datasetio
|
||||
- eval
|
||||
- files
|
||||
- inference
|
||||
- safety
|
||||
- scoring
|
||||
|
|
@ -48,7 +49,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/trace_store.db
|
||||
eval:
|
||||
|
|
@ -85,6 +86,14 @@ providers:
|
|||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||
files:
|
||||
- provider_id: meta-reference-files
|
||||
provider_type: inline::localfs
|
||||
config:
|
||||
storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/fireworks/files}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/files_metadata.db
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/trace_store.db
|
||||
eval:
|
||||
|
|
@ -112,7 +112,7 @@ models:
|
|||
provider_model_id: groq/llama3-8b-8192
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||
model_id: groq/meta-llama/Llama-3.1-8B-Instruct
|
||||
provider_id: groq
|
||||
provider_model_id: groq/llama3-8b-8192
|
||||
model_type: llm
|
||||
|
|
@ -127,7 +127,7 @@ models:
|
|||
provider_model_id: groq/llama3-70b-8192
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3-70B-Instruct
|
||||
model_id: groq/meta-llama/Llama-3-70B-Instruct
|
||||
provider_id: groq
|
||||
provider_model_id: groq/llama3-70b-8192
|
||||
model_type: llm
|
||||
|
|
@ -137,7 +137,7 @@ models:
|
|||
provider_model_id: groq/llama-3.3-70b-versatile
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||
model_id: groq/meta-llama/Llama-3.3-70B-Instruct
|
||||
provider_id: groq
|
||||
provider_model_id: groq/llama-3.3-70b-versatile
|
||||
model_type: llm
|
||||
|
|
@ -147,7 +147,7 @@ models:
|
|||
provider_model_id: groq/llama-3.2-3b-preview
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-3B-Instruct
|
||||
model_id: groq/meta-llama/Llama-3.2-3B-Instruct
|
||||
provider_id: groq
|
||||
provider_model_id: groq/llama-3.2-3b-preview
|
||||
model_type: llm
|
||||
|
|
@ -157,7 +157,7 @@ models:
|
|||
provider_model_id: groq/llama-4-scout-17b-16e-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
model_id: groq/meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
provider_id: groq
|
||||
provider_model_id: groq/llama-4-scout-17b-16e-instruct
|
||||
model_type: llm
|
||||
|
|
@ -167,7 +167,7 @@ models:
|
|||
provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
model_id: groq/meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
provider_id: groq
|
||||
provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
|
||||
model_type: llm
|
||||
|
|
@ -177,7 +177,7 @@ models:
|
|||
provider_model_id: groq/llama-4-maverick-17b-128e-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||
model_id: groq/meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||
provider_id: groq
|
||||
provider_model_id: groq/llama-4-maverick-17b-128e-instruct
|
||||
model_type: llm
|
||||
|
|
@ -187,7 +187,7 @@ models:
|
|||
provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||
model_id: groq/meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||
provider_id: groq
|
||||
provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
|
||||
model_type: llm
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/trace_store.db
|
||||
eval:
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/trace_store.db
|
||||
eval:
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/trace_store.db
|
||||
eval:
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/trace_store.db
|
||||
eval:
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/trace_store.db
|
||||
eval:
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/trace_store.db
|
||||
eval:
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/trace_store.db
|
||||
eval:
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/trace_store.db
|
||||
eval:
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/trace_store.db
|
||||
eval:
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db
|
||||
eval:
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db
|
||||
eval:
|
||||
|
|
|
|||
|
|
@ -71,7 +71,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/trace_store.db
|
||||
eval:
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/trace_store.db
|
||||
eval:
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/trace_store.db
|
||||
eval:
|
||||
|
|
|
|||
|
|
@ -3,8 +3,8 @@ distribution_spec:
|
|||
description: Quick start template for running Llama Stack with several popular providers
|
||||
providers:
|
||||
inference:
|
||||
- remote::fireworks
|
||||
- remote::vllm
|
||||
- inline::sentence-transformers
|
||||
vector_io:
|
||||
- remote::chromadb
|
||||
safety:
|
||||
|
|
|
|||
|
|
@ -5,64 +5,36 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
from llama_stack.distribution.datatypes import (
|
||||
ModelInput,
|
||||
Provider,
|
||||
ShieldInput,
|
||||
ToolGroupInput,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig
|
||||
from llama_stack.providers.remote.inference.fireworks.models import (
|
||||
MODEL_ENTRIES as FIREWORKS_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.inline.inference.sentence_transformers import SentenceTransformersInferenceConfig
|
||||
from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
|
||||
from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
|
||||
from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
|
||||
from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig
|
||||
from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
|
||||
from llama_stack.templates.template import (
|
||||
DistributionTemplate,
|
||||
RunConfigSettings,
|
||||
get_model_registry,
|
||||
)
|
||||
|
||||
|
||||
def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderModelEntry]]]:
|
||||
# in this template, we allow each API key to be optional
|
||||
providers = [
|
||||
(
|
||||
"fireworks",
|
||||
FIREWORKS_MODEL_ENTRIES,
|
||||
FireworksImplConfig.sample_run_config(api_key="${env.FIREWORKS_API_KEY:}"),
|
||||
),
|
||||
]
|
||||
inference_providers = []
|
||||
available_models = {}
|
||||
for provider_id, model_entries, config in providers:
|
||||
inference_providers.append(
|
||||
Provider(
|
||||
provider_id=provider_id,
|
||||
provider_type=f"remote::{provider_id}",
|
||||
config=config,
|
||||
)
|
||||
)
|
||||
available_models[provider_id] = model_entries
|
||||
inference_providers.append(
|
||||
def get_distribution_template() -> DistributionTemplate:
|
||||
inference_providers = [
|
||||
Provider(
|
||||
provider_id="vllm-inference",
|
||||
provider_type="remote::vllm",
|
||||
config=VLLMInferenceAdapterConfig.sample_run_config(
|
||||
url="${env.VLLM_URL:http://localhost:8000/v1}",
|
||||
),
|
||||
)
|
||||
)
|
||||
return inference_providers, available_models
|
||||
|
||||
|
||||
def get_distribution_template() -> DistributionTemplate:
|
||||
inference_providers, available_models = get_inference_providers()
|
||||
),
|
||||
]
|
||||
providers = {
|
||||
"inference": ([p.provider_type for p in inference_providers]),
|
||||
"inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]),
|
||||
"vector_io": ["remote::chromadb"],
|
||||
"safety": ["inline::llama-guard"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
|
|
@ -94,22 +66,26 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
),
|
||||
]
|
||||
|
||||
default_models = get_model_registry(available_models)
|
||||
default_models.append(
|
||||
default_models = [
|
||||
ModelInput(
|
||||
model_id="${env.INFERENCE_MODEL}",
|
||||
provider_id="vllm-inference",
|
||||
)
|
||||
]
|
||||
embedding_provider = Provider(
|
||||
provider_id="sentence-transformers",
|
||||
provider_type="inline::sentence-transformers",
|
||||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||
)
|
||||
postgres_config = {
|
||||
"type": "postgres",
|
||||
"host": "${env.POSTGRES_HOST:localhost}",
|
||||
"port": "${env.POSTGRES_PORT:5432}",
|
||||
"db": "${env.POSTGRES_DB:llamastack}",
|
||||
"user": "${env.POSTGRES_USER:llamastack}",
|
||||
"password": "${env.POSTGRES_PASSWORD:llamastack}",
|
||||
}
|
||||
|
||||
embedding_model = ModelInput(
|
||||
model_id="all-MiniLM-L6-v2",
|
||||
provider_id=embedding_provider.provider_id,
|
||||
model_type=ModelType.embedding,
|
||||
metadata={
|
||||
"embedding_dimension": 384,
|
||||
},
|
||||
)
|
||||
postgres_config = PostgresSqlStoreConfig.sample_run_config()
|
||||
return DistributionTemplate(
|
||||
name=name,
|
||||
distro_type="self_hosted",
|
||||
|
|
@ -117,11 +93,11 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
container_image=None,
|
||||
template_path=None,
|
||||
providers=providers,
|
||||
available_models_by_provider=available_models,
|
||||
available_models_by_provider={},
|
||||
run_configs={
|
||||
"run.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
"inference": inference_providers,
|
||||
"inference": inference_providers + [embedding_provider],
|
||||
"vector_io": vector_io_providers,
|
||||
"agents": [
|
||||
Provider(
|
||||
|
|
@ -139,16 +115,17 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
provider_type="inline::meta-reference",
|
||||
config=dict(
|
||||
service_name="${env.OTEL_SERVICE_NAME:}",
|
||||
sinks="${env.TELEMETRY_SINKS:console}",
|
||||
sinks="${env.TELEMETRY_SINKS:console,otel_trace}",
|
||||
otel_trace_endpoint="${env.OTEL_TRACE_ENDPOINT:http://localhost:4318/v1/traces}",
|
||||
),
|
||||
)
|
||||
],
|
||||
},
|
||||
default_models=default_models,
|
||||
default_models=default_models + [embedding_model],
|
||||
default_tool_groups=default_tool_groups,
|
||||
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
|
||||
metadata_store=PostgresKVStoreConfig.model_validate(postgres_config),
|
||||
inference_store=PostgresSqlStoreConfig.model_validate(postgres_config),
|
||||
metadata_store=PostgresKVStoreConfig.sample_run_config(),
|
||||
inference_store=postgres_config,
|
||||
),
|
||||
},
|
||||
run_config_env_vars={
|
||||
|
|
@ -156,9 +133,5 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"8321",
|
||||
"Port for the Llama Stack distribution server",
|
||||
),
|
||||
"FIREWORKS_API_KEY": (
|
||||
"",
|
||||
"Fireworks API Key",
|
||||
),
|
||||
},
|
||||
)
|
||||
|
|
|
|||
|
|
@ -9,11 +9,6 @@ apis:
|
|||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: fireworks
|
||||
provider_type: remote::fireworks
|
||||
config:
|
||||
url: https://api.fireworks.ai/inference/v1
|
||||
api_key: ${env.FIREWORKS_API_KEY:}
|
||||
- provider_id: vllm-inference
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
|
|
@ -21,6 +16,9 @@ providers:
|
|||
max_tokens: ${env.VLLM_MAX_TOKENS:4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:true}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
vector_io:
|
||||
- provider_id: ${env.ENABLE_CHROMADB+chromadb}
|
||||
provider_type: remote::chromadb
|
||||
|
|
@ -54,7 +52,8 @@ providers:
|
|||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,otel_trace}
|
||||
otel_trace_endpoint: ${env.OTEL_TRACE_ENDPOINT:http://localhost:4318/v1/traces}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
|
|
@ -79,7 +78,7 @@ metadata_store:
|
|||
db: ${env.POSTGRES_DB:llamastack}
|
||||
user: ${env.POSTGRES_USER:llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:llamastack}
|
||||
table_name: llamastack_kvstore
|
||||
table_name: ${env.POSTGRES_TABLE_NAME:llamastack_kvstore}
|
||||
inference_store:
|
||||
type: postgres
|
||||
host: ${env.POSTGRES_HOST:localhost}
|
||||
|
|
@ -88,127 +87,15 @@ inference_store:
|
|||
user: ${env.POSTGRES_USER:llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:llamastack}
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-70B-Instruct
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-3B-Instruct
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: accounts/fireworks/models/llama-guard-3-8b
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-Guard-3-8B
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: accounts/fireworks/models/llama-guard-3-11b-vision
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-Guard-3-11B-Vision
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: accounts/fireworks/models/llama4-scout-instruct-basic
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 768
|
||||
context_length: 8192
|
||||
model_id: nomic-ai/nomic-embed-text-v1.5
|
||||
provider_id: fireworks
|
||||
provider_model_id: nomic-ai/nomic-embed-text-v1.5
|
||||
model_type: embedding
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: vllm-inference
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields:
|
||||
- shield_id: meta-llama/Llama-Guard-3-8B
|
||||
vector_dbs: []
|
||||
|
|
|
|||
|
|
@ -91,7 +91,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/trace_store.db
|
||||
tool_runtime:
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/trace_store.db
|
||||
tool_runtime:
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/trace_store.db
|
||||
tool_runtime:
|
||||
|
|
|
|||
|
|
@ -5,10 +5,13 @@ distribution_spec:
|
|||
inference:
|
||||
- remote::openai
|
||||
- remote::fireworks
|
||||
- remote::together
|
||||
- remote::ollama
|
||||
- remote::anthropic
|
||||
- remote::gemini
|
||||
- remote::groq
|
||||
- remote::sambanova
|
||||
- remote::vllm
|
||||
- inline::sentence-transformers
|
||||
vector_io:
|
||||
- inline::sqlite-vec
|
||||
|
|
@ -37,4 +40,5 @@ distribution_spec:
|
|||
image_type: conda
|
||||
additional_pip_packages:
|
||||
- aiosqlite
|
||||
- asyncpg
|
||||
- sqlalchemy[asyncio]
|
||||
|
|
|
|||
|
|
@ -21,6 +21,15 @@ providers:
|
|||
config:
|
||||
url: https://api.fireworks.ai/inference/v1
|
||||
api_key: ${env.FIREWORKS_API_KEY:}
|
||||
- provider_id: together
|
||||
provider_type: remote::together
|
||||
config:
|
||||
url: https://api.together.xyz/v1
|
||||
api_key: ${env.TOGETHER_API_KEY:}
|
||||
- provider_id: ollama
|
||||
provider_type: remote::ollama
|
||||
config:
|
||||
url: ${env.OLLAMA_URL:http://localhost:11434}
|
||||
- provider_id: anthropic
|
||||
provider_type: remote::anthropic
|
||||
config:
|
||||
|
|
@ -39,6 +48,13 @@ providers:
|
|||
config:
|
||||
url: https://api.sambanova.ai/v1
|
||||
api_key: ${env.SAMBANOVA_API_KEY:}
|
||||
- provider_id: vllm
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
url: ${env.VLLM_URL:http://localhost:8000/v1}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:true}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
|
|
@ -79,7 +95,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/trace_store.db
|
||||
eval:
|
||||
|
|
@ -156,72 +172,72 @@ models:
|
|||
provider_model_id: openai/chatgpt-4o-latest
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-3.5-turbo-0125
|
||||
model_id: openai/gpt-3.5-turbo-0125
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-3.5-turbo-0125
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-3.5-turbo
|
||||
model_id: openai/gpt-3.5-turbo
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-3.5-turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-3.5-turbo-instruct
|
||||
model_id: openai/gpt-3.5-turbo-instruct
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-3.5-turbo-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-4
|
||||
model_id: openai/gpt-4
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-4
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-4-turbo
|
||||
model_id: openai/gpt-4-turbo
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-4-turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-4o
|
||||
model_id: openai/gpt-4o
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-4o
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-4o-2024-08-06
|
||||
model_id: openai/gpt-4o-2024-08-06
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-4o-2024-08-06
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-4o-mini
|
||||
model_id: openai/gpt-4o-mini
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-4o-mini
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-4o-audio-preview
|
||||
model_id: openai/gpt-4o-audio-preview
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-4o-audio-preview
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: chatgpt-4o-latest
|
||||
model_id: openai/chatgpt-4o-latest
|
||||
provider_id: openai
|
||||
provider_model_id: chatgpt-4o-latest
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: o1
|
||||
model_id: openai/o1
|
||||
provider_id: openai
|
||||
provider_model_id: o1
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: o1-mini
|
||||
model_id: openai/o1-mini
|
||||
provider_id: openai
|
||||
provider_model_id: o1-mini
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: o3-mini
|
||||
model_id: openai/o3-mini
|
||||
provider_id: openai
|
||||
provider_model_id: o3-mini
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: o4-mini
|
||||
model_id: openai/o4-mini
|
||||
provider_id: openai
|
||||
provider_model_id: o4-mini
|
||||
model_type: llm
|
||||
|
|
@ -242,14 +258,14 @@ models:
|
|||
- metadata:
|
||||
embedding_dimension: 1536
|
||||
context_length: 8192
|
||||
model_id: text-embedding-3-small
|
||||
model_id: openai/text-embedding-3-small
|
||||
provider_id: openai
|
||||
provider_model_id: text-embedding-3-small
|
||||
model_type: embedding
|
||||
- metadata:
|
||||
embedding_dimension: 3072
|
||||
context_length: 8192
|
||||
model_id: text-embedding-3-large
|
||||
model_id: openai/text-embedding-3-large
|
||||
provider_id: openai
|
||||
provider_model_id: text-embedding-3-large
|
||||
model_type: embedding
|
||||
|
|
@ -259,7 +275,7 @@ models:
|
|||
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||
model_id: fireworks/meta-llama/Llama-3.1-8B-Instruct
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
|
||||
model_type: llm
|
||||
|
|
@ -269,7 +285,7 @@ models:
|
|||
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-70B-Instruct
|
||||
model_id: fireworks/meta-llama/Llama-3.1-70B-Instruct
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
|
||||
model_type: llm
|
||||
|
|
@ -279,7 +295,7 @@ models:
|
|||
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
|
||||
model_id: fireworks/meta-llama/Llama-3.1-405B-Instruct-FP8
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
|
||||
model_type: llm
|
||||
|
|
@ -289,7 +305,7 @@ models:
|
|||
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-3B-Instruct
|
||||
model_id: fireworks/meta-llama/Llama-3.2-3B-Instruct
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
|
||||
model_type: llm
|
||||
|
|
@ -299,7 +315,7 @@ models:
|
|||
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
||||
model_id: fireworks/meta-llama/Llama-3.2-11B-Vision-Instruct
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
|
||||
model_type: llm
|
||||
|
|
@ -309,7 +325,7 @@ models:
|
|||
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
||||
model_id: fireworks/meta-llama/Llama-3.2-90B-Vision-Instruct
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
|
||||
model_type: llm
|
||||
|
|
@ -319,7 +335,7 @@ models:
|
|||
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||
model_id: fireworks/meta-llama/Llama-3.3-70B-Instruct
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
|
||||
model_type: llm
|
||||
|
|
@ -329,7 +345,7 @@ models:
|
|||
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-Guard-3-8B
|
||||
model_id: fireworks/meta-llama/Llama-Guard-3-8B
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
|
||||
model_type: llm
|
||||
|
|
@ -339,7 +355,7 @@ models:
|
|||
provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-Guard-3-11B-Vision
|
||||
model_id: fireworks/meta-llama/Llama-Guard-3-11B-Vision
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
|
||||
model_type: llm
|
||||
|
|
@ -349,7 +365,7 @@ models:
|
|||
provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
model_id: fireworks/meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
|
||||
model_type: llm
|
||||
|
|
@ -359,17 +375,307 @@ models:
|
|||
provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||
model_id: fireworks/meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||
provider_id: fireworks
|
||||
provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 768
|
||||
context_length: 8192
|
||||
model_id: nomic-ai/nomic-embed-text-v1.5
|
||||
model_id: fireworks/nomic-ai/nomic-embed-text-v1.5
|
||||
provider_id: fireworks
|
||||
provider_model_id: nomic-ai/nomic-embed-text-v1.5
|
||||
model_type: embedding
|
||||
- metadata: {}
|
||||
model_id: together/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
|
||||
provider_id: together
|
||||
provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: together/meta-llama/Llama-3.1-8B-Instruct
|
||||
provider_id: together
|
||||
provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: together/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
|
||||
provider_id: together
|
||||
provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: together/meta-llama/Llama-3.1-70B-Instruct
|
||||
provider_id: together
|
||||
provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: together/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
|
||||
provider_id: together
|
||||
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: together/meta-llama/Llama-3.1-405B-Instruct-FP8
|
||||
provider_id: together
|
||||
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: together/meta-llama/Llama-3.2-3B-Instruct-Turbo
|
||||
provider_id: together
|
||||
provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: together/meta-llama/Llama-3.2-3B-Instruct
|
||||
provider_id: together
|
||||
provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: together/meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
|
||||
provider_id: together
|
||||
provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: together/meta-llama/Llama-3.2-11B-Vision-Instruct
|
||||
provider_id: together
|
||||
provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: together/meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
|
||||
provider_id: together
|
||||
provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: together/meta-llama/Llama-3.2-90B-Vision-Instruct
|
||||
provider_id: together
|
||||
provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
|
||||
provider_id: together
|
||||
provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: together/meta-llama/Llama-3.3-70B-Instruct
|
||||
provider_id: together
|
||||
provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: together/meta-llama/Meta-Llama-Guard-3-8B
|
||||
provider_id: together
|
||||
provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: together/meta-llama/Llama-Guard-3-8B
|
||||
provider_id: together
|
||||
provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: together/meta-llama/Llama-Guard-3-11B-Vision-Turbo
|
||||
provider_id: together
|
||||
provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: together/meta-llama/Llama-Guard-3-11B-Vision
|
||||
provider_id: together
|
||||
provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 768
|
||||
context_length: 8192
|
||||
model_id: togethercomputer/m2-bert-80M-8k-retrieval
|
||||
provider_id: together
|
||||
provider_model_id: togethercomputer/m2-bert-80M-8k-retrieval
|
||||
model_type: embedding
|
||||
- metadata:
|
||||
embedding_dimension: 768
|
||||
context_length: 32768
|
||||
model_id: togethercomputer/m2-bert-80M-32k-retrieval
|
||||
provider_id: together
|
||||
provider_model_id: togethercomputer/m2-bert-80M-32k-retrieval
|
||||
model_type: embedding
|
||||
- metadata: {}
|
||||
model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
provider_id: together
|
||||
provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
provider_id: together
|
||||
provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
provider_id: together
|
||||
provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
|
||||
provider_id: together
|
||||
provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||
provider_id: together
|
||||
provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
|
||||
provider_id: together
|
||||
provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama3.1:8b-instruct-fp16
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.1:8b-instruct-fp16
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/meta-llama/Llama-3.1-8B-Instruct
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.1:8b-instruct-fp16
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama3.1:8b
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.1:8b
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama3.1:70b-instruct-fp16
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.1:70b-instruct-fp16
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/meta-llama/Llama-3.1-70B-Instruct
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.1:70b-instruct-fp16
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama3.1:70b
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.1:70b
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama3.1:405b-instruct-fp16
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.1:405b-instruct-fp16
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/meta-llama/Llama-3.1-405B-Instruct-FP8
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.1:405b-instruct-fp16
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama3.1:405b
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.1:405b
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama3.2:1b-instruct-fp16
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.2:1b-instruct-fp16
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/meta-llama/Llama-3.2-1B-Instruct
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.2:1b-instruct-fp16
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama3.2:1b
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.2:1b
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama3.2:3b-instruct-fp16
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.2:3b-instruct-fp16
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/meta-llama/Llama-3.2-3B-Instruct
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.2:3b-instruct-fp16
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama3.2:3b
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.2:3b
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama3.2-vision:11b-instruct-fp16
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.2-vision:11b-instruct-fp16
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/meta-llama/Llama-3.2-11B-Vision-Instruct
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.2-vision:11b-instruct-fp16
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama3.2-vision:latest
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.2-vision:latest
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama3.2-vision:90b-instruct-fp16
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.2-vision:90b-instruct-fp16
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/meta-llama/Llama-3.2-90B-Vision-Instruct
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.2-vision:90b-instruct-fp16
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama3.2-vision:90b
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.2-vision:90b
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama3.3:70b
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.3:70b
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/meta-llama/Llama-3.3-70B-Instruct
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.3:70b
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama-guard3:8b
|
||||
provider_id: ollama
|
||||
provider_model_id: llama-guard3:8b
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/meta-llama/Llama-Guard-3-8B
|
||||
provider_id: ollama
|
||||
provider_model_id: llama-guard3:8b
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama-guard3:1b
|
||||
provider_id: ollama
|
||||
provider_model_id: llama-guard3:1b
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/meta-llama/Llama-Guard-3-1B
|
||||
provider_id: ollama
|
||||
provider_model_id: llama-guard3:1b
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
context_length: 512
|
||||
model_id: ollama/all-minilm:latest
|
||||
provider_id: ollama
|
||||
provider_model_id: all-minilm:latest
|
||||
model_type: embedding
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
context_length: 512
|
||||
model_id: ollama/all-minilm
|
||||
provider_id: ollama
|
||||
provider_model_id: all-minilm:latest
|
||||
model_type: embedding
|
||||
- metadata:
|
||||
embedding_dimension: 768
|
||||
context_length: 8192
|
||||
model_id: ollama/nomic-embed-text
|
||||
provider_id: ollama
|
||||
provider_model_id: nomic-embed-text
|
||||
model_type: embedding
|
||||
- metadata: {}
|
||||
model_id: anthropic/claude-3-5-sonnet-latest
|
||||
provider_id: anthropic
|
||||
|
|
@ -429,7 +735,7 @@ models:
|
|||
provider_model_id: groq/llama3-8b-8192
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||
model_id: groq/meta-llama/Llama-3.1-8B-Instruct
|
||||
provider_id: groq
|
||||
provider_model_id: groq/llama3-8b-8192
|
||||
model_type: llm
|
||||
|
|
@ -444,7 +750,7 @@ models:
|
|||
provider_model_id: groq/llama3-70b-8192
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3-70B-Instruct
|
||||
model_id: groq/meta-llama/Llama-3-70B-Instruct
|
||||
provider_id: groq
|
||||
provider_model_id: groq/llama3-70b-8192
|
||||
model_type: llm
|
||||
|
|
@ -454,7 +760,7 @@ models:
|
|||
provider_model_id: groq/llama-3.3-70b-versatile
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||
model_id: groq/meta-llama/Llama-3.3-70B-Instruct
|
||||
provider_id: groq
|
||||
provider_model_id: groq/llama-3.3-70b-versatile
|
||||
model_type: llm
|
||||
|
|
@ -464,7 +770,7 @@ models:
|
|||
provider_model_id: groq/llama-3.2-3b-preview
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-3B-Instruct
|
||||
model_id: groq/meta-llama/Llama-3.2-3B-Instruct
|
||||
provider_id: groq
|
||||
provider_model_id: groq/llama-3.2-3b-preview
|
||||
model_type: llm
|
||||
|
|
@ -474,7 +780,7 @@ models:
|
|||
provider_model_id: groq/llama-4-scout-17b-16e-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
model_id: groq/meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
provider_id: groq
|
||||
provider_model_id: groq/llama-4-scout-17b-16e-instruct
|
||||
model_type: llm
|
||||
|
|
@ -484,7 +790,7 @@ models:
|
|||
provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
model_id: groq/meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
provider_id: groq
|
||||
provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
|
||||
model_type: llm
|
||||
|
|
@ -494,7 +800,7 @@ models:
|
|||
provider_model_id: groq/llama-4-maverick-17b-128e-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||
model_id: groq/meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||
provider_id: groq
|
||||
provider_model_id: groq/llama-4-maverick-17b-128e-instruct
|
||||
model_type: llm
|
||||
|
|
@ -504,7 +810,7 @@ models:
|
|||
provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||
model_id: groq/meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||
provider_id: groq
|
||||
provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
|
||||
model_type: llm
|
||||
|
|
@ -514,7 +820,7 @@ models:
|
|||
provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||
model_id: sambanova/meta-llama/Llama-3.1-8B-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct
|
||||
model_type: llm
|
||||
|
|
@ -524,7 +830,7 @@ models:
|
|||
provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
|
||||
model_id: sambanova/meta-llama/Llama-3.1-405B-Instruct-FP8
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct
|
||||
model_type: llm
|
||||
|
|
@ -534,7 +840,7 @@ models:
|
|||
provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-1B-Instruct
|
||||
model_id: sambanova/meta-llama/Llama-3.2-1B-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct
|
||||
model_type: llm
|
||||
|
|
@ -544,7 +850,7 @@ models:
|
|||
provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-3B-Instruct
|
||||
model_id: sambanova/meta-llama/Llama-3.2-3B-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct
|
||||
model_type: llm
|
||||
|
|
@ -554,7 +860,7 @@ models:
|
|||
provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||
model_id: sambanova/meta-llama/Llama-3.3-70B-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct
|
||||
model_type: llm
|
||||
|
|
@ -564,7 +870,7 @@ models:
|
|||
provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
||||
model_id: sambanova/meta-llama/Llama-3.2-11B-Vision-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct
|
||||
model_type: llm
|
||||
|
|
@ -574,7 +880,7 @@ models:
|
|||
provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
||||
model_id: sambanova/meta-llama/Llama-3.2-90B-Vision-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct
|
||||
model_type: llm
|
||||
|
|
@ -584,7 +890,7 @@ models:
|
|||
provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
model_id: sambanova/meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
|
||||
model_type: llm
|
||||
|
|
@ -594,7 +900,7 @@ models:
|
|||
provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||
model_id: sambanova/meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
|
||||
model_type: llm
|
||||
|
|
@ -604,7 +910,7 @@ models:
|
|||
provider_model_id: sambanova/Meta-Llama-Guard-3-8B
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-Guard-3-8B
|
||||
model_id: sambanova/meta-llama/Llama-Guard-3-8B
|
||||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Meta-Llama-Guard-3-8B
|
||||
model_type: llm
|
||||
|
|
|
|||
|
|
@ -34,6 +34,10 @@ from llama_stack.providers.remote.inference.groq.config import GroqConfig
|
|||
from llama_stack.providers.remote.inference.groq.models import (
|
||||
MODEL_ENTRIES as GROQ_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
|
||||
from llama_stack.providers.remote.inference.ollama.models import (
|
||||
MODEL_ENTRIES as OLLAMA_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.openai.config import OpenAIConfig
|
||||
from llama_stack.providers.remote.inference.openai.models import (
|
||||
MODEL_ENTRIES as OPENAI_MODEL_ENTRIES,
|
||||
|
|
@ -42,11 +46,17 @@ from llama_stack.providers.remote.inference.sambanova.config import SambaNovaImp
|
|||
from llama_stack.providers.remote.inference.sambanova.models import (
|
||||
MODEL_ENTRIES as SAMBANOVA_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.together.config import TogetherImplConfig
|
||||
from llama_stack.providers.remote.inference.together.models import (
|
||||
MODEL_ENTRIES as TOGETHER_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
|
||||
from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
|
||||
from llama_stack.providers.remote.vector_io.pgvector.config import (
|
||||
PGVectorVectorIOConfig,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
|
||||
from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
|
||||
from llama_stack.templates.template import (
|
||||
DistributionTemplate,
|
||||
RunConfigSettings,
|
||||
|
|
@ -67,6 +77,16 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
|
|||
FIREWORKS_MODEL_ENTRIES,
|
||||
FireworksImplConfig.sample_run_config(api_key="${env.FIREWORKS_API_KEY:}"),
|
||||
),
|
||||
(
|
||||
"together",
|
||||
TOGETHER_MODEL_ENTRIES,
|
||||
TogetherImplConfig.sample_run_config(api_key="${env.TOGETHER_API_KEY:}"),
|
||||
),
|
||||
(
|
||||
"ollama",
|
||||
OLLAMA_MODEL_ENTRIES,
|
||||
OllamaImplConfig.sample_run_config(),
|
||||
),
|
||||
(
|
||||
"anthropic",
|
||||
ANTHROPIC_MODEL_ENTRIES,
|
||||
|
|
@ -87,6 +107,13 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
|
|||
SAMBANOVA_MODEL_ENTRIES,
|
||||
SambaNovaImplConfig.sample_run_config(api_key="${env.SAMBANOVA_API_KEY:}"),
|
||||
),
|
||||
(
|
||||
"vllm",
|
||||
[],
|
||||
VLLMInferenceAdapterConfig.sample_run_config(
|
||||
url="${env.VLLM_URL:http://localhost:8000/v1}",
|
||||
),
|
||||
),
|
||||
]
|
||||
inference_providers = []
|
||||
available_models = {}
|
||||
|
|
@ -169,6 +196,8 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
)
|
||||
|
||||
default_models = get_model_registry(available_models)
|
||||
|
||||
postgres_store = PostgresSqlStoreConfig.sample_run_config()
|
||||
return DistributionTemplate(
|
||||
name=name,
|
||||
distro_type="self_hosted",
|
||||
|
|
@ -177,6 +206,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
template_path=None,
|
||||
providers=providers,
|
||||
available_models_by_provider=available_models,
|
||||
additional_pip_packages=postgres_store.pip_packages,
|
||||
run_configs={
|
||||
"run.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
|
|
@ -201,5 +231,25 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"",
|
||||
"OpenAI API Key",
|
||||
),
|
||||
"GROQ_API_KEY": (
|
||||
"",
|
||||
"Groq API Key",
|
||||
),
|
||||
"ANTHROPIC_API_KEY": (
|
||||
"",
|
||||
"Anthropic API Key",
|
||||
),
|
||||
"GEMINI_API_KEY": (
|
||||
"",
|
||||
"Gemini API Key",
|
||||
),
|
||||
"SAMBANOVA_API_KEY": (
|
||||
"",
|
||||
"SambaNova API Key",
|
||||
),
|
||||
"VLLM_URL": (
|
||||
"http://localhost:8000/v1",
|
||||
"VLLM URL",
|
||||
),
|
||||
},
|
||||
)
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ from pathlib import Path
|
|||
from typing import Literal
|
||||
|
||||
import jinja2
|
||||
import rich
|
||||
import yaml
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
|
@ -36,13 +37,35 @@ def get_model_registry(
|
|||
available_models: dict[str, list[ProviderModelEntry]],
|
||||
) -> list[ModelInput]:
|
||||
models = []
|
||||
|
||||
# check for conflicts in model ids
|
||||
all_ids = set()
|
||||
ids_conflict = False
|
||||
|
||||
for _, entries in available_models.items():
|
||||
for entry in entries:
|
||||
ids = [entry.provider_model_id] + entry.aliases
|
||||
for model_id in ids:
|
||||
if model_id in all_ids:
|
||||
ids_conflict = True
|
||||
rich.print(
|
||||
f"[yellow]Model id {model_id} conflicts; all model ids will be prefixed with provider id[/yellow]"
|
||||
)
|
||||
break
|
||||
all_ids.update(ids)
|
||||
if ids_conflict:
|
||||
break
|
||||
if ids_conflict:
|
||||
break
|
||||
|
||||
for provider_id, entries in available_models.items():
|
||||
for entry in entries:
|
||||
ids = [entry.provider_model_id] + entry.aliases
|
||||
for model_id in ids:
|
||||
identifier = f"{provider_id}/{model_id}" if ids_conflict and provider_id not in model_id else model_id
|
||||
models.append(
|
||||
ModelInput(
|
||||
model_id=model_id,
|
||||
model_id=identifier,
|
||||
provider_model_id=entry.provider_model_id,
|
||||
provider_id=provider_id,
|
||||
model_type=entry.model_type,
|
||||
|
|
@ -154,6 +177,11 @@ class DistributionTemplate(BaseModel):
|
|||
|
||||
available_models_by_provider: dict[str, list[ProviderModelEntry]] | None = None
|
||||
|
||||
# we may want to specify additional pip packages without necessarily indicating a
|
||||
# specific "default" inference store (which is what typically used to dictate additional
|
||||
# pip packages)
|
||||
additional_pip_packages: list[str] | None = None
|
||||
|
||||
def build_config(self) -> BuildConfig:
|
||||
additional_pip_packages: list[str] = []
|
||||
for run_config in self.run_configs.values():
|
||||
|
|
@ -161,6 +189,9 @@ class DistributionTemplate(BaseModel):
|
|||
if run_config_.inference_store:
|
||||
additional_pip_packages.extend(run_config_.inference_store.pip_packages)
|
||||
|
||||
if self.additional_pip_packages:
|
||||
additional_pip_packages.extend(self.additional_pip_packages)
|
||||
|
||||
return BuildConfig(
|
||||
distribution_spec=DistributionSpec(
|
||||
description=self.description,
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/trace_store.db
|
||||
eval:
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/trace_store.db
|
||||
eval:
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/trace_store.db
|
||||
eval:
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/trace_store.db
|
||||
eval:
|
||||
|
|
|
|||
|
|
@ -1,40 +0,0 @@
|
|||
version: '2'
|
||||
distribution_spec:
|
||||
description: Distribution for running e2e tests in CI
|
||||
providers:
|
||||
inference:
|
||||
- remote::openai
|
||||
- remote::fireworks-openai-compat
|
||||
- remote::together-openai-compat
|
||||
- remote::groq-openai-compat
|
||||
- remote::sambanova-openai-compat
|
||||
- remote::cerebras-openai-compat
|
||||
- inline::sentence-transformers
|
||||
vector_io:
|
||||
- inline::sqlite-vec
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
safety:
|
||||
- inline::llama-guard
|
||||
agents:
|
||||
- inline::meta-reference
|
||||
telemetry:
|
||||
- inline::meta-reference
|
||||
eval:
|
||||
- inline::meta-reference
|
||||
datasetio:
|
||||
- remote::huggingface
|
||||
- inline::localfs
|
||||
scoring:
|
||||
- inline::basic
|
||||
- inline::llm-as-judge
|
||||
- inline::braintrust
|
||||
tool_runtime:
|
||||
- remote::brave-search
|
||||
- remote::tavily-search
|
||||
- inline::rag-runtime
|
||||
- remote::model-context-protocol
|
||||
image_type: conda
|
||||
additional_pip_packages:
|
||||
- aiosqlite
|
||||
- sqlalchemy[asyncio]
|
||||
|
|
@ -1,731 +0,0 @@
|
|||
version: '2'
|
||||
image_name: verification
|
||||
apis:
|
||||
- agents
|
||||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: openai
|
||||
provider_type: remote::openai
|
||||
config:
|
||||
api_key: ${env.OPENAI_API_KEY:}
|
||||
- provider_id: fireworks-openai-compat
|
||||
provider_type: remote::fireworks-openai-compat
|
||||
config:
|
||||
openai_compat_api_base: https://api.fireworks.ai/inference/v1
|
||||
api_key: ${env.FIREWORKS_API_KEY:}
|
||||
- provider_id: together-openai-compat
|
||||
provider_type: remote::together-openai-compat
|
||||
config:
|
||||
openai_compat_api_base: https://api.together.xyz/v1
|
||||
api_key: ${env.TOGETHER_API_KEY:}
|
||||
- provider_id: groq-openai-compat
|
||||
provider_type: remote::groq-openai-compat
|
||||
config:
|
||||
openai_compat_api_base: https://api.groq.com/openai/v1
|
||||
api_key: ${env.GROQ_API_KEY:}
|
||||
- provider_id: sambanova-openai-compat
|
||||
provider_type: remote::sambanova-openai-compat
|
||||
config:
|
||||
openai_compat_api_base: https://api.sambanova.ai/v1
|
||||
api_key: ${env.SAMBANOVA_API_KEY:}
|
||||
- provider_id: cerebras-openai-compat
|
||||
provider_type: remote::cerebras-openai-compat
|
||||
config:
|
||||
openai_compat_api_base: https://api.cerebras.ai/v1
|
||||
api_key: ${env.CEREBRAS_API_KEY:}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
vector_io:
|
||||
- provider_id: sqlite-vec
|
||||
provider_type: inline::sqlite-vec
|
||||
config:
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/sqlite_vec.db
|
||||
- provider_id: ${env.ENABLE_CHROMADB+chromadb}
|
||||
provider_type: remote::chromadb
|
||||
config:
|
||||
url: ${env.CHROMADB_URL:}
|
||||
- provider_id: ${env.ENABLE_PGVECTOR+pgvector}
|
||||
provider_type: remote::pgvector
|
||||
config:
|
||||
host: ${env.PGVECTOR_HOST:localhost}
|
||||
port: ${env.PGVECTOR_PORT:5432}
|
||||
db: ${env.PGVECTOR_DB:}
|
||||
user: ${env.PGVECTOR_USER:}
|
||||
password: ${env.PGVECTOR_PASSWORD:}
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
config:
|
||||
excluded_categories: []
|
||||
agents:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
persistence_store:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/agents_store.db
|
||||
responses_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/responses_store.db
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/trace_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/meta_reference_eval.db
|
||||
datasetio:
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/huggingface_datasetio.db
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/localfs_datasetio.db
|
||||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
config: {}
|
||||
- provider_id: llm-as-judge
|
||||
provider_type: inline::llm-as-judge
|
||||
config: {}
|
||||
- provider_id: braintrust
|
||||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/registry.db
|
||||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: openai/gpt-4o
|
||||
provider_id: openai
|
||||
provider_model_id: openai/gpt-4o
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: openai/gpt-4o-mini
|
||||
provider_id: openai
|
||||
provider_model_id: openai/gpt-4o-mini
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: openai/chatgpt-4o-latest
|
||||
provider_id: openai
|
||||
provider_model_id: openai/chatgpt-4o-latest
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-3.5-turbo-0125
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-3.5-turbo-0125
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-3.5-turbo
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-3.5-turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-3.5-turbo-instruct
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-3.5-turbo-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-4
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-4
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-4-turbo
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-4-turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-4o
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-4o
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-4o-2024-08-06
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-4o-2024-08-06
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-4o-mini
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-4o-mini
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: gpt-4o-audio-preview
|
||||
provider_id: openai
|
||||
provider_model_id: gpt-4o-audio-preview
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: chatgpt-4o-latest
|
||||
provider_id: openai
|
||||
provider_model_id: chatgpt-4o-latest
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: o1
|
||||
provider_id: openai
|
||||
provider_model_id: o1
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: o1-mini
|
||||
provider_id: openai
|
||||
provider_model_id: o1-mini
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: o3-mini
|
||||
provider_id: openai
|
||||
provider_model_id: o3-mini
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: o4-mini
|
||||
provider_id: openai
|
||||
provider_model_id: o4-mini
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 1536
|
||||
context_length: 8192
|
||||
model_id: openai/text-embedding-3-small
|
||||
provider_id: openai
|
||||
provider_model_id: openai/text-embedding-3-small
|
||||
model_type: embedding
|
||||
- metadata:
|
||||
embedding_dimension: 3072
|
||||
context_length: 8192
|
||||
model_id: openai/text-embedding-3-large
|
||||
provider_id: openai
|
||||
provider_model_id: openai/text-embedding-3-large
|
||||
model_type: embedding
|
||||
- metadata:
|
||||
embedding_dimension: 1536
|
||||
context_length: 8192
|
||||
model_id: text-embedding-3-small
|
||||
provider_id: openai
|
||||
provider_model_id: text-embedding-3-small
|
||||
model_type: embedding
|
||||
- metadata:
|
||||
embedding_dimension: 3072
|
||||
context_length: 8192
|
||||
model_id: text-embedding-3-large
|
||||
provider_id: openai
|
||||
provider_model_id: text-embedding-3-large
|
||||
model_type: embedding
|
||||
- metadata: {}
|
||||
model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
|
||||
provider_id: fireworks-openai-compat
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||
provider_id: fireworks-openai-compat
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
|
||||
provider_id: fireworks-openai-compat
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-70B-Instruct
|
||||
provider_id: fireworks-openai-compat
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
|
||||
provider_id: fireworks-openai-compat
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
|
||||
provider_id: fireworks-openai-compat
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
|
||||
provider_id: fireworks-openai-compat
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-3B-Instruct
|
||||
provider_id: fireworks-openai-compat
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
|
||||
provider_id: fireworks-openai-compat
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
||||
provider_id: fireworks-openai-compat
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
|
||||
provider_id: fireworks-openai-compat
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
||||
provider_id: fireworks-openai-compat
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
|
||||
provider_id: fireworks-openai-compat
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||
provider_id: fireworks-openai-compat
|
||||
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: accounts/fireworks/models/llama-guard-3-8b
|
||||
provider_id: fireworks-openai-compat
|
||||
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-Guard-3-8B
|
||||
provider_id: fireworks-openai-compat
|
||||
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: accounts/fireworks/models/llama-guard-3-11b-vision
|
||||
provider_id: fireworks-openai-compat
|
||||
provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-Guard-3-11B-Vision
|
||||
provider_id: fireworks-openai-compat
|
||||
provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: accounts/fireworks/models/llama4-scout-instruct-basic
|
||||
provider_id: fireworks-openai-compat
|
||||
provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
provider_id: fireworks-openai-compat
|
||||
provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
|
||||
provider_id: fireworks-openai-compat
|
||||
provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||
provider_id: fireworks-openai-compat
|
||||
provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 768
|
||||
context_length: 8192
|
||||
model_id: nomic-ai/nomic-embed-text-v1.5
|
||||
provider_id: fireworks-openai-compat
|
||||
provider_model_id: nomic-ai/nomic-embed-text-v1.5
|
||||
model_type: embedding
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
|
||||
provider_id: together-openai-compat
|
||||
provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||
provider_id: together-openai-compat
|
||||
provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
|
||||
provider_id: together-openai-compat
|
||||
provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-70B-Instruct
|
||||
provider_id: together-openai-compat
|
||||
provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
|
||||
provider_id: together-openai-compat
|
||||
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
|
||||
provider_id: together-openai-compat
|
||||
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
|
||||
provider_id: together-openai-compat
|
||||
provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-3B-Instruct
|
||||
provider_id: together-openai-compat
|
||||
provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
|
||||
provider_id: together-openai-compat
|
||||
provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
||||
provider_id: together-openai-compat
|
||||
provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
|
||||
provider_id: together-openai-compat
|
||||
provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
||||
provider_id: together-openai-compat
|
||||
provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
|
||||
provider_id: together-openai-compat
|
||||
provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||
provider_id: together-openai-compat
|
||||
provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Meta-Llama-Guard-3-8B
|
||||
provider_id: together-openai-compat
|
||||
provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-Guard-3-8B
|
||||
provider_id: together-openai-compat
|
||||
provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
|
||||
provider_id: together-openai-compat
|
||||
provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-Guard-3-11B-Vision
|
||||
provider_id: together-openai-compat
|
||||
provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 768
|
||||
context_length: 8192
|
||||
model_id: togethercomputer/m2-bert-80M-8k-retrieval
|
||||
provider_id: together-openai-compat
|
||||
provider_model_id: togethercomputer/m2-bert-80M-8k-retrieval
|
||||
model_type: embedding
|
||||
- metadata:
|
||||
embedding_dimension: 768
|
||||
context_length: 32768
|
||||
model_id: togethercomputer/m2-bert-80M-32k-retrieval
|
||||
provider_id: together-openai-compat
|
||||
provider_model_id: togethercomputer/m2-bert-80M-32k-retrieval
|
||||
model_type: embedding
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
provider_id: together-openai-compat
|
||||
provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
provider_id: together-openai-compat
|
||||
provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
provider_id: together-openai-compat
|
||||
provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
|
||||
provider_id: together-openai-compat
|
||||
provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||
provider_id: together-openai-compat
|
||||
provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
|
||||
provider_id: together-openai-compat
|
||||
provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: groq/llama3-8b-8192
|
||||
provider_id: groq-openai-compat
|
||||
provider_model_id: groq/llama3-8b-8192
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||
provider_id: groq-openai-compat
|
||||
provider_model_id: groq/llama3-8b-8192
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: groq/llama-3.1-8b-instant
|
||||
provider_id: groq-openai-compat
|
||||
provider_model_id: groq/llama-3.1-8b-instant
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: groq/llama3-70b-8192
|
||||
provider_id: groq-openai-compat
|
||||
provider_model_id: groq/llama3-70b-8192
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3-70B-Instruct
|
||||
provider_id: groq-openai-compat
|
||||
provider_model_id: groq/llama3-70b-8192
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: groq/llama-3.3-70b-versatile
|
||||
provider_id: groq-openai-compat
|
||||
provider_model_id: groq/llama-3.3-70b-versatile
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||
provider_id: groq-openai-compat
|
||||
provider_model_id: groq/llama-3.3-70b-versatile
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: groq/llama-3.2-3b-preview
|
||||
provider_id: groq-openai-compat
|
||||
provider_model_id: groq/llama-3.2-3b-preview
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-3B-Instruct
|
||||
provider_id: groq-openai-compat
|
||||
provider_model_id: groq/llama-3.2-3b-preview
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: groq/llama-4-scout-17b-16e-instruct
|
||||
provider_id: groq-openai-compat
|
||||
provider_model_id: groq/llama-4-scout-17b-16e-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
provider_id: groq-openai-compat
|
||||
provider_model_id: groq/llama-4-scout-17b-16e-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
|
||||
provider_id: groq-openai-compat
|
||||
provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
provider_id: groq-openai-compat
|
||||
provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: groq/llama-4-maverick-17b-128e-instruct
|
||||
provider_id: groq-openai-compat
|
||||
provider_model_id: groq/llama-4-maverick-17b-128e-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||
provider_id: groq-openai-compat
|
||||
provider_model_id: groq/llama-4-maverick-17b-128e-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
|
||||
provider_id: groq-openai-compat
|
||||
provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||
provider_id: groq-openai-compat
|
||||
provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: sambanova/Meta-Llama-3.1-8B-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: sambanova/Meta-Llama-3.1-405B-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: sambanova/Meta-Llama-3.2-1B-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-1B-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: sambanova/Meta-Llama-3.2-3B-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-3B-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: sambanova/Meta-Llama-3.3-70B-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: sambanova/Llama-3.2-11B-Vision-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: sambanova/Llama-3.2-90B-Vision-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: sambanova/Meta-Llama-Guard-3-8B
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: sambanova/Meta-Llama-Guard-3-8B
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-Guard-3-8B
|
||||
provider_id: sambanova-openai-compat
|
||||
provider_model_id: sambanova/Meta-Llama-Guard-3-8B
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: llama3.1-8b
|
||||
provider_id: cerebras-openai-compat
|
||||
provider_model_id: llama3.1-8b
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||
provider_id: cerebras-openai-compat
|
||||
provider_model_id: llama3.1-8b
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: llama-3.3-70b
|
||||
provider_id: cerebras-openai-compat
|
||||
provider_model_id: llama-3.3-70b
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||
provider_id: cerebras-openai-compat
|
||||
provider_model_id: llama-3.3-70b
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields:
|
||||
- shield_id: meta-llama/Llama-Guard-3-8B
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
||||
|
|
@ -1,201 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
from llama_stack.distribution.datatypes import (
|
||||
ModelInput,
|
||||
Provider,
|
||||
ShieldInput,
|
||||
ToolGroupInput,
|
||||
)
|
||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
from llama_stack.providers.inline.vector_io.sqlite_vec.config import (
|
||||
SQLiteVectorIOConfig,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.cerebras.models import MODEL_ENTRIES as CEREBRAS_MODEL_ENTRIES
|
||||
from llama_stack.providers.remote.inference.cerebras_openai_compat.config import CerebrasCompatConfig
|
||||
from llama_stack.providers.remote.inference.fireworks.models import (
|
||||
MODEL_ENTRIES as FIREWORKS_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.fireworks_openai_compat.config import FireworksCompatConfig
|
||||
from llama_stack.providers.remote.inference.groq.models import (
|
||||
MODEL_ENTRIES as GROQ_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.groq_openai_compat.config import GroqCompatConfig
|
||||
from llama_stack.providers.remote.inference.openai.config import OpenAIConfig
|
||||
from llama_stack.providers.remote.inference.openai.models import (
|
||||
MODEL_ENTRIES as OPENAI_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.sambanova.models import MODEL_ENTRIES as SAMBANOVA_MODEL_ENTRIES
|
||||
from llama_stack.providers.remote.inference.sambanova_openai_compat.config import SambaNovaCompatConfig
|
||||
from llama_stack.providers.remote.inference.together.models import (
|
||||
MODEL_ENTRIES as TOGETHER_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.together_openai_compat.config import TogetherCompatConfig
|
||||
from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
|
||||
from llama_stack.providers.remote.vector_io.pgvector.config import (
|
||||
PGVectorVectorIOConfig,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
|
||||
from llama_stack.templates.template import (
|
||||
DistributionTemplate,
|
||||
RunConfigSettings,
|
||||
get_model_registry,
|
||||
)
|
||||
|
||||
|
||||
def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderModelEntry]]]:
|
||||
# in this template, we allow each API key to be optional
|
||||
providers = [
|
||||
(
|
||||
"openai",
|
||||
OPENAI_MODEL_ENTRIES,
|
||||
OpenAIConfig.sample_run_config(api_key="${env.OPENAI_API_KEY:}"),
|
||||
),
|
||||
(
|
||||
"fireworks-openai-compat",
|
||||
FIREWORKS_MODEL_ENTRIES,
|
||||
FireworksCompatConfig.sample_run_config(api_key="${env.FIREWORKS_API_KEY:}"),
|
||||
),
|
||||
(
|
||||
"together-openai-compat",
|
||||
TOGETHER_MODEL_ENTRIES,
|
||||
TogetherCompatConfig.sample_run_config(api_key="${env.TOGETHER_API_KEY:}"),
|
||||
),
|
||||
(
|
||||
"groq-openai-compat",
|
||||
GROQ_MODEL_ENTRIES,
|
||||
GroqCompatConfig.sample_run_config(api_key="${env.GROQ_API_KEY:}"),
|
||||
),
|
||||
(
|
||||
"sambanova-openai-compat",
|
||||
SAMBANOVA_MODEL_ENTRIES,
|
||||
SambaNovaCompatConfig.sample_run_config(api_key="${env.SAMBANOVA_API_KEY:}"),
|
||||
),
|
||||
(
|
||||
"cerebras-openai-compat",
|
||||
CEREBRAS_MODEL_ENTRIES,
|
||||
CerebrasCompatConfig.sample_run_config(api_key="${env.CEREBRAS_API_KEY:}"),
|
||||
),
|
||||
]
|
||||
inference_providers = []
|
||||
available_models = {}
|
||||
for provider_id, model_entries, config in providers:
|
||||
inference_providers.append(
|
||||
Provider(
|
||||
provider_id=provider_id,
|
||||
provider_type=f"remote::{provider_id}",
|
||||
config=config,
|
||||
)
|
||||
)
|
||||
available_models[provider_id] = model_entries
|
||||
return inference_providers, available_models
|
||||
|
||||
|
||||
def get_distribution_template() -> DistributionTemplate:
|
||||
inference_providers, available_models = get_inference_providers()
|
||||
providers = {
|
||||
"inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]),
|
||||
"vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"],
|
||||
"safety": ["inline::llama-guard"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
"telemetry": ["inline::meta-reference"],
|
||||
"eval": ["inline::meta-reference"],
|
||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||
"tool_runtime": [
|
||||
"remote::brave-search",
|
||||
"remote::tavily-search",
|
||||
"inline::rag-runtime",
|
||||
"remote::model-context-protocol",
|
||||
],
|
||||
}
|
||||
name = "verification"
|
||||
|
||||
vector_io_providers = [
|
||||
Provider(
|
||||
provider_id="sqlite-vec",
|
||||
provider_type="inline::sqlite-vec",
|
||||
config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
||||
),
|
||||
Provider(
|
||||
provider_id="${env.ENABLE_CHROMADB+chromadb}",
|
||||
provider_type="remote::chromadb",
|
||||
config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"),
|
||||
),
|
||||
Provider(
|
||||
provider_id="${env.ENABLE_PGVECTOR+pgvector}",
|
||||
provider_type="remote::pgvector",
|
||||
config=PGVectorVectorIOConfig.sample_run_config(
|
||||
db="${env.PGVECTOR_DB:}",
|
||||
user="${env.PGVECTOR_USER:}",
|
||||
password="${env.PGVECTOR_PASSWORD:}",
|
||||
),
|
||||
),
|
||||
]
|
||||
embedding_provider = Provider(
|
||||
provider_id="sentence-transformers",
|
||||
provider_type="inline::sentence-transformers",
|
||||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||
)
|
||||
|
||||
default_tool_groups = [
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::websearch",
|
||||
provider_id="tavily-search",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::rag",
|
||||
provider_id="rag-runtime",
|
||||
),
|
||||
]
|
||||
embedding_model = ModelInput(
|
||||
model_id="all-MiniLM-L6-v2",
|
||||
provider_id=embedding_provider.provider_id,
|
||||
model_type=ModelType.embedding,
|
||||
metadata={
|
||||
"embedding_dimension": 384,
|
||||
},
|
||||
)
|
||||
|
||||
default_models = get_model_registry(available_models)
|
||||
return DistributionTemplate(
|
||||
name=name,
|
||||
distro_type="self_hosted",
|
||||
description="Distribution for running e2e tests in CI",
|
||||
container_image=None,
|
||||
template_path=None,
|
||||
providers=providers,
|
||||
available_models_by_provider=available_models,
|
||||
run_configs={
|
||||
"run.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
"inference": inference_providers + [embedding_provider],
|
||||
"vector_io": vector_io_providers,
|
||||
},
|
||||
default_models=default_models + [embedding_model],
|
||||
default_tool_groups=default_tool_groups,
|
||||
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
|
||||
),
|
||||
},
|
||||
run_config_env_vars={
|
||||
"LLAMA_STACK_PORT": (
|
||||
"8321",
|
||||
"Port for the Llama Stack distribution server",
|
||||
),
|
||||
"FIREWORKS_API_KEY": (
|
||||
"",
|
||||
"Fireworks API Key",
|
||||
),
|
||||
"OPENAI_API_KEY": (
|
||||
"",
|
||||
"OpenAI API Key",
|
||||
),
|
||||
},
|
||||
)
|
||||
|
|
@ -52,7 +52,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/trace_store.db
|
||||
eval:
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ providers:
|
|||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:}
|
||||
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/trace_store.db
|
||||
eval:
|
||||
|
|
|
|||
105
llama_stack/ui/app/api/v1/[...path]/route.ts
Normal file
105
llama_stack/ui/app/api/v1/[...path]/route.ts
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
import { NextRequest, NextResponse } from "next/server";
|
||||
|
||||
// Get backend URL from environment variable or default to localhost for development
|
||||
const BACKEND_URL =
|
||||
process.env.LLAMA_STACK_BACKEND_URL ||
|
||||
`http://localhost:${process.env.LLAMA_STACK_PORT || 8321}`;
|
||||
|
||||
async function proxyRequest(request: NextRequest, method: string) {
|
||||
try {
|
||||
// Extract the path from the request URL
|
||||
const url = new URL(request.url);
|
||||
const pathSegments = url.pathname.split("/");
|
||||
|
||||
// Remove /api from the path to get the actual API path
|
||||
// /api/v1/models/list -> /v1/models/list
|
||||
const apiPath = pathSegments.slice(2).join("/"); // Remove 'api' segment
|
||||
const targetUrl = `${BACKEND_URL}/${apiPath}${url.search}`;
|
||||
|
||||
console.log(`Proxying ${method} ${url.pathname} -> ${targetUrl}`);
|
||||
|
||||
// Prepare headers (exclude host and other problematic headers)
|
||||
const headers = new Headers();
|
||||
request.headers.forEach((value, key) => {
|
||||
// Skip headers that might cause issues in proxy
|
||||
if (
|
||||
!["host", "connection", "content-length"].includes(key.toLowerCase())
|
||||
) {
|
||||
headers.set(key, value);
|
||||
}
|
||||
});
|
||||
|
||||
// Prepare the request options
|
||||
const requestOptions: RequestInit = {
|
||||
method,
|
||||
headers,
|
||||
};
|
||||
|
||||
// Add body for methods that support it
|
||||
if (["POST", "PUT", "PATCH"].includes(method) && request.body) {
|
||||
requestOptions.body = await request.text();
|
||||
}
|
||||
|
||||
// Make the request to FastAPI backend
|
||||
const response = await fetch(targetUrl, requestOptions);
|
||||
|
||||
// Get response data
|
||||
const responseText = await response.text();
|
||||
|
||||
console.log(
|
||||
`Response from FastAPI: ${response.status} ${response.statusText}`,
|
||||
);
|
||||
|
||||
// Create response with same status and headers
|
||||
const proxyResponse = new NextResponse(responseText, {
|
||||
status: response.status,
|
||||
statusText: response.statusText,
|
||||
});
|
||||
|
||||
// Copy response headers (except problematic ones)
|
||||
response.headers.forEach((value, key) => {
|
||||
if (!["connection", "transfer-encoding"].includes(key.toLowerCase())) {
|
||||
proxyResponse.headers.set(key, value);
|
||||
}
|
||||
});
|
||||
|
||||
return proxyResponse;
|
||||
} catch (error) {
|
||||
console.error("Proxy request failed:", error);
|
||||
|
||||
return NextResponse.json(
|
||||
{
|
||||
error: "Proxy request failed",
|
||||
message: error instanceof Error ? error.message : "Unknown error",
|
||||
backend_url: BACKEND_URL,
|
||||
timestamp: new Date().toISOString(),
|
||||
},
|
||||
{ status: 500 },
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// HTTP method handlers
|
||||
export async function GET(request: NextRequest) {
|
||||
return proxyRequest(request, "GET");
|
||||
}
|
||||
|
||||
export async function POST(request: NextRequest) {
|
||||
return proxyRequest(request, "POST");
|
||||
}
|
||||
|
||||
export async function PUT(request: NextRequest) {
|
||||
return proxyRequest(request, "PUT");
|
||||
}
|
||||
|
||||
export async function DELETE(request: NextRequest) {
|
||||
return proxyRequest(request, "DELETE");
|
||||
}
|
||||
|
||||
export async function PATCH(request: NextRequest) {
|
||||
return proxyRequest(request, "PATCH");
|
||||
}
|
||||
|
||||
export async function OPTIONS(request: NextRequest) {
|
||||
return proxyRequest(request, "OPTIONS");
|
||||
}
|
||||
|
|
@ -1,12 +1,6 @@
|
|||
import LlamaStackClient from "llama-stack-client";
|
||||
import OpenAI from "openai";
|
||||
|
||||
export const client =
|
||||
process.env.NEXT_PUBLIC_USE_OPENAI_CLIENT === "true" // useful for testing
|
||||
? new OpenAI({
|
||||
apiKey: process.env.NEXT_PUBLIC_OPENAI_API_KEY,
|
||||
dangerouslyAllowBrowser: true,
|
||||
})
|
||||
: new LlamaStackClient({
|
||||
baseURL: process.env.NEXT_PUBLIC_LLAMA_STACK_BASE_URL,
|
||||
});
|
||||
export const client = new LlamaStackClient({
|
||||
baseURL:
|
||||
typeof window !== "undefined" ? `${window.location.origin}/api` : "/api",
|
||||
});
|
||||
|
|
|
|||
522
llama_stack/ui/package-lock.json
generated
522
llama_stack/ui/package-lock.json
generated
|
|
@ -15,11 +15,10 @@
|
|||
"@radix-ui/react-tooltip": "^1.2.6",
|
||||
"class-variance-authority": "^0.7.1",
|
||||
"clsx": "^2.1.1",
|
||||
"llama-stack-client": "github:stainless-sdks/llama-stack-node#ehhuang/dev",
|
||||
"llama-stack-client": "0.2.9",
|
||||
"lucide-react": "^0.510.0",
|
||||
"next": "15.3.2",
|
||||
"next-themes": "^0.4.6",
|
||||
"openai": "^4.103.0",
|
||||
"react": "^19.0.0",
|
||||
"react-dom": "^19.0.0",
|
||||
"tailwind-merge": "^3.3.0"
|
||||
|
|
@ -677,6 +676,406 @@
|
|||
"tslib": "^2.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/aix-ppc64": {
|
||||
"version": "0.25.5",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.5.tgz",
|
||||
"integrity": "sha512-9o3TMmpmftaCMepOdA5k/yDw8SfInyzWWTjYTFCX3kPSDJMROQTb8jg+h9Cnwnmm1vOzvxN7gIfB5V2ewpjtGA==",
|
||||
"cpu": [
|
||||
"ppc64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"aix"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/android-arm": {
|
||||
"version": "0.25.5",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.25.5.tgz",
|
||||
"integrity": "sha512-AdJKSPeEHgi7/ZhuIPtcQKr5RQdo6OO2IL87JkianiMYMPbCtot9fxPbrMiBADOWWm3T2si9stAiVsGbTQFkbA==",
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"android"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/android-arm64": {
|
||||
"version": "0.25.5",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.25.5.tgz",
|
||||
"integrity": "sha512-VGzGhj4lJO+TVGV1v8ntCZWJktV7SGCs3Pn1GRWI1SBFtRALoomm8k5E9Pmwg3HOAal2VDc2F9+PM/rEY6oIDg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"android"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/android-x64": {
|
||||
"version": "0.25.5",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.25.5.tgz",
|
||||
"integrity": "sha512-D2GyJT1kjvO//drbRT3Hib9XPwQeWd9vZoBJn+bu/lVsOZ13cqNdDeqIF/xQ5/VmWvMduP6AmXvylO/PIc2isw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"android"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/darwin-arm64": {
|
||||
"version": "0.25.5",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.25.5.tgz",
|
||||
"integrity": "sha512-GtaBgammVvdF7aPIgH2jxMDdivezgFu6iKpmT+48+F8Hhg5J/sfnDieg0aeG/jfSvkYQU2/pceFPDKlqZzwnfQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/darwin-x64": {
|
||||
"version": "0.25.5",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.25.5.tgz",
|
||||
"integrity": "sha512-1iT4FVL0dJ76/q1wd7XDsXrSW+oLoquptvh4CLR4kITDtqi2e/xwXwdCVH8hVHU43wgJdsq7Gxuzcs6Iq/7bxQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/freebsd-arm64": {
|
||||
"version": "0.25.5",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.25.5.tgz",
|
||||
"integrity": "sha512-nk4tGP3JThz4La38Uy/gzyXtpkPW8zSAmoUhK9xKKXdBCzKODMc2adkB2+8om9BDYugz+uGV7sLmpTYzvmz6Sw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"freebsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/freebsd-x64": {
|
||||
"version": "0.25.5",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.25.5.tgz",
|
||||
"integrity": "sha512-PrikaNjiXdR2laW6OIjlbeuCPrPaAl0IwPIaRv+SMV8CiM8i2LqVUHFC1+8eORgWyY7yhQY+2U2fA55mBzReaw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"freebsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-arm": {
|
||||
"version": "0.25.5",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.25.5.tgz",
|
||||
"integrity": "sha512-cPzojwW2okgh7ZlRpcBEtsX7WBuqbLrNXqLU89GxWbNt6uIg78ET82qifUy3W6OVww6ZWobWub5oqZOVtwolfw==",
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-arm64": {
|
||||
"version": "0.25.5",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.25.5.tgz",
|
||||
"integrity": "sha512-Z9kfb1v6ZlGbWj8EJk9T6czVEjjq2ntSYLY2cw6pAZl4oKtfgQuS4HOq41M/BcoLPzrUbNd+R4BXFyH//nHxVg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-ia32": {
|
||||
"version": "0.25.5",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.25.5.tgz",
|
||||
"integrity": "sha512-sQ7l00M8bSv36GLV95BVAdhJ2QsIbCuCjh/uYrWiMQSUuV+LpXwIqhgJDcvMTj+VsQmqAHL2yYaasENvJ7CDKA==",
|
||||
"cpu": [
|
||||
"ia32"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-loong64": {
|
||||
"version": "0.25.5",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.25.5.tgz",
|
||||
"integrity": "sha512-0ur7ae16hDUC4OL5iEnDb0tZHDxYmuQyhKhsPBV8f99f6Z9KQM02g33f93rNH5A30agMS46u2HP6qTdEt6Q1kg==",
|
||||
"cpu": [
|
||||
"loong64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-mips64el": {
|
||||
"version": "0.25.5",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.25.5.tgz",
|
||||
"integrity": "sha512-kB/66P1OsHO5zLz0i6X0RxlQ+3cu0mkxS3TKFvkb5lin6uwZ/ttOkP3Z8lfR9mJOBk14ZwZ9182SIIWFGNmqmg==",
|
||||
"cpu": [
|
||||
"mips64el"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-ppc64": {
|
||||
"version": "0.25.5",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.25.5.tgz",
|
||||
"integrity": "sha512-UZCmJ7r9X2fe2D6jBmkLBMQetXPXIsZjQJCjgwpVDz+YMcS6oFR27alkgGv3Oqkv07bxdvw7fyB71/olceJhkQ==",
|
||||
"cpu": [
|
||||
"ppc64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-riscv64": {
|
||||
"version": "0.25.5",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.25.5.tgz",
|
||||
"integrity": "sha512-kTxwu4mLyeOlsVIFPfQo+fQJAV9mh24xL+y+Bm6ej067sYANjyEw1dNHmvoqxJUCMnkBdKpvOn0Ahql6+4VyeA==",
|
||||
"cpu": [
|
||||
"riscv64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-s390x": {
|
||||
"version": "0.25.5",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.25.5.tgz",
|
||||
"integrity": "sha512-K2dSKTKfmdh78uJ3NcWFiqyRrimfdinS5ErLSn3vluHNeHVnBAFWC8a4X5N+7FgVE1EjXS1QDZbpqZBjfrqMTQ==",
|
||||
"cpu": [
|
||||
"s390x"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-x64": {
|
||||
"version": "0.25.5",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.25.5.tgz",
|
||||
"integrity": "sha512-uhj8N2obKTE6pSZ+aMUbqq+1nXxNjZIIjCjGLfsWvVpy7gKCOL6rsY1MhRh9zLtUtAI7vpgLMK6DxjO8Qm9lJw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/netbsd-arm64": {
|
||||
"version": "0.25.5",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.25.5.tgz",
|
||||
"integrity": "sha512-pwHtMP9viAy1oHPvgxtOv+OkduK5ugofNTVDilIzBLpoWAM16r7b/mxBvfpuQDpRQFMfuVr5aLcn4yveGvBZvw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"netbsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/netbsd-x64": {
|
||||
"version": "0.25.5",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.25.5.tgz",
|
||||
"integrity": "sha512-WOb5fKrvVTRMfWFNCroYWWklbnXH0Q5rZppjq0vQIdlsQKuw6mdSihwSo4RV/YdQ5UCKKvBy7/0ZZYLBZKIbwQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"netbsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/openbsd-arm64": {
|
||||
"version": "0.25.5",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.25.5.tgz",
|
||||
"integrity": "sha512-7A208+uQKgTxHd0G0uqZO8UjK2R0DDb4fDmERtARjSHWxqMTye4Erz4zZafx7Di9Cv+lNHYuncAkiGFySoD+Mw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"openbsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/openbsd-x64": {
|
||||
"version": "0.25.5",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.25.5.tgz",
|
||||
"integrity": "sha512-G4hE405ErTWraiZ8UiSoesH8DaCsMm0Cay4fsFWOOUcz8b8rC6uCvnagr+gnioEjWn0wC+o1/TAHt+It+MpIMg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"openbsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/sunos-x64": {
|
||||
"version": "0.25.5",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.25.5.tgz",
|
||||
"integrity": "sha512-l+azKShMy7FxzY0Rj4RCt5VD/q8mG/e+mDivgspo+yL8zW7qEwctQ6YqKX34DTEleFAvCIUviCFX1SDZRSyMQA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"sunos"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/win32-arm64": {
|
||||
"version": "0.25.5",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.25.5.tgz",
|
||||
"integrity": "sha512-O2S7SNZzdcFG7eFKgvwUEZ2VG9D/sn/eIiz8XRZ1Q/DO5a3s76Xv0mdBzVM5j5R639lXQmPmSo0iRpHqUUrsxw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/win32-ia32": {
|
||||
"version": "0.25.5",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.25.5.tgz",
|
||||
"integrity": "sha512-onOJ02pqs9h1iMJ1PQphR+VZv8qBMQ77Klcsqv9CNW2w6yLqoURLcgERAIurY6QE63bbLuqgP9ATqajFLK5AMQ==",
|
||||
"cpu": [
|
||||
"ia32"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/win32-x64": {
|
||||
"version": "0.25.5",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.25.5.tgz",
|
||||
"integrity": "sha512-TXv6YnJ8ZMVdX+SXWVBo/0p8LTcrUYngpWjvm91TMjjBQii7Oz11Lw5lbDV5Y0TzuhSJHwiH4hEtC1I42mMS0g==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@eslint-community/eslint-utils": {
|
||||
"version": "4.7.0",
|
||||
"resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.7.0.tgz",
|
||||
|
|
@ -5601,6 +6000,46 @@
|
|||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/esbuild": {
|
||||
"version": "0.25.5",
|
||||
"resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.25.5.tgz",
|
||||
"integrity": "sha512-P8OtKZRv/5J5hhz0cUAdu/cLuPIKXpQl1R9pZtvmHWQvrAUVd0UNIPT4IB4W3rNOqVO0rlqHmCIbSwxh/c9yUQ==",
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"bin": {
|
||||
"esbuild": "bin/esbuild"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@esbuild/aix-ppc64": "0.25.5",
|
||||
"@esbuild/android-arm": "0.25.5",
|
||||
"@esbuild/android-arm64": "0.25.5",
|
||||
"@esbuild/android-x64": "0.25.5",
|
||||
"@esbuild/darwin-arm64": "0.25.5",
|
||||
"@esbuild/darwin-x64": "0.25.5",
|
||||
"@esbuild/freebsd-arm64": "0.25.5",
|
||||
"@esbuild/freebsd-x64": "0.25.5",
|
||||
"@esbuild/linux-arm": "0.25.5",
|
||||
"@esbuild/linux-arm64": "0.25.5",
|
||||
"@esbuild/linux-ia32": "0.25.5",
|
||||
"@esbuild/linux-loong64": "0.25.5",
|
||||
"@esbuild/linux-mips64el": "0.25.5",
|
||||
"@esbuild/linux-ppc64": "0.25.5",
|
||||
"@esbuild/linux-riscv64": "0.25.5",
|
||||
"@esbuild/linux-s390x": "0.25.5",
|
||||
"@esbuild/linux-x64": "0.25.5",
|
||||
"@esbuild/netbsd-arm64": "0.25.5",
|
||||
"@esbuild/netbsd-x64": "0.25.5",
|
||||
"@esbuild/openbsd-arm64": "0.25.5",
|
||||
"@esbuild/openbsd-x64": "0.25.5",
|
||||
"@esbuild/sunos-x64": "0.25.5",
|
||||
"@esbuild/win32-arm64": "0.25.5",
|
||||
"@esbuild/win32-ia32": "0.25.5",
|
||||
"@esbuild/win32-x64": "0.25.5"
|
||||
}
|
||||
},
|
||||
"node_modules/escalade": {
|
||||
"version": "3.2.0",
|
||||
"resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
|
||||
|
|
@ -6555,7 +6994,6 @@
|
|||
"version": "2.3.3",
|
||||
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
|
||||
"integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
|
||||
"dev": true,
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
|
|
@ -6717,7 +7155,6 @@
|
|||
"version": "4.10.0",
|
||||
"resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.10.0.tgz",
|
||||
"integrity": "sha512-kGzZ3LWWQcGIAmg6iWvXn0ei6WDtV26wzHRMwDSzmAbcXrTEXxHy6IehI6/4eT6VRKyMP1eF1VqwrVUmE/LR7A==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"resolve-pkg-maps": "^1.0.0"
|
||||
|
|
@ -9092,8 +9529,9 @@
|
|||
"license": "MIT"
|
||||
},
|
||||
"node_modules/llama-stack-client": {
|
||||
"version": "0.0.1-alpha.0",
|
||||
"resolved": "git+ssh://git@github.com/stainless-sdks/llama-stack-node.git#5d34d229fb53b6dad02da0f19f4b310b529c6b15",
|
||||
"version": "0.2.9",
|
||||
"resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.9.tgz",
|
||||
"integrity": "sha512-7+2WuPYt2j/k/Twh5IGn8hd8q4W6lVEK+Ql4PpICGLj4N8YmooCfydI1UvdT2UlX7PNYKNeyeFqTifWT2MjWKg==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@types/node": "^18.11.18",
|
||||
|
|
@ -9102,7 +9540,8 @@
|
|||
"agentkeepalive": "^4.2.1",
|
||||
"form-data-encoder": "1.7.2",
|
||||
"formdata-node": "^4.3.2",
|
||||
"node-fetch": "^2.6.7"
|
||||
"node-fetch": "^2.6.7",
|
||||
"tsx": "^4.19.2"
|
||||
}
|
||||
},
|
||||
"node_modules/llama-stack-client/node_modules/@types/node": {
|
||||
|
|
@ -9805,51 +10244,6 @@
|
|||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/openai": {
|
||||
"version": "4.103.0",
|
||||
"resolved": "https://registry.npmjs.org/openai/-/openai-4.103.0.tgz",
|
||||
"integrity": "sha512-eWcz9kdurkGOFDtd5ySS5y251H2uBgq9+1a2lTBnjMMzlexJ40Am5t6Mu76SSE87VvitPa0dkIAp75F+dZVC0g==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@types/node": "^18.11.18",
|
||||
"@types/node-fetch": "^2.6.4",
|
||||
"abort-controller": "^3.0.0",
|
||||
"agentkeepalive": "^4.2.1",
|
||||
"form-data-encoder": "1.7.2",
|
||||
"formdata-node": "^4.3.2",
|
||||
"node-fetch": "^2.6.7"
|
||||
},
|
||||
"bin": {
|
||||
"openai": "bin/cli"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"ws": "^8.18.0",
|
||||
"zod": "^3.23.8"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"ws": {
|
||||
"optional": true
|
||||
},
|
||||
"zod": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/openai/node_modules/@types/node": {
|
||||
"version": "18.19.103",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.103.tgz",
|
||||
"integrity": "sha512-hHTHp+sEz6SxFsp+SA+Tqrua3AbmlAw+Y//aEwdHrdZkYVRWdvWD3y5uPZ0flYOkgskaFWqZ/YGFm3FaFQ0pRw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"undici-types": "~5.26.4"
|
||||
}
|
||||
},
|
||||
"node_modules/openai/node_modules/undici-types": {
|
||||
"version": "5.26.5",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
|
||||
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/optionator": {
|
||||
"version": "0.9.4",
|
||||
"resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz",
|
||||
|
|
@ -10631,7 +11025,6 @@
|
|||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz",
|
||||
"integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
|
||||
|
|
@ -11682,6 +12075,25 @@
|
|||
"integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
|
||||
"license": "0BSD"
|
||||
},
|
||||
"node_modules/tsx": {
|
||||
"version": "4.19.4",
|
||||
"resolved": "https://registry.npmjs.org/tsx/-/tsx-4.19.4.tgz",
|
||||
"integrity": "sha512-gK5GVzDkJK1SI1zwHf32Mqxf2tSJkNx+eYcNly5+nHvWqXUJYUkWBQtKauoESz3ymezAI++ZwT855x5p5eop+Q==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"esbuild": "~0.25.0",
|
||||
"get-tsconfig": "^4.7.5"
|
||||
},
|
||||
"bin": {
|
||||
"tsx": "dist/cli.mjs"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"fsevents": "~2.3.3"
|
||||
}
|
||||
},
|
||||
"node_modules/tw-animate-css": {
|
||||
"version": "1.2.9",
|
||||
"resolved": "https://registry.npmjs.org/tw-animate-css/-/tw-animate-css-1.2.9.tgz",
|
||||
|
|
@ -12269,7 +12681,7 @@
|
|||
"version": "8.18.2",
|
||||
"resolved": "https://registry.npmjs.org/ws/-/ws-8.18.2.tgz",
|
||||
"integrity": "sha512-DMricUmwGZUVr++AEAe2uiVM7UoO9MAVZMDu05UQOaUII0lp+zOzLLU4Xqh/JvTqklB1T4uELaaPBKyjE1r4fQ==",
|
||||
"devOptional": true,
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=10.0.0"
|
||||
|
|
@ -12380,7 +12792,7 @@
|
|||
"version": "3.24.4",
|
||||
"resolved": "https://registry.npmjs.org/zod/-/zod-3.24.4.tgz",
|
||||
"integrity": "sha512-OdqJE9UDRPwWsrHjLN2F8bPxvwJBK22EHLWtanu0LSYr5YqzsaaW3RMgmjwr8Rypg5k+meEJdSPXJZXE/yqOMg==",
|
||||
"devOptional": true,
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/colinhacks"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue