Merge remote-tracking branch 'origin/main' into TamiTakamiya/tool-param-definition-update

This commit is contained in:
Ashwin Bharambe 2025-09-27 10:47:08 -07:00
commit c1818350c8
479 changed files with 74743 additions and 8997 deletions

View file

@ -54,7 +54,7 @@ class InferenceStore:
async def initialize(self):
"""Create the necessary tables if they don't exist."""
self.sql_store = AuthorizedSqlStore(sqlstore_impl(self.sql_store_config))
self.sql_store = AuthorizedSqlStore(sqlstore_impl(self.sql_store_config), self.policy)
await self.sql_store.create_table(
"chat_completions",
{
@ -202,7 +202,6 @@ class InferenceStore:
order_by=[("created", order.value)],
cursor=("id", after) if after else None,
limit=limit,
policy=self.policy,
)
data = [
@ -229,7 +228,6 @@ class InferenceStore:
row = await self.sql_store.fetch_one(
table="chat_completions",
where={"id": completion_id},
policy=self.policy,
)
if not row:

View file

@ -40,7 +40,7 @@ from llama_stack.apis.inference import (
)
from llama_stack.core.request_headers import NeedsRequestProviderData
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
from llama_stack.providers.utils.inference.openai_compat import (
b64_encode_openai_embeddings_response,
convert_message_to_openai_dict_new,
@ -67,10 +67,10 @@ class LiteLLMOpenAIMixin(
# when calling litellm.
def __init__(
self,
model_entries,
litellm_provider_name: str,
api_key_from_config: str | None,
provider_data_api_key_field: str,
model_entries: list[ProviderModelEntry] | None = None,
openai_compat_api_base: str | None = None,
download_images: bool = False,
json_schema_strict: bool = True,
@ -86,7 +86,7 @@ class LiteLLMOpenAIMixin(
:param download_images: Whether to download images and convert to base64 for message conversion.
:param json_schema_strict: Whether to use strict mode for JSON schema validation.
"""
ModelRegistryHelper.__init__(self, model_entries)
ModelRegistryHelper.__init__(self, model_entries=model_entries)
self.litellm_provider_name = litellm_provider_name
self.api_key_from_config = api_key_from_config

View file

@ -11,7 +11,6 @@ from pydantic import BaseModel, Field
from llama_stack.apis.common.errors import UnsupportedModelError
from llama_stack.apis.models import ModelType
from llama_stack.log import get_logger
from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
from llama_stack.providers.utils.inference import (
ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR,
@ -21,7 +20,7 @@ logger = get_logger(name=__name__, category="providers::utils")
class RemoteInferenceProviderConfig(BaseModel):
allowed_models: list[str] | None = Field(
allowed_models: list[str] | None = Field( # TODO: make this non-optional and give a list() default
default=None,
description="List of models that should be registered with the model registry. If None, all models are allowed.",
)
@ -37,13 +36,6 @@ class ProviderModelEntry(BaseModel):
metadata: dict[str, Any] = Field(default_factory=dict)
def get_huggingface_repo(model_descriptor: str) -> str | None:
for model in all_registered_models():
if model.descriptor() == model_descriptor:
return model.huggingface_repo
return None
def build_hf_repo_model_entry(
provider_model_id: str,
model_descriptor: str,
@ -63,25 +55,20 @@ def build_hf_repo_model_entry(
)
def build_model_entry(provider_model_id: str, model_descriptor: str) -> ProviderModelEntry:
return ProviderModelEntry(
provider_model_id=provider_model_id,
aliases=[],
llama_model=model_descriptor,
model_type=ModelType.llm,
)
class ModelRegistryHelper(ModelsProtocolPrivate):
__provider_id__: str
def __init__(self, model_entries: list[ProviderModelEntry], allowed_models: list[str] | None = None):
self.model_entries = model_entries
def __init__(
self,
model_entries: list[ProviderModelEntry] | None = None,
allowed_models: list[str] | None = None,
):
self.allowed_models = allowed_models
self.alias_to_provider_id_map = {}
self.provider_id_to_llama_model_map = {}
for entry in model_entries:
self.model_entries = model_entries or []
for entry in self.model_entries:
for alias in entry.aliases:
self.alias_to_provider_id_map[alias] = entry.provider_model_id
@ -103,7 +90,7 @@ class ModelRegistryHelper(ModelsProtocolPrivate):
Model(
identifier=id,
provider_resource_id=entry.provider_model_id,
model_type=ModelType.llm,
model_type=entry.model_type,
metadata=entry.metadata,
provider_id=self.__provider_id__,
)

View file

@ -4,12 +4,12 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import base64
import uuid
from abc import ABC, abstractmethod
from collections.abc import AsyncIterator
from typing import Any
import openai
from openai import NOT_GIVEN, AsyncOpenAI
from llama_stack.apis.inference import (
@ -23,13 +23,16 @@ from llama_stack.apis.inference import (
OpenAIMessageParam,
OpenAIResponseFormatParam,
)
from llama_stack.apis.models import ModelType
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params
from llama_stack.providers.utils.inference.prompt_adapter import localize_image_content
logger = get_logger(name=__name__, category="providers::utils")
class OpenAIMixin(ABC):
class OpenAIMixin(ModelRegistryHelper, ABC):
"""
Mixin class that provides OpenAI-specific functionality for inference providers.
This class handles direct OpenAI API calls using the AsyncOpenAI client.
@ -50,6 +53,22 @@ class OpenAIMixin(ABC):
# This is useful for providers that do not return a unique id in the response.
overwrite_completion_id: bool = False
# Allow subclasses to control whether to download images and convert to base64
# for providers that require base64 encoded images instead of URLs.
download_images: bool = False
# Embedding model metadata for this provider
# Can be set by subclasses or instances to provide embedding models
# Format: {"model_id": {"embedding_dimension": 1536, "context_length": 8192}}
embedding_model_metadata: dict[str, dict[str, int]] = {}
# Cache of available models keyed by model ID
# This is set in list_models() and used in check_model_availability()
_model_cache: dict[str, Model] = {}
# List of allowed models for this provider, if empty all models allowed
allowed_models: list[str] = []
@abstractmethod
def get_api_key(self) -> str:
"""
@ -226,6 +245,24 @@ class OpenAIMixin(ABC):
"""
Direct OpenAI chat completion API call.
"""
if self.download_images:
async def _localize_image_url(m: OpenAIMessageParam) -> OpenAIMessageParam:
if isinstance(m.content, list):
for c in m.content:
if c.type == "image_url" and c.image_url and c.image_url.url and "http" in c.image_url.url:
localize_result = await localize_image_content(c.image_url.url)
if localize_result is None:
raise ValueError(
f"Failed to localize image content from {c.image_url.url[:42]}{'...' if len(c.image_url.url) > 42 else ''}"
)
content, format = localize_result
c.image_url.url = f"data:image/{format};base64,{base64.b64encode(content).decode('utf-8')}"
# else it's a string and we don't need to modify it
return m
messages = [await _localize_image_url(m) for m in messages]
resp = await self.client.chat.completions.create(
**await prepare_openai_completion_params(
model=await self._get_provider_model_id(model),
@ -292,26 +329,53 @@ class OpenAIMixin(ABC):
return OpenAIEmbeddingsResponse(
data=data,
model=response.model,
model=model,
usage=usage,
)
async def list_models(self) -> list[Model] | None:
"""
List available models from the provider's /v1/models endpoint augmented with static embedding model metadata.
Also, caches the models in self._model_cache for use in check_model_availability().
:return: A list of Model instances representing available models.
"""
self._model_cache = {}
async for m in self.client.models.list():
if self.allowed_models and m.id not in self.allowed_models:
logger.info(f"Skipping model {m.id} as it is not in the allowed models list")
continue
if metadata := self.embedding_model_metadata.get(m.id):
# This is an embedding model - augment with metadata
model = Model(
provider_id=self.__provider_id__, # type: ignore[attr-defined]
provider_resource_id=m.id,
identifier=m.id,
model_type=ModelType.embedding,
metadata=metadata,
)
else:
# This is an LLM
model = Model(
provider_id=self.__provider_id__, # type: ignore[attr-defined]
provider_resource_id=m.id,
identifier=m.id,
model_type=ModelType.llm,
)
self._model_cache[m.id] = model
return list(self._model_cache.values())
async def check_model_availability(self, model: str) -> bool:
"""
Check if a specific model is available from OpenAI.
Check if a specific model is available from the provider's /v1/models.
:param model: The model identifier to check.
:return: True if the model is available dynamically, False otherwise.
"""
try:
# Direct model lookup - returns model or raises NotFoundError
await self.client.models.retrieve(model)
return True
except openai.NotFoundError:
# Model doesn't exist - this is expected for unavailable models
pass
except Exception as e:
# All other errors (auth, rate limit, network, etc.)
logger.warning(f"Failed to check model availability for {model}: {e}")
if not self._model_cache:
await self.list_models()
return False
return model in self._model_cache

View file

@ -28,7 +28,7 @@ class CommonConfig(BaseModel):
class RedisKVStoreConfig(CommonConfig):
type: Literal[KVStoreType.redis.value] = KVStoreType.redis.value
type: Literal["redis"] = KVStoreType.redis.value
host: str = "localhost"
port: int = 6379
@ -50,7 +50,7 @@ class RedisKVStoreConfig(CommonConfig):
class SqliteKVStoreConfig(CommonConfig):
type: Literal[KVStoreType.sqlite.value] = KVStoreType.sqlite.value
type: Literal["sqlite"] = KVStoreType.sqlite.value
db_path: str = Field(
default=(RUNTIME_BASE_DIR / "kvstore.db").as_posix(),
description="File path for the sqlite database",
@ -69,7 +69,7 @@ class SqliteKVStoreConfig(CommonConfig):
class PostgresKVStoreConfig(CommonConfig):
type: Literal[KVStoreType.postgres.value] = KVStoreType.postgres.value
type: Literal["postgres"] = KVStoreType.postgres.value
host: str = "localhost"
port: int = 5432
db: str = "llamastack"
@ -113,11 +113,11 @@ class PostgresKVStoreConfig(CommonConfig):
class MongoDBKVStoreConfig(CommonConfig):
type: Literal[KVStoreType.mongodb.value] = KVStoreType.mongodb.value
type: Literal["mongodb"] = KVStoreType.mongodb.value
host: str = "localhost"
port: int = 27017
db: str = "llamastack"
user: str = None
user: str | None = None
password: str | None = None
collection_name: str = "llamastack_kvstore"

View file

@ -7,6 +7,7 @@
from datetime import datetime
from pymongo import AsyncMongoClient
from pymongo.asynchronous.collection import AsyncCollection
from llama_stack.log import get_logger
from llama_stack.providers.utils.kvstore import KVStore
@ -19,8 +20,13 @@ log = get_logger(name=__name__, category="providers::utils")
class MongoDBKVStoreImpl(KVStore):
def __init__(self, config: MongoDBKVStoreConfig):
self.config = config
self.conn = None
self.collection = None
self.conn: AsyncMongoClient | None = None
@property
def collection(self) -> AsyncCollection:
if self.conn is None:
raise RuntimeError("MongoDB connection is not initialized")
return self.conn[self.config.db][self.config.collection_name]
async def initialize(self) -> None:
try:
@ -32,7 +38,6 @@ class MongoDBKVStoreImpl(KVStore):
}
conn_creds = {k: v for k, v in conn_creds.items() if v is not None}
self.conn = AsyncMongoClient(**conn_creds)
self.collection = self.conn[self.config.db][self.config.collection_name]
except Exception as e:
log.exception("Could not connect to MongoDB database server")
raise RuntimeError("Could not connect to MongoDB database server") from e

View file

@ -9,9 +9,13 @@ from datetime import datetime
import aiosqlite
from llama_stack.log import get_logger
from ..api import KVStore
from ..config import SqliteKVStoreConfig
logger = get_logger(name=__name__, category="providers::utils")
class SqliteKVStoreImpl(KVStore):
def __init__(self, config: SqliteKVStoreConfig):
@ -50,6 +54,9 @@ class SqliteKVStoreImpl(KVStore):
if row is None:
return None
value, expiration = row
if not isinstance(value, str):
logger.warning(f"Expected string value for key {key}, got {type(value)}, returning None")
return None
return value
async def delete(self, key: str) -> None:

View file

@ -28,8 +28,7 @@ class ResponsesStore:
sql_store_config = SqliteSqlStoreConfig(
db_path=(RUNTIME_BASE_DIR / "sqlstore.db").as_posix(),
)
self.sql_store = AuthorizedSqlStore(sqlstore_impl(sql_store_config))
self.policy = policy
self.sql_store = AuthorizedSqlStore(sqlstore_impl(sql_store_config), policy)
async def initialize(self):
"""Create the necessary tables if they don't exist."""
@ -87,7 +86,6 @@ class ResponsesStore:
order_by=[("created_at", order.value)],
cursor=("id", after) if after else None,
limit=limit,
policy=self.policy,
)
data = [OpenAIResponseObjectWithInput(**row["response_object"]) for row in paginated_result.data]
@ -105,7 +103,6 @@ class ResponsesStore:
row = await self.sql_store.fetch_one(
"openai_responses",
where={"id": response_id},
policy=self.policy,
)
if not row:
@ -116,7 +113,7 @@ class ResponsesStore:
return OpenAIResponseObjectWithInput(**row["response_object"])
async def delete_response_object(self, response_id: str) -> OpenAIDeleteResponseObject:
row = await self.sql_store.fetch_one("openai_responses", where={"id": response_id}, policy=self.policy)
row = await self.sql_store.fetch_one("openai_responses", where={"id": response_id})
if not row:
raise ValueError(f"Response with id {response_id} not found")
await self.sql_store.delete("openai_responses", where={"id": response_id})

View file

@ -53,13 +53,15 @@ class AuthorizedSqlStore:
access control policies, user attribute capture, and SQL filtering optimization.
"""
def __init__(self, sql_store: SqlStore):
def __init__(self, sql_store: SqlStore, policy: list[AccessRule]):
"""
Initialize the authorization layer.
:param sql_store: Base SqlStore implementation to wrap
:param policy: Access control policy to use for authorization
"""
self.sql_store = sql_store
self.policy = policy
self._detect_database_type()
self._validate_sql_optimized_policy()
@ -117,14 +119,13 @@ class AuthorizedSqlStore:
async def fetch_all(
self,
table: str,
policy: list[AccessRule],
where: Mapping[str, Any] | None = None,
limit: int | None = None,
order_by: list[tuple[str, Literal["asc", "desc"]]] | None = None,
cursor: tuple[str, str] | None = None,
) -> PaginatedResponse:
"""Fetch all rows with automatic access control filtering."""
access_where = self._build_access_control_where_clause(policy)
access_where = self._build_access_control_where_clause(self.policy)
rows = await self.sql_store.fetch_all(
table=table,
where=where,
@ -146,7 +147,7 @@ class AuthorizedSqlStore:
str(record_id), table, User(principal=stored_owner_principal, attributes=stored_access_attrs)
)
if is_action_allowed(policy, Action.READ, sql_record, current_user):
if is_action_allowed(self.policy, Action.READ, sql_record, current_user):
filtered_rows.append(row)
return PaginatedResponse(
@ -157,14 +158,12 @@ class AuthorizedSqlStore:
async def fetch_one(
self,
table: str,
policy: list[AccessRule],
where: Mapping[str, Any] | None = None,
order_by: list[tuple[str, Literal["asc", "desc"]]] | None = None,
) -> dict[str, Any] | None:
"""Fetch one row with automatic access control checking."""
results = await self.fetch_all(
table=table,
policy=policy,
where=where,
limit=1,
order_by=order_by,

View file

@ -8,7 +8,7 @@ import asyncio
import contextvars
import logging # allow-direct-logging
import queue
import random
import secrets
import sys
import threading
import time
@ -76,16 +76,16 @@ def span_id_to_str(span_id: int) -> str:
def generate_span_id() -> str:
span_id = random.getrandbits(64)
span_id = secrets.randbits(64)
while span_id == INVALID_SPAN_ID:
span_id = random.getrandbits(64)
span_id = secrets.randbits(64)
return span_id_to_str(span_id)
def generate_trace_id() -> str:
trace_id = random.getrandbits(128)
trace_id = secrets.randbits(128)
while trace_id == INVALID_TRACE_ID:
trace_id = random.getrandbits(128)
trace_id = secrets.randbits(128)
return trace_id_to_str(trace_id)