From b4974d411d3a2557ad679822c2372e355cd49532 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Fri, 19 Sep 2025 14:59:30 -0700 Subject: [PATCH 01/33] chore: simplify authorized sqlstore # What does this PR do? ## Test Plan --- .../providers/inline/files/localfs/files.py | 5 ++--- .../providers/remote/files/s3/files.py | 5 ++--- .../utils/inference/inference_store.py | 4 +--- .../utils/responses/responses_store.py | 7 ++----- .../utils/sqlstore/authorized_sqlstore.py | 11 +++++------ .../sqlstore/test_authorized_sqlstore.py | 19 +++++++++++-------- tests/unit/utils/test_authorized_sqlstore.py | 18 +++++++++--------- 7 files changed, 32 insertions(+), 37 deletions(-) diff --git a/llama_stack/providers/inline/files/localfs/files.py b/llama_stack/providers/inline/files/localfs/files.py index 9c610c1ba..65cf8d815 100644 --- a/llama_stack/providers/inline/files/localfs/files.py +++ b/llama_stack/providers/inline/files/localfs/files.py @@ -44,7 +44,7 @@ class LocalfsFilesImpl(Files): storage_path.mkdir(parents=True, exist_ok=True) # Initialize SQL store for metadata - self.sql_store = AuthorizedSqlStore(sqlstore_impl(self.config.metadata_store)) + self.sql_store = AuthorizedSqlStore(sqlstore_impl(self.config.metadata_store), self.policy) await self.sql_store.create_table( "openai_files", { @@ -74,7 +74,7 @@ class LocalfsFilesImpl(Files): if not self.sql_store: raise RuntimeError("Files provider not initialized") - row = await self.sql_store.fetch_one("openai_files", policy=self.policy, where={"id": file_id}) + row = await self.sql_store.fetch_one("openai_files", where={"id": file_id}) if not row: raise ResourceNotFoundError(file_id, "File", "client.files.list()") @@ -150,7 +150,6 @@ class LocalfsFilesImpl(Files): paginated_result = await self.sql_store.fetch_all( table="openai_files", - policy=self.policy, where=where_conditions if where_conditions else None, order_by=[("created_at", order.value)], cursor=("id", after) if after else None, diff --git a/llama_stack/providers/remote/files/s3/files.py b/llama_stack/providers/remote/files/s3/files.py index 54742d900..8ea96af9e 100644 --- a/llama_stack/providers/remote/files/s3/files.py +++ b/llama_stack/providers/remote/files/s3/files.py @@ -137,7 +137,7 @@ class S3FilesImpl(Files): where: dict[str, str | dict] = {"id": file_id} if not return_expired: where["expires_at"] = {">": self._now()} - if not (row := await self.sql_store.fetch_one("openai_files", policy=self.policy, where=where)): + if not (row := await self.sql_store.fetch_one("openai_files", where=where)): raise ResourceNotFoundError(file_id, "File", "files.list()") return row @@ -164,7 +164,7 @@ class S3FilesImpl(Files): self._client = _create_s3_client(self._config) await _create_bucket_if_not_exists(self._client, self._config) - self._sql_store = AuthorizedSqlStore(sqlstore_impl(self._config.metadata_store)) + self._sql_store = AuthorizedSqlStore(sqlstore_impl(self._config.metadata_store), self.policy) await self._sql_store.create_table( "openai_files", { @@ -268,7 +268,6 @@ class S3FilesImpl(Files): paginated_result = await self.sql_store.fetch_all( table="openai_files", - policy=self.policy, where=where_conditions, order_by=[("created_at", order.value)], cursor=("id", after) if after else None, diff --git a/llama_stack/providers/utils/inference/inference_store.py b/llama_stack/providers/utils/inference/inference_store.py index 17f4c6268..ffc9f3e11 100644 --- a/llama_stack/providers/utils/inference/inference_store.py +++ b/llama_stack/providers/utils/inference/inference_store.py @@ -54,7 +54,7 @@ class InferenceStore: async def initialize(self): """Create the necessary tables if they don't exist.""" - self.sql_store = AuthorizedSqlStore(sqlstore_impl(self.sql_store_config)) + self.sql_store = AuthorizedSqlStore(sqlstore_impl(self.sql_store_config), self.policy) await self.sql_store.create_table( "chat_completions", { @@ -202,7 +202,6 @@ class InferenceStore: order_by=[("created", order.value)], cursor=("id", after) if after else None, limit=limit, - policy=self.policy, ) data = [ @@ -229,7 +228,6 @@ class InferenceStore: row = await self.sql_store.fetch_one( table="chat_completions", where={"id": completion_id}, - policy=self.policy, ) if not row: diff --git a/llama_stack/providers/utils/responses/responses_store.py b/llama_stack/providers/utils/responses/responses_store.py index 04778ed1c..829cd8a62 100644 --- a/llama_stack/providers/utils/responses/responses_store.py +++ b/llama_stack/providers/utils/responses/responses_store.py @@ -28,8 +28,7 @@ class ResponsesStore: sql_store_config = SqliteSqlStoreConfig( db_path=(RUNTIME_BASE_DIR / "sqlstore.db").as_posix(), ) - self.sql_store = AuthorizedSqlStore(sqlstore_impl(sql_store_config)) - self.policy = policy + self.sql_store = AuthorizedSqlStore(sqlstore_impl(sql_store_config), policy) async def initialize(self): """Create the necessary tables if they don't exist.""" @@ -87,7 +86,6 @@ class ResponsesStore: order_by=[("created_at", order.value)], cursor=("id", after) if after else None, limit=limit, - policy=self.policy, ) data = [OpenAIResponseObjectWithInput(**row["response_object"]) for row in paginated_result.data] @@ -105,7 +103,6 @@ class ResponsesStore: row = await self.sql_store.fetch_one( "openai_responses", where={"id": response_id}, - policy=self.policy, ) if not row: @@ -116,7 +113,7 @@ class ResponsesStore: return OpenAIResponseObjectWithInput(**row["response_object"]) async def delete_response_object(self, response_id: str) -> OpenAIDeleteResponseObject: - row = await self.sql_store.fetch_one("openai_responses", where={"id": response_id}, policy=self.policy) + row = await self.sql_store.fetch_one("openai_responses", where={"id": response_id}) if not row: raise ValueError(f"Response with id {response_id} not found") await self.sql_store.delete("openai_responses", where={"id": response_id}) diff --git a/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py b/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py index acb688f96..ab67f7052 100644 --- a/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +++ b/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py @@ -53,13 +53,15 @@ class AuthorizedSqlStore: access control policies, user attribute capture, and SQL filtering optimization. """ - def __init__(self, sql_store: SqlStore): + def __init__(self, sql_store: SqlStore, policy: list[AccessRule]): """ Initialize the authorization layer. :param sql_store: Base SqlStore implementation to wrap + :param policy: Access control policy to use for authorization """ self.sql_store = sql_store + self.policy = policy self._detect_database_type() self._validate_sql_optimized_policy() @@ -117,14 +119,13 @@ class AuthorizedSqlStore: async def fetch_all( self, table: str, - policy: list[AccessRule], where: Mapping[str, Any] | None = None, limit: int | None = None, order_by: list[tuple[str, Literal["asc", "desc"]]] | None = None, cursor: tuple[str, str] | None = None, ) -> PaginatedResponse: """Fetch all rows with automatic access control filtering.""" - access_where = self._build_access_control_where_clause(policy) + access_where = self._build_access_control_where_clause(self.policy) rows = await self.sql_store.fetch_all( table=table, where=where, @@ -146,7 +147,7 @@ class AuthorizedSqlStore: str(record_id), table, User(principal=stored_owner_principal, attributes=stored_access_attrs) ) - if is_action_allowed(policy, Action.READ, sql_record, current_user): + if is_action_allowed(self.policy, Action.READ, sql_record, current_user): filtered_rows.append(row) return PaginatedResponse( @@ -157,14 +158,12 @@ class AuthorizedSqlStore: async def fetch_one( self, table: str, - policy: list[AccessRule], where: Mapping[str, Any] | None = None, order_by: list[tuple[str, Literal["asc", "desc"]]] | None = None, ) -> dict[str, Any] | None: """Fetch one row with automatic access control checking.""" results = await self.fetch_all( table=table, - policy=policy, where=where, limit=1, order_by=order_by, diff --git a/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py b/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py index 4002f2e1f..98bef0f2c 100644 --- a/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py +++ b/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py @@ -57,7 +57,7 @@ def authorized_store(backend_config): config = config_func() base_sqlstore = sqlstore_impl(config) - authorized_store = AuthorizedSqlStore(base_sqlstore) + authorized_store = AuthorizedSqlStore(base_sqlstore, default_policy()) yield authorized_store @@ -106,7 +106,7 @@ async def test_authorized_store_attributes(mock_get_authenticated_user, authoriz await authorized_store.insert(table_name, {"id": "1", "data": "public_data"}) # Test fetching with no user - should not error on JSON comparison - result = await authorized_store.fetch_all(table_name, policy=default_policy()) + result = await authorized_store.fetch_all(table_name) assert len(result.data) == 1 assert result.data[0]["id"] == "1" assert result.data[0]["access_attributes"] is None @@ -119,7 +119,7 @@ async def test_authorized_store_attributes(mock_get_authenticated_user, authoriz await authorized_store.insert(table_name, {"id": "2", "data": "admin_data"}) # Fetch all - admin should see both - result = await authorized_store.fetch_all(table_name, policy=default_policy()) + result = await authorized_store.fetch_all(table_name) assert len(result.data) == 2 # Test with non-admin user @@ -127,7 +127,7 @@ async def test_authorized_store_attributes(mock_get_authenticated_user, authoriz mock_get_authenticated_user.return_value = regular_user # Should only see public record - result = await authorized_store.fetch_all(table_name, policy=default_policy()) + result = await authorized_store.fetch_all(table_name) assert len(result.data) == 1 assert result.data[0]["id"] == "1" @@ -156,7 +156,7 @@ async def test_authorized_store_attributes(mock_get_authenticated_user, authoriz # Now test with the multi-user who has both roles=admin and teams=dev mock_get_authenticated_user.return_value = multi_user - result = await authorized_store.fetch_all(table_name, policy=default_policy()) + result = await authorized_store.fetch_all(table_name) # Should see: # - public record (1) - no access_attributes @@ -217,21 +217,24 @@ async def test_user_ownership_policy(mock_get_authenticated_user, authorized_sto ), ] + # Create a new authorized store with the owner-only policy + owner_only_store = AuthorizedSqlStore(authorized_store.sql_store, owner_only_policy) + # Test user1 access - should only see their own record mock_get_authenticated_user.return_value = user1 - result = await authorized_store.fetch_all(table_name, policy=owner_only_policy) + result = await owner_only_store.fetch_all(table_name) assert len(result.data) == 1, f"Expected user1 to see 1 record, got {len(result.data)}" assert result.data[0]["id"] == "1", f"Expected user1's record, got {result.data[0]['id']}" # Test user2 access - should only see their own record mock_get_authenticated_user.return_value = user2 - result = await authorized_store.fetch_all(table_name, policy=owner_only_policy) + result = await owner_only_store.fetch_all(table_name) assert len(result.data) == 1, f"Expected user2 to see 1 record, got {len(result.data)}" assert result.data[0]["id"] == "2", f"Expected user2's record, got {result.data[0]['id']}" # Test with anonymous user - should see no records mock_get_authenticated_user.return_value = None - result = await authorized_store.fetch_all(table_name, policy=owner_only_policy) + result = await owner_only_store.fetch_all(table_name) assert len(result.data) == 0, f"Expected anonymous user to see 0 records, got {len(result.data)}" finally: diff --git a/tests/unit/utils/test_authorized_sqlstore.py b/tests/unit/utils/test_authorized_sqlstore.py index 90eb706e4..d85e784a9 100644 --- a/tests/unit/utils/test_authorized_sqlstore.py +++ b/tests/unit/utils/test_authorized_sqlstore.py @@ -26,7 +26,7 @@ async def test_authorized_fetch_with_where_sql_access_control(mock_get_authentic db_path=tmp_dir + "/" + db_name, ) ) - sqlstore = AuthorizedSqlStore(base_sqlstore) + sqlstore = AuthorizedSqlStore(base_sqlstore, default_policy()) # Create table with access control await sqlstore.create_table( @@ -56,24 +56,24 @@ async def test_authorized_fetch_with_where_sql_access_control(mock_get_authentic mock_get_authenticated_user.return_value = admin_user # Admin should see both documents - result = await sqlstore.fetch_all("documents", policy=default_policy(), where={"id": 1}) + result = await sqlstore.fetch_all("documents", where={"id": 1}) assert len(result.data) == 1 assert result.data[0]["title"] == "Admin Document" # User should only see their document mock_get_authenticated_user.return_value = regular_user - result = await sqlstore.fetch_all("documents", policy=default_policy(), where={"id": 1}) + result = await sqlstore.fetch_all("documents", where={"id": 1}) assert len(result.data) == 0 - result = await sqlstore.fetch_all("documents", policy=default_policy(), where={"id": 2}) + result = await sqlstore.fetch_all("documents", where={"id": 2}) assert len(result.data) == 1 assert result.data[0]["title"] == "User Document" - row = await sqlstore.fetch_one("documents", policy=default_policy(), where={"id": 1}) + row = await sqlstore.fetch_one("documents", where={"id": 1}) assert row is None - row = await sqlstore.fetch_one("documents", policy=default_policy(), where={"id": 2}) + row = await sqlstore.fetch_one("documents", where={"id": 2}) assert row is not None assert row["title"] == "User Document" @@ -88,7 +88,7 @@ async def test_sql_policy_consistency(mock_get_authenticated_user): db_path=tmp_dir + "/" + db_name, ) ) - sqlstore = AuthorizedSqlStore(base_sqlstore) + sqlstore = AuthorizedSqlStore(base_sqlstore, default_policy()) await sqlstore.create_table( table="resources", @@ -144,7 +144,7 @@ async def test_sql_policy_consistency(mock_get_authenticated_user): user = User(principal=user_data["principal"], attributes=user_data["attributes"]) mock_get_authenticated_user.return_value = user - sql_results = await sqlstore.fetch_all("resources", policy=policy) + sql_results = await sqlstore.fetch_all("resources") sql_ids = {row["id"] for row in sql_results.data} policy_ids = set() for scenario in test_scenarios: @@ -174,7 +174,7 @@ async def test_authorized_store_user_attribute_capture(mock_get_authenticated_us db_path=tmp_dir + "/" + db_name, ) ) - authorized_store = AuthorizedSqlStore(base_sqlstore) + authorized_store = AuthorizedSqlStore(base_sqlstore, default_policy()) await authorized_store.create_table( table="user_data", From f0da887e793dce0a084a2239c1cef4ab11cd0156 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Fri, 19 Sep 2025 15:49:40 -0700 Subject: [PATCH 02/33] chore: introduce write queue for response_store # What does this PR do? ## Test Plan --- llama_stack/core/datatypes.py | 6 ++ .../utils/responses/responses_store.py | 102 ++++++++++++++++-- .../utils/responses/test_responses_store.py | 21 ++++ 3 files changed, 123 insertions(+), 6 deletions(-) diff --git a/llama_stack/core/datatypes.py b/llama_stack/core/datatypes.py index b5558c66f..6a297f012 100644 --- a/llama_stack/core/datatypes.py +++ b/llama_stack/core/datatypes.py @@ -433,6 +433,12 @@ class InferenceStoreConfig(BaseModel): num_writers: int = Field(default=4, description="Number of concurrent background writers") +class ResponsesStoreConfig(BaseModel): + sql_store_config: SqlStoreConfig + max_write_queue_size: int = Field(default=10000, description="Max queued writes for responses store") + num_writers: int = Field(default=4, description="Number of concurrent background writers") + + class StackRunConfig(BaseModel): version: int = LLAMA_STACK_RUN_CONFIG_VERSION diff --git a/llama_stack/providers/utils/responses/responses_store.py b/llama_stack/providers/utils/responses/responses_store.py index 04778ed1c..367b8aa94 100644 --- a/llama_stack/providers/utils/responses/responses_store.py +++ b/llama_stack/providers/utils/responses/responses_store.py @@ -3,6 +3,9 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import asyncio +from typing import Any + from llama_stack.apis.agents import ( Order, ) @@ -14,25 +17,51 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseObject, OpenAIResponseObjectWithInput, ) -from llama_stack.core.datatypes import AccessRule +from llama_stack.core.datatypes import AccessRule, ResponsesStoreConfig from llama_stack.core.utils.config_dirs import RUNTIME_BASE_DIR +from llama_stack.log import get_logger from ..sqlstore.api import ColumnDefinition, ColumnType from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore -from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, sqlstore_impl +from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, SqlStoreType, sqlstore_impl + +logger = get_logger(name=__name__, category="responses_store") class ResponsesStore: - def __init__(self, sql_store_config: SqlStoreConfig, policy: list[AccessRule]): - if not sql_store_config: - sql_store_config = SqliteSqlStoreConfig( + def __init__( + self, + config: ResponsesStoreConfig | SqlStoreConfig, + policy: list[AccessRule], + ): + # Handle backward compatibility + if not isinstance(config, ResponsesStoreConfig): + # Legacy: SqlStoreConfig passed directly as config + config = ResponsesStoreConfig( + sql_store_config=config, + ) + + self.config = config + self.sql_store_config = config.sql_store_config + if not self.sql_store_config: + self.sql_store_config = SqliteSqlStoreConfig( db_path=(RUNTIME_BASE_DIR / "sqlstore.db").as_posix(), ) - self.sql_store = AuthorizedSqlStore(sqlstore_impl(sql_store_config)) + self.sql_store = None self.policy = policy + # Disable write queue for SQLite to avoid concurrency issues + self.enable_write_queue = self.sql_store_config.type != SqlStoreType.sqlite + + # Async write queue and worker control + self._queue: asyncio.Queue[tuple[OpenAIResponseObject, list[OpenAIResponseInput]]] | None = None + self._worker_tasks: list[asyncio.Task[Any]] = [] + self._max_write_queue_size: int = config.max_write_queue_size + self._num_writers: int = max(1, config.num_writers) + async def initialize(self): """Create the necessary tables if they don't exist.""" + self.sql_store = AuthorizedSqlStore(sqlstore_impl(self.sql_store_config)) await self.sql_store.create_table( "openai_responses", { @@ -43,9 +72,70 @@ class ResponsesStore: }, ) + if self.enable_write_queue: + self._queue = asyncio.Queue(maxsize=self._max_write_queue_size) + for _ in range(self._num_writers): + self._worker_tasks.append(asyncio.create_task(self._worker_loop())) + else: + logger.info("Write queue disabled for SQLite to avoid concurrency issues") + + async def shutdown(self) -> None: + if not self._worker_tasks: + return + if self._queue is not None: + await self._queue.join() + for t in self._worker_tasks: + if not t.done(): + t.cancel() + for t in self._worker_tasks: + try: + await t + except asyncio.CancelledError: + pass + self._worker_tasks.clear() + + async def flush(self) -> None: + """Wait for all queued writes to complete. Useful for testing.""" + if self.enable_write_queue and self._queue is not None: + await self._queue.join() + async def store_response_object( self, response_object: OpenAIResponseObject, input: list[OpenAIResponseInput] ) -> None: + if self.enable_write_queue: + if self._queue is None: + raise ValueError("Responses store is not initialized") + try: + self._queue.put_nowait((response_object, input)) + except asyncio.QueueFull: + logger.warning( + f"Write queue full; adding response id={getattr(response_object, 'id', '')}" + ) + await self._queue.put((response_object, input)) + else: + await self._write_response_object(response_object, input) + + async def _worker_loop(self) -> None: + assert self._queue is not None + while True: + try: + item = await self._queue.get() + except asyncio.CancelledError: + break + response_object, input = item + try: + await self._write_response_object(response_object, input) + except Exception as e: # noqa: BLE001 + logger.error(f"Error writing response object: {e}") + finally: + self._queue.task_done() + + async def _write_response_object( + self, response_object: OpenAIResponseObject, input: list[OpenAIResponseInput] + ) -> None: + if self.sql_store is None: + raise ValueError("Responses store is not initialized") + data = response_object.model_dump() data["input"] = [input_item.model_dump() for input_item in input] diff --git a/tests/unit/utils/responses/test_responses_store.py b/tests/unit/utils/responses/test_responses_store.py index 44d4b30da..4e5256c1b 100644 --- a/tests/unit/utils/responses/test_responses_store.py +++ b/tests/unit/utils/responses/test_responses_store.py @@ -67,6 +67,9 @@ async def test_responses_store_pagination_basic(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test 1: First page with limit=2, descending order (default) result = await store.list_responses(limit=2, order=Order.desc) assert len(result.data) == 2 @@ -110,6 +113,9 @@ async def test_responses_store_pagination_ascending(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test ascending order pagination result = await store.list_responses(limit=1, order=Order.asc) assert len(result.data) == 1 @@ -145,6 +151,9 @@ async def test_responses_store_pagination_with_model_filter(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test pagination with model filter result = await store.list_responses(limit=1, model="model-a", order=Order.desc) assert len(result.data) == 1 @@ -192,6 +201,9 @@ async def test_responses_store_pagination_no_limit(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test without limit (should use default of 50) result = await store.list_responses(order=Order.desc) assert len(result.data) == 2 @@ -212,6 +224,9 @@ async def test_responses_store_get_response_object(): input_list = [create_test_response_input("Test input content", "input-test-resp")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Retrieve the response retrieved = await store.get_response_object("test-resp") assert retrieved.id == "test-resp" @@ -242,6 +257,9 @@ async def test_responses_store_input_items_pagination(): ] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Verify all items are stored correctly with explicit IDs all_items = await store.list_response_input_items("test-resp", order=Order.desc) assert len(all_items.data) == 5 @@ -319,6 +337,9 @@ async def test_responses_store_input_items_before_pagination(): ] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test before pagination with descending order # In desc order: [Fifth, Fourth, Third, Second, First] # before="before-3" should return [Fifth, Fourth] From b93b7798adb380db33a8f0eeb5f742b279339e26 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Fri, 19 Sep 2025 15:53:26 -0700 Subject: [PATCH 03/33] chore: introduce write queue for response_store # What does this PR do? ## Test Plan --- llama_stack/core/datatypes.py | 6 ++ .../utils/responses/responses_store.py | 102 ++++++++++++++++-- .../meta_reference/test_openai_responses.py | 2 +- .../utils/responses/test_responses_store.py | 21 ++++ 4 files changed, 124 insertions(+), 7 deletions(-) diff --git a/llama_stack/core/datatypes.py b/llama_stack/core/datatypes.py index b5558c66f..6a297f012 100644 --- a/llama_stack/core/datatypes.py +++ b/llama_stack/core/datatypes.py @@ -433,6 +433,12 @@ class InferenceStoreConfig(BaseModel): num_writers: int = Field(default=4, description="Number of concurrent background writers") +class ResponsesStoreConfig(BaseModel): + sql_store_config: SqlStoreConfig + max_write_queue_size: int = Field(default=10000, description="Max queued writes for responses store") + num_writers: int = Field(default=4, description="Number of concurrent background writers") + + class StackRunConfig(BaseModel): version: int = LLAMA_STACK_RUN_CONFIG_VERSION diff --git a/llama_stack/providers/utils/responses/responses_store.py b/llama_stack/providers/utils/responses/responses_store.py index 04778ed1c..367b8aa94 100644 --- a/llama_stack/providers/utils/responses/responses_store.py +++ b/llama_stack/providers/utils/responses/responses_store.py @@ -3,6 +3,9 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import asyncio +from typing import Any + from llama_stack.apis.agents import ( Order, ) @@ -14,25 +17,51 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseObject, OpenAIResponseObjectWithInput, ) -from llama_stack.core.datatypes import AccessRule +from llama_stack.core.datatypes import AccessRule, ResponsesStoreConfig from llama_stack.core.utils.config_dirs import RUNTIME_BASE_DIR +from llama_stack.log import get_logger from ..sqlstore.api import ColumnDefinition, ColumnType from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore -from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, sqlstore_impl +from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, SqlStoreType, sqlstore_impl + +logger = get_logger(name=__name__, category="responses_store") class ResponsesStore: - def __init__(self, sql_store_config: SqlStoreConfig, policy: list[AccessRule]): - if not sql_store_config: - sql_store_config = SqliteSqlStoreConfig( + def __init__( + self, + config: ResponsesStoreConfig | SqlStoreConfig, + policy: list[AccessRule], + ): + # Handle backward compatibility + if not isinstance(config, ResponsesStoreConfig): + # Legacy: SqlStoreConfig passed directly as config + config = ResponsesStoreConfig( + sql_store_config=config, + ) + + self.config = config + self.sql_store_config = config.sql_store_config + if not self.sql_store_config: + self.sql_store_config = SqliteSqlStoreConfig( db_path=(RUNTIME_BASE_DIR / "sqlstore.db").as_posix(), ) - self.sql_store = AuthorizedSqlStore(sqlstore_impl(sql_store_config)) + self.sql_store = None self.policy = policy + # Disable write queue for SQLite to avoid concurrency issues + self.enable_write_queue = self.sql_store_config.type != SqlStoreType.sqlite + + # Async write queue and worker control + self._queue: asyncio.Queue[tuple[OpenAIResponseObject, list[OpenAIResponseInput]]] | None = None + self._worker_tasks: list[asyncio.Task[Any]] = [] + self._max_write_queue_size: int = config.max_write_queue_size + self._num_writers: int = max(1, config.num_writers) + async def initialize(self): """Create the necessary tables if they don't exist.""" + self.sql_store = AuthorizedSqlStore(sqlstore_impl(self.sql_store_config)) await self.sql_store.create_table( "openai_responses", { @@ -43,9 +72,70 @@ class ResponsesStore: }, ) + if self.enable_write_queue: + self._queue = asyncio.Queue(maxsize=self._max_write_queue_size) + for _ in range(self._num_writers): + self._worker_tasks.append(asyncio.create_task(self._worker_loop())) + else: + logger.info("Write queue disabled for SQLite to avoid concurrency issues") + + async def shutdown(self) -> None: + if not self._worker_tasks: + return + if self._queue is not None: + await self._queue.join() + for t in self._worker_tasks: + if not t.done(): + t.cancel() + for t in self._worker_tasks: + try: + await t + except asyncio.CancelledError: + pass + self._worker_tasks.clear() + + async def flush(self) -> None: + """Wait for all queued writes to complete. Useful for testing.""" + if self.enable_write_queue and self._queue is not None: + await self._queue.join() + async def store_response_object( self, response_object: OpenAIResponseObject, input: list[OpenAIResponseInput] ) -> None: + if self.enable_write_queue: + if self._queue is None: + raise ValueError("Responses store is not initialized") + try: + self._queue.put_nowait((response_object, input)) + except asyncio.QueueFull: + logger.warning( + f"Write queue full; adding response id={getattr(response_object, 'id', '')}" + ) + await self._queue.put((response_object, input)) + else: + await self._write_response_object(response_object, input) + + async def _worker_loop(self) -> None: + assert self._queue is not None + while True: + try: + item = await self._queue.get() + except asyncio.CancelledError: + break + response_object, input = item + try: + await self._write_response_object(response_object, input) + except Exception as e: # noqa: BLE001 + logger.error(f"Error writing response object: {e}") + finally: + self._queue.task_done() + + async def _write_response_object( + self, response_object: OpenAIResponseObject, input: list[OpenAIResponseInput] + ) -> None: + if self.sql_store is None: + raise ValueError("Responses store is not initialized") + data = response_object.model_dump() data["input"] = [input_item.model_dump() for input_item in input] diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py index a964bc219..fd128f585 100644 --- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py +++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py @@ -677,7 +677,7 @@ async def test_responses_store_list_input_items_logic(): # Create mock store and response store mock_sql_store = AsyncMock() - responses_store = ResponsesStore(sql_store_config=None, policy=default_policy()) + responses_store = ResponsesStore(None, policy=default_policy()) responses_store.sql_store = mock_sql_store # Setup test data - multiple input items diff --git a/tests/unit/utils/responses/test_responses_store.py b/tests/unit/utils/responses/test_responses_store.py index 44d4b30da..4e5256c1b 100644 --- a/tests/unit/utils/responses/test_responses_store.py +++ b/tests/unit/utils/responses/test_responses_store.py @@ -67,6 +67,9 @@ async def test_responses_store_pagination_basic(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test 1: First page with limit=2, descending order (default) result = await store.list_responses(limit=2, order=Order.desc) assert len(result.data) == 2 @@ -110,6 +113,9 @@ async def test_responses_store_pagination_ascending(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test ascending order pagination result = await store.list_responses(limit=1, order=Order.asc) assert len(result.data) == 1 @@ -145,6 +151,9 @@ async def test_responses_store_pagination_with_model_filter(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test pagination with model filter result = await store.list_responses(limit=1, model="model-a", order=Order.desc) assert len(result.data) == 1 @@ -192,6 +201,9 @@ async def test_responses_store_pagination_no_limit(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test without limit (should use default of 50) result = await store.list_responses(order=Order.desc) assert len(result.data) == 2 @@ -212,6 +224,9 @@ async def test_responses_store_get_response_object(): input_list = [create_test_response_input("Test input content", "input-test-resp")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Retrieve the response retrieved = await store.get_response_object("test-resp") assert retrieved.id == "test-resp" @@ -242,6 +257,9 @@ async def test_responses_store_input_items_pagination(): ] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Verify all items are stored correctly with explicit IDs all_items = await store.list_response_input_items("test-resp", order=Order.desc) assert len(all_items.data) == 5 @@ -319,6 +337,9 @@ async def test_responses_store_input_items_before_pagination(): ] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test before pagination with descending order # In desc order: [Fifth, Fourth, Third, Second, First] # before="before-3" should return [Fifth, Fourth] From b0115674a4e0100c1449720e9e5e8fa4c0fc5f46 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Fri, 19 Sep 2025 15:59:36 -0700 Subject: [PATCH 04/33] chore: introduce write queue for response_store # What does this PR do? ## Test Plan --- llama_stack/core/datatypes.py | 6 ++ .../utils/responses/responses_store.py | 102 ++++++++++++++++-- .../meta_reference/test_openai_responses.py | 3 +- .../utils/responses/test_responses_store.py | 21 ++++ 4 files changed, 125 insertions(+), 7 deletions(-) diff --git a/llama_stack/core/datatypes.py b/llama_stack/core/datatypes.py index b5558c66f..6a297f012 100644 --- a/llama_stack/core/datatypes.py +++ b/llama_stack/core/datatypes.py @@ -433,6 +433,12 @@ class InferenceStoreConfig(BaseModel): num_writers: int = Field(default=4, description="Number of concurrent background writers") +class ResponsesStoreConfig(BaseModel): + sql_store_config: SqlStoreConfig + max_write_queue_size: int = Field(default=10000, description="Max queued writes for responses store") + num_writers: int = Field(default=4, description="Number of concurrent background writers") + + class StackRunConfig(BaseModel): version: int = LLAMA_STACK_RUN_CONFIG_VERSION diff --git a/llama_stack/providers/utils/responses/responses_store.py b/llama_stack/providers/utils/responses/responses_store.py index 04778ed1c..367b8aa94 100644 --- a/llama_stack/providers/utils/responses/responses_store.py +++ b/llama_stack/providers/utils/responses/responses_store.py @@ -3,6 +3,9 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import asyncio +from typing import Any + from llama_stack.apis.agents import ( Order, ) @@ -14,25 +17,51 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseObject, OpenAIResponseObjectWithInput, ) -from llama_stack.core.datatypes import AccessRule +from llama_stack.core.datatypes import AccessRule, ResponsesStoreConfig from llama_stack.core.utils.config_dirs import RUNTIME_BASE_DIR +from llama_stack.log import get_logger from ..sqlstore.api import ColumnDefinition, ColumnType from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore -from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, sqlstore_impl +from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, SqlStoreType, sqlstore_impl + +logger = get_logger(name=__name__, category="responses_store") class ResponsesStore: - def __init__(self, sql_store_config: SqlStoreConfig, policy: list[AccessRule]): - if not sql_store_config: - sql_store_config = SqliteSqlStoreConfig( + def __init__( + self, + config: ResponsesStoreConfig | SqlStoreConfig, + policy: list[AccessRule], + ): + # Handle backward compatibility + if not isinstance(config, ResponsesStoreConfig): + # Legacy: SqlStoreConfig passed directly as config + config = ResponsesStoreConfig( + sql_store_config=config, + ) + + self.config = config + self.sql_store_config = config.sql_store_config + if not self.sql_store_config: + self.sql_store_config = SqliteSqlStoreConfig( db_path=(RUNTIME_BASE_DIR / "sqlstore.db").as_posix(), ) - self.sql_store = AuthorizedSqlStore(sqlstore_impl(sql_store_config)) + self.sql_store = None self.policy = policy + # Disable write queue for SQLite to avoid concurrency issues + self.enable_write_queue = self.sql_store_config.type != SqlStoreType.sqlite + + # Async write queue and worker control + self._queue: asyncio.Queue[tuple[OpenAIResponseObject, list[OpenAIResponseInput]]] | None = None + self._worker_tasks: list[asyncio.Task[Any]] = [] + self._max_write_queue_size: int = config.max_write_queue_size + self._num_writers: int = max(1, config.num_writers) + async def initialize(self): """Create the necessary tables if they don't exist.""" + self.sql_store = AuthorizedSqlStore(sqlstore_impl(self.sql_store_config)) await self.sql_store.create_table( "openai_responses", { @@ -43,9 +72,70 @@ class ResponsesStore: }, ) + if self.enable_write_queue: + self._queue = asyncio.Queue(maxsize=self._max_write_queue_size) + for _ in range(self._num_writers): + self._worker_tasks.append(asyncio.create_task(self._worker_loop())) + else: + logger.info("Write queue disabled for SQLite to avoid concurrency issues") + + async def shutdown(self) -> None: + if not self._worker_tasks: + return + if self._queue is not None: + await self._queue.join() + for t in self._worker_tasks: + if not t.done(): + t.cancel() + for t in self._worker_tasks: + try: + await t + except asyncio.CancelledError: + pass + self._worker_tasks.clear() + + async def flush(self) -> None: + """Wait for all queued writes to complete. Useful for testing.""" + if self.enable_write_queue and self._queue is not None: + await self._queue.join() + async def store_response_object( self, response_object: OpenAIResponseObject, input: list[OpenAIResponseInput] ) -> None: + if self.enable_write_queue: + if self._queue is None: + raise ValueError("Responses store is not initialized") + try: + self._queue.put_nowait((response_object, input)) + except asyncio.QueueFull: + logger.warning( + f"Write queue full; adding response id={getattr(response_object, 'id', '')}" + ) + await self._queue.put((response_object, input)) + else: + await self._write_response_object(response_object, input) + + async def _worker_loop(self) -> None: + assert self._queue is not None + while True: + try: + item = await self._queue.get() + except asyncio.CancelledError: + break + response_object, input = item + try: + await self._write_response_object(response_object, input) + except Exception as e: # noqa: BLE001 + logger.error(f"Error writing response object: {e}") + finally: + self._queue.task_done() + + async def _write_response_object( + self, response_object: OpenAIResponseObject, input: list[OpenAIResponseInput] + ) -> None: + if self.sql_store is None: + raise ValueError("Responses store is not initialized") + data = response_object.model_dump() data["input"] = [input_item.model_dump() for input_item in input] diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py index a964bc219..df89986af 100644 --- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py +++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py @@ -42,6 +42,7 @@ from llama_stack.apis.inference import ( ) from llama_stack.apis.tools.tools import Tool, ToolGroups, ToolInvocationResult, ToolParameter, ToolRuntime from llama_stack.core.access_control.access_control import default_policy +from llama_stack.core.datatypes import ResponsesStoreConfig from llama_stack.providers.inline.agents.meta_reference.responses.openai_responses import ( OpenAIResponsesImpl, ) @@ -677,7 +678,7 @@ async def test_responses_store_list_input_items_logic(): # Create mock store and response store mock_sql_store = AsyncMock() - responses_store = ResponsesStore(sql_store_config=None, policy=default_policy()) + responses_store = ResponsesStore(ResponsesStoreConfig(), policy=default_policy()) responses_store.sql_store = mock_sql_store # Setup test data - multiple input items diff --git a/tests/unit/utils/responses/test_responses_store.py b/tests/unit/utils/responses/test_responses_store.py index 44d4b30da..4e5256c1b 100644 --- a/tests/unit/utils/responses/test_responses_store.py +++ b/tests/unit/utils/responses/test_responses_store.py @@ -67,6 +67,9 @@ async def test_responses_store_pagination_basic(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test 1: First page with limit=2, descending order (default) result = await store.list_responses(limit=2, order=Order.desc) assert len(result.data) == 2 @@ -110,6 +113,9 @@ async def test_responses_store_pagination_ascending(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test ascending order pagination result = await store.list_responses(limit=1, order=Order.asc) assert len(result.data) == 1 @@ -145,6 +151,9 @@ async def test_responses_store_pagination_with_model_filter(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test pagination with model filter result = await store.list_responses(limit=1, model="model-a", order=Order.desc) assert len(result.data) == 1 @@ -192,6 +201,9 @@ async def test_responses_store_pagination_no_limit(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test without limit (should use default of 50) result = await store.list_responses(order=Order.desc) assert len(result.data) == 2 @@ -212,6 +224,9 @@ async def test_responses_store_get_response_object(): input_list = [create_test_response_input("Test input content", "input-test-resp")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Retrieve the response retrieved = await store.get_response_object("test-resp") assert retrieved.id == "test-resp" @@ -242,6 +257,9 @@ async def test_responses_store_input_items_pagination(): ] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Verify all items are stored correctly with explicit IDs all_items = await store.list_response_input_items("test-resp", order=Order.desc) assert len(all_items.data) == 5 @@ -319,6 +337,9 @@ async def test_responses_store_input_items_before_pagination(): ] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test before pagination with descending order # In desc order: [Fifth, Fourth, Third, Second, First] # before="before-3" should return [Fifth, Fourth] From 7660ba844f64d01d40bbf9631f309acb05067cfd Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Fri, 19 Sep 2025 16:02:02 -0700 Subject: [PATCH 05/33] chore: introduce write queue for response_store # What does this PR do? ## Test Plan --- llama_stack/core/datatypes.py | 6 ++ .../utils/responses/responses_store.py | 102 ++++++++++++++++-- .../meta_reference/test_openai_responses.py | 6 +- .../utils/responses/test_responses_store.py | 21 ++++ 4 files changed, 128 insertions(+), 7 deletions(-) diff --git a/llama_stack/core/datatypes.py b/llama_stack/core/datatypes.py index b5558c66f..6a297f012 100644 --- a/llama_stack/core/datatypes.py +++ b/llama_stack/core/datatypes.py @@ -433,6 +433,12 @@ class InferenceStoreConfig(BaseModel): num_writers: int = Field(default=4, description="Number of concurrent background writers") +class ResponsesStoreConfig(BaseModel): + sql_store_config: SqlStoreConfig + max_write_queue_size: int = Field(default=10000, description="Max queued writes for responses store") + num_writers: int = Field(default=4, description="Number of concurrent background writers") + + class StackRunConfig(BaseModel): version: int = LLAMA_STACK_RUN_CONFIG_VERSION diff --git a/llama_stack/providers/utils/responses/responses_store.py b/llama_stack/providers/utils/responses/responses_store.py index 04778ed1c..367b8aa94 100644 --- a/llama_stack/providers/utils/responses/responses_store.py +++ b/llama_stack/providers/utils/responses/responses_store.py @@ -3,6 +3,9 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import asyncio +from typing import Any + from llama_stack.apis.agents import ( Order, ) @@ -14,25 +17,51 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseObject, OpenAIResponseObjectWithInput, ) -from llama_stack.core.datatypes import AccessRule +from llama_stack.core.datatypes import AccessRule, ResponsesStoreConfig from llama_stack.core.utils.config_dirs import RUNTIME_BASE_DIR +from llama_stack.log import get_logger from ..sqlstore.api import ColumnDefinition, ColumnType from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore -from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, sqlstore_impl +from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, SqlStoreType, sqlstore_impl + +logger = get_logger(name=__name__, category="responses_store") class ResponsesStore: - def __init__(self, sql_store_config: SqlStoreConfig, policy: list[AccessRule]): - if not sql_store_config: - sql_store_config = SqliteSqlStoreConfig( + def __init__( + self, + config: ResponsesStoreConfig | SqlStoreConfig, + policy: list[AccessRule], + ): + # Handle backward compatibility + if not isinstance(config, ResponsesStoreConfig): + # Legacy: SqlStoreConfig passed directly as config + config = ResponsesStoreConfig( + sql_store_config=config, + ) + + self.config = config + self.sql_store_config = config.sql_store_config + if not self.sql_store_config: + self.sql_store_config = SqliteSqlStoreConfig( db_path=(RUNTIME_BASE_DIR / "sqlstore.db").as_posix(), ) - self.sql_store = AuthorizedSqlStore(sqlstore_impl(sql_store_config)) + self.sql_store = None self.policy = policy + # Disable write queue for SQLite to avoid concurrency issues + self.enable_write_queue = self.sql_store_config.type != SqlStoreType.sqlite + + # Async write queue and worker control + self._queue: asyncio.Queue[tuple[OpenAIResponseObject, list[OpenAIResponseInput]]] | None = None + self._worker_tasks: list[asyncio.Task[Any]] = [] + self._max_write_queue_size: int = config.max_write_queue_size + self._num_writers: int = max(1, config.num_writers) + async def initialize(self): """Create the necessary tables if they don't exist.""" + self.sql_store = AuthorizedSqlStore(sqlstore_impl(self.sql_store_config)) await self.sql_store.create_table( "openai_responses", { @@ -43,9 +72,70 @@ class ResponsesStore: }, ) + if self.enable_write_queue: + self._queue = asyncio.Queue(maxsize=self._max_write_queue_size) + for _ in range(self._num_writers): + self._worker_tasks.append(asyncio.create_task(self._worker_loop())) + else: + logger.info("Write queue disabled for SQLite to avoid concurrency issues") + + async def shutdown(self) -> None: + if not self._worker_tasks: + return + if self._queue is not None: + await self._queue.join() + for t in self._worker_tasks: + if not t.done(): + t.cancel() + for t in self._worker_tasks: + try: + await t + except asyncio.CancelledError: + pass + self._worker_tasks.clear() + + async def flush(self) -> None: + """Wait for all queued writes to complete. Useful for testing.""" + if self.enable_write_queue and self._queue is not None: + await self._queue.join() + async def store_response_object( self, response_object: OpenAIResponseObject, input: list[OpenAIResponseInput] ) -> None: + if self.enable_write_queue: + if self._queue is None: + raise ValueError("Responses store is not initialized") + try: + self._queue.put_nowait((response_object, input)) + except asyncio.QueueFull: + logger.warning( + f"Write queue full; adding response id={getattr(response_object, 'id', '')}" + ) + await self._queue.put((response_object, input)) + else: + await self._write_response_object(response_object, input) + + async def _worker_loop(self) -> None: + assert self._queue is not None + while True: + try: + item = await self._queue.get() + except asyncio.CancelledError: + break + response_object, input = item + try: + await self._write_response_object(response_object, input) + except Exception as e: # noqa: BLE001 + logger.error(f"Error writing response object: {e}") + finally: + self._queue.task_done() + + async def _write_response_object( + self, response_object: OpenAIResponseObject, input: list[OpenAIResponseInput] + ) -> None: + if self.sql_store is None: + raise ValueError("Responses store is not initialized") + data = response_object.model_dump() data["input"] = [input_item.model_dump() for input_item in input] diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py index a964bc219..e467e910d 100644 --- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py +++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py @@ -42,10 +42,12 @@ from llama_stack.apis.inference import ( ) from llama_stack.apis.tools.tools import Tool, ToolGroups, ToolInvocationResult, ToolParameter, ToolRuntime from llama_stack.core.access_control.access_control import default_policy +from llama_stack.core.datatypes import ResponsesStoreConfig from llama_stack.providers.inline.agents.meta_reference.responses.openai_responses import ( OpenAIResponsesImpl, ) from llama_stack.providers.utils.responses.responses_store import ResponsesStore +from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig from tests.unit.providers.agents.meta_reference.fixtures import load_chat_completion_fixture @@ -677,7 +679,9 @@ async def test_responses_store_list_input_items_logic(): # Create mock store and response store mock_sql_store = AsyncMock() - responses_store = ResponsesStore(sql_store_config=None, policy=default_policy()) + responses_store = ResponsesStore( + ResponsesStoreConfig(SqliteSqlStoreConfig(db_path="mock_db_path")), policy=default_policy() + ) responses_store.sql_store = mock_sql_store # Setup test data - multiple input items diff --git a/tests/unit/utils/responses/test_responses_store.py b/tests/unit/utils/responses/test_responses_store.py index 44d4b30da..4e5256c1b 100644 --- a/tests/unit/utils/responses/test_responses_store.py +++ b/tests/unit/utils/responses/test_responses_store.py @@ -67,6 +67,9 @@ async def test_responses_store_pagination_basic(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test 1: First page with limit=2, descending order (default) result = await store.list_responses(limit=2, order=Order.desc) assert len(result.data) == 2 @@ -110,6 +113,9 @@ async def test_responses_store_pagination_ascending(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test ascending order pagination result = await store.list_responses(limit=1, order=Order.asc) assert len(result.data) == 1 @@ -145,6 +151,9 @@ async def test_responses_store_pagination_with_model_filter(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test pagination with model filter result = await store.list_responses(limit=1, model="model-a", order=Order.desc) assert len(result.data) == 1 @@ -192,6 +201,9 @@ async def test_responses_store_pagination_no_limit(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test without limit (should use default of 50) result = await store.list_responses(order=Order.desc) assert len(result.data) == 2 @@ -212,6 +224,9 @@ async def test_responses_store_get_response_object(): input_list = [create_test_response_input("Test input content", "input-test-resp")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Retrieve the response retrieved = await store.get_response_object("test-resp") assert retrieved.id == "test-resp" @@ -242,6 +257,9 @@ async def test_responses_store_input_items_pagination(): ] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Verify all items are stored correctly with explicit IDs all_items = await store.list_response_input_items("test-resp", order=Order.desc) assert len(all_items.data) == 5 @@ -319,6 +337,9 @@ async def test_responses_store_input_items_before_pagination(): ] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test before pagination with descending order # In desc order: [Fifth, Fourth, Third, Second, First] # before="before-3" should return [Fifth, Fourth] From 04fd837d2fc26a26e1655c4ba80cc3652eab3d2b Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Fri, 19 Sep 2025 16:13:43 -0700 Subject: [PATCH 06/33] chore: introduce write queue for response_store # What does this PR do? ## Test Plan --- llama_stack/core/datatypes.py | 6 ++ .../utils/responses/responses_store.py | 102 ++++++++++++++++-- .../meta_reference/test_openai_responses.py | 7 +- .../utils/responses/test_responses_store.py | 21 ++++ 4 files changed, 129 insertions(+), 7 deletions(-) diff --git a/llama_stack/core/datatypes.py b/llama_stack/core/datatypes.py index b5558c66f..6a297f012 100644 --- a/llama_stack/core/datatypes.py +++ b/llama_stack/core/datatypes.py @@ -433,6 +433,12 @@ class InferenceStoreConfig(BaseModel): num_writers: int = Field(default=4, description="Number of concurrent background writers") +class ResponsesStoreConfig(BaseModel): + sql_store_config: SqlStoreConfig + max_write_queue_size: int = Field(default=10000, description="Max queued writes for responses store") + num_writers: int = Field(default=4, description="Number of concurrent background writers") + + class StackRunConfig(BaseModel): version: int = LLAMA_STACK_RUN_CONFIG_VERSION diff --git a/llama_stack/providers/utils/responses/responses_store.py b/llama_stack/providers/utils/responses/responses_store.py index 04778ed1c..367b8aa94 100644 --- a/llama_stack/providers/utils/responses/responses_store.py +++ b/llama_stack/providers/utils/responses/responses_store.py @@ -3,6 +3,9 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import asyncio +from typing import Any + from llama_stack.apis.agents import ( Order, ) @@ -14,25 +17,51 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseObject, OpenAIResponseObjectWithInput, ) -from llama_stack.core.datatypes import AccessRule +from llama_stack.core.datatypes import AccessRule, ResponsesStoreConfig from llama_stack.core.utils.config_dirs import RUNTIME_BASE_DIR +from llama_stack.log import get_logger from ..sqlstore.api import ColumnDefinition, ColumnType from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore -from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, sqlstore_impl +from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, SqlStoreType, sqlstore_impl + +logger = get_logger(name=__name__, category="responses_store") class ResponsesStore: - def __init__(self, sql_store_config: SqlStoreConfig, policy: list[AccessRule]): - if not sql_store_config: - sql_store_config = SqliteSqlStoreConfig( + def __init__( + self, + config: ResponsesStoreConfig | SqlStoreConfig, + policy: list[AccessRule], + ): + # Handle backward compatibility + if not isinstance(config, ResponsesStoreConfig): + # Legacy: SqlStoreConfig passed directly as config + config = ResponsesStoreConfig( + sql_store_config=config, + ) + + self.config = config + self.sql_store_config = config.sql_store_config + if not self.sql_store_config: + self.sql_store_config = SqliteSqlStoreConfig( db_path=(RUNTIME_BASE_DIR / "sqlstore.db").as_posix(), ) - self.sql_store = AuthorizedSqlStore(sqlstore_impl(sql_store_config)) + self.sql_store = None self.policy = policy + # Disable write queue for SQLite to avoid concurrency issues + self.enable_write_queue = self.sql_store_config.type != SqlStoreType.sqlite + + # Async write queue and worker control + self._queue: asyncio.Queue[tuple[OpenAIResponseObject, list[OpenAIResponseInput]]] | None = None + self._worker_tasks: list[asyncio.Task[Any]] = [] + self._max_write_queue_size: int = config.max_write_queue_size + self._num_writers: int = max(1, config.num_writers) + async def initialize(self): """Create the necessary tables if they don't exist.""" + self.sql_store = AuthorizedSqlStore(sqlstore_impl(self.sql_store_config)) await self.sql_store.create_table( "openai_responses", { @@ -43,9 +72,70 @@ class ResponsesStore: }, ) + if self.enable_write_queue: + self._queue = asyncio.Queue(maxsize=self._max_write_queue_size) + for _ in range(self._num_writers): + self._worker_tasks.append(asyncio.create_task(self._worker_loop())) + else: + logger.info("Write queue disabled for SQLite to avoid concurrency issues") + + async def shutdown(self) -> None: + if not self._worker_tasks: + return + if self._queue is not None: + await self._queue.join() + for t in self._worker_tasks: + if not t.done(): + t.cancel() + for t in self._worker_tasks: + try: + await t + except asyncio.CancelledError: + pass + self._worker_tasks.clear() + + async def flush(self) -> None: + """Wait for all queued writes to complete. Useful for testing.""" + if self.enable_write_queue and self._queue is not None: + await self._queue.join() + async def store_response_object( self, response_object: OpenAIResponseObject, input: list[OpenAIResponseInput] ) -> None: + if self.enable_write_queue: + if self._queue is None: + raise ValueError("Responses store is not initialized") + try: + self._queue.put_nowait((response_object, input)) + except asyncio.QueueFull: + logger.warning( + f"Write queue full; adding response id={getattr(response_object, 'id', '')}" + ) + await self._queue.put((response_object, input)) + else: + await self._write_response_object(response_object, input) + + async def _worker_loop(self) -> None: + assert self._queue is not None + while True: + try: + item = await self._queue.get() + except asyncio.CancelledError: + break + response_object, input = item + try: + await self._write_response_object(response_object, input) + except Exception as e: # noqa: BLE001 + logger.error(f"Error writing response object: {e}") + finally: + self._queue.task_done() + + async def _write_response_object( + self, response_object: OpenAIResponseObject, input: list[OpenAIResponseInput] + ) -> None: + if self.sql_store is None: + raise ValueError("Responses store is not initialized") + data = response_object.model_dump() data["input"] = [input_item.model_dump() for input_item in input] diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py index a964bc219..67ab87504 100644 --- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py +++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py @@ -42,10 +42,12 @@ from llama_stack.apis.inference import ( ) from llama_stack.apis.tools.tools import Tool, ToolGroups, ToolInvocationResult, ToolParameter, ToolRuntime from llama_stack.core.access_control.access_control import default_policy +from llama_stack.core.datatypes import ResponsesStoreConfig from llama_stack.providers.inline.agents.meta_reference.responses.openai_responses import ( OpenAIResponsesImpl, ) from llama_stack.providers.utils.responses.responses_store import ResponsesStore +from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig from tests.unit.providers.agents.meta_reference.fixtures import load_chat_completion_fixture @@ -677,7 +679,10 @@ async def test_responses_store_list_input_items_logic(): # Create mock store and response store mock_sql_store = AsyncMock() - responses_store = ResponsesStore(sql_store_config=None, policy=default_policy()) + responses_store = ResponsesStore( + ResponsesStoreConfig(sql_store_config=SqliteSqlStoreConfig(db_path="mock_db_path")), + policy=default_policy() + ) responses_store.sql_store = mock_sql_store # Setup test data - multiple input items diff --git a/tests/unit/utils/responses/test_responses_store.py b/tests/unit/utils/responses/test_responses_store.py index 44d4b30da..4e5256c1b 100644 --- a/tests/unit/utils/responses/test_responses_store.py +++ b/tests/unit/utils/responses/test_responses_store.py @@ -67,6 +67,9 @@ async def test_responses_store_pagination_basic(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test 1: First page with limit=2, descending order (default) result = await store.list_responses(limit=2, order=Order.desc) assert len(result.data) == 2 @@ -110,6 +113,9 @@ async def test_responses_store_pagination_ascending(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test ascending order pagination result = await store.list_responses(limit=1, order=Order.asc) assert len(result.data) == 1 @@ -145,6 +151,9 @@ async def test_responses_store_pagination_with_model_filter(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test pagination with model filter result = await store.list_responses(limit=1, model="model-a", order=Order.desc) assert len(result.data) == 1 @@ -192,6 +201,9 @@ async def test_responses_store_pagination_no_limit(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test without limit (should use default of 50) result = await store.list_responses(order=Order.desc) assert len(result.data) == 2 @@ -212,6 +224,9 @@ async def test_responses_store_get_response_object(): input_list = [create_test_response_input("Test input content", "input-test-resp")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Retrieve the response retrieved = await store.get_response_object("test-resp") assert retrieved.id == "test-resp" @@ -242,6 +257,9 @@ async def test_responses_store_input_items_pagination(): ] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Verify all items are stored correctly with explicit IDs all_items = await store.list_response_input_items("test-resp", order=Order.desc) assert len(all_items.data) == 5 @@ -319,6 +337,9 @@ async def test_responses_store_input_items_before_pagination(): ] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test before pagination with descending order # In desc order: [Fifth, Fourth, Third, Second, First] # before="before-3" should return [Fifth, Fourth] From ce9a62aa840933e83c47825a0d207da02c4fc153 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Sun, 21 Sep 2025 20:37:58 -0700 Subject: [PATCH 07/33] chore: introduce write queue for response_store # What does this PR do? ## Test Plan --- llama_stack/core/datatypes.py | 6 ++ .../utils/responses/responses_store.py | 100 ++++++++++++++++-- .../meta_reference/test_openai_responses.py | 6 +- .../utils/responses/test_responses_store.py | 21 ++++ 4 files changed, 126 insertions(+), 7 deletions(-) diff --git a/llama_stack/core/datatypes.py b/llama_stack/core/datatypes.py index b5558c66f..6a297f012 100644 --- a/llama_stack/core/datatypes.py +++ b/llama_stack/core/datatypes.py @@ -433,6 +433,12 @@ class InferenceStoreConfig(BaseModel): num_writers: int = Field(default=4, description="Number of concurrent background writers") +class ResponsesStoreConfig(BaseModel): + sql_store_config: SqlStoreConfig + max_write_queue_size: int = Field(default=10000, description="Max queued writes for responses store") + num_writers: int = Field(default=4, description="Number of concurrent background writers") + + class StackRunConfig(BaseModel): version: int = LLAMA_STACK_RUN_CONFIG_VERSION diff --git a/llama_stack/providers/utils/responses/responses_store.py b/llama_stack/providers/utils/responses/responses_store.py index 04778ed1c..f952d0880 100644 --- a/llama_stack/providers/utils/responses/responses_store.py +++ b/llama_stack/providers/utils/responses/responses_store.py @@ -3,6 +3,9 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import asyncio +from typing import Any + from llama_stack.apis.agents import ( Order, ) @@ -14,25 +17,51 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseObject, OpenAIResponseObjectWithInput, ) -from llama_stack.core.datatypes import AccessRule +from llama_stack.core.datatypes import AccessRule, ResponsesStoreConfig from llama_stack.core.utils.config_dirs import RUNTIME_BASE_DIR +from llama_stack.log import get_logger from ..sqlstore.api import ColumnDefinition, ColumnType from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore -from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, sqlstore_impl +from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, SqlStoreType, sqlstore_impl + +logger = get_logger(name=__name__, category="responses_store") class ResponsesStore: - def __init__(self, sql_store_config: SqlStoreConfig, policy: list[AccessRule]): - if not sql_store_config: - sql_store_config = SqliteSqlStoreConfig( + def __init__( + self, + config: ResponsesStoreConfig | SqlStoreConfig, + policy: list[AccessRule], + ): + # Handle backward compatibility + if not isinstance(config, ResponsesStoreConfig): + # Legacy: SqlStoreConfig passed directly as config + config = ResponsesStoreConfig( + sql_store_config=config, + ) + + self.config = config + self.sql_store_config = config.sql_store_config + if not self.sql_store_config: + self.sql_store_config = SqliteSqlStoreConfig( db_path=(RUNTIME_BASE_DIR / "sqlstore.db").as_posix(), ) - self.sql_store = AuthorizedSqlStore(sqlstore_impl(sql_store_config)) + self.sql_store = None self.policy = policy + # Disable write queue for SQLite to avoid concurrency issues + self.enable_write_queue = self.sql_store_config.type != SqlStoreType.sqlite + + # Async write queue and worker control + self._queue: asyncio.Queue[tuple[OpenAIResponseObject, list[OpenAIResponseInput]]] | None = None + self._worker_tasks: list[asyncio.Task[Any]] = [] + self._max_write_queue_size: int = config.max_write_queue_size + self._num_writers: int = max(1, config.num_writers) + async def initialize(self): """Create the necessary tables if they don't exist.""" + self.sql_store = AuthorizedSqlStore(sqlstore_impl(self.sql_store_config)) await self.sql_store.create_table( "openai_responses", { @@ -43,9 +72,68 @@ class ResponsesStore: }, ) + if self.enable_write_queue: + self._queue = asyncio.Queue(maxsize=self._max_write_queue_size) + for _ in range(self._num_writers): + self._worker_tasks.append(asyncio.create_task(self._worker_loop())) + else: + logger.info("Write queue disabled for SQLite to avoid concurrency issues") + + async def shutdown(self) -> None: + if not self._worker_tasks: + return + if self._queue is not None: + await self._queue.join() + for t in self._worker_tasks: + if not t.done(): + t.cancel() + for t in self._worker_tasks: + try: + await t + except asyncio.CancelledError: + pass + self._worker_tasks.clear() + + async def flush(self) -> None: + """Wait for all queued writes to complete. Useful for testing.""" + if self.enable_write_queue and self._queue is not None: + await self._queue.join() + async def store_response_object( self, response_object: OpenAIResponseObject, input: list[OpenAIResponseInput] ) -> None: + if self.enable_write_queue: + if self._queue is None: + raise ValueError("Responses store is not initialized") + try: + self._queue.put_nowait((response_object, input)) + except asyncio.QueueFull: + logger.warning(f"Write queue full; adding response id={getattr(response_object, 'id', '')}") + await self._queue.put((response_object, input)) + else: + await self._write_response_object(response_object, input) + + async def _worker_loop(self) -> None: + assert self._queue is not None + while True: + try: + item = await self._queue.get() + except asyncio.CancelledError: + break + response_object, input = item + try: + await self._write_response_object(response_object, input) + except Exception as e: # noqa: BLE001 + logger.error(f"Error writing response object: {e}") + finally: + self._queue.task_done() + + async def _write_response_object( + self, response_object: OpenAIResponseObject, input: list[OpenAIResponseInput] + ) -> None: + if self.sql_store is None: + raise ValueError("Responses store is not initialized") + data = response_object.model_dump() data["input"] = [input_item.model_dump() for input_item in input] diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py index a964bc219..38ce365c1 100644 --- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py +++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py @@ -42,10 +42,12 @@ from llama_stack.apis.inference import ( ) from llama_stack.apis.tools.tools import Tool, ToolGroups, ToolInvocationResult, ToolParameter, ToolRuntime from llama_stack.core.access_control.access_control import default_policy +from llama_stack.core.datatypes import ResponsesStoreConfig from llama_stack.providers.inline.agents.meta_reference.responses.openai_responses import ( OpenAIResponsesImpl, ) from llama_stack.providers.utils.responses.responses_store import ResponsesStore +from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig from tests.unit.providers.agents.meta_reference.fixtures import load_chat_completion_fixture @@ -677,7 +679,9 @@ async def test_responses_store_list_input_items_logic(): # Create mock store and response store mock_sql_store = AsyncMock() - responses_store = ResponsesStore(sql_store_config=None, policy=default_policy()) + responses_store = ResponsesStore( + ResponsesStoreConfig(sql_store_config=SqliteSqlStoreConfig(db_path="mock_db_path")), policy=default_policy() + ) responses_store.sql_store = mock_sql_store # Setup test data - multiple input items diff --git a/tests/unit/utils/responses/test_responses_store.py b/tests/unit/utils/responses/test_responses_store.py index 44d4b30da..4e5256c1b 100644 --- a/tests/unit/utils/responses/test_responses_store.py +++ b/tests/unit/utils/responses/test_responses_store.py @@ -67,6 +67,9 @@ async def test_responses_store_pagination_basic(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test 1: First page with limit=2, descending order (default) result = await store.list_responses(limit=2, order=Order.desc) assert len(result.data) == 2 @@ -110,6 +113,9 @@ async def test_responses_store_pagination_ascending(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test ascending order pagination result = await store.list_responses(limit=1, order=Order.asc) assert len(result.data) == 1 @@ -145,6 +151,9 @@ async def test_responses_store_pagination_with_model_filter(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test pagination with model filter result = await store.list_responses(limit=1, model="model-a", order=Order.desc) assert len(result.data) == 1 @@ -192,6 +201,9 @@ async def test_responses_store_pagination_no_limit(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test without limit (should use default of 50) result = await store.list_responses(order=Order.desc) assert len(result.data) == 2 @@ -212,6 +224,9 @@ async def test_responses_store_get_response_object(): input_list = [create_test_response_input("Test input content", "input-test-resp")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Retrieve the response retrieved = await store.get_response_object("test-resp") assert retrieved.id == "test-resp" @@ -242,6 +257,9 @@ async def test_responses_store_input_items_pagination(): ] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Verify all items are stored correctly with explicit IDs all_items = await store.list_response_input_items("test-resp", order=Order.desc) assert len(all_items.data) == 5 @@ -319,6 +337,9 @@ async def test_responses_store_input_items_before_pagination(): ] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test before pagination with descending order # In desc order: [Fifth, Fourth, Third, Second, First] # before="before-3" should return [Fifth, Fourth] From c0b6c9d7179dc83da9efeef70a20260a30f64e30 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Sun, 21 Sep 2025 20:40:25 -0700 Subject: [PATCH 08/33] chore: introduce write queue for response_store # What does this PR do? ## Test Plan --- llama_stack/core/datatypes.py | 6 ++ .../utils/responses/responses_store.py | 101 ++++++++++++++++-- .../meta_reference/test_openai_responses.py | 6 +- .../utils/responses/test_responses_store.py | 21 ++++ 4 files changed, 127 insertions(+), 7 deletions(-) diff --git a/llama_stack/core/datatypes.py b/llama_stack/core/datatypes.py index b5558c66f..6a297f012 100644 --- a/llama_stack/core/datatypes.py +++ b/llama_stack/core/datatypes.py @@ -433,6 +433,12 @@ class InferenceStoreConfig(BaseModel): num_writers: int = Field(default=4, description="Number of concurrent background writers") +class ResponsesStoreConfig(BaseModel): + sql_store_config: SqlStoreConfig + max_write_queue_size: int = Field(default=10000, description="Max queued writes for responses store") + num_writers: int = Field(default=4, description="Number of concurrent background writers") + + class StackRunConfig(BaseModel): version: int = LLAMA_STACK_RUN_CONFIG_VERSION diff --git a/llama_stack/providers/utils/responses/responses_store.py b/llama_stack/providers/utils/responses/responses_store.py index 829cd8a62..8dec807a3 100644 --- a/llama_stack/providers/utils/responses/responses_store.py +++ b/llama_stack/providers/utils/responses/responses_store.py @@ -3,6 +3,9 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import asyncio +from typing import Any + from llama_stack.apis.agents import ( Order, ) @@ -14,24 +17,51 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseObject, OpenAIResponseObjectWithInput, ) -from llama_stack.core.datatypes import AccessRule +from llama_stack.core.datatypes import AccessRule, ResponsesStoreConfig from llama_stack.core.utils.config_dirs import RUNTIME_BASE_DIR +from llama_stack.log import get_logger from ..sqlstore.api import ColumnDefinition, ColumnType from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore -from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, sqlstore_impl +from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, SqlStoreType, sqlstore_impl + +logger = get_logger(name=__name__, category="responses_store") class ResponsesStore: - def __init__(self, sql_store_config: SqlStoreConfig, policy: list[AccessRule]): - if not sql_store_config: - sql_store_config = SqliteSqlStoreConfig( + def __init__( + self, + config: ResponsesStoreConfig | SqlStoreConfig, + policy: list[AccessRule], + ): + # Handle backward compatibility + if not isinstance(config, ResponsesStoreConfig): + # Legacy: SqlStoreConfig passed directly as config + config = ResponsesStoreConfig( + sql_store_config=config, + ) + + self.config = config + self.sql_store_config = config.sql_store_config + if not self.sql_store_config: + self.sql_store_config = SqliteSqlStoreConfig( db_path=(RUNTIME_BASE_DIR / "sqlstore.db").as_posix(), ) - self.sql_store = AuthorizedSqlStore(sqlstore_impl(sql_store_config), policy) + self.sql_store = None + self.policy = policy + + # Disable write queue for SQLite to avoid concurrency issues + self.enable_write_queue = self.sql_store_config.type != SqlStoreType.sqlite + + # Async write queue and worker control + self._queue: asyncio.Queue[tuple[OpenAIResponseObject, list[OpenAIResponseInput]]] | None = None + self._worker_tasks: list[asyncio.Task[Any]] = [] + self._max_write_queue_size: int = config.max_write_queue_size + self._num_writers: int = max(1, config.num_writers) async def initialize(self): """Create the necessary tables if they don't exist.""" + self.sql_store = AuthorizedSqlStore(sqlstore_impl(self.sql_store_config), self.policy) await self.sql_store.create_table( "openai_responses", { @@ -42,9 +72,68 @@ class ResponsesStore: }, ) + if self.enable_write_queue: + self._queue = asyncio.Queue(maxsize=self._max_write_queue_size) + for _ in range(self._num_writers): + self._worker_tasks.append(asyncio.create_task(self._worker_loop())) + else: + logger.info("Write queue disabled for SQLite to avoid concurrency issues") + + async def shutdown(self) -> None: + if not self._worker_tasks: + return + if self._queue is not None: + await self._queue.join() + for t in self._worker_tasks: + if not t.done(): + t.cancel() + for t in self._worker_tasks: + try: + await t + except asyncio.CancelledError: + pass + self._worker_tasks.clear() + + async def flush(self) -> None: + """Wait for all queued writes to complete. Useful for testing.""" + if self.enable_write_queue and self._queue is not None: + await self._queue.join() + async def store_response_object( self, response_object: OpenAIResponseObject, input: list[OpenAIResponseInput] ) -> None: + if self.enable_write_queue: + if self._queue is None: + raise ValueError("Responses store is not initialized") + try: + self._queue.put_nowait((response_object, input)) + except asyncio.QueueFull: + logger.warning(f"Write queue full; adding response id={getattr(response_object, 'id', '')}") + await self._queue.put((response_object, input)) + else: + await self._write_response_object(response_object, input) + + async def _worker_loop(self) -> None: + assert self._queue is not None + while True: + try: + item = await self._queue.get() + except asyncio.CancelledError: + break + response_object, input = item + try: + await self._write_response_object(response_object, input) + except Exception as e: # noqa: BLE001 + logger.error(f"Error writing response object: {e}") + finally: + self._queue.task_done() + + async def _write_response_object( + self, response_object: OpenAIResponseObject, input: list[OpenAIResponseInput] + ) -> None: + if self.sql_store is None: + raise ValueError("Responses store is not initialized") + data = response_object.model_dump() data["input"] = [input_item.model_dump() for input_item in input] diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py index a964bc219..38ce365c1 100644 --- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py +++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py @@ -42,10 +42,12 @@ from llama_stack.apis.inference import ( ) from llama_stack.apis.tools.tools import Tool, ToolGroups, ToolInvocationResult, ToolParameter, ToolRuntime from llama_stack.core.access_control.access_control import default_policy +from llama_stack.core.datatypes import ResponsesStoreConfig from llama_stack.providers.inline.agents.meta_reference.responses.openai_responses import ( OpenAIResponsesImpl, ) from llama_stack.providers.utils.responses.responses_store import ResponsesStore +from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig from tests.unit.providers.agents.meta_reference.fixtures import load_chat_completion_fixture @@ -677,7 +679,9 @@ async def test_responses_store_list_input_items_logic(): # Create mock store and response store mock_sql_store = AsyncMock() - responses_store = ResponsesStore(sql_store_config=None, policy=default_policy()) + responses_store = ResponsesStore( + ResponsesStoreConfig(sql_store_config=SqliteSqlStoreConfig(db_path="mock_db_path")), policy=default_policy() + ) responses_store.sql_store = mock_sql_store # Setup test data - multiple input items diff --git a/tests/unit/utils/responses/test_responses_store.py b/tests/unit/utils/responses/test_responses_store.py index 44d4b30da..4e5256c1b 100644 --- a/tests/unit/utils/responses/test_responses_store.py +++ b/tests/unit/utils/responses/test_responses_store.py @@ -67,6 +67,9 @@ async def test_responses_store_pagination_basic(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test 1: First page with limit=2, descending order (default) result = await store.list_responses(limit=2, order=Order.desc) assert len(result.data) == 2 @@ -110,6 +113,9 @@ async def test_responses_store_pagination_ascending(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test ascending order pagination result = await store.list_responses(limit=1, order=Order.asc) assert len(result.data) == 1 @@ -145,6 +151,9 @@ async def test_responses_store_pagination_with_model_filter(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test pagination with model filter result = await store.list_responses(limit=1, model="model-a", order=Order.desc) assert len(result.data) == 1 @@ -192,6 +201,9 @@ async def test_responses_store_pagination_no_limit(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test without limit (should use default of 50) result = await store.list_responses(order=Order.desc) assert len(result.data) == 2 @@ -212,6 +224,9 @@ async def test_responses_store_get_response_object(): input_list = [create_test_response_input("Test input content", "input-test-resp")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Retrieve the response retrieved = await store.get_response_object("test-resp") assert retrieved.id == "test-resp" @@ -242,6 +257,9 @@ async def test_responses_store_input_items_pagination(): ] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Verify all items are stored correctly with explicit IDs all_items = await store.list_response_input_items("test-resp", order=Order.desc) assert len(all_items.data) == 5 @@ -319,6 +337,9 @@ async def test_responses_store_input_items_before_pagination(): ] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test before pagination with descending order # In desc order: [Fifth, Fourth, Third, Second, First] # before="before-3" should return [Fifth, Fourth] From a772f0a42dc165ee9993fc1eaebfc422e4666b85 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Sun, 21 Sep 2025 20:46:34 -0700 Subject: [PATCH 09/33] chore: introduce write queue for response_store # What does this PR do? ## Test Plan --- llama_stack/core/datatypes.py | 6 + .../utils/responses/responses_store.py | 110 +++++++++++++++++- .../meta_reference/test_openai_responses.py | 6 +- .../utils/responses/test_responses_store.py | 21 ++++ 4 files changed, 136 insertions(+), 7 deletions(-) diff --git a/llama_stack/core/datatypes.py b/llama_stack/core/datatypes.py index b5558c66f..6a297f012 100644 --- a/llama_stack/core/datatypes.py +++ b/llama_stack/core/datatypes.py @@ -433,6 +433,12 @@ class InferenceStoreConfig(BaseModel): num_writers: int = Field(default=4, description="Number of concurrent background writers") +class ResponsesStoreConfig(BaseModel): + sql_store_config: SqlStoreConfig + max_write_queue_size: int = Field(default=10000, description="Max queued writes for responses store") + num_writers: int = Field(default=4, description="Number of concurrent background writers") + + class StackRunConfig(BaseModel): version: int = LLAMA_STACK_RUN_CONFIG_VERSION diff --git a/llama_stack/providers/utils/responses/responses_store.py b/llama_stack/providers/utils/responses/responses_store.py index 829cd8a62..b9fceb1ab 100644 --- a/llama_stack/providers/utils/responses/responses_store.py +++ b/llama_stack/providers/utils/responses/responses_store.py @@ -3,6 +3,9 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import asyncio +from typing import Any + from llama_stack.apis.agents import ( Order, ) @@ -14,24 +17,51 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseObject, OpenAIResponseObjectWithInput, ) -from llama_stack.core.datatypes import AccessRule +from llama_stack.core.datatypes import AccessRule, ResponsesStoreConfig from llama_stack.core.utils.config_dirs import RUNTIME_BASE_DIR +from llama_stack.log import get_logger from ..sqlstore.api import ColumnDefinition, ColumnType from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore -from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, sqlstore_impl +from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, SqlStoreType, sqlstore_impl + +logger = get_logger(name=__name__, category="responses_store") class ResponsesStore: - def __init__(self, sql_store_config: SqlStoreConfig, policy: list[AccessRule]): - if not sql_store_config: - sql_store_config = SqliteSqlStoreConfig( + def __init__( + self, + config: ResponsesStoreConfig | SqlStoreConfig, + policy: list[AccessRule], + ): + # Handle backward compatibility + if not isinstance(config, ResponsesStoreConfig): + # Legacy: SqlStoreConfig passed directly as config + config = ResponsesStoreConfig( + sql_store_config=config, + ) + + self.config = config + self.sql_store_config = config.sql_store_config + if not self.sql_store_config: + self.sql_store_config = SqliteSqlStoreConfig( db_path=(RUNTIME_BASE_DIR / "sqlstore.db").as_posix(), ) - self.sql_store = AuthorizedSqlStore(sqlstore_impl(sql_store_config), policy) + self.sql_store = None + self.policy = policy + + # Disable write queue for SQLite to avoid concurrency issues + self.enable_write_queue = self.sql_store_config.type != SqlStoreType.sqlite + + # Async write queue and worker control + self._queue: asyncio.Queue[tuple[OpenAIResponseObject, list[OpenAIResponseInput]]] | None = None + self._worker_tasks: list[asyncio.Task[Any]] = [] + self._max_write_queue_size: int = config.max_write_queue_size + self._num_writers: int = max(1, config.num_writers) async def initialize(self): """Create the necessary tables if they don't exist.""" + self.sql_store = AuthorizedSqlStore(sqlstore_impl(self.sql_store_config), self.policy) await self.sql_store.create_table( "openai_responses", { @@ -42,9 +72,68 @@ class ResponsesStore: }, ) + if self.enable_write_queue: + self._queue = asyncio.Queue(maxsize=self._max_write_queue_size) + for _ in range(self._num_writers): + self._worker_tasks.append(asyncio.create_task(self._worker_loop())) + else: + logger.info("Write queue disabled for SQLite to avoid concurrency issues") + + async def shutdown(self) -> None: + if not self._worker_tasks: + return + if self._queue is not None: + await self._queue.join() + for t in self._worker_tasks: + if not t.done(): + t.cancel() + for t in self._worker_tasks: + try: + await t + except asyncio.CancelledError: + pass + self._worker_tasks.clear() + + async def flush(self) -> None: + """Wait for all queued writes to complete. Useful for testing.""" + if self.enable_write_queue and self._queue is not None: + await self._queue.join() + async def store_response_object( self, response_object: OpenAIResponseObject, input: list[OpenAIResponseInput] ) -> None: + if self.enable_write_queue: + if self._queue is None: + raise ValueError("Responses store is not initialized") + try: + self._queue.put_nowait((response_object, input)) + except asyncio.QueueFull: + logger.warning(f"Write queue full; adding response id={getattr(response_object, 'id', '')}") + await self._queue.put((response_object, input)) + else: + await self._write_response_object(response_object, input) + + async def _worker_loop(self) -> None: + assert self._queue is not None + while True: + try: + item = await self._queue.get() + except asyncio.CancelledError: + break + response_object, input = item + try: + await self._write_response_object(response_object, input) + except Exception as e: # noqa: BLE001 + logger.error(f"Error writing response object: {e}") + finally: + self._queue.task_done() + + async def _write_response_object( + self, response_object: OpenAIResponseObject, input: list[OpenAIResponseInput] + ) -> None: + if self.sql_store is None: + raise ValueError("Responses store is not initialized") + data = response_object.model_dump() data["input"] = [input_item.model_dump() for input_item in input] @@ -73,6 +162,9 @@ class ResponsesStore: :param model: The model to filter by. :param order: The order to sort the responses by. """ + if not self.sql_store: + raise ValueError("Responses store is not initialized") + if not order: order = Order.desc @@ -100,6 +192,9 @@ class ResponsesStore: """ Get a response object with automatic access control checking. """ + if not self.sql_store: + raise ValueError("Responses store is not initialized") + row = await self.sql_store.fetch_one( "openai_responses", where={"id": response_id}, @@ -113,6 +208,9 @@ class ResponsesStore: return OpenAIResponseObjectWithInput(**row["response_object"]) async def delete_response_object(self, response_id: str) -> OpenAIDeleteResponseObject: + if not self.sql_store: + raise ValueError("Responses store is not initialized") + row = await self.sql_store.fetch_one("openai_responses", where={"id": response_id}) if not row: raise ValueError(f"Response with id {response_id} not found") diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py index a964bc219..38ce365c1 100644 --- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py +++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py @@ -42,10 +42,12 @@ from llama_stack.apis.inference import ( ) from llama_stack.apis.tools.tools import Tool, ToolGroups, ToolInvocationResult, ToolParameter, ToolRuntime from llama_stack.core.access_control.access_control import default_policy +from llama_stack.core.datatypes import ResponsesStoreConfig from llama_stack.providers.inline.agents.meta_reference.responses.openai_responses import ( OpenAIResponsesImpl, ) from llama_stack.providers.utils.responses.responses_store import ResponsesStore +from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig from tests.unit.providers.agents.meta_reference.fixtures import load_chat_completion_fixture @@ -677,7 +679,9 @@ async def test_responses_store_list_input_items_logic(): # Create mock store and response store mock_sql_store = AsyncMock() - responses_store = ResponsesStore(sql_store_config=None, policy=default_policy()) + responses_store = ResponsesStore( + ResponsesStoreConfig(sql_store_config=SqliteSqlStoreConfig(db_path="mock_db_path")), policy=default_policy() + ) responses_store.sql_store = mock_sql_store # Setup test data - multiple input items diff --git a/tests/unit/utils/responses/test_responses_store.py b/tests/unit/utils/responses/test_responses_store.py index 44d4b30da..4e5256c1b 100644 --- a/tests/unit/utils/responses/test_responses_store.py +++ b/tests/unit/utils/responses/test_responses_store.py @@ -67,6 +67,9 @@ async def test_responses_store_pagination_basic(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test 1: First page with limit=2, descending order (default) result = await store.list_responses(limit=2, order=Order.desc) assert len(result.data) == 2 @@ -110,6 +113,9 @@ async def test_responses_store_pagination_ascending(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test ascending order pagination result = await store.list_responses(limit=1, order=Order.asc) assert len(result.data) == 1 @@ -145,6 +151,9 @@ async def test_responses_store_pagination_with_model_filter(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test pagination with model filter result = await store.list_responses(limit=1, model="model-a", order=Order.desc) assert len(result.data) == 1 @@ -192,6 +201,9 @@ async def test_responses_store_pagination_no_limit(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test without limit (should use default of 50) result = await store.list_responses(order=Order.desc) assert len(result.data) == 2 @@ -212,6 +224,9 @@ async def test_responses_store_get_response_object(): input_list = [create_test_response_input("Test input content", "input-test-resp")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Retrieve the response retrieved = await store.get_response_object("test-resp") assert retrieved.id == "test-resp" @@ -242,6 +257,9 @@ async def test_responses_store_input_items_pagination(): ] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Verify all items are stored correctly with explicit IDs all_items = await store.list_response_input_items("test-resp", order=Order.desc) assert len(all_items.data) == 5 @@ -319,6 +337,9 @@ async def test_responses_store_input_items_before_pagination(): ] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test before pagination with descending order # In desc order: [Fifth, Fourth, Third, Second, First] # before="before-3" should return [Fifth, Fourth] From f0211ffb7004e1aec58fd88c85741317d08b46fc Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Mon, 22 Sep 2025 21:25:09 -0700 Subject: [PATCH 10/33] chore: fix build # What does this PR do? ## Test Plan --- llama_stack/core/build_container.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_stack/core/build_container.sh b/llama_stack/core/build_container.sh index 424b40a9d..29964f324 100755 --- a/llama_stack/core/build_container.sh +++ b/llama_stack/core/build_container.sh @@ -164,7 +164,7 @@ RUN apt-get update && apt-get install -y \ procps psmisc lsof \ traceroute \ bubblewrap \ - gcc \ + gcc g++ \ && rm -rf /var/lib/apt/lists/* ENV UV_SYSTEM_PYTHON=1 From 7650d2c96a122479ebe629c613d6f11fc3e8c9f7 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Mon, 22 Sep 2025 21:33:14 -0700 Subject: [PATCH 11/33] chore: fix build # What does this PR do? ## Test Plan --- llama_stack/core/build_container.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llama_stack/core/build_container.sh b/llama_stack/core/build_container.sh index 424b40a9d..8e47fc592 100755 --- a/llama_stack/core/build_container.sh +++ b/llama_stack/core/build_container.sh @@ -147,7 +147,7 @@ WORKDIR /app RUN dnf -y update && dnf install -y iputils git net-tools wget \ vim-minimal python3.12 python3.12-pip python3.12-wheel \ - python3.12-setuptools python3.12-devel gcc make && \ + python3.12-setuptools python3.12-devel gcc gcc-c++ make && \ ln -s /bin/pip3.12 /bin/pip && ln -s /bin/python3.12 /bin/python && dnf clean all ENV UV_SYSTEM_PYTHON=1 @@ -164,7 +164,7 @@ RUN apt-get update && apt-get install -y \ procps psmisc lsof \ traceroute \ bubblewrap \ - gcc \ + gcc g++ \ && rm -rf /var/lib/apt/lists/* ENV UV_SYSTEM_PYTHON=1 From 88ad5d6d7319667eae1c0a04d8e99349cb98c13b Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Fri, 26 Sep 2025 10:35:11 -0700 Subject: [PATCH 12/33] chore: introduce write queue for response_store # What does this PR do? ## Test Plan --- llama_stack/core/datatypes.py | 6 + .../utils/responses/responses_store.py | 110 +++++++++++++++++- .../meta_reference/test_openai_responses.py | 6 +- .../utils/responses/test_responses_store.py | 21 ++++ 4 files changed, 136 insertions(+), 7 deletions(-) diff --git a/llama_stack/core/datatypes.py b/llama_stack/core/datatypes.py index b5558c66f..6a297f012 100644 --- a/llama_stack/core/datatypes.py +++ b/llama_stack/core/datatypes.py @@ -433,6 +433,12 @@ class InferenceStoreConfig(BaseModel): num_writers: int = Field(default=4, description="Number of concurrent background writers") +class ResponsesStoreConfig(BaseModel): + sql_store_config: SqlStoreConfig + max_write_queue_size: int = Field(default=10000, description="Max queued writes for responses store") + num_writers: int = Field(default=4, description="Number of concurrent background writers") + + class StackRunConfig(BaseModel): version: int = LLAMA_STACK_RUN_CONFIG_VERSION diff --git a/llama_stack/providers/utils/responses/responses_store.py b/llama_stack/providers/utils/responses/responses_store.py index 829cd8a62..b9fceb1ab 100644 --- a/llama_stack/providers/utils/responses/responses_store.py +++ b/llama_stack/providers/utils/responses/responses_store.py @@ -3,6 +3,9 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import asyncio +from typing import Any + from llama_stack.apis.agents import ( Order, ) @@ -14,24 +17,51 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseObject, OpenAIResponseObjectWithInput, ) -from llama_stack.core.datatypes import AccessRule +from llama_stack.core.datatypes import AccessRule, ResponsesStoreConfig from llama_stack.core.utils.config_dirs import RUNTIME_BASE_DIR +from llama_stack.log import get_logger from ..sqlstore.api import ColumnDefinition, ColumnType from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore -from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, sqlstore_impl +from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, SqlStoreType, sqlstore_impl + +logger = get_logger(name=__name__, category="responses_store") class ResponsesStore: - def __init__(self, sql_store_config: SqlStoreConfig, policy: list[AccessRule]): - if not sql_store_config: - sql_store_config = SqliteSqlStoreConfig( + def __init__( + self, + config: ResponsesStoreConfig | SqlStoreConfig, + policy: list[AccessRule], + ): + # Handle backward compatibility + if not isinstance(config, ResponsesStoreConfig): + # Legacy: SqlStoreConfig passed directly as config + config = ResponsesStoreConfig( + sql_store_config=config, + ) + + self.config = config + self.sql_store_config = config.sql_store_config + if not self.sql_store_config: + self.sql_store_config = SqliteSqlStoreConfig( db_path=(RUNTIME_BASE_DIR / "sqlstore.db").as_posix(), ) - self.sql_store = AuthorizedSqlStore(sqlstore_impl(sql_store_config), policy) + self.sql_store = None + self.policy = policy + + # Disable write queue for SQLite to avoid concurrency issues + self.enable_write_queue = self.sql_store_config.type != SqlStoreType.sqlite + + # Async write queue and worker control + self._queue: asyncio.Queue[tuple[OpenAIResponseObject, list[OpenAIResponseInput]]] | None = None + self._worker_tasks: list[asyncio.Task[Any]] = [] + self._max_write_queue_size: int = config.max_write_queue_size + self._num_writers: int = max(1, config.num_writers) async def initialize(self): """Create the necessary tables if they don't exist.""" + self.sql_store = AuthorizedSqlStore(sqlstore_impl(self.sql_store_config), self.policy) await self.sql_store.create_table( "openai_responses", { @@ -42,9 +72,68 @@ class ResponsesStore: }, ) + if self.enable_write_queue: + self._queue = asyncio.Queue(maxsize=self._max_write_queue_size) + for _ in range(self._num_writers): + self._worker_tasks.append(asyncio.create_task(self._worker_loop())) + else: + logger.info("Write queue disabled for SQLite to avoid concurrency issues") + + async def shutdown(self) -> None: + if not self._worker_tasks: + return + if self._queue is not None: + await self._queue.join() + for t in self._worker_tasks: + if not t.done(): + t.cancel() + for t in self._worker_tasks: + try: + await t + except asyncio.CancelledError: + pass + self._worker_tasks.clear() + + async def flush(self) -> None: + """Wait for all queued writes to complete. Useful for testing.""" + if self.enable_write_queue and self._queue is not None: + await self._queue.join() + async def store_response_object( self, response_object: OpenAIResponseObject, input: list[OpenAIResponseInput] ) -> None: + if self.enable_write_queue: + if self._queue is None: + raise ValueError("Responses store is not initialized") + try: + self._queue.put_nowait((response_object, input)) + except asyncio.QueueFull: + logger.warning(f"Write queue full; adding response id={getattr(response_object, 'id', '')}") + await self._queue.put((response_object, input)) + else: + await self._write_response_object(response_object, input) + + async def _worker_loop(self) -> None: + assert self._queue is not None + while True: + try: + item = await self._queue.get() + except asyncio.CancelledError: + break + response_object, input = item + try: + await self._write_response_object(response_object, input) + except Exception as e: # noqa: BLE001 + logger.error(f"Error writing response object: {e}") + finally: + self._queue.task_done() + + async def _write_response_object( + self, response_object: OpenAIResponseObject, input: list[OpenAIResponseInput] + ) -> None: + if self.sql_store is None: + raise ValueError("Responses store is not initialized") + data = response_object.model_dump() data["input"] = [input_item.model_dump() for input_item in input] @@ -73,6 +162,9 @@ class ResponsesStore: :param model: The model to filter by. :param order: The order to sort the responses by. """ + if not self.sql_store: + raise ValueError("Responses store is not initialized") + if not order: order = Order.desc @@ -100,6 +192,9 @@ class ResponsesStore: """ Get a response object with automatic access control checking. """ + if not self.sql_store: + raise ValueError("Responses store is not initialized") + row = await self.sql_store.fetch_one( "openai_responses", where={"id": response_id}, @@ -113,6 +208,9 @@ class ResponsesStore: return OpenAIResponseObjectWithInput(**row["response_object"]) async def delete_response_object(self, response_id: str) -> OpenAIDeleteResponseObject: + if not self.sql_store: + raise ValueError("Responses store is not initialized") + row = await self.sql_store.fetch_one("openai_responses", where={"id": response_id}) if not row: raise ValueError(f"Response with id {response_id} not found") diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py index a964bc219..38ce365c1 100644 --- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py +++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py @@ -42,10 +42,12 @@ from llama_stack.apis.inference import ( ) from llama_stack.apis.tools.tools import Tool, ToolGroups, ToolInvocationResult, ToolParameter, ToolRuntime from llama_stack.core.access_control.access_control import default_policy +from llama_stack.core.datatypes import ResponsesStoreConfig from llama_stack.providers.inline.agents.meta_reference.responses.openai_responses import ( OpenAIResponsesImpl, ) from llama_stack.providers.utils.responses.responses_store import ResponsesStore +from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig from tests.unit.providers.agents.meta_reference.fixtures import load_chat_completion_fixture @@ -677,7 +679,9 @@ async def test_responses_store_list_input_items_logic(): # Create mock store and response store mock_sql_store = AsyncMock() - responses_store = ResponsesStore(sql_store_config=None, policy=default_policy()) + responses_store = ResponsesStore( + ResponsesStoreConfig(sql_store_config=SqliteSqlStoreConfig(db_path="mock_db_path")), policy=default_policy() + ) responses_store.sql_store = mock_sql_store # Setup test data - multiple input items diff --git a/tests/unit/utils/responses/test_responses_store.py b/tests/unit/utils/responses/test_responses_store.py index 44d4b30da..4e5256c1b 100644 --- a/tests/unit/utils/responses/test_responses_store.py +++ b/tests/unit/utils/responses/test_responses_store.py @@ -67,6 +67,9 @@ async def test_responses_store_pagination_basic(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test 1: First page with limit=2, descending order (default) result = await store.list_responses(limit=2, order=Order.desc) assert len(result.data) == 2 @@ -110,6 +113,9 @@ async def test_responses_store_pagination_ascending(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test ascending order pagination result = await store.list_responses(limit=1, order=Order.asc) assert len(result.data) == 1 @@ -145,6 +151,9 @@ async def test_responses_store_pagination_with_model_filter(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test pagination with model filter result = await store.list_responses(limit=1, model="model-a", order=Order.desc) assert len(result.data) == 1 @@ -192,6 +201,9 @@ async def test_responses_store_pagination_no_limit(): input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test without limit (should use default of 50) result = await store.list_responses(order=Order.desc) assert len(result.data) == 2 @@ -212,6 +224,9 @@ async def test_responses_store_get_response_object(): input_list = [create_test_response_input("Test input content", "input-test-resp")] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Retrieve the response retrieved = await store.get_response_object("test-resp") assert retrieved.id == "test-resp" @@ -242,6 +257,9 @@ async def test_responses_store_input_items_pagination(): ] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Verify all items are stored correctly with explicit IDs all_items = await store.list_response_input_items("test-resp", order=Order.desc) assert len(all_items.data) == 5 @@ -319,6 +337,9 @@ async def test_responses_store_input_items_before_pagination(): ] await store.store_response_object(response, input_list) + # Wait for all queued writes to complete + await store.flush() + # Test before pagination with descending order # In desc order: [Fifth, Fourth, Third, Second, First] # before="before-3" should return [Fifth, Fourth] From 7004ac27b51c7b59effe74fcfc1ec7a65db9573d Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Fri, 26 Sep 2025 14:44:17 -0700 Subject: [PATCH 13/33] chore: remove extra logging # What does this PR do? ## Test Plan --- .../providers/inline/telemetry/meta_reference/telemetry.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py index 9224c3792..2a4032543 100644 --- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +++ b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py @@ -224,10 +224,6 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry): return _GLOBAL_STORAGE["gauges"][name] def _log_metric(self, event: MetricEvent) -> None: - # Always log to console if console sink is enabled (debug) - if TelemetrySink.CONSOLE in self.config.sinks: - logger.debug(f"METRIC: {event.metric}={event.value} {event.unit} {event.attributes}") - # Add metric as an event to the current span try: with self._lock: From b1cbfe99f96fae717593fe67d878804eea40d175 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Mon, 29 Sep 2025 15:52:44 -0700 Subject: [PATCH 14/33] fix: mcp tool with array type should include items # What does this PR do? ## Test Plan --- .../meta_reference/responses/streaming.py | 1 + .../meta_reference/responses/__init__.py | 5 + .../responses/test_streaming.py | 147 ++++++++++++++++++ 3 files changed, 153 insertions(+) create mode 100644 tests/unit/providers/agents/meta_reference/responses/__init__.py create mode 100644 tests/unit/providers/agents/meta_reference/responses/test_streaming.py diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index 3e69fa5cd..b6ffb1471 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -568,6 +568,7 @@ class StreamingResponseOrchestrator: description=param.description, required=param.required, default=param.default, + items=param.items, ) for param in t.parameters }, diff --git a/tests/unit/providers/agents/meta_reference/responses/__init__.py b/tests/unit/providers/agents/meta_reference/responses/__init__.py new file mode 100644 index 000000000..6f3c1df03 --- /dev/null +++ b/tests/unit/providers/agents/meta_reference/responses/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. \ No newline at end of file diff --git a/tests/unit/providers/agents/meta_reference/responses/test_streaming.py b/tests/unit/providers/agents/meta_reference/responses/test_streaming.py new file mode 100644 index 000000000..5807dd17e --- /dev/null +++ b/tests/unit/providers/agents/meta_reference/responses/test_streaming.py @@ -0,0 +1,147 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +Unit tests for MCP tool parameter conversion in streaming responses. + +This tests the fix for handling array-type parameters with 'items' field +when converting MCP tool definitions to OpenAI format. +""" + +from llama_stack.apis.tools import ToolDef, ToolParameter +from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition +from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool + + +def test_mcp_tool_conversion_with_array_items(): + """ + Test that MCP tool parameters with array type and items field are properly converted. + + This is a regression test for the bug where array parameters without 'items' + caused OpenAI API validation errors like: + "Invalid schema for function 'pods_exec': In context=('properties', 'command'), + array schema missing items." + """ + # Create a tool parameter with array type and items specification + # This mimics what kubernetes-mcp-server's pods_exec tool has + tool_param = ToolParameter( + name="command", + parameter_type="array", + description="Command to execute in the pod", + required=True, + items={"type": "string"}, # This is the crucial field + ) + + # Convert to ToolDefinition format (as done in streaming.py) + tool_def = ToolDefinition( + tool_name="test_tool", + description="Test tool with array parameter", + parameters={ + "command": ToolParamDefinition( + param_type=tool_param.parameter_type, + description=tool_param.description, + required=tool_param.required, + default=tool_param.default, + items=tool_param.items, # The fix: ensure items is passed through + ) + }, + ) + + # Convert to OpenAI format + openai_tool = convert_tooldef_to_openai_tool(tool_def) + + # Verify the conversion includes the items field + assert openai_tool["type"] == "function" + assert openai_tool["function"]["name"] == "test_tool" + assert "parameters" in openai_tool["function"] + + parameters = openai_tool["function"]["parameters"] + assert "properties" in parameters + assert "command" in parameters["properties"] + + command_param = parameters["properties"]["command"] + assert command_param["type"] == "array" + assert "items" in command_param, "Array parameter must have 'items' field for OpenAI API" + assert command_param["items"] == {"type": "string"} + + +def test_mcp_tool_conversion_without_array(): + """Test that non-array parameters work correctly without items field.""" + tool_param = ToolParameter( + name="name", + parameter_type="string", + description="Name parameter", + required=True, + ) + + tool_def = ToolDefinition( + tool_name="test_tool", + description="Test tool with string parameter", + parameters={ + "name": ToolParamDefinition( + param_type=tool_param.parameter_type, + description=tool_param.description, + required=tool_param.required, + items=tool_param.items, # Will be None for non-array types + ) + }, + ) + + openai_tool = convert_tooldef_to_openai_tool(tool_def) + + # Verify basic structure + assert openai_tool["type"] == "function" + parameters = openai_tool["function"]["parameters"] + assert "name" in parameters["properties"] + + name_param = parameters["properties"]["name"] + assert name_param["type"] == "string" + # items should not be present for non-array types + assert "items" not in name_param or name_param.get("items") is None + + +def test_mcp_tool_conversion_complex_array_items(): + """Test array parameter with complex items schema (object type).""" + tool_param = ToolParameter( + name="configs", + parameter_type="array", + description="Array of configuration objects", + required=False, + items={ + "type": "object", + "properties": { + "key": {"type": "string"}, + "value": {"type": "string"}, + }, + "required": ["key"], + }, + ) + + tool_def = ToolDefinition( + tool_name="test_tool", + description="Test tool with complex array parameter", + parameters={ + "configs": ToolParamDefinition( + param_type=tool_param.parameter_type, + description=tool_param.description, + required=tool_param.required, + items=tool_param.items, + ) + }, + ) + + openai_tool = convert_tooldef_to_openai_tool(tool_def) + + # Verify complex items schema is preserved + parameters = openai_tool["function"]["parameters"] + configs_param = parameters["properties"]["configs"] + + assert configs_param["type"] == "array" + assert "items" in configs_param + assert configs_param["items"]["type"] == "object" + assert "properties" in configs_param["items"] + assert "key" in configs_param["items"]["properties"] + assert "value" in configs_param["items"]["properties"] \ No newline at end of file From cd1f6410ceb1f90e80f490fffab46b710b5d74b9 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Mon, 29 Sep 2025 15:53:13 -0700 Subject: [PATCH 15/33] fix: mcp tool with array type should include items # What does this PR do? ## Test Plan --- .../meta_reference/responses/streaming.py | 1 + .../meta_reference/responses/__init__.py | 5 + .../responses/test_streaming.py | 147 ++++++++++++++++++ 3 files changed, 153 insertions(+) create mode 100644 tests/unit/providers/agents/meta_reference/responses/__init__.py create mode 100644 tests/unit/providers/agents/meta_reference/responses/test_streaming.py diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index 3e69fa5cd..b6ffb1471 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -568,6 +568,7 @@ class StreamingResponseOrchestrator: description=param.description, required=param.required, default=param.default, + items=param.items, ) for param in t.parameters }, diff --git a/tests/unit/providers/agents/meta_reference/responses/__init__.py b/tests/unit/providers/agents/meta_reference/responses/__init__.py new file mode 100644 index 000000000..6f3c1df03 --- /dev/null +++ b/tests/unit/providers/agents/meta_reference/responses/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. \ No newline at end of file diff --git a/tests/unit/providers/agents/meta_reference/responses/test_streaming.py b/tests/unit/providers/agents/meta_reference/responses/test_streaming.py new file mode 100644 index 000000000..f4bba613e --- /dev/null +++ b/tests/unit/providers/agents/meta_reference/responses/test_streaming.py @@ -0,0 +1,147 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +Unit tests for MCP tool parameter conversion in streaming responses. + +This tests the fix for handling array-type parameters with 'items' field +when converting MCP tool definitions to OpenAI format. +""" + +from llama_stack.apis.tools import ToolParameter +from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition +from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool + + +def test_mcp_tool_conversion_with_array_items(): + """ + Test that MCP tool parameters with array type and items field are properly converted. + + This is a regression test for the bug where array parameters without 'items' + caused OpenAI API validation errors like: + "Invalid schema for function 'pods_exec': In context=('properties', 'command'), + array schema missing items." + """ + # Create a tool parameter with array type and items specification + # This mimics what kubernetes-mcp-server's pods_exec tool has + tool_param = ToolParameter( + name="command", + parameter_type="array", + description="Command to execute in the pod", + required=True, + items={"type": "string"}, # This is the crucial field + ) + + # Convert to ToolDefinition format (as done in streaming.py) + tool_def = ToolDefinition( + tool_name="test_tool", + description="Test tool with array parameter", + parameters={ + "command": ToolParamDefinition( + param_type=tool_param.parameter_type, + description=tool_param.description, + required=tool_param.required, + default=tool_param.default, + items=tool_param.items, # The fix: ensure items is passed through + ) + }, + ) + + # Convert to OpenAI format + openai_tool = convert_tooldef_to_openai_tool(tool_def) + + # Verify the conversion includes the items field + assert openai_tool["type"] == "function" + assert openai_tool["function"]["name"] == "test_tool" + assert "parameters" in openai_tool["function"] + + parameters = openai_tool["function"]["parameters"] + assert "properties" in parameters + assert "command" in parameters["properties"] + + command_param = parameters["properties"]["command"] + assert command_param["type"] == "array" + assert "items" in command_param, "Array parameter must have 'items' field for OpenAI API" + assert command_param["items"] == {"type": "string"} + + +def test_mcp_tool_conversion_without_array(): + """Test that non-array parameters work correctly without items field.""" + tool_param = ToolParameter( + name="name", + parameter_type="string", + description="Name parameter", + required=True, + ) + + tool_def = ToolDefinition( + tool_name="test_tool", + description="Test tool with string parameter", + parameters={ + "name": ToolParamDefinition( + param_type=tool_param.parameter_type, + description=tool_param.description, + required=tool_param.required, + items=tool_param.items, # Will be None for non-array types + ) + }, + ) + + openai_tool = convert_tooldef_to_openai_tool(tool_def) + + # Verify basic structure + assert openai_tool["type"] == "function" + parameters = openai_tool["function"]["parameters"] + assert "name" in parameters["properties"] + + name_param = parameters["properties"]["name"] + assert name_param["type"] == "string" + # items should not be present for non-array types + assert "items" not in name_param or name_param.get("items") is None + + +def test_mcp_tool_conversion_complex_array_items(): + """Test array parameter with complex items schema (object type).""" + tool_param = ToolParameter( + name="configs", + parameter_type="array", + description="Array of configuration objects", + required=False, + items={ + "type": "object", + "properties": { + "key": {"type": "string"}, + "value": {"type": "string"}, + }, + "required": ["key"], + }, + ) + + tool_def = ToolDefinition( + tool_name="test_tool", + description="Test tool with complex array parameter", + parameters={ + "configs": ToolParamDefinition( + param_type=tool_param.parameter_type, + description=tool_param.description, + required=tool_param.required, + items=tool_param.items, + ) + }, + ) + + openai_tool = convert_tooldef_to_openai_tool(tool_def) + + # Verify complex items schema is preserved + parameters = openai_tool["function"]["parameters"] + configs_param = parameters["properties"]["configs"] + + assert configs_param["type"] == "array" + assert "items" in configs_param + assert configs_param["items"]["type"] == "object" + assert "properties" in configs_param["items"] + assert "key" in configs_param["items"]["properties"] + assert "value" in configs_param["items"]["properties"] From 1b308fd87237ee3bc45431c4f7d28d36551a0d95 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Mon, 29 Sep 2025 15:53:13 -0700 Subject: [PATCH 16/33] fix: mcp tool with array type should include items # What does this PR do? ## Test Plan --- .../meta_reference/responses/streaming.py | 1 + .../meta_reference/responses/__init__.py | 5 + .../responses/test_streaming.py | 147 ++++++++++++++++++ 3 files changed, 153 insertions(+) create mode 100644 tests/unit/providers/agents/meta_reference/responses/__init__.py create mode 100644 tests/unit/providers/agents/meta_reference/responses/test_streaming.py diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index 3e69fa5cd..b6ffb1471 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -568,6 +568,7 @@ class StreamingResponseOrchestrator: description=param.description, required=param.required, default=param.default, + items=param.items, ) for param in t.parameters }, diff --git a/tests/unit/providers/agents/meta_reference/responses/__init__.py b/tests/unit/providers/agents/meta_reference/responses/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/tests/unit/providers/agents/meta_reference/responses/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/tests/unit/providers/agents/meta_reference/responses/test_streaming.py b/tests/unit/providers/agents/meta_reference/responses/test_streaming.py new file mode 100644 index 000000000..f4bba613e --- /dev/null +++ b/tests/unit/providers/agents/meta_reference/responses/test_streaming.py @@ -0,0 +1,147 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +Unit tests for MCP tool parameter conversion in streaming responses. + +This tests the fix for handling array-type parameters with 'items' field +when converting MCP tool definitions to OpenAI format. +""" + +from llama_stack.apis.tools import ToolParameter +from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition +from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool + + +def test_mcp_tool_conversion_with_array_items(): + """ + Test that MCP tool parameters with array type and items field are properly converted. + + This is a regression test for the bug where array parameters without 'items' + caused OpenAI API validation errors like: + "Invalid schema for function 'pods_exec': In context=('properties', 'command'), + array schema missing items." + """ + # Create a tool parameter with array type and items specification + # This mimics what kubernetes-mcp-server's pods_exec tool has + tool_param = ToolParameter( + name="command", + parameter_type="array", + description="Command to execute in the pod", + required=True, + items={"type": "string"}, # This is the crucial field + ) + + # Convert to ToolDefinition format (as done in streaming.py) + tool_def = ToolDefinition( + tool_name="test_tool", + description="Test tool with array parameter", + parameters={ + "command": ToolParamDefinition( + param_type=tool_param.parameter_type, + description=tool_param.description, + required=tool_param.required, + default=tool_param.default, + items=tool_param.items, # The fix: ensure items is passed through + ) + }, + ) + + # Convert to OpenAI format + openai_tool = convert_tooldef_to_openai_tool(tool_def) + + # Verify the conversion includes the items field + assert openai_tool["type"] == "function" + assert openai_tool["function"]["name"] == "test_tool" + assert "parameters" in openai_tool["function"] + + parameters = openai_tool["function"]["parameters"] + assert "properties" in parameters + assert "command" in parameters["properties"] + + command_param = parameters["properties"]["command"] + assert command_param["type"] == "array" + assert "items" in command_param, "Array parameter must have 'items' field for OpenAI API" + assert command_param["items"] == {"type": "string"} + + +def test_mcp_tool_conversion_without_array(): + """Test that non-array parameters work correctly without items field.""" + tool_param = ToolParameter( + name="name", + parameter_type="string", + description="Name parameter", + required=True, + ) + + tool_def = ToolDefinition( + tool_name="test_tool", + description="Test tool with string parameter", + parameters={ + "name": ToolParamDefinition( + param_type=tool_param.parameter_type, + description=tool_param.description, + required=tool_param.required, + items=tool_param.items, # Will be None for non-array types + ) + }, + ) + + openai_tool = convert_tooldef_to_openai_tool(tool_def) + + # Verify basic structure + assert openai_tool["type"] == "function" + parameters = openai_tool["function"]["parameters"] + assert "name" in parameters["properties"] + + name_param = parameters["properties"]["name"] + assert name_param["type"] == "string" + # items should not be present for non-array types + assert "items" not in name_param or name_param.get("items") is None + + +def test_mcp_tool_conversion_complex_array_items(): + """Test array parameter with complex items schema (object type).""" + tool_param = ToolParameter( + name="configs", + parameter_type="array", + description="Array of configuration objects", + required=False, + items={ + "type": "object", + "properties": { + "key": {"type": "string"}, + "value": {"type": "string"}, + }, + "required": ["key"], + }, + ) + + tool_def = ToolDefinition( + tool_name="test_tool", + description="Test tool with complex array parameter", + parameters={ + "configs": ToolParamDefinition( + param_type=tool_param.parameter_type, + description=tool_param.description, + required=tool_param.required, + items=tool_param.items, + ) + }, + ) + + openai_tool = convert_tooldef_to_openai_tool(tool_def) + + # Verify complex items schema is preserved + parameters = openai_tool["function"]["parameters"] + configs_param = parameters["properties"]["configs"] + + assert configs_param["type"] == "array" + assert "items" in configs_param + assert configs_param["items"]["type"] == "object" + assert "properties" in configs_param["items"] + assert "key" in configs_param["items"]["properties"] + assert "value" in configs_param["items"]["properties"] From fad9f6c4c9f2487a867c1434624ac37838dc6d84 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Mon, 29 Sep 2025 22:11:20 -0700 Subject: [PATCH 17/33] fix: mcp tool with array type should include items # What does this PR do? ## Test Plan --- .../meta_reference/responses/streaming.py | 49 ++++++++++++------- .../meta_reference/responses/__init__.py | 5 ++ .../responses/test_streaming.py | 42 ++++++++++++++++ 3 files changed, 79 insertions(+), 17 deletions(-) create mode 100644 tests/unit/providers/inline/agents/meta_reference/responses/__init__.py create mode 100644 tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index 3e69fa5cd..059d240f1 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -50,6 +50,37 @@ from .utils import convert_chat_choice_to_response_message, is_function_tool_cal logger = get_logger(name=__name__, category="agents::meta_reference") +def convert_tooldef_to_chat_tool(tool_def): + """Convert a ToolDef to OpenAI ChatCompletionToolParam format. + + Args: + tool_def: ToolDef from the tools API + + Returns: + ChatCompletionToolParam suitable for OpenAI chat completion + """ + from openai.types.chat import ChatCompletionToolParam + + from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition + from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool + + internal_tool_def = ToolDefinition( + tool_name=tool_def.name, + description=tool_def.description, + parameters={ + param.name: ToolParamDefinition( + param_type=param.parameter_type, + description=param.description, + required=param.required, + default=param.default, + items=param.items, + ) + for param in tool_def.parameters + }, + ) + return convert_tooldef_to_openai_tool(internal_tool_def) + + class StreamingResponseOrchestrator: def __init__( self, @@ -556,23 +587,7 @@ class StreamingResponseOrchestrator: continue if not always_allowed or t.name in always_allowed: # Add to chat tools for inference - from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition - from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool - - tool_def = ToolDefinition( - tool_name=t.name, - description=t.description, - parameters={ - param.name: ToolParamDefinition( - param_type=param.parameter_type, - description=param.description, - required=param.required, - default=param.default, - ) - for param in t.parameters - }, - ) - openai_tool = convert_tooldef_to_openai_tool(tool_def) + openai_tool = convert_tooldef_to_chat_tool(t) if self.ctx.chat_tools is None: self.ctx.chat_tools = [] self.ctx.chat_tools.append(openai_tool) diff --git a/tests/unit/providers/inline/agents/meta_reference/responses/__init__.py b/tests/unit/providers/inline/agents/meta_reference/responses/__init__.py new file mode 100644 index 000000000..6f3c1df03 --- /dev/null +++ b/tests/unit/providers/inline/agents/meta_reference/responses/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. \ No newline at end of file diff --git a/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py b/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py new file mode 100644 index 000000000..6fda2b508 --- /dev/null +++ b/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py @@ -0,0 +1,42 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.apis.tools import ToolDef, ToolParameter +from llama_stack.providers.inline.agents.meta_reference.responses.streaming import ( + convert_tooldef_to_chat_tool, +) + + +def test_convert_tooldef_to_chat_tool_preserves_items_field(): + """Test that array parameters preserve the items field during conversion. + + This test ensures that when converting ToolDef with array-type parameters + to OpenAI ChatCompletionToolParam format, the 'items' field is preserved. + Without this fix, array parameters would be missing schema information about their items. + """ + tool_def = ToolDef( + name="test_tool", + description="A test tool with array parameter", + parameters=[ + ToolParameter( + name="tags", + parameter_type="array", + description="List of tags", + required=True, + items={"type": "string"}, + ) + ], + ) + + result = convert_tooldef_to_chat_tool(tool_def) + + assert result["type"] == "function" + assert result["function"]["name"] == "test_tool" + + tags_param = result["function"]["parameters"]["properties"]["tags"] + assert tags_param["type"] == "array" + assert "items" in tags_param, "items field should be preserved for array parameters" + assert tags_param["items"] == {"type": "string"} From be97c9f9dfc184653cad974b3736da63af27ad24 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Mon, 29 Sep 2025 22:11:20 -0700 Subject: [PATCH 18/33] fix: mcp tool with array type should include items # What does this PR do? ## Test Plan --- .../meta_reference/responses/streaming.py | 48 ++++++++++++------- .../meta_reference/responses/__init__.py | 5 ++ .../responses/test_streaming.py | 42 ++++++++++++++++ 3 files changed, 78 insertions(+), 17 deletions(-) create mode 100644 tests/unit/providers/inline/agents/meta_reference/responses/__init__.py create mode 100644 tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index 3e69fa5cd..2f45ad2a3 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -50,6 +50,36 @@ from .utils import convert_chat_choice_to_response_message, is_function_tool_cal logger = get_logger(name=__name__, category="agents::meta_reference") +def convert_tooldef_to_chat_tool(tool_def): + """Convert a ToolDef to OpenAI ChatCompletionToolParam format. + + Args: + tool_def: ToolDef from the tools API + + Returns: + ChatCompletionToolParam suitable for OpenAI chat completion + """ + + from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition + from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool + + internal_tool_def = ToolDefinition( + tool_name=tool_def.name, + description=tool_def.description, + parameters={ + param.name: ToolParamDefinition( + param_type=param.parameter_type, + description=param.description, + required=param.required, + default=param.default, + items=param.items, + ) + for param in tool_def.parameters + }, + ) + return convert_tooldef_to_openai_tool(internal_tool_def) + + class StreamingResponseOrchestrator: def __init__( self, @@ -556,23 +586,7 @@ class StreamingResponseOrchestrator: continue if not always_allowed or t.name in always_allowed: # Add to chat tools for inference - from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition - from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool - - tool_def = ToolDefinition( - tool_name=t.name, - description=t.description, - parameters={ - param.name: ToolParamDefinition( - param_type=param.parameter_type, - description=param.description, - required=param.required, - default=param.default, - ) - for param in t.parameters - }, - ) - openai_tool = convert_tooldef_to_openai_tool(tool_def) + openai_tool = convert_tooldef_to_chat_tool(t) if self.ctx.chat_tools is None: self.ctx.chat_tools = [] self.ctx.chat_tools.append(openai_tool) diff --git a/tests/unit/providers/inline/agents/meta_reference/responses/__init__.py b/tests/unit/providers/inline/agents/meta_reference/responses/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/tests/unit/providers/inline/agents/meta_reference/responses/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py b/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py new file mode 100644 index 000000000..6fda2b508 --- /dev/null +++ b/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py @@ -0,0 +1,42 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.apis.tools import ToolDef, ToolParameter +from llama_stack.providers.inline.agents.meta_reference.responses.streaming import ( + convert_tooldef_to_chat_tool, +) + + +def test_convert_tooldef_to_chat_tool_preserves_items_field(): + """Test that array parameters preserve the items field during conversion. + + This test ensures that when converting ToolDef with array-type parameters + to OpenAI ChatCompletionToolParam format, the 'items' field is preserved. + Without this fix, array parameters would be missing schema information about their items. + """ + tool_def = ToolDef( + name="test_tool", + description="A test tool with array parameter", + parameters=[ + ToolParameter( + name="tags", + parameter_type="array", + description="List of tags", + required=True, + items={"type": "string"}, + ) + ], + ) + + result = convert_tooldef_to_chat_tool(tool_def) + + assert result["type"] == "function" + assert result["function"]["name"] == "test_tool" + + tags_param = result["function"]["parameters"]["properties"]["tags"] + assert tags_param["type"] == "array" + assert "items" in tags_param, "items field should be preserved for array parameters" + assert tags_param["items"] == {"type": "string"} From d87459790814f5cf87a4e2f33cb25e4dc73317d7 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Mon, 29 Sep 2025 22:39:06 -0700 Subject: [PATCH 19/33] fix: mcp tool with array type should include items # What does this PR do? ## Test Plan --- .../meta_reference/responses/streaming.py | 48 ++++++++++++------- tests/unit/providers/inline/__init__.py | 0 .../unit/providers/inline/agents/__init__.py | 0 .../inline/agents/meta_reference/__init__.py | 0 .../meta_reference/responses/__init__.py | 5 ++ .../responses/test_streaming.py | 42 ++++++++++++++++ 6 files changed, 78 insertions(+), 17 deletions(-) create mode 100644 tests/unit/providers/inline/__init__.py create mode 100644 tests/unit/providers/inline/agents/__init__.py create mode 100644 tests/unit/providers/inline/agents/meta_reference/__init__.py create mode 100644 tests/unit/providers/inline/agents/meta_reference/responses/__init__.py create mode 100644 tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index 3e69fa5cd..2f45ad2a3 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -50,6 +50,36 @@ from .utils import convert_chat_choice_to_response_message, is_function_tool_cal logger = get_logger(name=__name__, category="agents::meta_reference") +def convert_tooldef_to_chat_tool(tool_def): + """Convert a ToolDef to OpenAI ChatCompletionToolParam format. + + Args: + tool_def: ToolDef from the tools API + + Returns: + ChatCompletionToolParam suitable for OpenAI chat completion + """ + + from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition + from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool + + internal_tool_def = ToolDefinition( + tool_name=tool_def.name, + description=tool_def.description, + parameters={ + param.name: ToolParamDefinition( + param_type=param.parameter_type, + description=param.description, + required=param.required, + default=param.default, + items=param.items, + ) + for param in tool_def.parameters + }, + ) + return convert_tooldef_to_openai_tool(internal_tool_def) + + class StreamingResponseOrchestrator: def __init__( self, @@ -556,23 +586,7 @@ class StreamingResponseOrchestrator: continue if not always_allowed or t.name in always_allowed: # Add to chat tools for inference - from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition - from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool - - tool_def = ToolDefinition( - tool_name=t.name, - description=t.description, - parameters={ - param.name: ToolParamDefinition( - param_type=param.parameter_type, - description=param.description, - required=param.required, - default=param.default, - ) - for param in t.parameters - }, - ) - openai_tool = convert_tooldef_to_openai_tool(tool_def) + openai_tool = convert_tooldef_to_chat_tool(t) if self.ctx.chat_tools is None: self.ctx.chat_tools = [] self.ctx.chat_tools.append(openai_tool) diff --git a/tests/unit/providers/inline/__init__.py b/tests/unit/providers/inline/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/providers/inline/agents/__init__.py b/tests/unit/providers/inline/agents/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/providers/inline/agents/meta_reference/__init__.py b/tests/unit/providers/inline/agents/meta_reference/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/providers/inline/agents/meta_reference/responses/__init__.py b/tests/unit/providers/inline/agents/meta_reference/responses/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/tests/unit/providers/inline/agents/meta_reference/responses/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py b/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py new file mode 100644 index 000000000..6fda2b508 --- /dev/null +++ b/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py @@ -0,0 +1,42 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.apis.tools import ToolDef, ToolParameter +from llama_stack.providers.inline.agents.meta_reference.responses.streaming import ( + convert_tooldef_to_chat_tool, +) + + +def test_convert_tooldef_to_chat_tool_preserves_items_field(): + """Test that array parameters preserve the items field during conversion. + + This test ensures that when converting ToolDef with array-type parameters + to OpenAI ChatCompletionToolParam format, the 'items' field is preserved. + Without this fix, array parameters would be missing schema information about their items. + """ + tool_def = ToolDef( + name="test_tool", + description="A test tool with array parameter", + parameters=[ + ToolParameter( + name="tags", + parameter_type="array", + description="List of tags", + required=True, + items={"type": "string"}, + ) + ], + ) + + result = convert_tooldef_to_chat_tool(tool_def) + + assert result["type"] == "function" + assert result["function"]["name"] == "test_tool" + + tags_param = result["function"]["parameters"]["properties"]["tags"] + assert tags_param["type"] == "array" + assert "items" in tags_param, "items field should be preserved for array parameters" + assert tags_param["items"] == {"type": "string"} From b2694a362055ea79afc9db35bb42208fd34b6e52 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Mon, 29 Sep 2025 22:43:20 -0700 Subject: [PATCH 20/33] fix: mcp tool with array type should include items # What does this PR do? ## Test Plan --- .../meta_reference/responses/streaming.py | 48 ++++++++++++------- tests/unit/providers/inline/__init__.py | 6 +++ .../unit/providers/inline/agents/__init__.py | 6 +++ .../inline/agents/meta_reference/__init__.py | 6 +++ .../meta_reference/responses/__init__.py | 5 ++ .../responses/test_streaming.py | 42 ++++++++++++++++ 6 files changed, 96 insertions(+), 17 deletions(-) create mode 100644 tests/unit/providers/inline/__init__.py create mode 100644 tests/unit/providers/inline/agents/__init__.py create mode 100644 tests/unit/providers/inline/agents/meta_reference/__init__.py create mode 100644 tests/unit/providers/inline/agents/meta_reference/responses/__init__.py create mode 100644 tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index 3e69fa5cd..2f45ad2a3 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -50,6 +50,36 @@ from .utils import convert_chat_choice_to_response_message, is_function_tool_cal logger = get_logger(name=__name__, category="agents::meta_reference") +def convert_tooldef_to_chat_tool(tool_def): + """Convert a ToolDef to OpenAI ChatCompletionToolParam format. + + Args: + tool_def: ToolDef from the tools API + + Returns: + ChatCompletionToolParam suitable for OpenAI chat completion + """ + + from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition + from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool + + internal_tool_def = ToolDefinition( + tool_name=tool_def.name, + description=tool_def.description, + parameters={ + param.name: ToolParamDefinition( + param_type=param.parameter_type, + description=param.description, + required=param.required, + default=param.default, + items=param.items, + ) + for param in tool_def.parameters + }, + ) + return convert_tooldef_to_openai_tool(internal_tool_def) + + class StreamingResponseOrchestrator: def __init__( self, @@ -556,23 +586,7 @@ class StreamingResponseOrchestrator: continue if not always_allowed or t.name in always_allowed: # Add to chat tools for inference - from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition - from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool - - tool_def = ToolDefinition( - tool_name=t.name, - description=t.description, - parameters={ - param.name: ToolParamDefinition( - param_type=param.parameter_type, - description=param.description, - required=param.required, - default=param.default, - ) - for param in t.parameters - }, - ) - openai_tool = convert_tooldef_to_openai_tool(tool_def) + openai_tool = convert_tooldef_to_chat_tool(t) if self.ctx.chat_tools is None: self.ctx.chat_tools = [] self.ctx.chat_tools.append(openai_tool) diff --git a/tests/unit/providers/inline/__init__.py b/tests/unit/providers/inline/__init__.py new file mode 100644 index 000000000..d4a3e15c8 --- /dev/null +++ b/tests/unit/providers/inline/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + diff --git a/tests/unit/providers/inline/agents/__init__.py b/tests/unit/providers/inline/agents/__init__.py new file mode 100644 index 000000000..d4a3e15c8 --- /dev/null +++ b/tests/unit/providers/inline/agents/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + diff --git a/tests/unit/providers/inline/agents/meta_reference/__init__.py b/tests/unit/providers/inline/agents/meta_reference/__init__.py new file mode 100644 index 000000000..d4a3e15c8 --- /dev/null +++ b/tests/unit/providers/inline/agents/meta_reference/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + diff --git a/tests/unit/providers/inline/agents/meta_reference/responses/__init__.py b/tests/unit/providers/inline/agents/meta_reference/responses/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/tests/unit/providers/inline/agents/meta_reference/responses/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py b/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py new file mode 100644 index 000000000..6fda2b508 --- /dev/null +++ b/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py @@ -0,0 +1,42 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.apis.tools import ToolDef, ToolParameter +from llama_stack.providers.inline.agents.meta_reference.responses.streaming import ( + convert_tooldef_to_chat_tool, +) + + +def test_convert_tooldef_to_chat_tool_preserves_items_field(): + """Test that array parameters preserve the items field during conversion. + + This test ensures that when converting ToolDef with array-type parameters + to OpenAI ChatCompletionToolParam format, the 'items' field is preserved. + Without this fix, array parameters would be missing schema information about their items. + """ + tool_def = ToolDef( + name="test_tool", + description="A test tool with array parameter", + parameters=[ + ToolParameter( + name="tags", + parameter_type="array", + description="List of tags", + required=True, + items={"type": "string"}, + ) + ], + ) + + result = convert_tooldef_to_chat_tool(tool_def) + + assert result["type"] == "function" + assert result["function"]["name"] == "test_tool" + + tags_param = result["function"]["parameters"]["properties"]["tags"] + assert tags_param["type"] == "array" + assert "items" in tags_param, "items field should be preserved for array parameters" + assert tags_param["items"] == {"type": "string"} From 19ca0d0d9c1d850e95a15f54ef9713f7c812fcc4 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Mon, 29 Sep 2025 22:58:30 -0700 Subject: [PATCH 21/33] fix: mcp tool with array type should include items # What does this PR do? ## Test Plan --- .../meta_reference/responses/streaming.py | 48 ++++++++++++------- tests/unit/providers/inline/__init__.py | 5 ++ .../unit/providers/inline/agents/__init__.py | 5 ++ .../inline/agents/meta_reference/__init__.py | 5 ++ .../meta_reference/responses/__init__.py | 5 ++ .../responses/test_streaming.py | 42 ++++++++++++++++ 6 files changed, 93 insertions(+), 17 deletions(-) create mode 100644 tests/unit/providers/inline/__init__.py create mode 100644 tests/unit/providers/inline/agents/__init__.py create mode 100644 tests/unit/providers/inline/agents/meta_reference/__init__.py create mode 100644 tests/unit/providers/inline/agents/meta_reference/responses/__init__.py create mode 100644 tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index 3e69fa5cd..2f45ad2a3 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -50,6 +50,36 @@ from .utils import convert_chat_choice_to_response_message, is_function_tool_cal logger = get_logger(name=__name__, category="agents::meta_reference") +def convert_tooldef_to_chat_tool(tool_def): + """Convert a ToolDef to OpenAI ChatCompletionToolParam format. + + Args: + tool_def: ToolDef from the tools API + + Returns: + ChatCompletionToolParam suitable for OpenAI chat completion + """ + + from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition + from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool + + internal_tool_def = ToolDefinition( + tool_name=tool_def.name, + description=tool_def.description, + parameters={ + param.name: ToolParamDefinition( + param_type=param.parameter_type, + description=param.description, + required=param.required, + default=param.default, + items=param.items, + ) + for param in tool_def.parameters + }, + ) + return convert_tooldef_to_openai_tool(internal_tool_def) + + class StreamingResponseOrchestrator: def __init__( self, @@ -556,23 +586,7 @@ class StreamingResponseOrchestrator: continue if not always_allowed or t.name in always_allowed: # Add to chat tools for inference - from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition - from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool - - tool_def = ToolDefinition( - tool_name=t.name, - description=t.description, - parameters={ - param.name: ToolParamDefinition( - param_type=param.parameter_type, - description=param.description, - required=param.required, - default=param.default, - ) - for param in t.parameters - }, - ) - openai_tool = convert_tooldef_to_openai_tool(tool_def) + openai_tool = convert_tooldef_to_chat_tool(t) if self.ctx.chat_tools is None: self.ctx.chat_tools = [] self.ctx.chat_tools.append(openai_tool) diff --git a/tests/unit/providers/inline/__init__.py b/tests/unit/providers/inline/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/tests/unit/providers/inline/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/tests/unit/providers/inline/agents/__init__.py b/tests/unit/providers/inline/agents/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/tests/unit/providers/inline/agents/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/tests/unit/providers/inline/agents/meta_reference/__init__.py b/tests/unit/providers/inline/agents/meta_reference/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/tests/unit/providers/inline/agents/meta_reference/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/tests/unit/providers/inline/agents/meta_reference/responses/__init__.py b/tests/unit/providers/inline/agents/meta_reference/responses/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/tests/unit/providers/inline/agents/meta_reference/responses/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py b/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py new file mode 100644 index 000000000..6fda2b508 --- /dev/null +++ b/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py @@ -0,0 +1,42 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.apis.tools import ToolDef, ToolParameter +from llama_stack.providers.inline.agents.meta_reference.responses.streaming import ( + convert_tooldef_to_chat_tool, +) + + +def test_convert_tooldef_to_chat_tool_preserves_items_field(): + """Test that array parameters preserve the items field during conversion. + + This test ensures that when converting ToolDef with array-type parameters + to OpenAI ChatCompletionToolParam format, the 'items' field is preserved. + Without this fix, array parameters would be missing schema information about their items. + """ + tool_def = ToolDef( + name="test_tool", + description="A test tool with array parameter", + parameters=[ + ToolParameter( + name="tags", + parameter_type="array", + description="List of tags", + required=True, + items={"type": "string"}, + ) + ], + ) + + result = convert_tooldef_to_chat_tool(tool_def) + + assert result["type"] == "function" + assert result["function"]["name"] == "test_tool" + + tags_param = result["function"]["parameters"]["properties"]["tags"] + assert tags_param["type"] == "array" + assert "items" in tags_param, "items field should be preserved for array parameters" + assert tags_param["items"] == {"type": "string"} From 0cc072dcafb3426894eb77231b4d90b3edd06196 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Tue, 30 Sep 2025 11:24:27 -0700 Subject: [PATCH 22/33] fix: don't pass default response format in Responses # What does this PR do? ## Test Plan --- .../inline/agents/meta_reference/responses/streaming.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index 2f45ad2a3..179f7f023 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -127,13 +127,16 @@ class StreamingResponseOrchestrator: messages = self.ctx.messages.copy() while True: + # Text is the default response format for chat completion so don't need to pass it + # (some providers don't support non-empty response_format when tools are present) + response_format = None if self.ctx.response_format.type == "text" else self.ctx.response_format completion_result = await self.inference_api.openai_chat_completion( model=self.ctx.model, messages=messages, tools=self.ctx.chat_tools, stream=True, temperature=self.ctx.temperature, - response_format=self.ctx.response_format, + response_format=response_format, ) # Process streaming chunks and build complete response From a03f0cabfd1c17c4b30af84fcd337dda668414ee Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Tue, 30 Sep 2025 11:28:31 -0700 Subject: [PATCH 23/33] fix: don't pass default response format in Responses # What does this PR do? ## Test Plan --- .../inline/agents/meta_reference/responses/streaming.py | 5 ++++- .../agents/meta_reference/test_openai_responses.py | 7 +++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index 2f45ad2a3..179f7f023 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -127,13 +127,16 @@ class StreamingResponseOrchestrator: messages = self.ctx.messages.copy() while True: + # Text is the default response format for chat completion so don't need to pass it + # (some providers don't support non-empty response_format when tools are present) + response_format = None if self.ctx.response_format.type == "text" else self.ctx.response_format completion_result = await self.inference_api.openai_chat_completion( model=self.ctx.model, messages=messages, tools=self.ctx.chat_tools, stream=True, temperature=self.ctx.temperature, - response_format=self.ctx.response_format, + response_format=response_format, ) # Process streaming chunks and build complete response diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py index 38ce365c1..eb77c2dbe 100644 --- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py +++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py @@ -148,7 +148,7 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m mock_inference_api.openai_chat_completion.assert_called_once_with( model=model, messages=[OpenAIUserMessageParam(role="user", content="What is the capital of Ireland?", name=None)], - response_format=OpenAIResponseFormatText(), + response_format=None, tools=None, stream=True, temperature=0.1, @@ -831,8 +831,8 @@ async def test_store_response_uses_rehydrated_input_with_previous_response( (OpenAIResponseText(format=OpenAIResponseTextFormat(type="json_object")), OpenAIResponseFormatJSONObject()), # ensure text param with no format specified defaults to text (OpenAIResponseText(format=None), OpenAIResponseFormatText()), - # ensure text param of None defaults to text - (None, OpenAIResponseFormatText()), + # ensure text param of None defaults to None + (None, None), ], ) async def test_create_openai_response_with_text_format( @@ -855,7 +855,6 @@ async def test_create_openai_response_with_text_format( # Verify first_call = mock_inference_api.openai_chat_completion.call_args_list[0] assert first_call.kwargs["messages"][0].content == input_text - assert first_call.kwargs["response_format"] is not None assert first_call.kwargs["response_format"] == response_format From 28cc185cbbb49472f3c4b50c579e30559e5f475e Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Tue, 30 Sep 2025 11:28:31 -0700 Subject: [PATCH 24/33] fix: don't pass default response format in Responses # What does this PR do? ## Test Plan --- .../agents/meta_reference/responses/streaming.py | 5 ++++- .../agents/meta_reference/test_openai_responses.py | 13 ++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index 2f45ad2a3..179f7f023 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -127,13 +127,16 @@ class StreamingResponseOrchestrator: messages = self.ctx.messages.copy() while True: + # Text is the default response format for chat completion so don't need to pass it + # (some providers don't support non-empty response_format when tools are present) + response_format = None if self.ctx.response_format.type == "text" else self.ctx.response_format completion_result = await self.inference_api.openai_chat_completion( model=self.ctx.model, messages=messages, tools=self.ctx.chat_tools, stream=True, temperature=self.ctx.temperature, - response_format=self.ctx.response_format, + response_format=response_format, ) # Process streaming chunks and build complete response diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py index 38ce365c1..ed60c5bdc 100644 --- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py +++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py @@ -148,7 +148,7 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m mock_inference_api.openai_chat_completion.assert_called_once_with( model=model, messages=[OpenAIUserMessageParam(role="user", content="What is the capital of Ireland?", name=None)], - response_format=OpenAIResponseFormatText(), + response_format=None, tools=None, stream=True, temperature=0.1, @@ -823,16 +823,16 @@ async def test_store_response_uses_rehydrated_input_with_previous_response( @pytest.mark.parametrize( "text_format, response_format", [ - (OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")), OpenAIResponseFormatText()), + (OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")), None), ( OpenAIResponseText(format=OpenAIResponseTextFormat(name="Test", schema={"foo": "bar"}, type="json_schema")), OpenAIResponseFormatJSONSchema(json_schema=OpenAIJSONSchema(name="Test", schema={"foo": "bar"})), ), (OpenAIResponseText(format=OpenAIResponseTextFormat(type="json_object")), OpenAIResponseFormatJSONObject()), - # ensure text param with no format specified defaults to text - (OpenAIResponseText(format=None), OpenAIResponseFormatText()), - # ensure text param of None defaults to text - (None, OpenAIResponseFormatText()), + # ensure text param with no format specified defaults to None + (OpenAIResponseText(format=None), None), + # ensure text param of None defaults to None + (None, None), ], ) async def test_create_openai_response_with_text_format( @@ -855,7 +855,6 @@ async def test_create_openai_response_with_text_format( # Verify first_call = mock_inference_api.openai_chat_completion.call_args_list[0] assert first_call.kwargs["messages"][0].content == input_text - assert first_call.kwargs["response_format"] is not None assert first_call.kwargs["response_format"] == response_format From f387e4023f4e2be312fbde60f0c3855a58ad8640 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Tue, 30 Sep 2025 11:33:57 -0700 Subject: [PATCH 25/33] fix: don't pass default response format in Responses # What does this PR do? ## Test Plan --- .../agents/meta_reference/responses/streaming.py | 5 ++++- .../agents/meta_reference/test_openai_responses.py | 14 ++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index 2f45ad2a3..179f7f023 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -127,13 +127,16 @@ class StreamingResponseOrchestrator: messages = self.ctx.messages.copy() while True: + # Text is the default response format for chat completion so don't need to pass it + # (some providers don't support non-empty response_format when tools are present) + response_format = None if self.ctx.response_format.type == "text" else self.ctx.response_format completion_result = await self.inference_api.openai_chat_completion( model=self.ctx.model, messages=messages, tools=self.ctx.chat_tools, stream=True, temperature=self.ctx.temperature, - response_format=self.ctx.response_format, + response_format=response_format, ) # Process streaming chunks and build complete response diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py index 38ce365c1..5e5914a03 100644 --- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py +++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py @@ -37,7 +37,6 @@ from llama_stack.apis.inference import ( OpenAIJSONSchema, OpenAIResponseFormatJSONObject, OpenAIResponseFormatJSONSchema, - OpenAIResponseFormatText, OpenAIUserMessageParam, ) from llama_stack.apis.tools.tools import Tool, ToolGroups, ToolInvocationResult, ToolParameter, ToolRuntime @@ -148,7 +147,7 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m mock_inference_api.openai_chat_completion.assert_called_once_with( model=model, messages=[OpenAIUserMessageParam(role="user", content="What is the capital of Ireland?", name=None)], - response_format=OpenAIResponseFormatText(), + response_format=None, tools=None, stream=True, temperature=0.1, @@ -823,16 +822,16 @@ async def test_store_response_uses_rehydrated_input_with_previous_response( @pytest.mark.parametrize( "text_format, response_format", [ - (OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")), OpenAIResponseFormatText()), + (OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")), None), ( OpenAIResponseText(format=OpenAIResponseTextFormat(name="Test", schema={"foo": "bar"}, type="json_schema")), OpenAIResponseFormatJSONSchema(json_schema=OpenAIJSONSchema(name="Test", schema={"foo": "bar"})), ), (OpenAIResponseText(format=OpenAIResponseTextFormat(type="json_object")), OpenAIResponseFormatJSONObject()), - # ensure text param with no format specified defaults to text - (OpenAIResponseText(format=None), OpenAIResponseFormatText()), - # ensure text param of None defaults to text - (None, OpenAIResponseFormatText()), + # ensure text param with no format specified defaults to None + (OpenAIResponseText(format=None), None), + # ensure text param of None defaults to None + (None, None), ], ) async def test_create_openai_response_with_text_format( @@ -855,7 +854,6 @@ async def test_create_openai_response_with_text_format( # Verify first_call = mock_inference_api.openai_chat_completion.call_args_list[0] assert first_call.kwargs["messages"][0].content == input_text - assert first_call.kwargs["response_format"] is not None assert first_call.kwargs["response_format"] == response_format From f034004ae63480df0b672aabef323398c6f69841 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Tue, 30 Sep 2025 12:04:50 -0700 Subject: [PATCH 26/33] fix: don't pass default response format in Responses # What does this PR do? ## Test Plan --- .../meta_reference/responses/streaming.py | 5 +- .../recordings/responses/cf776b1aa432.json | 232 +++++ .../recordings/responses/d0ac68cbde69.json | 16 +- .../models-7d9446738fd7-d5d684a3.json | 144 ++-- .../models-bd032f995f2a-7467c0cf.json | 69 ++ .../models-bd032f995f2a-ebaa996d.json | 798 ++++++++++++++++++ .../meta_reference/test_openai_responses.py | 14 +- 7 files changed, 1189 insertions(+), 89 deletions(-) create mode 100644 tests/integration/recordings/responses/cf776b1aa432.json create mode 100644 tests/integration/recordings/responses/models-bd032f995f2a-7467c0cf.json create mode 100644 tests/integration/recordings/responses/models-bd032f995f2a-ebaa996d.json diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index 2f45ad2a3..179f7f023 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -127,13 +127,16 @@ class StreamingResponseOrchestrator: messages = self.ctx.messages.copy() while True: + # Text is the default response format for chat completion so don't need to pass it + # (some providers don't support non-empty response_format when tools are present) + response_format = None if self.ctx.response_format.type == "text" else self.ctx.response_format completion_result = await self.inference_api.openai_chat_completion( model=self.ctx.model, messages=messages, tools=self.ctx.chat_tools, stream=True, temperature=self.ctx.temperature, - response_format=self.ctx.response_format, + response_format=response_format, ) # Process streaming chunks and build complete response diff --git a/tests/integration/recordings/responses/cf776b1aa432.json b/tests/integration/recordings/responses/cf776b1aa432.json new file mode 100644 index 000000000..c7449427a --- /dev/null +++ b/tests/integration/recordings/responses/cf776b1aa432.json @@ -0,0 +1,232 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "user", + "content": "What is the capital of France?" + } + ], + "stream": true + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-78", + "choices": [ + { + "delta": { + "content": "The", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759259077, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-78", + "choices": [ + { + "delta": { + "content": " capital", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759259077, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-78", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759259077, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-78", + "choices": [ + { + "delta": { + "content": " France", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759259077, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-78", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759259077, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-78", + "choices": [ + { + "delta": { + "content": " Paris", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759259077, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-78", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759259077, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-78", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 1759259077, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + } +} diff --git a/tests/integration/recordings/responses/d0ac68cbde69.json b/tests/integration/recordings/responses/d0ac68cbde69.json index 78784e0ca..a8c46b76b 100644 --- a/tests/integration/recordings/responses/d0ac68cbde69.json +++ b/tests/integration/recordings/responses/d0ac68cbde69.json @@ -13,12 +13,12 @@ "__data__": { "models": [ { - "model": "llama3.2:3b", - "name": "llama3.2:3b", - "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72", - "expires_at": "2025-09-27T11:54:56.718552-07:00", - "size": 3367856128, - "size_vram": 3367856128, + "model": "llama3.2:3b-instruct-fp16", + "name": "llama3.2:3b-instruct-fp16", + "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d", + "expires_at": "2025-09-30T12:07:39.189179-07:00", + "size": 8581748736, + "size_vram": 8581748736, "details": { "parent_model": "", "format": "gguf", @@ -27,9 +27,9 @@ "llama" ], "parameter_size": "3.2B", - "quantization_level": "Q4_K_M" + "quantization_level": "F16" }, - "context_length": 4096 + "context_length": null } ] } diff --git a/tests/integration/recordings/responses/models-7d9446738fd7-d5d684a3.json b/tests/integration/recordings/responses/models-7d9446738fd7-d5d684a3.json index a76f0ba8f..d9917b2ec 100644 --- a/tests/integration/recordings/responses/models-7d9446738fd7-d5d684a3.json +++ b/tests/integration/recordings/responses/models-7d9446738fd7-d5d684a3.json @@ -22,19 +22,6 @@ "supports_tools": false } }, - { - "__type__": "openai.types.model.Model", - "__data__": { - "id": "accounts/tvergho-87e44d/models/debatecards-70b-ft-3epoch-dpo-v2", - "created": 1743381121, - "object": "model", - "owned_by": "tvergho-87e44d", - "kind": "HF_PEFT_ADDON", - "supports_chat": true, - "supports_image_input": false, - "supports_tools": false - } - }, { "__type__": "openai.types.model.Model", "__data__": { @@ -75,20 +62,6 @@ "context_length": 131072 } }, - { - "__type__": "openai.types.model.Model", - "__data__": { - "id": "accounts/fireworks/models/deepseek-v3", - "created": 1735576668, - "object": "model", - "owned_by": "fireworks", - "kind": "HF_BASE_MODEL", - "supports_chat": true, - "supports_image_input": false, - "supports_tools": true, - "context_length": 131072 - } - }, { "__type__": "openai.types.model.Model", "__data__": { @@ -259,17 +232,45 @@ { "__type__": "openai.types.model.Model", "__data__": { - "id": "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", - "created": 1754063588, + "id": "accounts/fireworks/models/kimi-k2-instruct-0905", + "created": 1757018994, "object": "model", "owned_by": "fireworks", "kind": "HF_BASE_MODEL", "supports_chat": true, "supports_image_input": false, - "supports_tools": false, + "supports_tools": true, "context_length": 262144 } }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "accounts/fireworks/models/glm-4p5", + "created": 1753809636, + "object": "model", + "owned_by": "fireworks", + "kind": "HF_BASE_MODEL", + "supports_chat": true, + "supports_image_input": false, + "supports_tools": true, + "context_length": 131072 + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "accounts/fireworks/models/deepseek-v3", + "created": 1735576668, + "object": "model", + "owned_by": "fireworks", + "kind": "HF_BASE_MODEL", + "supports_chat": true, + "supports_image_input": false, + "supports_tools": true, + "context_length": 131072 + } + }, { "__type__": "openai.types.model.Model", "__data__": { @@ -284,20 +285,6 @@ "context_length": 131072 } }, - { - "__type__": "openai.types.model.Model", - "__data__": { - "id": "accounts/fireworks/models/qwen2p5-vl-32b-instruct", - "created": 1743392739, - "object": "model", - "owned_by": "fireworks", - "kind": "HF_BASE_MODEL", - "supports_chat": true, - "supports_image_input": true, - "supports_tools": false, - "context_length": 128000 - } - }, { "__type__": "openai.types.model.Model", "__data__": { @@ -395,34 +382,6 @@ "supports_tools": false } }, - { - "__type__": "openai.types.model.Model", - "__data__": { - "id": "accounts/fireworks/models/glm-4p5", - "created": 1753809636, - "object": "model", - "owned_by": "fireworks", - "kind": "HF_BASE_MODEL", - "supports_chat": true, - "supports_image_input": false, - "supports_tools": true, - "context_length": 131072 - } - }, - { - "__type__": "openai.types.model.Model", - "__data__": { - "id": "accounts/fireworks/models/kimi-k2-instruct-0905", - "created": 1757018994, - "object": "model", - "owned_by": "fireworks", - "kind": "HF_BASE_MODEL", - "supports_chat": true, - "supports_image_input": false, - "supports_tools": true, - "context_length": 262144 - } - }, { "__type__": "openai.types.model.Model", "__data__": { @@ -520,6 +479,47 @@ "supports_tools": false, "context_length": 262144 } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "accounts/fireworks/models/qwen2p5-vl-32b-instruct", + "created": 1743392739, + "object": "model", + "owned_by": "fireworks", + "kind": "HF_BASE_MODEL", + "supports_chat": true, + "supports_image_input": true, + "supports_tools": false, + "context_length": 128000 + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", + "created": 1754063588, + "object": "model", + "owned_by": "fireworks", + "kind": "HF_BASE_MODEL", + "supports_chat": true, + "supports_image_input": false, + "supports_tools": false, + "context_length": 262144 + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "accounts/tvergho-87e44d/models/debatecards-70b-ft-3epoch-dpo-v2", + "created": 1743381121, + "object": "model", + "owned_by": "tvergho-87e44d", + "kind": "HF_PEFT_ADDON", + "supports_chat": true, + "supports_image_input": false, + "supports_tools": false + } } ], "is_streaming": false diff --git a/tests/integration/recordings/responses/models-bd032f995f2a-7467c0cf.json b/tests/integration/recordings/responses/models-bd032f995f2a-7467c0cf.json new file mode 100644 index 000000000..00c447dcc --- /dev/null +++ b/tests/integration/recordings/responses/models-bd032f995f2a-7467c0cf.json @@ -0,0 +1,69 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nomic-embed-text:latest", + "created": 1754610899, + "object": "model", + "owned_by": "library" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "llama-guard3:1b", + "created": 1754088388, + "object": "model", + "owned_by": "library" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "all-minilm:l6-v2", + "created": 1753826826, + "object": "model", + "owned_by": "library" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "all-minilm:latest", + "created": 1749064003, + "object": "model", + "owned_by": "library" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "llama3.1:8b-instruct-fp16", + "created": 1739575404, + "object": "model", + "owned_by": "library" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "llama3.2:3b-instruct-fp16", + "created": 1737496003, + "object": "model", + "owned_by": "library" + } + } + ], + "is_streaming": false + } +} diff --git a/tests/integration/recordings/responses/models-bd032f995f2a-ebaa996d.json b/tests/integration/recordings/responses/models-bd032f995f2a-ebaa996d.json new file mode 100644 index 000000000..c460d6977 --- /dev/null +++ b/tests/integration/recordings/responses/models-bd032f995f2a-ebaa996d.json @@ -0,0 +1,798 @@ +{ + "request": { + "method": "POST", + "url": "https://api.openai.com/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-0613", + "created": 1686588896, + "object": "model", + "owned_by": "openai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4", + "created": 1687882411, + "object": "model", + "owned_by": "openai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo", + "created": 1677610602, + "object": "model", + "owned_by": "openai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-codex", + "created": 1757527818, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-audio-2025-08-28", + "created": 1756256146, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-realtime", + "created": 1756271701, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-realtime-2025-08-28", + "created": 1756271773, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-audio", + "created": 1756339249, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "davinci-002", + "created": 1692634301, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "babbage-002", + "created": 1692634615, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo-instruct", + "created": 1692901427, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo-instruct-0914", + "created": 1694122472, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "dall-e-3", + "created": 1698785189, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "dall-e-2", + "created": 1698798177, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-1106-preview", + "created": 1698957206, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo-1106", + "created": 1698959748, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "tts-1-hd", + "created": 1699046015, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "tts-1-1106", + "created": 1699053241, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "tts-1-hd-1106", + "created": 1699053533, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "text-embedding-3-small", + "created": 1705948997, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "text-embedding-3-large", + "created": 1705953180, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-0125-preview", + "created": 1706037612, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-turbo-preview", + "created": 1706037777, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo-0125", + "created": 1706048358, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-turbo", + "created": 1712361441, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-turbo-2024-04-09", + "created": 1712601677, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o", + "created": 1715367049, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-2024-05-13", + "created": 1715368132, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-2024-07-18", + "created": 1721172717, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini", + "created": 1721172741, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-2024-08-06", + "created": 1722814719, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "chatgpt-4o-latest", + "created": 1723515131, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o1-mini-2024-09-12", + "created": 1725648979, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o1-mini", + "created": 1725649008, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-realtime-preview-2024-10-01", + "created": 1727131766, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-audio-preview-2024-10-01", + "created": 1727389042, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-audio-preview", + "created": 1727460443, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-realtime-preview", + "created": 1727659998, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "omni-moderation-latest", + "created": 1731689265, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "omni-moderation-2024-09-26", + "created": 1732734466, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-realtime-preview-2024-12-17", + "created": 1733945430, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-audio-preview-2024-12-17", + "created": 1734034239, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-realtime-preview-2024-12-17", + "created": 1734112601, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-audio-preview-2024-12-17", + "created": 1734115920, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o1-2024-12-17", + "created": 1734326976, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o1", + "created": 1734375816, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-realtime-preview", + "created": 1734387380, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-audio-preview", + "created": 1734387424, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-mini", + "created": 1737146383, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-mini-2025-01-31", + "created": 1738010200, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-2024-11-20", + "created": 1739331543, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-search-preview-2025-03-11", + "created": 1741388170, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-search-preview", + "created": 1741388720, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-search-preview-2025-03-11", + "created": 1741390858, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-search-preview", + "created": 1741391161, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-transcribe", + "created": 1742068463, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-transcribe", + "created": 1742068596, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o1-pro-2025-03-19", + "created": 1742251504, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o1-pro", + "created": 1742251791, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-tts", + "created": 1742403959, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-2025-04-16", + "created": 1744133301, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o4-mini-2025-04-16", + "created": 1744133506, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3", + "created": 1744225308, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o4-mini", + "created": 1744225351, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1-2025-04-14", + "created": 1744315746, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1", + "created": 1744316542, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1-mini-2025-04-14", + "created": 1744317547, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1-mini", + "created": 1744318173, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1-nano-2025-04-14", + "created": 1744321025, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1-nano", + "created": 1744321707, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-image-1", + "created": 1745517030, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "codex-mini-latest", + "created": 1746673257, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-realtime-preview-2025-06-03", + "created": 1748907838, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-audio-preview-2025-06-03", + "created": 1748908498, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o4-mini-deep-research", + "created": 1749685485, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o4-mini-deep-research-2025-06-26", + "created": 1750866121, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-chat-latest", + "created": 1754073306, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-2025-08-07", + "created": 1754075360, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5", + "created": 1754425777, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-mini-2025-08-07", + "created": 1754425867, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-mini", + "created": 1754425928, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-nano-2025-08-07", + "created": 1754426303, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-nano", + "created": 1754426384, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo-16k", + "created": 1683758102, + "object": "model", + "owned_by": "openai-internal" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "tts-1", + "created": 1681940951, + "object": "model", + "owned_by": "openai-internal" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "whisper-1", + "created": 1677532384, + "object": "model", + "owned_by": "openai-internal" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "text-embedding-ada-002", + "created": 1671217299, + "object": "model", + "owned_by": "openai-internal" + } + } + ], + "is_streaming": false + } +} diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py index 38ce365c1..5e5914a03 100644 --- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py +++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py @@ -37,7 +37,6 @@ from llama_stack.apis.inference import ( OpenAIJSONSchema, OpenAIResponseFormatJSONObject, OpenAIResponseFormatJSONSchema, - OpenAIResponseFormatText, OpenAIUserMessageParam, ) from llama_stack.apis.tools.tools import Tool, ToolGroups, ToolInvocationResult, ToolParameter, ToolRuntime @@ -148,7 +147,7 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m mock_inference_api.openai_chat_completion.assert_called_once_with( model=model, messages=[OpenAIUserMessageParam(role="user", content="What is the capital of Ireland?", name=None)], - response_format=OpenAIResponseFormatText(), + response_format=None, tools=None, stream=True, temperature=0.1, @@ -823,16 +822,16 @@ async def test_store_response_uses_rehydrated_input_with_previous_response( @pytest.mark.parametrize( "text_format, response_format", [ - (OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")), OpenAIResponseFormatText()), + (OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")), None), ( OpenAIResponseText(format=OpenAIResponseTextFormat(name="Test", schema={"foo": "bar"}, type="json_schema")), OpenAIResponseFormatJSONSchema(json_schema=OpenAIJSONSchema(name="Test", schema={"foo": "bar"})), ), (OpenAIResponseText(format=OpenAIResponseTextFormat(type="json_object")), OpenAIResponseFormatJSONObject()), - # ensure text param with no format specified defaults to text - (OpenAIResponseText(format=None), OpenAIResponseFormatText()), - # ensure text param of None defaults to text - (None, OpenAIResponseFormatText()), + # ensure text param with no format specified defaults to None + (OpenAIResponseText(format=None), None), + # ensure text param of None defaults to None + (None, None), ], ) async def test_create_openai_response_with_text_format( @@ -855,7 +854,6 @@ async def test_create_openai_response_with_text_format( # Verify first_call = mock_inference_api.openai_chat_completion.call_args_list[0] assert first_call.kwargs["messages"][0].content == input_text - assert first_call.kwargs["response_format"] is not None assert first_call.kwargs["response_format"] == response_format From 4f7c177c6231b5ce0de2d897b432224a67ce4967 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Tue, 30 Sep 2025 12:04:50 -0700 Subject: [PATCH 27/33] fix: don't pass default response format in Responses # What does this PR do? ## Test Plan --- .../meta_reference/responses/streaming.py | 5 +- .../recordings/responses/4ebf08272d17.json | 6030 +++++++++++++++++ .../recordings/responses/73e97be515d9.json | 106 + .../recordings/responses/8aba89449cdc.json | 248 + .../recordings/responses/cf776b1aa432.json | 232 + .../recordings/responses/d0ac68cbde69.json | 16 +- .../models-7d9446738fd7-d5d684a3.json | 144 +- .../models-bd032f995f2a-7467c0cf.json | 69 + .../models-bd032f995f2a-ebaa996d.json | 798 +++ .../meta_reference/test_openai_responses.py | 14 +- 10 files changed, 7573 insertions(+), 89 deletions(-) create mode 100644 tests/integration/recordings/responses/4ebf08272d17.json create mode 100644 tests/integration/recordings/responses/73e97be515d9.json create mode 100644 tests/integration/recordings/responses/8aba89449cdc.json create mode 100644 tests/integration/recordings/responses/cf776b1aa432.json create mode 100644 tests/integration/recordings/responses/models-bd032f995f2a-7467c0cf.json create mode 100644 tests/integration/recordings/responses/models-bd032f995f2a-ebaa996d.json diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index 2f45ad2a3..179f7f023 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -127,13 +127,16 @@ class StreamingResponseOrchestrator: messages = self.ctx.messages.copy() while True: + # Text is the default response format for chat completion so don't need to pass it + # (some providers don't support non-empty response_format when tools are present) + response_format = None if self.ctx.response_format.type == "text" else self.ctx.response_format completion_result = await self.inference_api.openai_chat_completion( model=self.ctx.model, messages=messages, tools=self.ctx.chat_tools, stream=True, temperature=self.ctx.temperature, - response_format=self.ctx.response_format, + response_format=response_format, ) # Process streaming chunks and build complete response diff --git a/tests/integration/recordings/responses/4ebf08272d17.json b/tests/integration/recordings/responses/4ebf08272d17.json new file mode 100644 index 000000000..958d3ad9c --- /dev/null +++ b/tests/integration/recordings/responses/4ebf08272d17.json @@ -0,0 +1,6030 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "user", + "content": "What's the weather in Tokyo?" + } + ], + "stream": true + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "I", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267476, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "'m", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267476, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " not", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267476, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " able", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267476, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267476, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " provide", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267476, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " real", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267476, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "-time", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267476, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " weather", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267476, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " information", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267477, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267477, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " However", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267477, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267477, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " I", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267477, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " can", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267477, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " suggest", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267477, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " some", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267477, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " ways", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267477, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " for", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267477, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " you", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267477, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267477, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " find", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267477, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " out", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267477, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267477, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " current", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267477, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " weather", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267477, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " in", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267478, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " Tokyo", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267478, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": ":\n\n", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267478, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "1", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267478, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267478, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " Check", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267478, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " online", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267478, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " weather", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267478, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " websites", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267478, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": ":", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267478, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " You", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267478, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " can", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267478, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " check", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267478, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " websites", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267478, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " like", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267478, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " Acc", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267478, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "u", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267478, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "Weather", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267479, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267479, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " Weather", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267479, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": ".com", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267479, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267479, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " or", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267479, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " Japan", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267479, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " Meteor", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267479, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "ological", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267479, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " Agency", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267479, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " (", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267479, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "J", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267479, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "MA", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267479, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": ")", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267479, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " for", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267479, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267479, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " current", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267480, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " weather", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267480, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " conditions", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267480, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " and", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267480, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " forecast", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267480, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " in", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267480, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " Tokyo", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267480, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": ".\n", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267480, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "2", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267480, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267480, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " Use", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267480, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " a", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267480, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " mobile", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267480, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " app", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267480, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": ":", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267480, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " There", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267480, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " are", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267480, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " many", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267481, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " mobile", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267481, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " apps", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267481, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " available", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267481, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " that", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267481, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " provide", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267481, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " real", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267481, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "-time", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267481, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " weather", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267481, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " information", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267481, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267481, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " such", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267481, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " as", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267481, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " Dark", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267481, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " Sky", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267481, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267481, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " Weather", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267482, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " Underground", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267482, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267482, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " or", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267482, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " Japan", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267482, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "-based", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267482, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " apps", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267482, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " like", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267482, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " Japan", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267482, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " Meteor", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267482, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "ological", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267482, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " Corporation", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267482, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "'s", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267482, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " (", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267482, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "JM", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267482, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "Cor", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267482, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "ps", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267482, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": ")", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267483, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " Weather", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267483, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " App", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267483, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": ".\n", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267483, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "3", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267483, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267483, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " Check", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267483, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " social", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267483, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " media", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267483, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": ":", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267483, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " Many", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267483, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " airlines", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267483, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267483, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " airports", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267483, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267483, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " and", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267483, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " tourist", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267483, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " attractions", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267484, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " also", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267484, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " share", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267484, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267484, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " current", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267484, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " weather", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267484, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " conditions", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267484, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " on", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267484, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " their", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267484, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " social", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267484, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " media", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267484, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " accounts", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267484, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": ".\n\n", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267484, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "Please", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267484, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " note", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267484, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " that", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267484, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " Tokyo", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267484, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "'s", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267485, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " climate", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267485, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267485, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " humid", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267485, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " subt", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267485, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "ropical", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267485, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " with", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267485, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " four", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267485, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " distinct", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267485, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " seasons", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267485, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": ":\n\n", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267485, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "-", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267485, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " Winter", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267485, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " (", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267485, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "December", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267485, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267485, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " February", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267485, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "):", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267486, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " Mild", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267486, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " temperatures", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267486, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267486, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " with", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267486, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " average", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267486, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " highs", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267486, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " around", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267486, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " ", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267486, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "9", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267486, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "\u00b0C", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267486, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " (", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267486, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "48", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267486, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "\u00b0F", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267486, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": ")", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267486, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " and", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267486, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " lows", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267486, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " around", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267487, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " -", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267487, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "2", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267487, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "\u00b0C", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267487, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " (", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267487, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "28", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267487, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "\u00b0F", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267487, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": ").\n", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267487, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "-", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267487, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " Spring", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267487, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " (", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267487, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "March", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267487, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267487, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " May", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267487, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "):", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267487, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " Cool", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267487, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " temperature", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267487, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267488, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " with", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267488, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " average", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267488, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " highs", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267488, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " around", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267488, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " ", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267488, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "18", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267488, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "\u00b0C", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267488, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " (", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267488, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "64", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267488, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "\u00b0F", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267488, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": ")", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267488, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " and", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267488, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " lows", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267488, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " around", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267488, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " ", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267488, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "8", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267488, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "\u00b0C", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267489, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " (", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267489, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "46", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267489, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "\u00b0F", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267489, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": ").\n", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267489, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "-", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267489, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " Summer", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267489, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " (", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267489, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "June", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267489, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267489, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " August", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267489, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "):", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267489, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " Hot", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267489, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " and", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267489, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " humid", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267489, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267489, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " with", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267489, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " average", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267490, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": " highs", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267490, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-359", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 1759267490, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + } +} diff --git a/tests/integration/recordings/responses/73e97be515d9.json b/tests/integration/recordings/responses/73e97be515d9.json new file mode 100644 index 000000000..6df3dd956 --- /dev/null +++ b/tests/integration/recordings/responses/73e97be515d9.json @@ -0,0 +1,106 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "user", + "content": "What's the weather in Tokyo? YOU MUST USE THE get_weather function to get the weather." + } + ], + "stream": true, + "tools": [ + { + "type": "function", + "function": { + "type": "function", + "name": "get_weather", + "description": "Get the weather in a given city", + "parameters": { + "type": "object", + "properties": { + "city": { + "type": "string", + "description": "The city to get the weather for" + } + } + }, + "strict": null + } + } + ] + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-116", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "index": 0, + "id": "call_0c2qffvv", + "function": { + "arguments": "{\"city\":\"Tokyo\"}", + "name": "get_weather" + }, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267492, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-116", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 1759267492, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + } +} diff --git a/tests/integration/recordings/responses/8aba89449cdc.json b/tests/integration/recordings/responses/8aba89449cdc.json new file mode 100644 index 000000000..6aa6cd2c5 --- /dev/null +++ b/tests/integration/recordings/responses/8aba89449cdc.json @@ -0,0 +1,248 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "user", + "content": "Message A: What is the capital of France?" + }, + { + "role": "assistant", + "content": "The capital of France is Paris." + }, + { + "role": "user", + "content": "Message B: What about Spain?" + }, + { + "role": "assistant", + "content": "The capital of Spain is Madrid." + }, + { + "role": "user", + "content": "Message C: And Italy?" + } + ], + "stream": true + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-676", + "choices": [ + { + "delta": { + "content": "The", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267544, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-676", + "choices": [ + { + "delta": { + "content": " capital", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267544, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-676", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267544, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-676", + "choices": [ + { + "delta": { + "content": " Italy", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267544, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-676", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267544, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-676", + "choices": [ + { + "delta": { + "content": " Rome", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267544, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-676", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759267544, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-676", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 1759267544, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + } +} diff --git a/tests/integration/recordings/responses/cf776b1aa432.json b/tests/integration/recordings/responses/cf776b1aa432.json new file mode 100644 index 000000000..c7449427a --- /dev/null +++ b/tests/integration/recordings/responses/cf776b1aa432.json @@ -0,0 +1,232 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "user", + "content": "What is the capital of France?" + } + ], + "stream": true + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-78", + "choices": [ + { + "delta": { + "content": "The", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759259077, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-78", + "choices": [ + { + "delta": { + "content": " capital", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759259077, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-78", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759259077, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-78", + "choices": [ + { + "delta": { + "content": " France", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759259077, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-78", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759259077, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-78", + "choices": [ + { + "delta": { + "content": " Paris", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759259077, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-78", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759259077, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-78", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 1759259077, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + } +} diff --git a/tests/integration/recordings/responses/d0ac68cbde69.json b/tests/integration/recordings/responses/d0ac68cbde69.json index 78784e0ca..4dcc6a69b 100644 --- a/tests/integration/recordings/responses/d0ac68cbde69.json +++ b/tests/integration/recordings/responses/d0ac68cbde69.json @@ -13,12 +13,12 @@ "__data__": { "models": [ { - "model": "llama3.2:3b", - "name": "llama3.2:3b", - "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72", - "expires_at": "2025-09-27T11:54:56.718552-07:00", - "size": 3367856128, - "size_vram": 3367856128, + "model": "llama3.2:3b-instruct-fp16", + "name": "llama3.2:3b-instruct-fp16", + "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d", + "expires_at": "2025-09-30T14:29:52.682809-07:00", + "size": 8581748736, + "size_vram": 8581748736, "details": { "parent_model": "", "format": "gguf", @@ -27,9 +27,9 @@ "llama" ], "parameter_size": "3.2B", - "quantization_level": "Q4_K_M" + "quantization_level": "F16" }, - "context_length": 4096 + "context_length": null } ] } diff --git a/tests/integration/recordings/responses/models-7d9446738fd7-d5d684a3.json b/tests/integration/recordings/responses/models-7d9446738fd7-d5d684a3.json index a76f0ba8f..d9917b2ec 100644 --- a/tests/integration/recordings/responses/models-7d9446738fd7-d5d684a3.json +++ b/tests/integration/recordings/responses/models-7d9446738fd7-d5d684a3.json @@ -22,19 +22,6 @@ "supports_tools": false } }, - { - "__type__": "openai.types.model.Model", - "__data__": { - "id": "accounts/tvergho-87e44d/models/debatecards-70b-ft-3epoch-dpo-v2", - "created": 1743381121, - "object": "model", - "owned_by": "tvergho-87e44d", - "kind": "HF_PEFT_ADDON", - "supports_chat": true, - "supports_image_input": false, - "supports_tools": false - } - }, { "__type__": "openai.types.model.Model", "__data__": { @@ -75,20 +62,6 @@ "context_length": 131072 } }, - { - "__type__": "openai.types.model.Model", - "__data__": { - "id": "accounts/fireworks/models/deepseek-v3", - "created": 1735576668, - "object": "model", - "owned_by": "fireworks", - "kind": "HF_BASE_MODEL", - "supports_chat": true, - "supports_image_input": false, - "supports_tools": true, - "context_length": 131072 - } - }, { "__type__": "openai.types.model.Model", "__data__": { @@ -259,17 +232,45 @@ { "__type__": "openai.types.model.Model", "__data__": { - "id": "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", - "created": 1754063588, + "id": "accounts/fireworks/models/kimi-k2-instruct-0905", + "created": 1757018994, "object": "model", "owned_by": "fireworks", "kind": "HF_BASE_MODEL", "supports_chat": true, "supports_image_input": false, - "supports_tools": false, + "supports_tools": true, "context_length": 262144 } }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "accounts/fireworks/models/glm-4p5", + "created": 1753809636, + "object": "model", + "owned_by": "fireworks", + "kind": "HF_BASE_MODEL", + "supports_chat": true, + "supports_image_input": false, + "supports_tools": true, + "context_length": 131072 + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "accounts/fireworks/models/deepseek-v3", + "created": 1735576668, + "object": "model", + "owned_by": "fireworks", + "kind": "HF_BASE_MODEL", + "supports_chat": true, + "supports_image_input": false, + "supports_tools": true, + "context_length": 131072 + } + }, { "__type__": "openai.types.model.Model", "__data__": { @@ -284,20 +285,6 @@ "context_length": 131072 } }, - { - "__type__": "openai.types.model.Model", - "__data__": { - "id": "accounts/fireworks/models/qwen2p5-vl-32b-instruct", - "created": 1743392739, - "object": "model", - "owned_by": "fireworks", - "kind": "HF_BASE_MODEL", - "supports_chat": true, - "supports_image_input": true, - "supports_tools": false, - "context_length": 128000 - } - }, { "__type__": "openai.types.model.Model", "__data__": { @@ -395,34 +382,6 @@ "supports_tools": false } }, - { - "__type__": "openai.types.model.Model", - "__data__": { - "id": "accounts/fireworks/models/glm-4p5", - "created": 1753809636, - "object": "model", - "owned_by": "fireworks", - "kind": "HF_BASE_MODEL", - "supports_chat": true, - "supports_image_input": false, - "supports_tools": true, - "context_length": 131072 - } - }, - { - "__type__": "openai.types.model.Model", - "__data__": { - "id": "accounts/fireworks/models/kimi-k2-instruct-0905", - "created": 1757018994, - "object": "model", - "owned_by": "fireworks", - "kind": "HF_BASE_MODEL", - "supports_chat": true, - "supports_image_input": false, - "supports_tools": true, - "context_length": 262144 - } - }, { "__type__": "openai.types.model.Model", "__data__": { @@ -520,6 +479,47 @@ "supports_tools": false, "context_length": 262144 } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "accounts/fireworks/models/qwen2p5-vl-32b-instruct", + "created": 1743392739, + "object": "model", + "owned_by": "fireworks", + "kind": "HF_BASE_MODEL", + "supports_chat": true, + "supports_image_input": true, + "supports_tools": false, + "context_length": 128000 + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", + "created": 1754063588, + "object": "model", + "owned_by": "fireworks", + "kind": "HF_BASE_MODEL", + "supports_chat": true, + "supports_image_input": false, + "supports_tools": false, + "context_length": 262144 + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "accounts/tvergho-87e44d/models/debatecards-70b-ft-3epoch-dpo-v2", + "created": 1743381121, + "object": "model", + "owned_by": "tvergho-87e44d", + "kind": "HF_PEFT_ADDON", + "supports_chat": true, + "supports_image_input": false, + "supports_tools": false + } } ], "is_streaming": false diff --git a/tests/integration/recordings/responses/models-bd032f995f2a-7467c0cf.json b/tests/integration/recordings/responses/models-bd032f995f2a-7467c0cf.json new file mode 100644 index 000000000..00c447dcc --- /dev/null +++ b/tests/integration/recordings/responses/models-bd032f995f2a-7467c0cf.json @@ -0,0 +1,69 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nomic-embed-text:latest", + "created": 1754610899, + "object": "model", + "owned_by": "library" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "llama-guard3:1b", + "created": 1754088388, + "object": "model", + "owned_by": "library" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "all-minilm:l6-v2", + "created": 1753826826, + "object": "model", + "owned_by": "library" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "all-minilm:latest", + "created": 1749064003, + "object": "model", + "owned_by": "library" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "llama3.1:8b-instruct-fp16", + "created": 1739575404, + "object": "model", + "owned_by": "library" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "llama3.2:3b-instruct-fp16", + "created": 1737496003, + "object": "model", + "owned_by": "library" + } + } + ], + "is_streaming": false + } +} diff --git a/tests/integration/recordings/responses/models-bd032f995f2a-ebaa996d.json b/tests/integration/recordings/responses/models-bd032f995f2a-ebaa996d.json new file mode 100644 index 000000000..c460d6977 --- /dev/null +++ b/tests/integration/recordings/responses/models-bd032f995f2a-ebaa996d.json @@ -0,0 +1,798 @@ +{ + "request": { + "method": "POST", + "url": "https://api.openai.com/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-0613", + "created": 1686588896, + "object": "model", + "owned_by": "openai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4", + "created": 1687882411, + "object": "model", + "owned_by": "openai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo", + "created": 1677610602, + "object": "model", + "owned_by": "openai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-codex", + "created": 1757527818, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-audio-2025-08-28", + "created": 1756256146, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-realtime", + "created": 1756271701, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-realtime-2025-08-28", + "created": 1756271773, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-audio", + "created": 1756339249, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "davinci-002", + "created": 1692634301, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "babbage-002", + "created": 1692634615, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo-instruct", + "created": 1692901427, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo-instruct-0914", + "created": 1694122472, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "dall-e-3", + "created": 1698785189, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "dall-e-2", + "created": 1698798177, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-1106-preview", + "created": 1698957206, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo-1106", + "created": 1698959748, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "tts-1-hd", + "created": 1699046015, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "tts-1-1106", + "created": 1699053241, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "tts-1-hd-1106", + "created": 1699053533, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "text-embedding-3-small", + "created": 1705948997, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "text-embedding-3-large", + "created": 1705953180, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-0125-preview", + "created": 1706037612, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-turbo-preview", + "created": 1706037777, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo-0125", + "created": 1706048358, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-turbo", + "created": 1712361441, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-turbo-2024-04-09", + "created": 1712601677, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o", + "created": 1715367049, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-2024-05-13", + "created": 1715368132, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-2024-07-18", + "created": 1721172717, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini", + "created": 1721172741, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-2024-08-06", + "created": 1722814719, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "chatgpt-4o-latest", + "created": 1723515131, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o1-mini-2024-09-12", + "created": 1725648979, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o1-mini", + "created": 1725649008, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-realtime-preview-2024-10-01", + "created": 1727131766, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-audio-preview-2024-10-01", + "created": 1727389042, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-audio-preview", + "created": 1727460443, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-realtime-preview", + "created": 1727659998, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "omni-moderation-latest", + "created": 1731689265, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "omni-moderation-2024-09-26", + "created": 1732734466, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-realtime-preview-2024-12-17", + "created": 1733945430, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-audio-preview-2024-12-17", + "created": 1734034239, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-realtime-preview-2024-12-17", + "created": 1734112601, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-audio-preview-2024-12-17", + "created": 1734115920, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o1-2024-12-17", + "created": 1734326976, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o1", + "created": 1734375816, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-realtime-preview", + "created": 1734387380, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-audio-preview", + "created": 1734387424, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-mini", + "created": 1737146383, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-mini-2025-01-31", + "created": 1738010200, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-2024-11-20", + "created": 1739331543, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-search-preview-2025-03-11", + "created": 1741388170, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-search-preview", + "created": 1741388720, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-search-preview-2025-03-11", + "created": 1741390858, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-search-preview", + "created": 1741391161, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-transcribe", + "created": 1742068463, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-transcribe", + "created": 1742068596, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o1-pro-2025-03-19", + "created": 1742251504, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o1-pro", + "created": 1742251791, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-tts", + "created": 1742403959, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-2025-04-16", + "created": 1744133301, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o4-mini-2025-04-16", + "created": 1744133506, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3", + "created": 1744225308, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o4-mini", + "created": 1744225351, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1-2025-04-14", + "created": 1744315746, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1", + "created": 1744316542, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1-mini-2025-04-14", + "created": 1744317547, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1-mini", + "created": 1744318173, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1-nano-2025-04-14", + "created": 1744321025, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1-nano", + "created": 1744321707, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-image-1", + "created": 1745517030, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "codex-mini-latest", + "created": 1746673257, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-realtime-preview-2025-06-03", + "created": 1748907838, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-audio-preview-2025-06-03", + "created": 1748908498, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o4-mini-deep-research", + "created": 1749685485, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o4-mini-deep-research-2025-06-26", + "created": 1750866121, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-chat-latest", + "created": 1754073306, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-2025-08-07", + "created": 1754075360, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5", + "created": 1754425777, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-mini-2025-08-07", + "created": 1754425867, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-mini", + "created": 1754425928, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-nano-2025-08-07", + "created": 1754426303, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-nano", + "created": 1754426384, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo-16k", + "created": 1683758102, + "object": "model", + "owned_by": "openai-internal" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "tts-1", + "created": 1681940951, + "object": "model", + "owned_by": "openai-internal" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "whisper-1", + "created": 1677532384, + "object": "model", + "owned_by": "openai-internal" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "text-embedding-ada-002", + "created": 1671217299, + "object": "model", + "owned_by": "openai-internal" + } + } + ], + "is_streaming": false + } +} diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py index 38ce365c1..5e5914a03 100644 --- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py +++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py @@ -37,7 +37,6 @@ from llama_stack.apis.inference import ( OpenAIJSONSchema, OpenAIResponseFormatJSONObject, OpenAIResponseFormatJSONSchema, - OpenAIResponseFormatText, OpenAIUserMessageParam, ) from llama_stack.apis.tools.tools import Tool, ToolGroups, ToolInvocationResult, ToolParameter, ToolRuntime @@ -148,7 +147,7 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m mock_inference_api.openai_chat_completion.assert_called_once_with( model=model, messages=[OpenAIUserMessageParam(role="user", content="What is the capital of Ireland?", name=None)], - response_format=OpenAIResponseFormatText(), + response_format=None, tools=None, stream=True, temperature=0.1, @@ -823,16 +822,16 @@ async def test_store_response_uses_rehydrated_input_with_previous_response( @pytest.mark.parametrize( "text_format, response_format", [ - (OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")), OpenAIResponseFormatText()), + (OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")), None), ( OpenAIResponseText(format=OpenAIResponseTextFormat(name="Test", schema={"foo": "bar"}, type="json_schema")), OpenAIResponseFormatJSONSchema(json_schema=OpenAIJSONSchema(name="Test", schema={"foo": "bar"})), ), (OpenAIResponseText(format=OpenAIResponseTextFormat(type="json_object")), OpenAIResponseFormatJSONObject()), - # ensure text param with no format specified defaults to text - (OpenAIResponseText(format=None), OpenAIResponseFormatText()), - # ensure text param of None defaults to text - (None, OpenAIResponseFormatText()), + # ensure text param with no format specified defaults to None + (OpenAIResponseText(format=None), None), + # ensure text param of None defaults to None + (None, None), ], ) async def test_create_openai_response_with_text_format( @@ -855,7 +854,6 @@ async def test_create_openai_response_with_text_format( # Verify first_call = mock_inference_api.openai_chat_completion.call_args_list[0] assert first_call.kwargs["messages"][0].content == input_text - assert first_call.kwargs["response_format"] is not None assert first_call.kwargs["response_format"] == response_format From 168b42cd990add51575a5f83ca1c9afa33d5f91e Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Wed, 1 Oct 2025 09:28:58 -0700 Subject: [PATCH 28/33] fix: log level # What does this PR do? - categories like "core::server" is not recognized so it's level is not set by 'all=debug' - removed spammy telemetry debug logging ## Test Plan test server launched with LLAMA_STACK_LOGGING='all=debug' --- llama_stack/log.py | 11 ++++++++++- llama_stack/providers/utils/telemetry/tracing.py | 3 --- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/llama_stack/log.py b/llama_stack/log.py index cc4c9d4cf..2a11516fa 100644 --- a/llama_stack/log.py +++ b/llama_stack/log.py @@ -247,7 +247,16 @@ def get_logger( _category_levels.update(parse_yaml_config(config)) logger = logging.getLogger(name) - logger.setLevel(_category_levels.get(category, DEFAULT_LOG_LEVEL)) + if category in _category_levels: + log_level = _category_levels[category] + else: + root_category = category.split("::")[0] + if root_category in _category_levels: + log_level = _category_levels[root_category] + else: + log_level = _category_levels.get("root", DEFAULT_LOG_LEVEL) + logging.warning(f"Unknown logging category: {category}. Falling back to default 'root' level: {log_level}") + logger.setLevel(log_level) return logging.LoggerAdapter(logger, {"category": category}) diff --git a/llama_stack/providers/utils/telemetry/tracing.py b/llama_stack/providers/utils/telemetry/tracing.py index 62cceb13e..58bf8603a 100644 --- a/llama_stack/providers/utils/telemetry/tracing.py +++ b/llama_stack/providers/utils/telemetry/tracing.py @@ -317,7 +317,6 @@ class SpanContextManager: global CURRENT_TRACE_CONTEXT context = CURRENT_TRACE_CONTEXT.get() if not context: - logger.debug("No trace context to pop span") return context.pop_span() @@ -332,7 +331,6 @@ class SpanContextManager: global CURRENT_TRACE_CONTEXT context = CURRENT_TRACE_CONTEXT.get() if not context: - logger.debug("No trace context to push span") return self self.span = context.push_span(self.name, self.attributes) @@ -342,7 +340,6 @@ class SpanContextManager: global CURRENT_TRACE_CONTEXT context = CURRENT_TRACE_CONTEXT.get() if not context: - logger.debug("No trace context to pop span") return context.pop_span() From f675b09b69233665a1a08f2722d7e56623f6014a Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Wed, 1 Oct 2025 09:39:50 -0700 Subject: [PATCH 29/33] fix: log level # What does this PR do? - categories like "core::server" is not recognized so it's level is not set by 'all=debug' - removed spammy telemetry debug logging ## Test Plan test server launched with LLAMA_STACK_LOGGING='all=debug' --- llama_stack/log.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/llama_stack/log.py b/llama_stack/log.py index cc4c9d4cf..2a11516fa 100644 --- a/llama_stack/log.py +++ b/llama_stack/log.py @@ -247,7 +247,16 @@ def get_logger( _category_levels.update(parse_yaml_config(config)) logger = logging.getLogger(name) - logger.setLevel(_category_levels.get(category, DEFAULT_LOG_LEVEL)) + if category in _category_levels: + log_level = _category_levels[category] + else: + root_category = category.split("::")[0] + if root_category in _category_levels: + log_level = _category_levels[root_category] + else: + log_level = _category_levels.get("root", DEFAULT_LOG_LEVEL) + logging.warning(f"Unknown logging category: {category}. Falling back to default 'root' level: {log_level}") + logger.setLevel(log_level) return logging.LoggerAdapter(logger, {"category": category}) From ccaa9208f77926a3b562c13368b817277b733d1c Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Thu, 2 Oct 2025 11:01:57 -0700 Subject: [PATCH 30/33] chore: fix/add logging categories # What does this PR do? ## Test Plan --- llama_stack/log.py | 14 ++++++++++++-- .../meta_reference/responses/openai_responses.py | 2 +- .../providers/utils/inference/inference_store.py | 2 +- .../providers/utils/responses/responses_store.py | 2 +- 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/llama_stack/log.py b/llama_stack/log.py index 2a11516fa..729b2b8c5 100644 --- a/llama_stack/log.py +++ b/llama_stack/log.py @@ -31,7 +31,14 @@ CATEGORIES = [ "client", "telemetry", "openai_responses", + "testing", + "providers", + "models", + "files", + "vector_io", + "tool_runtime", ] +UNCATEGORIZED = "uncategorized" # Initialize category levels with default level _category_levels: dict[str, int] = dict.fromkeys(CATEGORIES, DEFAULT_LOG_LEVEL) @@ -165,7 +172,7 @@ def setup_logging(category_levels: dict[str, int], log_file: str | None) -> None def filter(self, record): if not hasattr(record, "category"): - record.category = "uncategorized" # Default to 'uncategorized' if no category found + record.category = UNCATEGORIZED # Default to 'uncategorized' if no category found return True # Determine the root logger's level (default to WARNING if not specified) @@ -255,7 +262,10 @@ def get_logger( log_level = _category_levels[root_category] else: log_level = _category_levels.get("root", DEFAULT_LOG_LEVEL) - logging.warning(f"Unknown logging category: {category}. Falling back to default 'root' level: {log_level}") + if category != UNCATEGORIZED: + logging.warning( + f"Unknown logging category: {category}. Falling back to default 'root' level: {log_level}" + ) logger.setLevel(log_level) return logging.LoggerAdapter(logger, {"category": category}) diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py index c27dc8467..1a6d75710 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py @@ -41,7 +41,7 @@ from .utils import ( convert_response_text_to_chat_response_format, ) -logger = get_logger(name=__name__, category="openai::responses") +logger = get_logger(name=__name__, category="openai_responses") class OpenAIResponsePreviousResponseWithInputItems(BaseModel): diff --git a/llama_stack/providers/utils/inference/inference_store.py b/llama_stack/providers/utils/inference/inference_store.py index ffc9f3e11..901f77c67 100644 --- a/llama_stack/providers/utils/inference/inference_store.py +++ b/llama_stack/providers/utils/inference/inference_store.py @@ -22,7 +22,7 @@ from ..sqlstore.api import ColumnDefinition, ColumnType from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore from ..sqlstore.sqlstore import SqlStoreConfig, SqlStoreType, sqlstore_impl -logger = get_logger(name=__name__, category="inference_store") +logger = get_logger(name=__name__, category="inference") class InferenceStore: diff --git a/llama_stack/providers/utils/responses/responses_store.py b/llama_stack/providers/utils/responses/responses_store.py index b9fceb1ab..cb665b88e 100644 --- a/llama_stack/providers/utils/responses/responses_store.py +++ b/llama_stack/providers/utils/responses/responses_store.py @@ -25,7 +25,7 @@ from ..sqlstore.api import ColumnDefinition, ColumnType from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, SqlStoreType, sqlstore_impl -logger = get_logger(name=__name__, category="responses_store") +logger = get_logger(name=__name__, category="openai_responses") class ResponsesStore: From 631e9d7762966089abc133337e6a099f9c568f23 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Thu, 2 Oct 2025 14:50:38 -0700 Subject: [PATCH 31/33] chore: fix precommit # What does this PR do? ## Test Plan --- docs/docs/providers/agents/index.mdx | 4 ++-- docs/docs/providers/batches/index.mdx | 24 ++++++++++++------------ docs/docs/providers/inference/index.mdx | 12 ++++++------ 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/docs/docs/providers/agents/index.mdx b/docs/docs/providers/agents/index.mdx index 52b92734e..06eb104af 100644 --- a/docs/docs/providers/agents/index.mdx +++ b/docs/docs/providers/agents/index.mdx @@ -1,7 +1,7 @@ --- description: "Agents -APIs for creating and interacting with agentic systems." + APIs for creating and interacting with agentic systems." sidebar_label: Agents title: Agents --- @@ -12,6 +12,6 @@ title: Agents Agents -APIs for creating and interacting with agentic systems. + APIs for creating and interacting with agentic systems. This section contains documentation for all available providers for the **agents** API. diff --git a/docs/docs/providers/batches/index.mdx b/docs/docs/providers/batches/index.mdx index 18e5e314d..2c64b277f 100644 --- a/docs/docs/providers/batches/index.mdx +++ b/docs/docs/providers/batches/index.mdx @@ -1,14 +1,14 @@ --- description: "The Batches API enables efficient processing of multiple requests in a single operation, -particularly useful for processing large datasets, batch evaluation workflows, and -cost-effective inference at scale. + particularly useful for processing large datasets, batch evaluation workflows, and + cost-effective inference at scale. -The API is designed to allow use of openai client libraries for seamless integration. + The API is designed to allow use of openai client libraries for seamless integration. -This API provides the following extensions: - - idempotent batch creation + This API provides the following extensions: + - idempotent batch creation -Note: This API is currently under active development and may undergo changes." + Note: This API is currently under active development and may undergo changes." sidebar_label: Batches title: Batches --- @@ -18,14 +18,14 @@ title: Batches ## Overview The Batches API enables efficient processing of multiple requests in a single operation, -particularly useful for processing large datasets, batch evaluation workflows, and -cost-effective inference at scale. + particularly useful for processing large datasets, batch evaluation workflows, and + cost-effective inference at scale. -The API is designed to allow use of openai client libraries for seamless integration. + The API is designed to allow use of openai client libraries for seamless integration. -This API provides the following extensions: - - idempotent batch creation + This API provides the following extensions: + - idempotent batch creation -Note: This API is currently under active development and may undergo changes. + Note: This API is currently under active development and may undergo changes. This section contains documentation for all available providers for the **batches** API. diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx index 1dc479675..ebbaf1be1 100644 --- a/docs/docs/providers/inference/index.mdx +++ b/docs/docs/providers/inference/index.mdx @@ -1,9 +1,9 @@ --- description: "Llama Stack Inference API for generating completions, chat completions, and embeddings. -This API provides the raw interface to the underlying models. Two kinds of models are supported: -- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions. -- Embedding models: these models generate embeddings to be used for semantic search." + This API provides the raw interface to the underlying models. Two kinds of models are supported: + - LLM models: these models generate \"raw\" and \"chat\" (conversational) completions. + - Embedding models: these models generate embeddings to be used for semantic search." sidebar_label: Inference title: Inference --- @@ -14,8 +14,8 @@ title: Inference Llama Stack Inference API for generating completions, chat completions, and embeddings. -This API provides the raw interface to the underlying models. Two kinds of models are supported: -- LLM models: these models generate "raw" and "chat" (conversational) completions. -- Embedding models: these models generate embeddings to be used for semantic search. + This API provides the raw interface to the underlying models. Two kinds of models are supported: + - LLM models: these models generate "raw" and "chat" (conversational) completions. + - Embedding models: these models generate embeddings to be used for semantic search. This section contains documentation for all available providers for the **inference** API. From fdafbd6ec2b2f070b531f5fbc71a3d566c23dc2f Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Mon, 6 Oct 2025 14:30:20 -0700 Subject: [PATCH 32/33] chore: remove --env from `llama stack run` # What does this PR do? ## Test Plan --- docs/docs/building_applications/tools.mdx | 9 ++-- docs/docs/contributing/new_api_provider.mdx | 2 +- docs/docs/distributions/building_distro.mdx | 17 ++++--- docs/docs/distributions/configuration.mdx | 9 ++-- .../remote_hosted_distro/watsonx.md | 8 ++-- .../distributions/self_hosted_distro/dell.md | 44 +++++++++---------- .../self_hosted_distro/meta-reference-gpu.md | 20 ++++----- .../self_hosted_distro/nvidia.md | 10 ++--- .../getting_started/detailed_tutorial.mdx | 8 ++-- docs/getting_started_llama4.ipynb | 2 +- docs/zero_to_hero_guide/README.md | 8 ++-- llama_stack/cli/stack/run.py | 30 +------------ llama_stack/core/stack.py | 16 ------- llama_stack/core/start_stack.sh | 13 +----- .../distributions/dell/doc_template.md | 42 +++++++++--------- .../meta-reference-gpu/doc_template.md | 20 ++++----- .../distributions/nvidia/doc_template.md | 10 ++--- scripts/install.sh | 4 +- 18 files changed, 105 insertions(+), 167 deletions(-) diff --git a/docs/docs/building_applications/tools.mdx b/docs/docs/building_applications/tools.mdx index e5d9c46f9..3b78ec57b 100644 --- a/docs/docs/building_applications/tools.mdx +++ b/docs/docs/building_applications/tools.mdx @@ -219,13 +219,10 @@ group_tools = client.tools.list_tools(toolgroup_id="search_tools") 1. Start by registering a Tavily API key at [Tavily](https://tavily.com/). -2. [Optional] Provide the API key directly to the Llama Stack server +2. [Optional] Set the API key in your environment before starting the Llama Stack server ```bash export TAVILY_SEARCH_API_KEY="your key" ``` -```bash ---env TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY} -``` @@ -273,9 +270,9 @@ for log in EventLogger().log(response): 1. Start by registering for a WolframAlpha API key at [WolframAlpha Developer Portal](https://developer.wolframalpha.com/access). -2. Provide the API key either when starting the Llama Stack server: +2. Provide the API key either by setting it in your environment before starting the Llama Stack server: ```bash - --env WOLFRAM_ALPHA_API_KEY=${WOLFRAM_ALPHA_API_KEY} + export WOLFRAM_ALPHA_API_KEY="your key" ``` or from the client side: ```python diff --git a/docs/docs/contributing/new_api_provider.mdx b/docs/docs/contributing/new_api_provider.mdx index 4ae6d5e72..6f9744771 100644 --- a/docs/docs/contributing/new_api_provider.mdx +++ b/docs/docs/contributing/new_api_provider.mdx @@ -76,7 +76,7 @@ Integration tests are located in [tests/integration](https://github.com/meta-lla Consult [tests/integration/README.md](https://github.com/meta-llama/llama-stack/blob/main/tests/integration/README.md) for more details on how to run the tests. Note that each provider's `sample_run_config()` method (in the configuration class for that provider) - typically references some environment variables for specifying API keys and the like. You can set these in the environment or pass these via the `--env` flag to the test command. + typically references some environment variables for specifying API keys and the like. You can set these in the environment before running the test command. ### 2. Unit Testing diff --git a/docs/docs/distributions/building_distro.mdx b/docs/docs/distributions/building_distro.mdx index 5b65b7f16..5ffb623b5 100644 --- a/docs/docs/distributions/building_distro.mdx +++ b/docs/docs/distributions/building_distro.mdx @@ -289,10 +289,10 @@ After this step is successful, you should be able to find the built container im docker run -d \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ + -e INFERENCE_MODEL=$INFERENCE_MODEL \ + -e OLLAMA_URL=http://host.docker.internal:11434 \ localhost/distribution-ollama:dev \ - --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=$INFERENCE_MODEL \ - --env OLLAMA_URL=http://host.docker.internal:11434 + --port $LLAMA_STACK_PORT ``` Here are the docker flags and their uses: @@ -305,12 +305,12 @@ Here are the docker flags and their uses: * `localhost/distribution-ollama:dev`: The name and tag of the container image to run +* `-e INFERENCE_MODEL=$INFERENCE_MODEL`: Sets the INFERENCE_MODEL environment variable in the container + +* `-e OLLAMA_URL=http://host.docker.internal:11434`: Sets the OLLAMA_URL environment variable in the container + * `--port $LLAMA_STACK_PORT`: Port number for the server to listen on -* `--env INFERENCE_MODEL=$INFERENCE_MODEL`: Sets the model to use for inference - -* `--env OLLAMA_URL=http://host.docker.internal:11434`: Configures the URL for the Ollama service - @@ -320,7 +320,7 @@ Now, let's start the Llama Stack Distribution Server. You will need the YAML con ``` llama stack run -h -usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME] [--env KEY=VALUE] +usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME] [--image-type {venv}] [--enable-ui] [config | template] @@ -334,7 +334,6 @@ options: --port PORT Port to run the server on. It can also be passed via the env var LLAMA_STACK_PORT. (default: 8321) --image-name IMAGE_NAME Name of the image to run. Defaults to the current environment (default: None) - --env KEY=VALUE Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times. (default: None) --image-type {venv} Image Type used during the build. This should be venv. (default: None) --enable-ui Start the UI server (default: False) diff --git a/docs/docs/distributions/configuration.mdx b/docs/docs/distributions/configuration.mdx index dbf879024..81243c97b 100644 --- a/docs/docs/distributions/configuration.mdx +++ b/docs/docs/distributions/configuration.mdx @@ -101,7 +101,7 @@ A few things to note: - The id is a string you can choose freely. - You can instantiate any number of provider instances of the same type. - The configuration dictionary is provider-specific. -- Notice that configuration can reference environment variables (with default values), which are expanded at runtime. When you run a stack server (via docker or via `llama stack run`), you can specify `--env OLLAMA_URL=http://my-server:11434` to override the default value. +- Notice that configuration can reference environment variables (with default values), which are expanded at runtime. When you run a stack server, you can set environment variables in your shell before running `llama stack run` to override the default values. ### Environment Variable Substitution @@ -173,13 +173,10 @@ optional_token: ${env.OPTIONAL_TOKEN:+} #### Runtime Override -You can override environment variables at runtime when starting the server: +You can override environment variables at runtime by setting them in your shell before starting the server: ```bash -# Override specific environment variables -llama stack run --config run.yaml --env API_KEY=sk-123 --env BASE_URL=https://custom-api.com - -# Or set them in your shell +# Set environment variables in your shell export API_KEY=sk-123 export BASE_URL=https://custom-api.com llama stack run --config run.yaml diff --git a/docs/docs/distributions/remote_hosted_distro/watsonx.md b/docs/docs/distributions/remote_hosted_distro/watsonx.md index 977af90dd..5add678f3 100644 --- a/docs/docs/distributions/remote_hosted_distro/watsonx.md +++ b/docs/docs/distributions/remote_hosted_distro/watsonx.md @@ -69,10 +69,10 @@ docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ./run.yaml:/root/my-run.yaml \ + -e WATSONX_API_KEY=$WATSONX_API_KEY \ + -e WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \ + -e WATSONX_BASE_URL=$WATSONX_BASE_URL \ llamastack/distribution-watsonx \ --config /root/my-run.yaml \ - --port $LLAMA_STACK_PORT \ - --env WATSONX_API_KEY=$WATSONX_API_KEY \ - --env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \ - --env WATSONX_BASE_URL=$WATSONX_BASE_URL + --port $LLAMA_STACK_PORT ``` diff --git a/docs/docs/distributions/self_hosted_distro/dell.md b/docs/docs/distributions/self_hosted_distro/dell.md index 52d40cf9d..851eac3bf 100644 --- a/docs/docs/distributions/self_hosted_distro/dell.md +++ b/docs/docs/distributions/self_hosted_distro/dell.md @@ -129,11 +129,11 @@ docker run -it \ # NOTE: mount the llama-stack / llama-model directories if testing local changes else not needed -v $HOME/git/llama-stack:/app/llama-stack-source -v $HOME/git/llama-models:/app/llama-models-source \ # localhost/distribution-dell:dev if building / testing locally - llamastack/distribution-dell\ - --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=$INFERENCE_MODEL \ - --env DEH_URL=$DEH_URL \ - --env CHROMA_URL=$CHROMA_URL + -e INFERENCE_MODEL=$INFERENCE_MODEL \ + -e DEH_URL=$DEH_URL \ + -e CHROMA_URL=$CHROMA_URL \ + llamastack/distribution-dell \ + --port $LLAMA_STACK_PORT ``` @@ -154,14 +154,14 @@ docker run \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v $HOME/.llama:/root/.llama \ -v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-run.yaml \ + -e INFERENCE_MODEL=$INFERENCE_MODEL \ + -e DEH_URL=$DEH_URL \ + -e SAFETY_MODEL=$SAFETY_MODEL \ + -e DEH_SAFETY_URL=$DEH_SAFETY_URL \ + -e CHROMA_URL=$CHROMA_URL \ llamastack/distribution-dell \ --config /root/my-run.yaml \ - --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=$INFERENCE_MODEL \ - --env DEH_URL=$DEH_URL \ - --env SAFETY_MODEL=$SAFETY_MODEL \ - --env DEH_SAFETY_URL=$DEH_SAFETY_URL \ - --env CHROMA_URL=$CHROMA_URL + --port $LLAMA_STACK_PORT ``` ### Via venv @@ -170,21 +170,21 @@ Make sure you have done `pip install llama-stack` and have the Llama Stack CLI a ```bash llama stack build --distro dell --image-type venv -llama stack run dell - --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=$INFERENCE_MODEL \ - --env DEH_URL=$DEH_URL \ - --env CHROMA_URL=$CHROMA_URL +INFERENCE_MODEL=$INFERENCE_MODEL \ +DEH_URL=$DEH_URL \ +CHROMA_URL=$CHROMA_URL \ +llama stack run dell \ + --port $LLAMA_STACK_PORT ``` If you are using Llama Stack Safety / Shield APIs, use: ```bash +INFERENCE_MODEL=$INFERENCE_MODEL \ +DEH_URL=$DEH_URL \ +SAFETY_MODEL=$SAFETY_MODEL \ +DEH_SAFETY_URL=$DEH_SAFETY_URL \ +CHROMA_URL=$CHROMA_URL \ llama stack run ./run-with-safety.yaml \ - --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=$INFERENCE_MODEL \ - --env DEH_URL=$DEH_URL \ - --env SAFETY_MODEL=$SAFETY_MODEL \ - --env DEH_SAFETY_URL=$DEH_SAFETY_URL \ - --env CHROMA_URL=$CHROMA_URL + --port $LLAMA_STACK_PORT ``` diff --git a/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md index 84b85b91c..1c0ef5f6e 100644 --- a/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md +++ b/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md @@ -84,9 +84,9 @@ docker run \ --gpu all \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ + -e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ llamastack/distribution-meta-reference-gpu \ - --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct + --port $LLAMA_STACK_PORT ``` If you are using Llama Stack Safety / Shield APIs, use: @@ -98,10 +98,10 @@ docker run \ --gpu all \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ + -e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ + -e SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \ llamastack/distribution-meta-reference-gpu \ - --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ - --env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B + --port $LLAMA_STACK_PORT ``` ### Via venv @@ -110,16 +110,16 @@ Make sure you have done `uv pip install llama-stack` and have the Llama Stack CL ```bash llama stack build --distro meta-reference-gpu --image-type venv +INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ llama stack run distributions/meta-reference-gpu/run.yaml \ - --port 8321 \ - --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct + --port 8321 ``` If you are using Llama Stack Safety / Shield APIs, use: ```bash +INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ +SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \ llama stack run distributions/meta-reference-gpu/run-with-safety.yaml \ - --port 8321 \ - --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ - --env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B + --port 8321 ``` diff --git a/docs/docs/distributions/self_hosted_distro/nvidia.md b/docs/docs/distributions/self_hosted_distro/nvidia.md index 1e52797db..a6e185442 100644 --- a/docs/docs/distributions/self_hosted_distro/nvidia.md +++ b/docs/docs/distributions/self_hosted_distro/nvidia.md @@ -129,10 +129,10 @@ docker run \ --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ./run.yaml:/root/my-run.yaml \ + -e NVIDIA_API_KEY=$NVIDIA_API_KEY \ llamastack/distribution-nvidia \ --config /root/my-run.yaml \ - --port $LLAMA_STACK_PORT \ - --env NVIDIA_API_KEY=$NVIDIA_API_KEY + --port $LLAMA_STACK_PORT ``` ### Via venv @@ -142,10 +142,10 @@ If you've set up your local development environment, you can also build the imag ```bash INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct llama stack build --distro nvidia --image-type venv +NVIDIA_API_KEY=$NVIDIA_API_KEY \ +INFERENCE_MODEL=$INFERENCE_MODEL \ llama stack run ./run.yaml \ - --port 8321 \ - --env NVIDIA_API_KEY=$NVIDIA_API_KEY \ - --env INFERENCE_MODEL=$INFERENCE_MODEL + --port 8321 ``` ## Example Notebooks diff --git a/docs/docs/getting_started/detailed_tutorial.mdx b/docs/docs/getting_started/detailed_tutorial.mdx index 33786ac0e..e6c22224d 100644 --- a/docs/docs/getting_started/detailed_tutorial.mdx +++ b/docs/docs/getting_started/detailed_tutorial.mdx @@ -86,9 +86,9 @@ docker run -it \ --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ + -e OLLAMA_URL=http://host.docker.internal:11434 \ llamastack/distribution-starter \ - --port $LLAMA_STACK_PORT \ - --env OLLAMA_URL=http://host.docker.internal:11434 + --port $LLAMA_STACK_PORT ``` Note to start the container with Podman, you can do the same but replace `docker` at the start of the command with `podman`. If you are using `podman` older than `4.7.0`, please also replace `host.docker.internal` in the `OLLAMA_URL` @@ -106,9 +106,9 @@ docker run -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ --network=host \ + -e OLLAMA_URL=http://localhost:11434 \ llamastack/distribution-starter \ - --port $LLAMA_STACK_PORT \ - --env OLLAMA_URL=http://localhost:11434 + --port $LLAMA_STACK_PORT ``` ::: You will see output like below: diff --git a/docs/getting_started_llama4.ipynb b/docs/getting_started_llama4.ipynb index cd5f83517..b840117f1 100644 --- a/docs/getting_started_llama4.ipynb +++ b/docs/getting_started_llama4.ipynb @@ -238,7 +238,7 @@ "def run_llama_stack_server_background():\n", " log_file = open(\"llama_stack_server.log\", \"w\")\n", " process = subprocess.Popen(\n", - " f\"uv run --with llama-stack llama stack run meta-reference-gpu --image-type venv --env INFERENCE_MODEL={model_id}\",\n", + " f\"INFERENCE_MODEL={model_id} uv run --with llama-stack llama stack run meta-reference-gpu --image-type venv\",\n", " shell=True,\n", " stdout=log_file,\n", " stderr=log_file,\n", diff --git a/docs/zero_to_hero_guide/README.md b/docs/zero_to_hero_guide/README.md index 183038a88..a899d3ebe 100644 --- a/docs/zero_to_hero_guide/README.md +++ b/docs/zero_to_hero_guide/README.md @@ -102,12 +102,12 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next 3. **Run the Llama Stack**: Run the stack using uv: ```bash + INFERENCE_MODEL=$INFERENCE_MODEL \ + SAFETY_MODEL=$SAFETY_MODEL \ + OLLAMA_URL=$OLLAMA_URL \ uv run --with llama-stack llama stack run starter \ --image-type venv \ - --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=$INFERENCE_MODEL \ - --env SAFETY_MODEL=$SAFETY_MODEL \ - --env OLLAMA_URL=$OLLAMA_URL + --port $LLAMA_STACK_PORT ``` Note: Every time you run a new model with `ollama run`, you will need to restart the llama stack. Otherwise it won't see the new model. diff --git a/llama_stack/cli/stack/run.py b/llama_stack/cli/stack/run.py index cec101083..677f5e5fa 100644 --- a/llama_stack/cli/stack/run.py +++ b/llama_stack/cli/stack/run.py @@ -16,7 +16,7 @@ import yaml from llama_stack.cli.stack.utils import ImageType from llama_stack.cli.subcommand import Subcommand from llama_stack.core.datatypes import LoggingConfig, StackRunConfig -from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars, validate_env_pair +from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro from llama_stack.log import get_logger @@ -57,12 +57,6 @@ class StackRun(Subcommand): default=None, help="Name of the image to run. Defaults to the current environment", ) - self.parser.add_argument( - "--env", - action="append", - help="Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times.", - metavar="KEY=VALUE", - ) self.parser.add_argument( "--image-type", type=str, @@ -162,34 +156,12 @@ class StackRun(Subcommand): if config_file: run_args.extend(["--config", str(config_file)]) - if args.env: - for env_var in args.env: - if "=" not in env_var: - self.parser.error(f"Environment variable '{env_var}' must be in KEY=VALUE format") - return - key, value = env_var.split("=", 1) # split on first = only - if not key: - self.parser.error(f"Environment variable '{env_var}' has empty key") - return - run_args.extend(["--env", f"{key}={value}"]) - run_command(run_args) def _uvicorn_run(self, config_file: Path | None, args: argparse.Namespace) -> None: if not config_file: self.parser.error("Config file is required") - # Set environment variables if provided - if args.env: - for env_pair in args.env: - try: - key, value = validate_env_pair(env_pair) - logger.info(f"Setting environment variable {key} => {value}") - os.environ[key] = value - except ValueError as e: - logger.error(f"Error: {str(e)}") - self.parser.error(f"Invalid environment variable format: {env_pair}") - config_file = resolve_config_or_distro(str(config_file), Mode.RUN) with open(config_file) as fp: config_contents = yaml.safe_load(fp) diff --git a/llama_stack/core/stack.py b/llama_stack/core/stack.py index d5d55319a..acc02eeff 100644 --- a/llama_stack/core/stack.py +++ b/llama_stack/core/stack.py @@ -274,22 +274,6 @@ def cast_image_name_to_string(config_dict: dict[str, Any]) -> dict[str, Any]: return config_dict -def validate_env_pair(env_pair: str) -> tuple[str, str]: - """Validate and split an environment variable key-value pair.""" - try: - key, value = env_pair.split("=", 1) - key = key.strip() - if not key: - raise ValueError(f"Empty key in environment variable pair: {env_pair}") - if not all(c.isalnum() or c == "_" for c in key): - raise ValueError(f"Key must contain only alphanumeric characters and underscores: {key}") - return key, value - except ValueError as e: - raise ValueError( - f"Invalid environment variable format '{env_pair}': {str(e)}. Expected format: KEY=value" - ) from e - - def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConfig) -> None: """Add internal implementations (inspect and providers) to the implementations dictionary. diff --git a/llama_stack/core/start_stack.sh b/llama_stack/core/start_stack.sh index 02b1cd408..cc0ae68d8 100755 --- a/llama_stack/core/start_stack.sh +++ b/llama_stack/core/start_stack.sh @@ -25,7 +25,7 @@ error_handler() { trap 'error_handler ${LINENO}' ERR if [ $# -lt 3 ]; then - echo "Usage: $0 [--config ] [--env KEY=VALUE]..." + echo "Usage: $0 [--config ]" exit 1 fi @@ -43,7 +43,6 @@ SCRIPT_DIR=$(dirname "$(readlink -f "$0")") # Initialize variables yaml_config="" -env_vars="" other_args="" # Process remaining arguments @@ -58,15 +57,6 @@ while [[ $# -gt 0 ]]; do exit 1 fi ;; - --env) - if [[ -n "$2" ]]; then - env_vars="$env_vars --env $2" - shift 2 - else - echo -e "${RED}Error: --env requires a KEY=VALUE argument${NC}" >&2 - exit 1 - fi - ;; *) other_args="$other_args $1" shift @@ -119,7 +109,6 @@ if [[ "$env_type" == "venv" ]]; then llama stack run \ $yaml_config_arg \ --port "$port" \ - $env_vars \ $other_args elif [[ "$env_type" == "container" ]]; then echo -e "${RED}Warning: Llama Stack no longer supports running Containers via the 'llama stack run' command.${NC}" diff --git a/llama_stack/distributions/dell/doc_template.md b/llama_stack/distributions/dell/doc_template.md index fcec3ea14..852e78d0e 100644 --- a/llama_stack/distributions/dell/doc_template.md +++ b/llama_stack/distributions/dell/doc_template.md @@ -117,11 +117,11 @@ docker run -it \ # NOTE: mount the llama-stack directory if testing local changes else not needed -v $HOME/git/llama-stack:/app/llama-stack-source \ # localhost/distribution-dell:dev if building / testing locally + -e INFERENCE_MODEL=$INFERENCE_MODEL \ + -e DEH_URL=$DEH_URL \ + -e CHROMA_URL=$CHROMA_URL \ llamastack/distribution-{{ name }}\ - --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=$INFERENCE_MODEL \ - --env DEH_URL=$DEH_URL \ - --env CHROMA_URL=$CHROMA_URL + --port $LLAMA_STACK_PORT ``` @@ -142,14 +142,14 @@ docker run \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v $HOME/.llama:/root/.llama \ -v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-run.yaml \ + -e INFERENCE_MODEL=$INFERENCE_MODEL \ + -e DEH_URL=$DEH_URL \ + -e SAFETY_MODEL=$SAFETY_MODEL \ + -e DEH_SAFETY_URL=$DEH_SAFETY_URL \ + -e CHROMA_URL=$CHROMA_URL \ llamastack/distribution-{{ name }} \ --config /root/my-run.yaml \ - --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=$INFERENCE_MODEL \ - --env DEH_URL=$DEH_URL \ - --env SAFETY_MODEL=$SAFETY_MODEL \ - --env DEH_SAFETY_URL=$DEH_SAFETY_URL \ - --env CHROMA_URL=$CHROMA_URL + --port $LLAMA_STACK_PORT ``` ### Via Conda @@ -158,21 +158,21 @@ Make sure you have done `pip install llama-stack` and have the Llama Stack CLI a ```bash llama stack build --distro {{ name }} --image-type conda -llama stack run {{ name }} - --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=$INFERENCE_MODEL \ - --env DEH_URL=$DEH_URL \ - --env CHROMA_URL=$CHROMA_URL +INFERENCE_MODEL=$INFERENCE_MODEL \ +DEH_URL=$DEH_URL \ +CHROMA_URL=$CHROMA_URL \ +llama stack run {{ name }} \ + --port $LLAMA_STACK_PORT ``` If you are using Llama Stack Safety / Shield APIs, use: ```bash +INFERENCE_MODEL=$INFERENCE_MODEL \ +DEH_URL=$DEH_URL \ +SAFETY_MODEL=$SAFETY_MODEL \ +DEH_SAFETY_URL=$DEH_SAFETY_URL \ +CHROMA_URL=$CHROMA_URL \ llama stack run ./run-with-safety.yaml \ - --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=$INFERENCE_MODEL \ - --env DEH_URL=$DEH_URL \ - --env SAFETY_MODEL=$SAFETY_MODEL \ - --env DEH_SAFETY_URL=$DEH_SAFETY_URL \ - --env CHROMA_URL=$CHROMA_URL + --port $LLAMA_STACK_PORT ``` diff --git a/llama_stack/distributions/meta-reference-gpu/doc_template.md b/llama_stack/distributions/meta-reference-gpu/doc_template.md index 602d053c4..92dcc6102 100644 --- a/llama_stack/distributions/meta-reference-gpu/doc_template.md +++ b/llama_stack/distributions/meta-reference-gpu/doc_template.md @@ -72,9 +72,9 @@ docker run \ --gpu all \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ + -e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ llamastack/distribution-{{ name }} \ - --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct + --port $LLAMA_STACK_PORT ``` If you are using Llama Stack Safety / Shield APIs, use: @@ -86,10 +86,10 @@ docker run \ --gpu all \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ + -e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ + -e SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \ llamastack/distribution-{{ name }} \ - --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ - --env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B + --port $LLAMA_STACK_PORT ``` ### Via venv @@ -98,16 +98,16 @@ Make sure you have done `uv pip install llama-stack` and have the Llama Stack CL ```bash llama stack build --distro {{ name }} --image-type venv +INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ llama stack run distributions/{{ name }}/run.yaml \ - --port 8321 \ - --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct + --port 8321 ``` If you are using Llama Stack Safety / Shield APIs, use: ```bash +INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ +SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \ llama stack run distributions/{{ name }}/run-with-safety.yaml \ - --port 8321 \ - --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ - --env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B + --port 8321 ``` diff --git a/llama_stack/distributions/nvidia/doc_template.md b/llama_stack/distributions/nvidia/doc_template.md index fbee17ef8..df2b68ef7 100644 --- a/llama_stack/distributions/nvidia/doc_template.md +++ b/llama_stack/distributions/nvidia/doc_template.md @@ -118,10 +118,10 @@ docker run \ --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ./run.yaml:/root/my-run.yaml \ + -e NVIDIA_API_KEY=$NVIDIA_API_KEY \ llamastack/distribution-{{ name }} \ --config /root/my-run.yaml \ - --port $LLAMA_STACK_PORT \ - --env NVIDIA_API_KEY=$NVIDIA_API_KEY + --port $LLAMA_STACK_PORT ``` ### Via venv @@ -131,10 +131,10 @@ If you've set up your local development environment, you can also build the imag ```bash INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct llama stack build --distro nvidia --image-type venv +NVIDIA_API_KEY=$NVIDIA_API_KEY \ +INFERENCE_MODEL=$INFERENCE_MODEL \ llama stack run ./run.yaml \ - --port 8321 \ - --env NVIDIA_API_KEY=$NVIDIA_API_KEY \ - --env INFERENCE_MODEL=$INFERENCE_MODEL + --port 8321 ``` ## Example Notebooks diff --git a/scripts/install.sh b/scripts/install.sh index f6fbc259c..571468dc5 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -221,8 +221,8 @@ fi cmd=( run -d "${PLATFORM_OPTS[@]}" --name llama-stack \ --network llama-net \ -p "${PORT}:${PORT}" \ - "${SERVER_IMAGE}" --port "${PORT}" \ - --env OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}") + -e OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}" \ + "${SERVER_IMAGE}" --port "${PORT}") log "🦙 Starting Llama Stack..." if ! execute_with_log $ENGINE "${cmd[@]}"; then From 8cb14eb84c66ceaedf276918e75afa12db2d23bc Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Mon, 6 Oct 2025 14:42:34 -0700 Subject: [PATCH 33/33] chore: require valid logging category # What does this PR do? ## Test Plan --- llama_stack/core/conversations/conversations.py | 2 +- llama_stack/log.py | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/llama_stack/core/conversations/conversations.py b/llama_stack/core/conversations/conversations.py index bef138e69..612b2f68e 100644 --- a/llama_stack/core/conversations/conversations.py +++ b/llama_stack/core/conversations/conversations.py @@ -32,7 +32,7 @@ from llama_stack.providers.utils.sqlstore.sqlstore import ( sqlstore_impl, ) -logger = get_logger(name=__name__, category="openai::conversations") +logger = get_logger(name=__name__, category="openai_conversations") class ConversationServiceConfig(BaseModel): diff --git a/llama_stack/log.py b/llama_stack/log.py index 6f751b21d..6ccc200fc 100644 --- a/llama_stack/log.py +++ b/llama_stack/log.py @@ -31,12 +31,17 @@ CATEGORIES = [ "client", "telemetry", "openai_responses", + "openai_conversations", "testing", "providers", "models", "files", "vector_io", "tool_runtime", + "cli", + "post_training", + "scoring", + "tests", ] UNCATEGORIZED = "uncategorized" @@ -261,11 +266,10 @@ def get_logger( if root_category in _category_levels: log_level = _category_levels[root_category] else: + assert category == UNCATEGORIZED, ( + "Unknown logging category: {category}. To resolve, choose a valid category from the CATEGORIES list or add it to the CATEGORIES list." + ) log_level = _category_levels.get("root", DEFAULT_LOG_LEVEL) - if category != UNCATEGORIZED: - logging.warning( - f"Unknown logging category: {category}. Falling back to default 'root' level: {log_level}" - ) logger.setLevel(log_level) return logging.LoggerAdapter(logger, {"category": category})