From e7a812f5deb5610910c0678e9e8ceaebd3fddd36 Mon Sep 17 00:00:00 2001 From: Matthew Farrellee Date: Tue, 19 Aug 2025 13:52:38 -0500 Subject: [PATCH 01/42] chore: Fixup main pre commit (#3204) --- .pre-commit-config.yaml | 4 ++-- .../distributions/k8s-benchmark/benchmark.py | 1 - .../test_response_conversion_utils.py | 18 ------------------ 3 files changed, 2 insertions(+), 21 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4309f289a..83ecdde58 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -148,14 +148,14 @@ repos: files: ^.github/workflows/.*$ - id: ui-prettier name: Format UI code with Prettier - entry: bash -c 'cd llama_stack/ui && npm run format' + entry: bash -c 'cd llama_stack/ui && npm ci && npm run format' language: system files: ^llama_stack/ui/.*\.(ts|tsx)$ pass_filenames: false require_serial: true - id: ui-eslint name: Lint UI code with ESLint - entry: bash -c 'cd llama_stack/ui && npm run lint -- --fix --quiet' + entry: bash -c 'cd llama_stack/ui && npm ci && npm run lint -- --fix --quiet' language: system files: ^llama_stack/ui/.*\.(ts|tsx)$ pass_filenames: false diff --git a/docs/source/distributions/k8s-benchmark/benchmark.py b/docs/source/distributions/k8s-benchmark/benchmark.py index 0e7368431..3d0d18150 100644 --- a/docs/source/distributions/k8s-benchmark/benchmark.py +++ b/docs/source/distributions/k8s-benchmark/benchmark.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python3 # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # diff --git a/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py b/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py index b568ce135..1b9657484 100644 --- a/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py +++ b/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py @@ -45,7 +45,6 @@ from llama_stack.providers.inline.agents.meta_reference.responses.utils import ( class TestConvertChatChoiceToResponseMessage: - @pytest.mark.asyncio async def test_convert_string_content(self): choice = OpenAIChoice( message=OpenAIAssistantMessageParam(content="Test message"), @@ -61,7 +60,6 @@ class TestConvertChatChoiceToResponseMessage: assert isinstance(result.content[0], OpenAIResponseOutputMessageContentOutputText) assert result.content[0].text == "Test message" - @pytest.mark.asyncio async def test_convert_text_param_content(self): choice = OpenAIChoice( message=OpenAIAssistantMessageParam( @@ -78,12 +76,10 @@ class TestConvertChatChoiceToResponseMessage: class TestConvertResponseContentToChatContent: - @pytest.mark.asyncio async def test_convert_string_content(self): result = await convert_response_content_to_chat_content("Simple string") assert result == "Simple string" - @pytest.mark.asyncio async def test_convert_text_content_parts(self): content = [ OpenAIResponseInputMessageContentText(text="First part"), @@ -98,7 +94,6 @@ class TestConvertResponseContentToChatContent: assert isinstance(result[1], OpenAIChatCompletionContentPartTextParam) assert result[1].text == "Second part" - @pytest.mark.asyncio async def test_convert_image_content(self): content = [OpenAIResponseInputMessageContentImage(image_url="https://example.com/image.jpg", detail="high")] @@ -111,7 +106,6 @@ class TestConvertResponseContentToChatContent: class TestConvertResponseInputToChatMessages: - @pytest.mark.asyncio async def test_convert_string_input(self): result = await convert_response_input_to_chat_messages("User message") @@ -119,7 +113,6 @@ class TestConvertResponseInputToChatMessages: assert isinstance(result[0], OpenAIUserMessageParam) assert result[0].content == "User message" - @pytest.mark.asyncio async def test_convert_function_tool_call_output(self): input_items = [ OpenAIResponseInputFunctionToolCallOutput( @@ -135,7 +128,6 @@ class TestConvertResponseInputToChatMessages: assert result[0].content == "Tool output" assert result[0].tool_call_id == "call_123" - @pytest.mark.asyncio async def test_convert_function_tool_call(self): input_items = [ OpenAIResponseOutputMessageFunctionToolCall( @@ -154,7 +146,6 @@ class TestConvertResponseInputToChatMessages: assert result[0].tool_calls[0].function.name == "test_function" assert result[0].tool_calls[0].function.arguments == '{"param": "value"}' - @pytest.mark.asyncio async def test_convert_response_message(self): input_items = [ OpenAIResponseMessage( @@ -173,7 +164,6 @@ class TestConvertResponseInputToChatMessages: class TestConvertResponseTextToChatResponseFormat: - @pytest.mark.asyncio async def test_convert_text_format(self): text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) result = await convert_response_text_to_chat_response_format(text) @@ -181,14 +171,12 @@ class TestConvertResponseTextToChatResponseFormat: assert isinstance(result, OpenAIResponseFormatText) assert result.type == "text" - @pytest.mark.asyncio async def test_convert_json_object_format(self): text = OpenAIResponseText(format={"type": "json_object"}) result = await convert_response_text_to_chat_response_format(text) assert isinstance(result, OpenAIResponseFormatJSONObject) - @pytest.mark.asyncio async def test_convert_json_schema_format(self): schema_def = {"type": "object", "properties": {"test": {"type": "string"}}} text = OpenAIResponseText( @@ -204,7 +192,6 @@ class TestConvertResponseTextToChatResponseFormat: assert result.json_schema["name"] == "test_schema" assert result.json_schema["schema"] == schema_def - @pytest.mark.asyncio async def test_default_text_format(self): text = OpenAIResponseText() result = await convert_response_text_to_chat_response_format(text) @@ -214,27 +201,22 @@ class TestConvertResponseTextToChatResponseFormat: class TestGetMessageTypeByRole: - @pytest.mark.asyncio async def test_user_role(self): result = await get_message_type_by_role("user") assert result == OpenAIUserMessageParam - @pytest.mark.asyncio async def test_system_role(self): result = await get_message_type_by_role("system") assert result == OpenAISystemMessageParam - @pytest.mark.asyncio async def test_assistant_role(self): result = await get_message_type_by_role("assistant") assert result == OpenAIAssistantMessageParam - @pytest.mark.asyncio async def test_developer_role(self): result = await get_message_type_by_role("developer") assert result == OpenAIDeveloperMessageParam - @pytest.mark.asyncio async def test_unknown_role(self): result = await get_message_type_by_role("unknown") assert result is None From 7f0b2a876421a7b27e7ddbac55687fb93b0f1382 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 19 Aug 2025 22:38:23 +0000 Subject: [PATCH 02/42] build: Bump version to 0.2.18 --- llama_stack/ui/package.json | 2 +- pyproject.toml | 6 +++--- uv.lock | 14 +++++++------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/llama_stack/ui/package.json b/llama_stack/ui/package.json index fd6f6fbb7..226b06f59 100644 --- a/llama_stack/ui/package.json +++ b/llama_stack/ui/package.json @@ -23,7 +23,7 @@ "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", "framer-motion": "^11.18.2", - "llama-stack-client": "^0.2.17", + "llama-stack-client": "^0.2.18", "lucide-react": "^0.510.0", "next": "15.3.3", "next-auth": "^4.24.11", diff --git a/pyproject.toml b/pyproject.toml index a918c3e36..0cdfc6a37 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ required-version = ">=0.7.0" [project] name = "llama_stack" -version = "0.2.17" +version = "0.2.18" authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }] description = "Llama Stack" readme = "README.md" @@ -31,7 +31,7 @@ dependencies = [ "huggingface-hub>=0.34.0,<1.0", "jinja2>=3.1.6", "jsonschema", - "llama-stack-client>=0.2.17", + "llama-stack-client>=0.2.18", "llama-api-client>=0.1.2", "openai>=1.99.6,<1.100.0", "prompt-toolkit", @@ -56,7 +56,7 @@ dependencies = [ ui = [ "streamlit", "pandas", - "llama-stack-client>=0.2.17", + "llama-stack-client>=0.2.18", "streamlit-option-menu", ] diff --git a/uv.lock b/uv.lock index 0cb2164db..635b2bdfe 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.12" resolution-markers = [ "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')", @@ -1719,7 +1719,7 @@ wheels = [ [[package]] name = "llama-stack" -version = "0.2.17" +version = "0.2.18" source = { editable = "." } dependencies = [ { name = "aiohttp" }, @@ -1856,8 +1856,8 @@ requires-dist = [ { name = "jinja2", specifier = ">=3.1.6" }, { name = "jsonschema" }, { name = "llama-api-client", specifier = ">=0.1.2" }, - { name = "llama-stack-client", specifier = ">=0.2.17" }, - { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.17" }, + { name = "llama-stack-client", specifier = ">=0.2.18" }, + { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.18" }, { name = "openai", specifier = ">=1.99.6,<1.100.0" }, { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" }, { name = "opentelemetry-sdk", specifier = ">=1.30.0" }, @@ -1963,7 +1963,7 @@ unit = [ [[package]] name = "llama-stack-client" -version = "0.2.17" +version = "0.2.18" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -1982,9 +1982,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c5/2a/bb2949d6a5c494d21da0c185d426e25eaa8016f8287b689249afc6c96fb5/llama_stack_client-0.2.17.tar.gz", hash = "sha256:1fe2070133c6356761e394fa346045e9b6b567d4c63157b9bc6be89b9a6e7a41", size = 257636, upload-time = "2025-08-05T01:42:55.911Z" } +sdist = { url = "https://files.pythonhosted.org/packages/69/da/5e5a745495f8a2b8ef24fc4d01fe9031aa2277c36447cb22192ec8c8cc1e/llama_stack_client-0.2.18.tar.gz", hash = "sha256:860c885c9e549445178ac55cc9422e6e2a91215ac7aff5aaccfb42f3ce07e79e", size = 277284, upload-time = "2025-08-19T22:12:09.106Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/81/fc/5eccc86b83c5ced3a3bca071d250a86ccafa4ff17546cf781deb7758ab74/llama_stack_client-0.2.17-py3-none-any.whl", hash = "sha256:336c32f8688700ff64717b8109f405dc87a990fbe310c2027ac9ed6d39d67d16", size = 350329, upload-time = "2025-08-05T01:42:54.381Z" }, + { url = "https://files.pythonhosted.org/packages/0a/e4/e97f8fdd8a07aa1efc7f7e37b5657d84357b664bf70dd1885a437edc0699/llama_stack_client-0.2.18-py3-none-any.whl", hash = "sha256:90f827d5476f7fc15fd993f1863af6a6e72bd064646bf6a99435eb43a1327f70", size = 367586, upload-time = "2025-08-19T22:12:07.899Z" }, ] [[package]] From 5f6d5072b696e9f94811e43ce0ff207dd1b5c8e4 Mon Sep 17 00:00:00 2001 From: Francisco Arceo Date: Tue, 19 Aug 2025 17:38:38 -0600 Subject: [PATCH 03/42] chore: Faster npm pre-commit (#3206) # What does this PR do? Adds npm to pre-commit.yml installation and caches ui Removes node installation during pre-commit. ## Test Plan Signed-off-by: Francisco Javier Arceo --- .github/workflows/pre-commit.yml | 11 +++++++++++ .pre-commit-config.yaml | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 4f1c143d2..00962a1ea 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -36,6 +36,17 @@ jobs: **/requirements*.txt .pre-commit-config.yaml + - name: Set up Node.js + uses: actions/setup-node@39370e3970a6d050c480ffad4ff0ed4d3fdee5af # v4.1.0 + with: + node-version: '20' + cache: 'npm' + cache-dependency-path: 'llama_stack/ui/' + + - name: Install npm dependencies + run: npm ci + working-directory: llama_stack/ui + - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1 continue-on-error: true env: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 83ecdde58..d21a7244f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -155,7 +155,7 @@ repos: require_serial: true - id: ui-eslint name: Lint UI code with ESLint - entry: bash -c 'cd llama_stack/ui && npm ci && npm run lint -- --fix --quiet' + entry: bash -c 'cd llama_stack/ui && npm run lint -- --fix --quiet' language: system files: ^llama_stack/ui/.*\.(ts|tsx)$ pass_filenames: false From 5f151ddf4504308da43aa4bf17487cd10f573b8e Mon Sep 17 00:00:00 2001 From: Matthew Farrellee Date: Wed, 20 Aug 2025 05:42:43 -0500 Subject: [PATCH 04/42] fix: disable ui-prettier & ui-eslint (#3207) --- .github/workflows/pre-commit.yml | 22 ++++++++++-------- .pre-commit-config.yaml | 39 ++++++++++++++++++++------------ 2 files changed, 38 insertions(+), 23 deletions(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 00962a1ea..99e0d0043 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -36,16 +36,20 @@ jobs: **/requirements*.txt .pre-commit-config.yaml - - name: Set up Node.js - uses: actions/setup-node@39370e3970a6d050c480ffad4ff0ed4d3fdee5af # v4.1.0 - with: - node-version: '20' - cache: 'npm' - cache-dependency-path: 'llama_stack/ui/' + # npm ci may fail - + # npm error `npm ci` can only install packages when your package.json and package-lock.json or npm-shrinkwrap.json are in sync. Please update your lock file with `npm install` before continuing. + # npm error Invalid: lock file's llama-stack-client@0.2.17 does not satisfy llama-stack-client@0.2.18 - - name: Install npm dependencies - run: npm ci - working-directory: llama_stack/ui + # - name: Set up Node.js + # uses: actions/setup-node@39370e3970a6d050c480ffad4ff0ed4d3fdee5af # v4.1.0 + # with: + # node-version: '20' + # cache: 'npm' + # cache-dependency-path: 'llama_stack/ui/' + + # - name: Install npm dependencies + # run: npm ci + # working-directory: llama_stack/ui - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1 continue-on-error: true diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d21a7244f..39278ab81 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -146,20 +146,31 @@ repos: pass_filenames: false require_serial: true files: ^.github/workflows/.*$ - - id: ui-prettier - name: Format UI code with Prettier - entry: bash -c 'cd llama_stack/ui && npm ci && npm run format' - language: system - files: ^llama_stack/ui/.*\.(ts|tsx)$ - pass_filenames: false - require_serial: true - - id: ui-eslint - name: Lint UI code with ESLint - entry: bash -c 'cd llama_stack/ui && npm run lint -- --fix --quiet' - language: system - files: ^llama_stack/ui/.*\.(ts|tsx)$ - pass_filenames: false - require_serial: true + # ui-prettier and ui-eslint are disabled until we can avoid `npm ci`, which is slow and may fail - + # npm error `npm ci` can only install packages when your package.json and package-lock.json or npm-shrinkwrap.json are in sync. Please update your lock file with `npm install` before continuing. + # npm error Invalid: lock file's llama-stack-client@0.2.17 does not satisfy llama-stack-client@0.2.18 + # and until we have infra for installing prettier and next via npm - + # Lint UI code with ESLint.....................................................Failed + # - hook id: ui-eslint + # - exit code: 127 + # > ui@0.1.0 lint + # > next lint --fix --quiet + # sh: line 1: next: command not found + # + # - id: ui-prettier + # name: Format UI code with Prettier + # entry: bash -c 'cd llama_stack/ui && npm ci && npm run format' + # language: system + # files: ^llama_stack/ui/.*\.(ts|tsx)$ + # pass_filenames: false + # require_serial: true + # - id: ui-eslint + # name: Lint UI code with ESLint + # entry: bash -c 'cd llama_stack/ui && npm run lint -- --fix --quiet' + # language: system + # files: ^llama_stack/ui/.*\.(ts|tsx)$ + # pass_filenames: false + # require_serial: true ci: autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks From 3f8df167f3047572ec3b97851aeb262a20ba1527 Mon Sep 17 00:00:00 2001 From: Mustafa Elbehery Date: Wed, 20 Aug 2025 13:15:35 +0200 Subject: [PATCH 05/42] chore(pre-commit): add pre-commit hook to enforce llama_stack logger usage (#3061) # What does this PR do? This PR adds a step in pre-commit to enforce using `llama_stack` logger. Currently, various parts of the code base uses different loggers. As a custom `llama_stack` logger exist and used in the codebase, it is better to standardize its utilization. Signed-off-by: Mustafa Elbehery Co-authored-by: Matthew Farrellee --- .pre-commit-config.yaml | 19 +++++++++++++++++++ llama_stack/core/build.py | 4 ++-- llama_stack/core/configure.py | 4 ++-- llama_stack/core/library_client.py | 5 +++-- llama_stack/core/request_headers.py | 4 ++-- llama_stack/core/server/server.py | 2 +- llama_stack/core/utils/exec.py | 6 +++--- llama_stack/core/utils/prompt_for_config.py | 5 +++-- llama_stack/log.py | 4 ++-- .../llama/llama3/multimodal/encoder_utils.py | 5 +++-- .../llama3/multimodal/image_transform.py | 5 +++-- .../models/llama/llama3/multimodal/model.py | 9 +++++---- llama_stack/models/llama/llama3/tokenizer.py | 8 ++++---- .../llama/llama4/quantization/loader.py | 5 +++-- llama_stack/models/llama/llama4/tokenizer.py | 7 +++---- llama_stack/models/llama/quantize_impls.py | 5 +++-- .../inline/agents/meta_reference/agents.py | 4 ++-- .../agents/meta_reference/persistence.py | 4 ++-- .../inline/agents/meta_reference/safety.py | 4 ++-- .../meta_reference/parallel_utils.py | 4 ++-- .../sentence_transformers.py | 4 ++-- .../recipes/finetune_single_device.py | 4 ++-- .../recipes/finetune_single_device_dpo.py | 4 ++-- .../inline/post_training/huggingface/utils.py | 4 ++-- .../recipes/lora_finetuning_single_device.py | 7 +++---- .../safety/code_scanner/code_scanner.py | 4 ++-- .../inline/safety/llama_guard/llama_guard.py | 6 ++++-- .../safety/prompt_guard/prompt_guard.py | 4 ++-- .../scoring/basic/utils/ifeval_utils.py | 5 +++-- .../telemetry/meta_reference/telemetry.py | 6 +++--- .../inline/tool_runtime/rag/memory.py | 4 ++-- .../providers/inline/vector_io/faiss/faiss.py | 4 ++-- .../inline/vector_io/sqlite_vec/sqlite_vec.py | 4 ++-- .../inference/llama_openai_compat/llama.py | 5 ++--- .../remote/inference/nvidia/nvidia.py | 4 ++-- .../remote/inference/nvidia/utils.py | 6 +++--- .../remote/inference/openai/openai.py | 5 ++--- .../providers/remote/inference/tgi/tgi.py | 4 ++-- .../remote/post_training/nvidia/utils.py | 4 ++-- .../remote/safety/bedrock/bedrock.py | 4 ++-- .../providers/remote/safety/nvidia/nvidia.py | 4 ++-- .../remote/safety/sambanova/sambanova.py | 4 ++-- .../remote/vector_io/chroma/chroma.py | 4 ++-- .../remote/vector_io/milvus/milvus.py | 4 ++-- .../remote/vector_io/pgvector/pgvector.py | 4 ++-- .../remote/vector_io/qdrant/qdrant.py | 4 ++-- .../remote/vector_io/weaviate/weaviate.py | 4 ++-- .../utils/inference/embedding_mixin.py | 5 +++-- .../utils/inference/openai_compat.py | 4 ++-- .../utils/kvstore/mongodb/mongodb.py | 4 ++-- .../utils/kvstore/postgres/postgres.py | 5 +++-- .../utils/memory/openai_vector_store_mixin.py | 2 +- .../providers/utils/memory/vector_store.py | 4 ++-- .../providers/utils/telemetry/tracing.py | 2 +- .../post_training/test_post_training.py | 5 ++--- .../vector_io/test_openai_vector_stores.py | 4 ++-- .../providers/inference/test_remote_vllm.py | 2 +- 57 files changed, 148 insertions(+), 122 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 39278ab81..d25455cf0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -172,6 +172,25 @@ repos: # pass_filenames: false # require_serial: true + - id: check-log-usage + name: Ensure 'llama_stack.log' usage for logging + entry: bash + language: system + types: [python] + pass_filenames: true + args: + - -c + - | + matches=$(grep -EnH '^[^#]*\b(import\s+logging|from\s+logging\b)' "$@" | grep -v -e '#\s*allow-direct-logging' || true) + if [ -n "$matches" ]; then + # GitHub Actions annotation format + while IFS=: read -r file line_num rest; do + echo "::error file=$file,line=$line_num::Do not use 'import logging' or 'from logging import' in $file. Use the custom log instead: from llama_stack.log import get_logger; logger = get_logger(). If direct logging is truly needed, add: # allow-direct-logging" + done <<< "$matches" + exit 1 + fi + exit 0 + ci: autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks autoupdate_commit_msg: ⬆ [pre-commit.ci] pre-commit autoupdate diff --git a/llama_stack/core/build.py b/llama_stack/core/build.py index 4b20588fd..fa1fe632b 100644 --- a/llama_stack/core/build.py +++ b/llama_stack/core/build.py @@ -5,7 +5,6 @@ # the root directory of this source tree. import importlib.resources -import logging import sys from pydantic import BaseModel @@ -17,9 +16,10 @@ from llama_stack.core.external import load_external_apis from llama_stack.core.utils.exec import run_command from llama_stack.core.utils.image_types import LlamaStackImageType from llama_stack.distributions.template import DistributionTemplate +from llama_stack.log import get_logger from llama_stack.providers.datatypes import Api -log = logging.getLogger(__name__) +log = get_logger(name=__name__, category="core") # These are the dependencies needed by the distribution server. # `llama-stack` is automatically installed by the installation script. diff --git a/llama_stack/core/configure.py b/llama_stack/core/configure.py index 9e18b438c..64473c053 100644 --- a/llama_stack/core/configure.py +++ b/llama_stack/core/configure.py @@ -3,7 +3,6 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import logging import textwrap from typing import Any @@ -21,9 +20,10 @@ from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars from llama_stack.core.utils.config_dirs import EXTERNAL_PROVIDERS_DIR from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.core.utils.prompt_for_config import prompt_for_config +from llama_stack.log import get_logger from llama_stack.providers.datatypes import Api, ProviderSpec -logger = logging.getLogger(__name__) +logger = get_logger(name=__name__, category="core") def configure_single_provider(registry: dict[str, ProviderSpec], provider: Provider) -> Provider: diff --git a/llama_stack/core/library_client.py b/llama_stack/core/library_client.py index a93fe509e..dd1fc8a50 100644 --- a/llama_stack/core/library_client.py +++ b/llama_stack/core/library_client.py @@ -7,7 +7,7 @@ import asyncio import inspect import json -import logging +import logging # allow-direct-logging import os import sys from concurrent.futures import ThreadPoolExecutor @@ -48,6 +48,7 @@ from llama_stack.core.stack import ( from llama_stack.core.utils.config import redact_sensitive_fields from llama_stack.core.utils.context import preserve_contexts_async_generator from llama_stack.core.utils.exec import in_notebook +from llama_stack.log import get_logger from llama_stack.providers.utils.telemetry.tracing import ( CURRENT_TRACE_CONTEXT, end_trace, @@ -55,7 +56,7 @@ from llama_stack.providers.utils.telemetry.tracing import ( start_trace, ) -logger = logging.getLogger(__name__) +logger = get_logger(name=__name__, category="core") T = TypeVar("T") diff --git a/llama_stack/core/request_headers.py b/llama_stack/core/request_headers.py index 35ac72775..f1ce8281f 100644 --- a/llama_stack/core/request_headers.py +++ b/llama_stack/core/request_headers.py @@ -6,15 +6,15 @@ import contextvars import json -import logging from contextlib import AbstractContextManager from typing import Any from llama_stack.core.datatypes import User +from llama_stack.log import get_logger from .utils.dynamic import instantiate_class_type -log = logging.getLogger(__name__) +log = get_logger(name=__name__, category="core") # Context variable for request provider data and auth attributes PROVIDER_DATA_VAR = contextvars.ContextVar("provider_data", default=None) diff --git a/llama_stack/core/server/server.py b/llama_stack/core/server/server.py index cbef8ef88..3d94b6e81 100644 --- a/llama_stack/core/server/server.py +++ b/llama_stack/core/server/server.py @@ -9,7 +9,7 @@ import asyncio import functools import inspect import json -import logging +import logging # allow-direct-logging import os import ssl import sys diff --git a/llama_stack/core/utils/exec.py b/llama_stack/core/utils/exec.py index 1b2b782fe..12fb82d01 100644 --- a/llama_stack/core/utils/exec.py +++ b/llama_stack/core/utils/exec.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import logging +import importlib import os import signal import subprocess @@ -12,9 +12,9 @@ import sys from termcolor import cprint -log = logging.getLogger(__name__) +from llama_stack.log import get_logger -import importlib +log = get_logger(name=__name__, category="core") def formulate_run_args(image_type: str, image_name: str) -> list: diff --git a/llama_stack/core/utils/prompt_for_config.py b/llama_stack/core/utils/prompt_for_config.py index 26f6920e0..bac0531ed 100644 --- a/llama_stack/core/utils/prompt_for_config.py +++ b/llama_stack/core/utils/prompt_for_config.py @@ -6,7 +6,6 @@ import inspect import json -import logging from enum import Enum from typing import Annotated, Any, Literal, Union, get_args, get_origin @@ -14,7 +13,9 @@ from pydantic import BaseModel from pydantic.fields import FieldInfo from pydantic_core import PydanticUndefinedType -log = logging.getLogger(__name__) +from llama_stack.log import get_logger + +log = get_logger(name=__name__, category="core") def is_list_of_primitives(field_type): diff --git a/llama_stack/log.py b/llama_stack/log.py index d67bd1b61..cc4c9d4cf 100644 --- a/llama_stack/log.py +++ b/llama_stack/log.py @@ -4,10 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import logging +import logging # allow-direct-logging import os import re -from logging.config import dictConfig +from logging.config import dictConfig # allow-direct-logging from rich.console import Console from rich.errors import MarkupError diff --git a/llama_stack/models/llama/llama3/multimodal/encoder_utils.py b/llama_stack/models/llama/llama3/multimodal/encoder_utils.py index 5b5969d89..90ced13b2 100644 --- a/llama_stack/models/llama/llama3/multimodal/encoder_utils.py +++ b/llama_stack/models/llama/llama3/multimodal/encoder_utils.py @@ -13,14 +13,15 @@ # Copyright (c) Meta Platforms, Inc. and its affiliates. import math -from logging import getLogger import torch import torch.nn.functional as F +from llama_stack.log import get_logger + from .utils import get_negative_inf_value, to_2tuple -logger = getLogger() +logger = get_logger(name=__name__, category="models::llama") def resize_local_position_embedding(orig_pos_embed, grid_size): diff --git a/llama_stack/models/llama/llama3/multimodal/image_transform.py b/llama_stack/models/llama/llama3/multimodal/image_transform.py index f2761ee47..7b20a31fa 100644 --- a/llama_stack/models/llama/llama3/multimodal/image_transform.py +++ b/llama_stack/models/llama/llama3/multimodal/image_transform.py @@ -13,7 +13,6 @@ import math from collections import defaultdict -from logging import getLogger from typing import Any import torch @@ -21,9 +20,11 @@ import torchvision.transforms as tv from PIL import Image from torchvision.transforms import functional as F +from llama_stack.log import get_logger + IMAGE_RES = 224 -logger = getLogger() +logger = get_logger(name=__name__, category="models::llama") class VariableSizeImageTransform: diff --git a/llama_stack/models/llama/llama3/multimodal/model.py b/llama_stack/models/llama/llama3/multimodal/model.py index 5f1c3605c..096156a5f 100644 --- a/llama_stack/models/llama/llama3/multimodal/model.py +++ b/llama_stack/models/llama/llama3/multimodal/model.py @@ -3,8 +3,6 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. - -import logging import math from collections.abc import Callable from functools import partial @@ -22,6 +20,8 @@ from PIL import Image as PIL_Image from torch import Tensor, nn from torch.distributed import _functional_collectives as funcol +from llama_stack.log import get_logger + from ..model import ModelArgs, RMSNorm, apply_rotary_emb, precompute_freqs_cis from .encoder_utils import ( build_encoder_attention_mask, @@ -34,9 +34,10 @@ from .encoder_utils import ( from .image_transform import VariableSizeImageTransform from .utils import get_negative_inf_value, to_2tuple -logger = logging.getLogger(__name__) MP_SCALE = 8 +logger = get_logger(name=__name__, category="models") + def reduce_from_tensor_model_parallel_region(input_): """All-reduce the input tensor across model parallel group.""" @@ -771,7 +772,7 @@ class TilePositionEmbedding(nn.Module): if embed is not None: # reshape the weights to the correct shape nt_old, nt_old, _, w = embed.shape - logging.info(f"Resizing tile embedding from {nt_old}x{nt_old} to {self.num_tiles}x{self.num_tiles}") + logger.info(f"Resizing tile embedding from {nt_old}x{nt_old} to {self.num_tiles}x{self.num_tiles}") embed_new = TilePositionEmbedding._dynamic_resize(embed, self.num_tiles) # assign the weights to the module state_dict[prefix + "embedding"] = embed_new diff --git a/llama_stack/models/llama/llama3/tokenizer.py b/llama_stack/models/llama/llama3/tokenizer.py index e47b579e3..ad7ced1c5 100644 --- a/llama_stack/models/llama/llama3/tokenizer.py +++ b/llama_stack/models/llama/llama3/tokenizer.py @@ -4,8 +4,8 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. + from collections.abc import Collection, Iterator, Sequence, Set -from logging import getLogger from pathlib import Path from typing import ( Literal, @@ -14,11 +14,9 @@ from typing import ( import tiktoken +from llama_stack.log import get_logger from llama_stack.models.llama.tokenizer_utils import load_bpe_file -logger = getLogger(__name__) - - # The tiktoken tokenizer can handle <=400k chars without # pyo3_runtime.PanicException. TIKTOKEN_MAX_ENCODE_CHARS = 400_000 @@ -31,6 +29,8 @@ MAX_NO_WHITESPACES_CHARS = 25_000 _INSTANCE = None +logger = get_logger(name=__name__, category="models::llama") + class Tokenizer: """ diff --git a/llama_stack/models/llama/llama4/quantization/loader.py b/llama_stack/models/llama/llama4/quantization/loader.py index 223744a5f..8220a9040 100644 --- a/llama_stack/models/llama/llama4/quantization/loader.py +++ b/llama_stack/models/llama/llama4/quantization/loader.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import logging import os from collections.abc import Callable @@ -13,11 +12,13 @@ from fairscale.nn.model_parallel.initialize import get_model_parallel_rank from torch import Tensor, nn from torch.nn import functional as F +from llama_stack.log import get_logger + from ...datatypes import QuantizationMode from ..model import Transformer, TransformerBlock from ..moe import MoE -log = logging.getLogger(__name__) +log = get_logger(name=__name__, category="models") def swiglu_wrapper_no_reduce( diff --git a/llama_stack/models/llama/llama4/tokenizer.py b/llama_stack/models/llama/llama4/tokenizer.py index e12b2cae0..bfbace8f9 100644 --- a/llama_stack/models/llama/llama4/tokenizer.py +++ b/llama_stack/models/llama/llama4/tokenizer.py @@ -5,7 +5,6 @@ # the root directory of this source tree. from collections.abc import Collection, Iterator, Sequence, Set -from logging import getLogger from pathlib import Path from typing import ( Literal, @@ -14,11 +13,9 @@ from typing import ( import tiktoken +from llama_stack.log import get_logger from llama_stack.models.llama.tokenizer_utils import load_bpe_file -logger = getLogger(__name__) - - # The tiktoken tokenizer can handle <=400k chars without # pyo3_runtime.PanicException. TIKTOKEN_MAX_ENCODE_CHARS = 400_000 @@ -101,6 +98,8 @@ BASIC_SPECIAL_TOKENS = [ "<|fim_suffix|>", ] +logger = get_logger(name=__name__, category="models::llama") + class Tokenizer: """ diff --git a/llama_stack/models/llama/quantize_impls.py b/llama_stack/models/llama/quantize_impls.py index a6400c5c9..7fab2d3a6 100644 --- a/llama_stack/models/llama/quantize_impls.py +++ b/llama_stack/models/llama/quantize_impls.py @@ -6,9 +6,10 @@ # type: ignore import collections -import logging -log = logging.getLogger(__name__) +from llama_stack.log import get_logger + +log = get_logger(name=__name__, category="llama") try: import fbgemm_gpu.experimental.gen_ai # noqa: F401 diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py index 30196c429..5794ad2c0 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agents.py +++ b/llama_stack/providers/inline/agents/meta_reference/agents.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import logging import uuid from collections.abc import AsyncGenerator from datetime import UTC, datetime @@ -42,6 +41,7 @@ from llama_stack.apis.safety import Safety from llama_stack.apis.tools import ToolGroups, ToolRuntime from llama_stack.apis.vector_io import VectorIO from llama_stack.core.datatypes import AccessRule +from llama_stack.log import get_logger from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl from llama_stack.providers.utils.pagination import paginate_records from llama_stack.providers.utils.responses.responses_store import ResponsesStore @@ -51,7 +51,7 @@ from .config import MetaReferenceAgentsImplConfig from .persistence import AgentInfo from .responses.openai_responses import OpenAIResponsesImpl -logger = logging.getLogger() +logger = get_logger(name=__name__, category="agents") class MetaReferenceAgentsImpl(Agents): diff --git a/llama_stack/providers/inline/agents/meta_reference/persistence.py b/llama_stack/providers/inline/agents/meta_reference/persistence.py index 0b234d96c..c19051f86 100644 --- a/llama_stack/providers/inline/agents/meta_reference/persistence.py +++ b/llama_stack/providers/inline/agents/meta_reference/persistence.py @@ -5,7 +5,6 @@ # the root directory of this source tree. import json -import logging import uuid from datetime import UTC, datetime @@ -15,9 +14,10 @@ from llama_stack.core.access_control.access_control import AccessDeniedError, is from llama_stack.core.access_control.datatypes import AccessRule from llama_stack.core.datatypes import User from llama_stack.core.request_headers import get_authenticated_user +from llama_stack.log import get_logger from llama_stack.providers.utils.kvstore import KVStore -log = logging.getLogger(__name__) +log = get_logger(name=__name__, category="agents") class AgentSessionInfo(Session): diff --git a/llama_stack/providers/inline/agents/meta_reference/safety.py b/llama_stack/providers/inline/agents/meta_reference/safety.py index 605f387b7..b8a5d8a95 100644 --- a/llama_stack/providers/inline/agents/meta_reference/safety.py +++ b/llama_stack/providers/inline/agents/meta_reference/safety.py @@ -5,13 +5,13 @@ # the root directory of this source tree. import asyncio -import logging from llama_stack.apis.inference import Message from llama_stack.apis.safety import Safety, SafetyViolation, ViolationLevel +from llama_stack.log import get_logger from llama_stack.providers.utils.telemetry import tracing -log = logging.getLogger(__name__) +log = get_logger(name=__name__, category="agents") class SafetyException(Exception): # noqa: N818 diff --git a/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py b/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py index 7ade75032..bb6a1bd03 100644 --- a/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +++ b/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py @@ -12,7 +12,6 @@ import copy import json -import logging import multiprocessing import os import tempfile @@ -32,13 +31,14 @@ from fairscale.nn.model_parallel.initialize import ( from pydantic import BaseModel, Field from torch.distributed.launcher.api import LaunchConfig, elastic_launch +from llama_stack.log import get_logger from llama_stack.models.llama.datatypes import GenerationResult from llama_stack.providers.utils.inference.prompt_adapter import ( ChatCompletionRequestWithRawContent, CompletionRequestWithRawContent, ) -log = logging.getLogger(__name__) +log = get_logger(name=__name__, category="inference") class ProcessingMessageName(str, Enum): diff --git a/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py b/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py index fea8a8189..600a5bd37 100644 --- a/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +++ b/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import logging from collections.abc import AsyncGenerator from llama_stack.apis.inference import ( @@ -21,6 +20,7 @@ from llama_stack.apis.inference import ( ToolPromptFormat, ) from llama_stack.apis.models import ModelType +from llama_stack.log import get_logger from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate from llama_stack.providers.utils.inference.embedding_mixin import ( SentenceTransformerEmbeddingMixin, @@ -32,7 +32,7 @@ from llama_stack.providers.utils.inference.openai_compat import ( from .config import SentenceTransformersInferenceConfig -log = logging.getLogger(__name__) +log = get_logger(name=__name__, category="inference") class SentenceTransformersInferenceImpl( diff --git a/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py b/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py index 2574b995b..d9ee3d2a8 100644 --- a/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +++ b/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py @@ -6,7 +6,6 @@ import gc import json -import logging import multiprocessing from pathlib import Path from typing import Any @@ -28,6 +27,7 @@ from llama_stack.apis.post_training import ( LoraFinetuningConfig, TrainingConfig, ) +from llama_stack.log import get_logger from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device from ..config import HuggingFacePostTrainingConfig @@ -44,7 +44,7 @@ from ..utils import ( split_dataset, ) -logger = logging.getLogger(__name__) +logger = get_logger(name=__name__, category="post_training") class HFFinetuningSingleDevice: diff --git a/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py b/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py index a7c19faac..b39a24c66 100644 --- a/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +++ b/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py @@ -5,7 +5,6 @@ # the root directory of this source tree. import gc -import logging import multiprocessing from pathlib import Path from typing import Any @@ -24,6 +23,7 @@ from llama_stack.apis.post_training import ( DPOAlignmentConfig, TrainingConfig, ) +from llama_stack.log import get_logger from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device from ..config import HuggingFacePostTrainingConfig @@ -40,7 +40,7 @@ from ..utils import ( split_dataset, ) -logger = logging.getLogger(__name__) +logger = get_logger(name=__name__, category="post_training") class HFDPOAlignmentSingleDevice: diff --git a/llama_stack/providers/inline/post_training/huggingface/utils.py b/llama_stack/providers/inline/post_training/huggingface/utils.py index 3147c19ab..f229c87dd 100644 --- a/llama_stack/providers/inline/post_training/huggingface/utils.py +++ b/llama_stack/providers/inline/post_training/huggingface/utils.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import logging import os import signal import sys @@ -19,10 +18,11 @@ from transformers import AutoConfig, AutoModelForCausalLM from llama_stack.apis.datasetio import DatasetIO from llama_stack.apis.post_training import Checkpoint, TrainingConfig +from llama_stack.log import get_logger from .config import HuggingFacePostTrainingConfig -logger = logging.getLogger(__name__) +logger = get_logger(name=__name__, category="post_training") def setup_environment(): diff --git a/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py b/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py index 49e1c95b8..8b1462862 100644 --- a/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +++ b/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import logging import os import time from datetime import UTC, datetime @@ -19,6 +18,7 @@ from torch.utils.data import DataLoader, DistributedSampler from torchtune import modules, training from torchtune import utils as torchtune_utils from torchtune.data import padded_collate_sft +from torchtune.models.llama3._tokenizer import Llama3Tokenizer from torchtune.modules.loss import CEWithChunkedOutputLoss from torchtune.modules.peft import ( get_adapter_params, @@ -45,6 +45,7 @@ from llama_stack.apis.post_training import ( ) from llama_stack.core.utils.config_dirs import DEFAULT_CHECKPOINT_DIR from llama_stack.core.utils.model_utils import model_local_dir +from llama_stack.log import get_logger from llama_stack.models.llama.sku_list import resolve_model from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device from llama_stack.providers.inline.post_training.torchtune.common import utils @@ -56,9 +57,7 @@ from llama_stack.providers.inline.post_training.torchtune.config import ( ) from llama_stack.providers.inline.post_training.torchtune.datasets.sft import SFTDataset -log = logging.getLogger(__name__) - -from torchtune.models.llama3._tokenizer import Llama3Tokenizer +log = get_logger(name=__name__, category="post_training") class LoraFinetuningSingleDevice: diff --git a/llama_stack/providers/inline/safety/code_scanner/code_scanner.py b/llama_stack/providers/inline/safety/code_scanner/code_scanner.py index 6e05d5b83..5e25c559f 100644 --- a/llama_stack/providers/inline/safety/code_scanner/code_scanner.py +++ b/llama_stack/providers/inline/safety/code_scanner/code_scanner.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import logging import uuid from typing import TYPE_CHECKING, Any @@ -20,13 +19,14 @@ from llama_stack.apis.safety import ( ) from llama_stack.apis.safety.safety import ModerationObject, ModerationObjectResults from llama_stack.apis.shields import Shield +from llama_stack.log import get_logger from llama_stack.providers.utils.inference.prompt_adapter import ( interleaved_content_as_str, ) from .config import CodeScannerConfig -log = logging.getLogger(__name__) +log = get_logger(name=__name__, category="safety") ALLOWED_CODE_SCANNER_MODEL_IDS = [ "code-scanner", diff --git a/llama_stack/providers/inline/safety/llama_guard/llama_guard.py b/llama_stack/providers/inline/safety/llama_guard/llama_guard.py index 5d52c5d89..5c7f30aa7 100644 --- a/llama_stack/providers/inline/safety/llama_guard/llama_guard.py +++ b/llama_stack/providers/inline/safety/llama_guard/llama_guard.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import logging import re import uuid from string import Template @@ -21,6 +20,7 @@ from llama_stack.apis.safety import ( from llama_stack.apis.safety.safety import ModerationObject, ModerationObjectResults from llama_stack.apis.shields import Shield from llama_stack.core.datatypes import Api +from llama_stack.log import get_logger from llama_stack.models.llama.datatypes import Role from llama_stack.models.llama.sku_types import CoreModelId from llama_stack.providers.datatypes import ShieldsProtocolPrivate @@ -132,6 +132,8 @@ Provide your safety assessment for ONLY THE LAST $agent_type message in the abov PROMPT_TEMPLATE = Template(f"{PROMPT_TASK}{SAFETY_CATEGORIES}{PROMPT_CONVERSATION}{PROMPT_INSTRUCTIONS}") +logger = get_logger(name=__name__, category="safety") + class LlamaGuardSafetyImpl(Safety, ShieldsProtocolPrivate): def __init__(self, config: LlamaGuardConfig, deps) -> None: @@ -407,7 +409,7 @@ class LlamaGuardShield: unsafe_code_list = [code.strip() for code in unsafe_code.split(",")] invalid_codes = [code for code in unsafe_code_list if code not in SAFETY_CODE_TO_CATEGORIES_MAP] if invalid_codes: - logging.warning(f"Invalid safety codes returned: {invalid_codes}") + logger.warning(f"Invalid safety codes returned: {invalid_codes}") # just returning safe object, as we don't know what the invalid codes can map to return ModerationObject( id=f"modr-{uuid.uuid4()}", diff --git a/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py b/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py index c760f0fd1..6fb6c4407 100644 --- a/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +++ b/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import logging from typing import Any import torch @@ -21,6 +20,7 @@ from llama_stack.apis.safety import ( from llama_stack.apis.safety.safety import ModerationObject from llama_stack.apis.shields import Shield from llama_stack.core.utils.model_utils import model_local_dir +from llama_stack.log import get_logger from llama_stack.providers.datatypes import ShieldsProtocolPrivate from llama_stack.providers.utils.inference.prompt_adapter import ( interleaved_content_as_str, @@ -28,7 +28,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( from .config import PromptGuardConfig, PromptGuardType -log = logging.getLogger(__name__) +log = get_logger(name=__name__, category="safety") PROMPT_GUARD_MODEL = "Prompt-Guard-86M" diff --git a/llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py b/llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py index b74c3826e..c9358101d 100644 --- a/llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +++ b/llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py @@ -7,7 +7,6 @@ import collections import functools import json -import logging import random import re import string @@ -20,7 +19,9 @@ import nltk from pythainlp.tokenize import sent_tokenize as sent_tokenize_thai from pythainlp.tokenize import word_tokenize as word_tokenize_thai -logger = logging.getLogger() +from llama_stack.log import get_logger + +logger = get_logger(name=__name__, category="scoring") WORD_LIST = [ "western", diff --git a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py index d99255c79..30710ec2a 100644 --- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +++ b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py @@ -4,13 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import logging import threading from typing import Any from opentelemetry import metrics, trace - -logger = logging.getLogger(__name__) from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter from opentelemetry.sdk.metrics import MeterProvider @@ -40,6 +37,7 @@ from llama_stack.apis.telemetry import ( UnstructuredLogEvent, ) from llama_stack.core.datatypes import Api +from llama_stack.log import get_logger from llama_stack.providers.inline.telemetry.meta_reference.console_span_processor import ( ConsoleSpanProcessor, ) @@ -61,6 +59,8 @@ _GLOBAL_STORAGE: dict[str, dict[str | int, Any]] = { _global_lock = threading.Lock() _TRACER_PROVIDER = None +logger = get_logger(name=__name__, category="telemetry") + def is_tracing_enabled(tracer): with tracer.start_as_current_span("check_tracing") as span: diff --git a/llama_stack/providers/inline/tool_runtime/rag/memory.py b/llama_stack/providers/inline/tool_runtime/rag/memory.py index 6a7c7885c..a1543457b 100644 --- a/llama_stack/providers/inline/tool_runtime/rag/memory.py +++ b/llama_stack/providers/inline/tool_runtime/rag/memory.py @@ -5,7 +5,6 @@ # the root directory of this source tree. import asyncio -import logging import secrets import string from typing import Any @@ -32,6 +31,7 @@ from llama_stack.apis.tools import ( ToolRuntime, ) from llama_stack.apis.vector_io import QueryChunksResponse, VectorIO +from llama_stack.log import get_logger from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str from llama_stack.providers.utils.memory.vector_store import ( @@ -42,7 +42,7 @@ from llama_stack.providers.utils.memory.vector_store import ( from .config import RagToolRuntimeConfig from .context_retriever import generate_rag_query -log = logging.getLogger(__name__) +log = get_logger(name=__name__, category="tool_runtime") def make_random_string(length: int = 8): diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py index af61da59b..258c6e7aa 100644 --- a/llama_stack/providers/inline/vector_io/faiss/faiss.py +++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py @@ -8,7 +8,6 @@ import asyncio import base64 import io import json -import logging from typing import Any import faiss @@ -24,6 +23,7 @@ from llama_stack.apis.vector_io import ( QueryChunksResponse, VectorIO, ) +from llama_stack.log import get_logger from llama_stack.providers.datatypes import ( HealthResponse, HealthStatus, @@ -40,7 +40,7 @@ from llama_stack.providers.utils.memory.vector_store import ( from .config import FaissVectorIOConfig -logger = logging.getLogger(__name__) +logger = get_logger(name=__name__, category="vector_io") VERSION = "v3" VECTOR_DBS_PREFIX = f"vector_dbs:{VERSION}::" diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py index cc1982f3b..7cf163960 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py @@ -5,7 +5,6 @@ # the root directory of this source tree. import asyncio -import logging import re import sqlite3 import struct @@ -24,6 +23,7 @@ from llama_stack.apis.vector_io import ( QueryChunksResponse, VectorIO, ) +from llama_stack.log import get_logger from llama_stack.providers.datatypes import VectorDBsProtocolPrivate from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore @@ -36,7 +36,7 @@ from llama_stack.providers.utils.memory.vector_store import ( VectorDBWithIndex, ) -logger = logging.getLogger(__name__) +logger = get_logger(name=__name__, category="vector_io") # Specifying search mode is dependent on the VectorIO provider. VECTOR_SEARCH = "vector" diff --git a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py index 4857c6723..cfcfcbf90 100644 --- a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py +++ b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py @@ -3,15 +3,14 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import logging - +from llama_stack.log import get_logger from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from .models import MODEL_ENTRIES -logger = logging.getLogger(__name__) +logger = get_logger(name=__name__, category="inference") class LlamaCompatInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin): diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index 7bc3fd0c9..297fb5762 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import logging import warnings from collections.abc import AsyncIterator @@ -33,6 +32,7 @@ from llama_stack.apis.inference import ( ToolChoice, ToolConfig, ) +from llama_stack.log import get_logger from llama_stack.models.llama.datatypes import ToolDefinition, ToolPromptFormat from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, @@ -54,7 +54,7 @@ from .openai_utils import ( ) from .utils import _is_nvidia_hosted -logger = logging.getLogger(__name__) +logger = get_logger(name=__name__, category="inference") class NVIDIAInferenceAdapter(OpenAIMixin, Inference, ModelRegistryHelper): diff --git a/llama_stack/providers/remote/inference/nvidia/utils.py b/llama_stack/providers/remote/inference/nvidia/utils.py index 74019999e..790bbafd1 100644 --- a/llama_stack/providers/remote/inference/nvidia/utils.py +++ b/llama_stack/providers/remote/inference/nvidia/utils.py @@ -4,13 +4,13 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import logging - import httpx +from llama_stack.log import get_logger + from . import NVIDIAConfig -logger = logging.getLogger(__name__) +logger = get_logger(name=__name__, category="inference") def _is_nvidia_hosted(config: NVIDIAConfig) -> bool: diff --git a/llama_stack/providers/remote/inference/openai/openai.py b/llama_stack/providers/remote/inference/openai/openai.py index 865258559..1c72fa0bc 100644 --- a/llama_stack/providers/remote/inference/openai/openai.py +++ b/llama_stack/providers/remote/inference/openai/openai.py @@ -4,15 +4,14 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import logging - +from llama_stack.log import get_logger from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from .config import OpenAIConfig from .models import MODEL_ENTRIES -logger = logging.getLogger(__name__) +logger = get_logger(name=__name__, category="inference") # diff --git a/llama_stack/providers/remote/inference/tgi/tgi.py b/llama_stack/providers/remote/inference/tgi/tgi.py index 323831845..9da961438 100644 --- a/llama_stack/providers/remote/inference/tgi/tgi.py +++ b/llama_stack/providers/remote/inference/tgi/tgi.py @@ -5,7 +5,6 @@ # the root directory of this source tree. -import logging from collections.abc import AsyncGenerator from huggingface_hub import AsyncInferenceClient, HfApi @@ -34,6 +33,7 @@ from llama_stack.apis.inference import ( ToolPromptFormat, ) from llama_stack.apis.models import Model +from llama_stack.log import get_logger from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.datatypes import ModelsProtocolPrivate from llama_stack.providers.utils.inference.model_registry import ( @@ -58,7 +58,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig -log = logging.getLogger(__name__) +log = get_logger(name=__name__, category="inference") def build_hf_repo_model_entries(): diff --git a/llama_stack/providers/remote/post_training/nvidia/utils.py b/llama_stack/providers/remote/post_training/nvidia/utils.py index d6e1016b2..9a6c3b53c 100644 --- a/llama_stack/providers/remote/post_training/nvidia/utils.py +++ b/llama_stack/providers/remote/post_training/nvidia/utils.py @@ -4,18 +4,18 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import logging import warnings from typing import Any from pydantic import BaseModel from llama_stack.apis.post_training import TrainingConfig +from llama_stack.log import get_logger from llama_stack.providers.remote.post_training.nvidia.config import SFTLoRADefaultConfig from .config import NvidiaPostTrainingConfig -logger = logging.getLogger(__name__) +logger = get_logger(name=__name__, category="integration") def warn_unsupported_params(config_dict: Any, supported_keys: set[str], config_name: str) -> None: diff --git a/llama_stack/providers/remote/safety/bedrock/bedrock.py b/llama_stack/providers/remote/safety/bedrock/bedrock.py index 1895e7507..1ca87ae3d 100644 --- a/llama_stack/providers/remote/safety/bedrock/bedrock.py +++ b/llama_stack/providers/remote/safety/bedrock/bedrock.py @@ -5,7 +5,6 @@ # the root directory of this source tree. import json -import logging from typing import Any from llama_stack.apis.inference import Message @@ -16,12 +15,13 @@ from llama_stack.apis.safety import ( ViolationLevel, ) from llama_stack.apis.shields import Shield +from llama_stack.log import get_logger from llama_stack.providers.datatypes import ShieldsProtocolPrivate from llama_stack.providers.utils.bedrock.client import create_bedrock_client from .config import BedrockSafetyConfig -logger = logging.getLogger(__name__) +logger = get_logger(name=__name__, category="safety") class BedrockSafetyAdapter(Safety, ShieldsProtocolPrivate): diff --git a/llama_stack/providers/remote/safety/nvidia/nvidia.py b/llama_stack/providers/remote/safety/nvidia/nvidia.py index 7f17b1cb6..0d8d8ba7a 100644 --- a/llama_stack/providers/remote/safety/nvidia/nvidia.py +++ b/llama_stack/providers/remote/safety/nvidia/nvidia.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import logging from typing import Any import requests @@ -12,12 +11,13 @@ import requests from llama_stack.apis.inference import Message from llama_stack.apis.safety import RunShieldResponse, Safety, SafetyViolation, ViolationLevel from llama_stack.apis.shields import Shield +from llama_stack.log import get_logger from llama_stack.providers.datatypes import ShieldsProtocolPrivate from llama_stack.providers.utils.inference.openai_compat import convert_message_to_openai_dict_new from .config import NVIDIASafetyConfig -logger = logging.getLogger(__name__) +logger = get_logger(name=__name__, category="safety") class NVIDIASafetyAdapter(Safety, ShieldsProtocolPrivate): diff --git a/llama_stack/providers/remote/safety/sambanova/sambanova.py b/llama_stack/providers/remote/safety/sambanova/sambanova.py index 6c7190afe..676ee7185 100644 --- a/llama_stack/providers/remote/safety/sambanova/sambanova.py +++ b/llama_stack/providers/remote/safety/sambanova/sambanova.py @@ -5,7 +5,6 @@ # the root directory of this source tree. import json -import logging from typing import Any import litellm @@ -20,12 +19,13 @@ from llama_stack.apis.safety import ( ) from llama_stack.apis.shields import Shield from llama_stack.core.request_headers import NeedsRequestProviderData +from llama_stack.log import get_logger from llama_stack.providers.datatypes import ShieldsProtocolPrivate from llama_stack.providers.utils.inference.openai_compat import convert_message_to_openai_dict_new from .config import SambaNovaSafetyConfig -logger = logging.getLogger(__name__) +logger = get_logger(name=__name__, category="safety") CANNED_RESPONSE_TEXT = "I can't answer that. Can I help with something else?" diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py index 8f252711b..0047e6055 100644 --- a/llama_stack/providers/remote/vector_io/chroma/chroma.py +++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py @@ -5,7 +5,6 @@ # the root directory of this source tree. import asyncio import json -import logging from typing import Any from urllib.parse import urlparse @@ -20,6 +19,7 @@ from llama_stack.apis.vector_io import ( QueryChunksResponse, VectorIO, ) +from llama_stack.log import get_logger from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig from llama_stack.providers.utils.kvstore import kvstore_impl @@ -33,7 +33,7 @@ from llama_stack.providers.utils.memory.vector_store import ( from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig -log = logging.getLogger(__name__) +log = get_logger(name=__name__, category="vector_io") ChromaClientType = chromadb.api.AsyncClientAPI | chromadb.api.ClientAPI diff --git a/llama_stack/providers/remote/vector_io/milvus/milvus.py b/llama_stack/providers/remote/vector_io/milvus/milvus.py index c659bdf6c..034ec331c 100644 --- a/llama_stack/providers/remote/vector_io/milvus/milvus.py +++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py @@ -5,7 +5,6 @@ # the root directory of this source tree. import asyncio -import logging import os from typing import Any @@ -21,6 +20,7 @@ from llama_stack.apis.vector_io import ( QueryChunksResponse, VectorIO, ) +from llama_stack.log import get_logger from llama_stack.providers.datatypes import VectorDBsProtocolPrivate from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig from llama_stack.providers.utils.kvstore import kvstore_impl @@ -36,7 +36,7 @@ from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collecti from .config import MilvusVectorIOConfig as RemoteMilvusVectorIOConfig -logger = logging.getLogger(__name__) +logger = get_logger(name=__name__, category="vector_io") VERSION = "v3" VECTOR_DBS_PREFIX = f"vector_dbs:milvus:{VERSION}::" diff --git a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py index d2a5d910b..e829c9e72 100644 --- a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py +++ b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import logging from typing import Any import psycopg2 @@ -22,6 +21,7 @@ from llama_stack.apis.vector_io import ( QueryChunksResponse, VectorIO, ) +from llama_stack.log import get_logger from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore @@ -34,7 +34,7 @@ from llama_stack.providers.utils.memory.vector_store import ( from .config import PGVectorVectorIOConfig -log = logging.getLogger(__name__) +log = get_logger(name=__name__, category="vector_io") VERSION = "v3" VECTOR_DBS_PREFIX = f"vector_dbs:pgvector:{VERSION}::" diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py index 018015780..8499ff997 100644 --- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py +++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py @@ -5,7 +5,6 @@ # the root directory of this source tree. import asyncio -import logging import uuid from typing import Any @@ -24,6 +23,7 @@ from llama_stack.apis.vector_io import ( VectorStoreChunkingStrategy, VectorStoreFileObject, ) +from llama_stack.log import get_logger from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl @@ -36,7 +36,7 @@ from llama_stack.providers.utils.memory.vector_store import ( from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig -log = logging.getLogger(__name__) +log = get_logger(name=__name__, category="vector_io") CHUNK_ID_KEY = "_chunk_id" # KV store prefixes for vector databases diff --git a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py index 966724848..ddf95317b 100644 --- a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py +++ b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. import json -import logging from typing import Any import weaviate @@ -19,6 +18,7 @@ from llama_stack.apis.files.files import Files from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO from llama_stack.core.request_headers import NeedsRequestProviderData +from llama_stack.log import get_logger from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore @@ -34,7 +34,7 @@ from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collecti from .config import WeaviateVectorIOConfig -log = logging.getLogger(__name__) +log = get_logger(name=__name__, category="vector_io") VERSION = "v3" VECTOR_DBS_PREFIX = f"vector_dbs:weaviate:{VERSION}::" diff --git a/llama_stack/providers/utils/inference/embedding_mixin.py b/llama_stack/providers/utils/inference/embedding_mixin.py index 32e89f987..05886cdc8 100644 --- a/llama_stack/providers/utils/inference/embedding_mixin.py +++ b/llama_stack/providers/utils/inference/embedding_mixin.py @@ -5,10 +5,11 @@ # the root directory of this source tree. import base64 -import logging import struct from typing import TYPE_CHECKING +from llama_stack.log import get_logger + if TYPE_CHECKING: from sentence_transformers import SentenceTransformer @@ -27,7 +28,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import interleaved_con EMBEDDING_MODELS = {} -log = logging.getLogger(__name__) +log = get_logger(name=__name__, category="inference") class SentenceTransformerEmbeddingMixin: diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py index 5e6c26884..eb32d2de9 100644 --- a/llama_stack/providers/utils/inference/openai_compat.py +++ b/llama_stack/providers/utils/inference/openai_compat.py @@ -5,7 +5,6 @@ # the root directory of this source tree. import base64 import json -import logging import struct import time import uuid @@ -122,6 +121,7 @@ from llama_stack.apis.inference import ( from llama_stack.apis.inference import ( OpenAIChoice as OpenAIChatCompletionChoice, ) +from llama_stack.log import get_logger from llama_stack.models.llama.datatypes import ( BuiltinTool, StopReason, @@ -134,7 +134,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( decode_assistant_message, ) -logger = logging.getLogger(__name__) +logger = get_logger(name=__name__, category="inference") class OpenAICompatCompletionChoiceDelta(BaseModel): diff --git a/llama_stack/providers/utils/kvstore/mongodb/mongodb.py b/llama_stack/providers/utils/kvstore/mongodb/mongodb.py index 3842773d9..af52f3708 100644 --- a/llama_stack/providers/utils/kvstore/mongodb/mongodb.py +++ b/llama_stack/providers/utils/kvstore/mongodb/mongodb.py @@ -4,16 +4,16 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import logging from datetime import datetime from pymongo import AsyncMongoClient +from llama_stack.log import get_logger from llama_stack.providers.utils.kvstore import KVStore from ..config import MongoDBKVStoreConfig -log = logging.getLogger(__name__) +log = get_logger(name=__name__, category="kvstore") class MongoDBKVStoreImpl(KVStore): diff --git a/llama_stack/providers/utils/kvstore/postgres/postgres.py b/llama_stack/providers/utils/kvstore/postgres/postgres.py index cabb4c512..021e90774 100644 --- a/llama_stack/providers/utils/kvstore/postgres/postgres.py +++ b/llama_stack/providers/utils/kvstore/postgres/postgres.py @@ -4,16 +4,17 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import logging from datetime import datetime import psycopg2 from psycopg2.extras import DictCursor +from llama_stack.log import get_logger + from ..api import KVStore from ..config import PostgresKVStoreConfig -log = logging.getLogger(__name__) +log = get_logger(name=__name__, category="kvstore") class PostgresKVStoreImpl(KVStore): diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index 120d0d4fc..0775b31d1 100644 --- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -44,7 +44,7 @@ from llama_stack.providers.utils.memory.vector_store import ( make_overlapped_chunks, ) -logger = get_logger(__name__, category="vector_io") +logger = get_logger(name=__name__, category="memory") # Constants for OpenAI vector stores CHUNK_MULTIPLIER = 5 diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py index 6ae5bb521..b5d82432d 100644 --- a/llama_stack/providers/utils/memory/vector_store.py +++ b/llama_stack/providers/utils/memory/vector_store.py @@ -5,7 +5,6 @@ # the root directory of this source tree. import base64 import io -import logging import re import time from abc import ABC, abstractmethod @@ -26,6 +25,7 @@ from llama_stack.apis.common.content_types import ( from llama_stack.apis.tools import RAGDocument from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse +from llama_stack.log import get_logger from llama_stack.models.llama.llama3.tokenizer import Tokenizer from llama_stack.providers.datatypes import Api from llama_stack.providers.utils.inference.prompt_adapter import ( @@ -33,7 +33,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( ) from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id -log = logging.getLogger(__name__) +log = get_logger(name=__name__, category="memory") class ChunkForDeletion(BaseModel): diff --git a/llama_stack/providers/utils/telemetry/tracing.py b/llama_stack/providers/utils/telemetry/tracing.py index 7080e774a..7694003b5 100644 --- a/llama_stack/providers/utils/telemetry/tracing.py +++ b/llama_stack/providers/utils/telemetry/tracing.py @@ -6,7 +6,7 @@ import asyncio import contextvars -import logging +import logging # allow-direct-logging import queue import random import sys diff --git a/tests/integration/post_training/test_post_training.py b/tests/integration/post_training/test_post_training.py index f9c797593..b5be71c7c 100644 --- a/tests/integration/post_training/test_post_training.py +++ b/tests/integration/post_training/test_post_training.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import logging import sys import time import uuid @@ -19,10 +18,10 @@ from llama_stack.apis.post_training import ( LoraFinetuningConfig, TrainingConfig, ) +from llama_stack.log import get_logger # Configure logging -logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", force=True) -logger = logging.getLogger(__name__) +logger = get_logger(name=__name__, category="post_training") skip_because_resource_intensive = pytest.mark.skip( diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py index bead95c26..82868164f 100644 --- a/tests/integration/vector_io/test_openai_vector_stores.py +++ b/tests/integration/vector_io/test_openai_vector_stores.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import logging import time from io import BytesIO @@ -14,8 +13,9 @@ from openai import BadRequestError as OpenAIBadRequestError from llama_stack.apis.vector_io import Chunk from llama_stack.core.library_client import LlamaStackAsLibraryClient +from llama_stack.log import get_logger -logger = logging.getLogger(__name__) +logger = get_logger(name=__name__, category="vector_io") def skip_if_provider_doesnt_support_openai_vector_stores(client_with_models): diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py index 5c2ad03ab..ce0e930b1 100644 --- a/tests/unit/providers/inference/test_remote_vllm.py +++ b/tests/unit/providers/inference/test_remote_vllm.py @@ -6,7 +6,7 @@ import asyncio import json -import logging +import logging # allow-direct-logging import threading import time from http.server import BaseHTTPRequestHandler, HTTPServer From 55e9959f62bd69b97a5805fe7f9790a461e6c332 Mon Sep 17 00:00:00 2001 From: Jiayi Ni Date: Wed, 20 Aug 2025 05:06:25 -0700 Subject: [PATCH 06/42] fix: fix ```openai_embeddings``` for asymmetric embedding NIMs (#3205) # What does this PR do? NVIDIA asymmetric embedding models (e.g., `nvidia/llama-3.2-nv-embedqa-1b-v2`) require an `input_type` parameter not present in the standard OpenAI embeddings API. This PR adds the `input_type="query"` as default and updates the documentation to suggest using the `embedding` API for passage embeddings. Resolves #2892 ## Test Plan ``` pytest -s -v tests/integration/inference/test_openai_embeddings.py --stack-config="inference=nvidia" --embedding-model="nvidia/llama-3.2-nv-embedqa-1b-v2" --env NVIDIA_API_KEY={nvidia_api_key} --env NVIDIA_BASE_URL="https://integrate.api.nvidia.com" ``` --- .../remote/inference/nvidia/NVIDIA.md | 4 ++ .../remote/inference/nvidia/nvidia.py | 56 ++++++++++++++++++- 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md index 4a072215c..35d26fd0b 100644 --- a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md +++ b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md @@ -77,6 +77,10 @@ print(f"Response: {response.completion_message.content}") ``` ### Create Embeddings +> Note on OpenAI embeddings compatibility +> +> NVIDIA asymmetric embedding models (e.g., `nvidia/llama-3.2-nv-embedqa-1b-v2`) require an `input_type` parameter not present in the standard OpenAI embeddings API. The NVIDIA Inference Adapter automatically sets `input_type="query"` when using the OpenAI-compatible embeddings endpoint for NVIDIA. For passage embeddings, use the `embeddings` API with `task_type="document"`. + ```python response = client.inference.embeddings( model_id="nvidia/llama-3.2-nv-embedqa-1b-v2", diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index 297fb5762..7052cfb57 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -7,7 +7,7 @@ import warnings from collections.abc import AsyncIterator -from openai import APIConnectionError, BadRequestError +from openai import NOT_GIVEN, APIConnectionError, BadRequestError from llama_stack.apis.common.content_types import ( InterleavedContent, @@ -26,6 +26,9 @@ from llama_stack.apis.inference import ( Inference, LogProbConfig, Message, + OpenAIEmbeddingData, + OpenAIEmbeddingsResponse, + OpenAIEmbeddingUsage, ResponseFormat, SamplingParams, TextTruncation, @@ -210,6 +213,57 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference, ModelRegistryHelper): # return EmbeddingsResponse(embeddings=[embedding.embedding for embedding in response.data]) + async def openai_embeddings( + self, + model: str, + input: str | list[str], + encoding_format: str | None = "float", + dimensions: int | None = None, + user: str | None = None, + ) -> OpenAIEmbeddingsResponse: + """ + OpenAI-compatible embeddings for NVIDIA NIM. + + Note: NVIDIA NIM asymmetric embedding models require an "input_type" field not present in the standard OpenAI embeddings API. + We default this to "query" to ensure requests succeed when using the + OpenAI-compatible endpoint. For passage embeddings, use the embeddings API with + `task_type='document'`. + """ + extra_body: dict[str, object] = {"input_type": "query"} + logger.warning( + "NVIDIA OpenAI-compatible embeddings: defaulting to input_type='query'. " + "For passage embeddings, use the embeddings API with task_type='document'." + ) + + response = await self.client.embeddings.create( + model=await self._get_provider_model_id(model), + input=input, + encoding_format=encoding_format if encoding_format is not None else NOT_GIVEN, + dimensions=dimensions if dimensions is not None else NOT_GIVEN, + user=user if user is not None else NOT_GIVEN, + extra_body=extra_body, + ) + + data = [] + for i, embedding_data in enumerate(response.data): + data.append( + OpenAIEmbeddingData( + embedding=embedding_data.embedding, + index=i, + ) + ) + + usage = OpenAIEmbeddingUsage( + prompt_tokens=response.usage.prompt_tokens, + total_tokens=response.usage.total_tokens, + ) + + return OpenAIEmbeddingsResponse( + data=data, + model=response.model, + usage=usage, + ) + async def chat_completion( self, model_id: str, From c2c859a6b03d922b07b377c1367eb1522912b80e Mon Sep 17 00:00:00 2001 From: Matthew Farrellee Date: Wed, 20 Aug 2025 13:22:40 -0500 Subject: [PATCH 07/42] chore(files tests): update files integration tests and fix inline::localfs (#3195) - update files=inline::localfs to raise ResourceNotFoundError instead of ValueError - only skip tests when no files provider is available - directly use openai_client and llama_stack_client where appropriate - check for correct behavior of non-existent file - xfail the isolation test, no implementation supports it test plan - ``` $ uv run ./scripts/integration-tests.sh --stack-config server:ci-tests --provider ollama --test-subdirs files ... tests/integration/files/test_files.py::test_openai_client_basic_operations PASSED [ 25%] tests/integration/files/test_files.py::test_files_authentication_isolation XFAIL [ 50%] tests/integration/files/test_files.py::test_files_authentication_shared_attributes PASSED [ 75%] tests/integration/files/test_files.py::test_files_authentication_anonymous_access PASSED [100%] ==================================== 3 passed, 1 xfailed in 1.03s ===================================== ``` previously - ``` $ uv run llama stack build --image-type venv --providers files=inline::localfs --run & ... $ ./scripts/integration-tests.sh --stack-config http://localhost:8321 --provider ollama --test-subdirs files ... tests/integration/files/test_files.py::test_openai_client_basic_operations[openai_client-ollama/llama3.2:3b-instruct-fp16-None-sentence-transformers/all-MiniLM-L6-v2-None-384] PASSED [ 12%] tests/integration/files/test_files.py::test_files_authentication_isolation[openai_client-ollama/llama3.2:3b-instruct-fp16-None-sentence-transformers/all-MiniLM-L6-v2-None-384] SKIPPED [ 25%] tests/integration/files/test_files.py::test_files_authentication_shared_attributes[openai_client-ollama/llama3.2:3b-instruct-fp16-None-sentence-transformers/all-MiniLM-L6-v2-None-384] SKIPPED [ 37%] tests/integration/files/test_files.py::test_files_authentication_anonymous_access[openai_client-ollama/llama3.2:3b-instruct-fp16-None-sentence-transformers/all-MiniLM-L6-v2-None-384] SKIPPED [ 50%] tests/integration/files/test_files.py::test_openai_client_basic_operations[client_with_models-ollama/llama3.2:3b-instruct-fp16-None-sentence-transformers/all-MiniLM-L6-v2-None-384] PASSED [ 62%] tests/integration/files/test_files.py::test_files_authentication_isolation[client_with_models-ollama/llama3.2:3b-instruct-fp16-None-sentence-transformers/all-MiniLM-L6-v2-None-384] SKIPPED [ 75%] tests/integration/files/test_files.py::test_files_authentication_shared_attributes[client_with_models-ollama/llama3.2:3b-instruct-fp16-None-sentence-transformers/all-MiniLM-L6-v2-None-384] SKIPPED [ 87%] tests/integration/files/test_files.py::test_files_authentication_anonymous_access[client_with_models-ollama/llama3.2:3b-instruct-fp16-None-sentence-transformers/all-MiniLM-L6-v2-None-384] SKIPPED [100%] ========================================================= 2 passed, 6 skipped in 1.31s ========================================================== ``` --- .../providers/inline/files/localfs/files.py | 64 ++++++------- tests/integration/files/test_files.py | 89 ++++++++++--------- tests/integration/fixtures/common.py | 17 +++- tests/unit/files/test_files.py | 9 +- 4 files changed, 92 insertions(+), 87 deletions(-) diff --git a/llama_stack/providers/inline/files/localfs/files.py b/llama_stack/providers/inline/files/localfs/files.py index 1e9dca3b5..4f6d571a4 100644 --- a/llama_stack/providers/inline/files/localfs/files.py +++ b/llama_stack/providers/inline/files/localfs/files.py @@ -11,6 +11,7 @@ from typing import Annotated from fastapi import File, Form, Response, UploadFile +from llama_stack.apis.common.errors import ResourceNotFoundError from llama_stack.apis.common.responses import Order from llama_stack.apis.files import ( Files, @@ -20,12 +21,15 @@ from llama_stack.apis.files import ( OpenAIFilePurpose, ) from llama_stack.core.datatypes import AccessRule +from llama_stack.log import get_logger from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl from .config import LocalfsFilesImplConfig +logger = get_logger(name=__name__, category="files") + class LocalfsFilesImpl(Files): def __init__(self, config: LocalfsFilesImplConfig, policy: list[AccessRule]) -> None: @@ -65,6 +69,18 @@ class LocalfsFilesImpl(Files): """Get the filesystem path for a file ID.""" return Path(self.config.storage_dir) / file_id + async def _lookup_file_id(self, file_id: str) -> tuple[OpenAIFileObject, Path]: + """Look up a OpenAIFileObject and filesystem path from its ID.""" + if not self.sql_store: + raise RuntimeError("Files provider not initialized") + + row = await self.sql_store.fetch_one("openai_files", policy=self.policy, where={"id": file_id}) + if not row: + raise ResourceNotFoundError(file_id, "File", "client.files.list()") + + file_path = Path(row.pop("file_path")) + return OpenAIFileObject(**row), file_path + # OpenAI Files API Implementation async def openai_upload_file( self, @@ -157,37 +173,19 @@ class LocalfsFilesImpl(Files): async def openai_retrieve_file(self, file_id: str) -> OpenAIFileObject: """Returns information about a specific file.""" - if not self.sql_store: - raise RuntimeError("Files provider not initialized") + file_obj, _ = await self._lookup_file_id(file_id) - row = await self.sql_store.fetch_one("openai_files", policy=self.policy, where={"id": file_id}) - if not row: - raise ValueError(f"File with id {file_id} not found") - - return OpenAIFileObject( - id=row["id"], - filename=row["filename"], - purpose=OpenAIFilePurpose(row["purpose"]), - bytes=row["bytes"], - created_at=row["created_at"], - expires_at=row["expires_at"], - ) + return file_obj async def openai_delete_file(self, file_id: str) -> OpenAIFileDeleteResponse: """Delete a file.""" - if not self.sql_store: - raise RuntimeError("Files provider not initialized") - - row = await self.sql_store.fetch_one("openai_files", policy=self.policy, where={"id": file_id}) - if not row: - raise ValueError(f"File with id {file_id} not found") - # Delete physical file - file_path = Path(row["file_path"]) + _, file_path = await self._lookup_file_id(file_id) if file_path.exists(): file_path.unlink() # Delete metadata from database + assert self.sql_store is not None, "Files provider not initialized" await self.sql_store.delete("openai_files", where={"id": file_id}) return OpenAIFileDeleteResponse( @@ -197,25 +195,17 @@ class LocalfsFilesImpl(Files): async def openai_retrieve_file_content(self, file_id: str) -> Response: """Returns the contents of the specified file.""" - if not self.sql_store: - raise RuntimeError("Files provider not initialized") - - # Get file metadata - row = await self.sql_store.fetch_one("openai_files", policy=self.policy, where={"id": file_id}) - if not row: - raise ValueError(f"File with id {file_id} not found") - # Read file content - file_path = Path(row["file_path"]) - if not file_path.exists(): - raise ValueError(f"File content not found on disk: {file_path}") + file_obj, file_path = await self._lookup_file_id(file_id) - with open(file_path, "rb") as f: - content = f.read() + if not file_path.exists(): + logger.warning(f"File '{file_id}'s underlying '{file_path}' is missing, deleting metadata.") + await self.openai_delete_file(file_id) + raise ResourceNotFoundError(file_id, "File", "client.files.list()") # Return as binary response with appropriate content type return Response( - content=content, + content=file_path.read_bytes(), media_type="application/octet-stream", - headers={"Content-Disposition": f'attachment; filename="{row["filename"]}"'}, + headers={"Content-Disposition": f'attachment; filename="{file_obj.filename}"'}, ) diff --git a/tests/integration/files/test_files.py b/tests/integration/files/test_files.py index b17c7db83..67351d4f7 100644 --- a/tests/integration/files/test_files.py +++ b/tests/integration/files/test_files.py @@ -8,20 +8,27 @@ from io import BytesIO from unittest.mock import patch import pytest -from openai import OpenAI from llama_stack.core.datatypes import User -from llama_stack.core.library_client import LlamaStackAsLibraryClient -def test_openai_client_basic_operations(compat_client, client_with_models): +# a fixture to skip all these tests if a files provider is not available +@pytest.fixture(autouse=True) +def skip_if_no_files_provider(llama_stack_client): + if not [provider for provider in llama_stack_client.providers.list() if provider.api == "files"]: + pytest.skip("No files providers found") + + +def test_openai_client_basic_operations(openai_client): """Test basic file operations through OpenAI client.""" - if isinstance(client_with_models, LlamaStackAsLibraryClient) and isinstance(compat_client, OpenAI): - pytest.skip("OpenAI files are not supported when testing with LlamaStackAsLibraryClient") - client = compat_client + from openai import NotFoundError + + client = openai_client test_content = b"files test content" + uploaded_file = None + try: # Upload file using OpenAI client with BytesIO(test_content) as file_buffer: @@ -31,6 +38,7 @@ def test_openai_client_basic_operations(compat_client, client_with_models): # Verify basic response structure assert uploaded_file.id.startswith("file-") assert hasattr(uploaded_file, "filename") + assert uploaded_file.filename == "openai_test.txt" # List files files_list = client.files.list() @@ -43,37 +51,41 @@ def test_openai_client_basic_operations(compat_client, client_with_models): # Retrieve file content - OpenAI client returns httpx Response object content_response = client.files.content(uploaded_file.id) - # The response is an httpx Response object with .content attribute containing bytes - if isinstance(content_response, str): - # Llama Stack Client returns a str - # TODO: fix Llama Stack Client - content = bytes(content_response, "utf-8") - else: - content = content_response.content - assert content == test_content + assert content_response.content == test_content # Delete file delete_response = client.files.delete(uploaded_file.id) assert delete_response.deleted is True - except Exception as e: - # Cleanup in case of failure - try: + # Retrieve file should fail + with pytest.raises(NotFoundError, match="not found"): + client.files.retrieve(uploaded_file.id) + + # File should not be found in listing + files_list = client.files.list() + file_ids = [f.id for f in files_list.data] + assert uploaded_file.id not in file_ids + + # Double delete should fail + with pytest.raises(NotFoundError, match="not found"): client.files.delete(uploaded_file.id) - except Exception: - pass - raise e + + finally: + # Cleanup in case of failure + if uploaded_file is not None: + try: + client.files.delete(uploaded_file.id) + except NotFoundError: + pass # ignore 404 +@pytest.mark.xfail(message="User isolation broken for current providers, must be fixed.") @patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") -def test_files_authentication_isolation(mock_get_authenticated_user, compat_client, client_with_models): +def test_files_authentication_isolation(mock_get_authenticated_user, llama_stack_client): """Test that users can only access their own files.""" - if isinstance(client_with_models, LlamaStackAsLibraryClient) and isinstance(compat_client, OpenAI): - pytest.skip("OpenAI files are not supported when testing with LlamaStackAsLibraryClient") - if not isinstance(client_with_models, LlamaStackAsLibraryClient): - pytest.skip("Authentication tests require LlamaStackAsLibraryClient (library mode)") + from llama_stack_client import NotFoundError - client = compat_client + client = llama_stack_client # Create two test users user1 = User("user1", {"roles": ["user"], "teams": ["team-a"]}) @@ -117,7 +129,7 @@ def test_files_authentication_isolation(mock_get_authenticated_user, compat_clie # User 1 cannot retrieve user2's file mock_get_authenticated_user.return_value = user1 - with pytest.raises(ValueError, match="not found"): + with pytest.raises(NotFoundError, match="not found"): client.files.retrieve(user2_file.id) # User 1 can access their file content @@ -131,7 +143,7 @@ def test_files_authentication_isolation(mock_get_authenticated_user, compat_clie # User 1 cannot access user2's file content mock_get_authenticated_user.return_value = user1 - with pytest.raises(ValueError, match="not found"): + with pytest.raises(NotFoundError, match="not found"): client.files.content(user2_file.id) # User 1 can delete their own file @@ -141,7 +153,7 @@ def test_files_authentication_isolation(mock_get_authenticated_user, compat_clie # User 1 cannot delete user2's file mock_get_authenticated_user.return_value = user1 - with pytest.raises(ValueError, match="not found"): + with pytest.raises(NotFoundError, match="not found"): client.files.delete(user2_file.id) # User 2 can still access their file after user1's file is deleted @@ -169,14 +181,9 @@ def test_files_authentication_isolation(mock_get_authenticated_user, compat_clie @patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") -def test_files_authentication_shared_attributes(mock_get_authenticated_user, compat_client, client_with_models): +def test_files_authentication_shared_attributes(mock_get_authenticated_user, llama_stack_client): """Test access control with users having identical attributes.""" - if isinstance(client_with_models, LlamaStackAsLibraryClient) and isinstance(compat_client, OpenAI): - pytest.skip("OpenAI files are not supported when testing with LlamaStackAsLibraryClient") - if not isinstance(client_with_models, LlamaStackAsLibraryClient): - pytest.skip("Authentication tests require LlamaStackAsLibraryClient (library mode)") - - client = compat_client + client = llama_stack_client # Create users with identical attributes (required for default policy) user_a = User("user-a", {"roles": ["user"], "teams": ["shared-team"]}) @@ -231,14 +238,8 @@ def test_files_authentication_shared_attributes(mock_get_authenticated_user, com @patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") -def test_files_authentication_anonymous_access(mock_get_authenticated_user, compat_client, client_with_models): - """Test anonymous user behavior when no authentication is present.""" - if isinstance(client_with_models, LlamaStackAsLibraryClient) and isinstance(compat_client, OpenAI): - pytest.skip("OpenAI files are not supported when testing with LlamaStackAsLibraryClient") - if not isinstance(client_with_models, LlamaStackAsLibraryClient): - pytest.skip("Authentication tests require LlamaStackAsLibraryClient (library mode)") - - client = compat_client +def test_files_authentication_anonymous_access(mock_get_authenticated_user, llama_stack_client): + client = llama_stack_client # Simulate anonymous user (no authentication) mock_get_authenticated_user.return_value = None diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py index 0b7132d71..9cf56f6f5 100644 --- a/tests/integration/fixtures/common.py +++ b/tests/integration/fixtures/common.py @@ -263,8 +263,21 @@ def instantiate_llama_stack_client(session): @pytest.fixture(scope="session") -def openai_client(client_with_models): - base_url = f"{client_with_models.base_url}/v1/openai/v1" +def require_server(llama_stack_client): + """ + Skip test if no server is running. + + We use the llama_stack_client to tell if a server was started or not. + + We use this with openai_client because it relies on a running server. + """ + if isinstance(llama_stack_client, LlamaStackAsLibraryClient): + pytest.skip("No server running") + + +@pytest.fixture(scope="session") +def openai_client(llama_stack_client, require_server): + base_url = f"{llama_stack_client.base_url}/v1/openai/v1" return OpenAI(base_url=base_url, api_key="fake") diff --git a/tests/unit/files/test_files.py b/tests/unit/files/test_files.py index 04f33e97d..e14e033b9 100644 --- a/tests/unit/files/test_files.py +++ b/tests/unit/files/test_files.py @@ -7,6 +7,7 @@ import pytest +from llama_stack.apis.common.errors import ResourceNotFoundError from llama_stack.apis.common.responses import Order from llama_stack.apis.files import OpenAIFilePurpose from llama_stack.core.access_control.access_control import default_policy @@ -190,7 +191,7 @@ class TestOpenAIFilesAPI: async def test_retrieve_file_not_found(self, files_provider): """Test retrieving a non-existent file.""" - with pytest.raises(ValueError, match="File with id file-nonexistent not found"): + with pytest.raises(ResourceNotFoundError, match="not found"): await files_provider.openai_retrieve_file("file-nonexistent") async def test_retrieve_file_content_success(self, files_provider, sample_text_file): @@ -208,7 +209,7 @@ class TestOpenAIFilesAPI: async def test_retrieve_file_content_not_found(self, files_provider): """Test retrieving content of a non-existent file.""" - with pytest.raises(ValueError, match="File with id file-nonexistent not found"): + with pytest.raises(ResourceNotFoundError, match="not found"): await files_provider.openai_retrieve_file_content("file-nonexistent") async def test_delete_file_success(self, files_provider, sample_text_file): @@ -229,12 +230,12 @@ class TestOpenAIFilesAPI: assert delete_response.deleted is True # Verify file no longer exists - with pytest.raises(ValueError, match=f"File with id {uploaded_file.id} not found"): + with pytest.raises(ResourceNotFoundError, match="not found"): await files_provider.openai_retrieve_file(uploaded_file.id) async def test_delete_file_not_found(self, files_provider): """Test deleting a non-existent file.""" - with pytest.raises(ValueError, match="File with id file-nonexistent not found"): + with pytest.raises(ResourceNotFoundError, match="not found"): await files_provider.openai_delete_file("file-nonexistent") async def test_file_persistence_across_operations(self, files_provider, sample_text_file): From e195ee3091da2aefe87ba668e8643813d7441a20 Mon Sep 17 00:00:00 2001 From: Francisco Arceo Date: Wed, 20 Aug 2025 14:11:44 -0600 Subject: [PATCH 08/42] fix: Fix broken package-lock.json (#3209) # What does this PR do? Fix broken `package-lock.json` not caught by [github bot in this commit](https://github.com/llamastack/llama-stack/commit/7f0b2a876421a7b27e7ddbac55687fb93b0f1382). ## Test Plan Signed-off-by: Francisco Javier Arceo --- llama_stack/ui/package-lock.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llama_stack/ui/package-lock.json b/llama_stack/ui/package-lock.json index bc6263732..2df1cceb3 100644 --- a/llama_stack/ui/package-lock.json +++ b/llama_stack/ui/package-lock.json @@ -18,7 +18,7 @@ "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", "framer-motion": "^11.18.2", - "llama-stack-client": "0.2.17", + "llama-stack-client": "^0.2.18", "lucide-react": "^0.510.0", "next": "15.3.3", "next-auth": "^4.24.11", @@ -9926,9 +9926,9 @@ "license": "MIT" }, "node_modules/llama-stack-client": { - "version": "0.2.17", - "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.17.tgz", - "integrity": "sha512-+/fEO8M7XPiVLjhH7ge18i1ijKp4+h3dOkE0C8g2cvGuDUtDYIJlf8NSyr9OMByjiWpCibWU7VOKL50LwGLS3Q==", + "version": "0.2.18", + "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.18.tgz", + "integrity": "sha512-k+xQOz/TIU0cINP4Aih8q6xs7f/6qs0fLDMXTTKQr5C0F1jtCjRiwsas7bTsDfpKfYhg/7Xy/wPw/uZgi6aIVg==", "license": "MIT", "dependencies": { "@types/node": "^18.11.18", From 00a67da449e8a38ce141de5feb359f0bf710d36a Mon Sep 17 00:00:00 2001 From: Omer Tuchfeld Date: Wed, 20 Aug 2025 22:52:05 +0200 Subject: [PATCH 09/42] fix: Use `pool_pre_ping=True` in SQLAlchemy engine creation (#3208) # What does this PR do? We noticed that when llama-stack is running for a long time, we would run into database errors when trying to run messages through the agent (which we configured to persist against postgres), seemingly due to the database connections being stale or disconnected. This commit adds `pool_pre_ping=True` to the SQLAlchemy engine creation to help mitigate this issue by checking the connection before using it, and re-establishing it if necessary. More information in: https://docs.sqlalchemy.org/en/20/core/pooling.html#dealing-with-disconnects We're also open to other suggestions on how to handle this issue, this PR is just a suggestion. ## Test Plan We have not tested it yet (we're in the process of doing that) and we're hoping it's going to resolve our issue. --- .../providers/utils/sqlstore/sqlalchemy_sqlstore.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py b/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py index 6414929db..7fa0cc755 100644 --- a/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +++ b/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py @@ -22,6 +22,7 @@ from sqlalchemy import ( text, ) from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine +from sqlalchemy.ext.asyncio.engine import AsyncEngine from llama_stack.apis.common.responses import PaginatedResponse from llama_stack.log import get_logger @@ -45,9 +46,12 @@ TYPE_MAPPING: dict[ColumnType, Any] = { class SqlAlchemySqlStoreImpl(SqlStore): def __init__(self, config: SqlAlchemySqlStoreConfig): self.config = config - self.async_session = async_sessionmaker(create_async_engine(config.engine_str)) + self.async_session = async_sessionmaker(self.create_engine()) self.metadata = MetaData() + def create_engine(self) -> AsyncEngine: + return create_async_engine(self.config.engine_str, pool_pre_ping=True) + async def create_table( self, table: str, @@ -83,7 +87,7 @@ class SqlAlchemySqlStoreImpl(SqlStore): else: sqlalchemy_table = self.metadata.tables[table] - engine = create_async_engine(self.config.engine_str) + engine = self.create_engine() async with engine.begin() as conn: await conn.run_sync(self.metadata.create_all, tables=[sqlalchemy_table], checkfirst=True) @@ -241,7 +245,7 @@ class SqlAlchemySqlStoreImpl(SqlStore): nullable: bool = True, ) -> None: """Add a column to an existing table if the column doesn't already exist.""" - engine = create_async_engine(self.config.engine_str) + engine = self.create_engine() try: async with engine.begin() as conn: From 14082b22af35ba3561ddccff7b5d2d6bbdebceaf Mon Sep 17 00:00:00 2001 From: grs Date: Wed, 20 Aug 2025 22:12:15 +0100 Subject: [PATCH 10/42] fix: handle mcp tool calls in previous response correctly (#3155) # What does this PR do? Handles MCP tool calls in a previous response Closes #3105 ## Test Plan Made call to create response with tool call, then made second call with the first linked through previous_response_id. Did not get error. Also added unit test. Signed-off-by: Gordon Sim --- .../agents/meta_reference/responses/utils.py | 21 ++++++ .../non_ci/responses/test_basic_responses.py | 5 +- .../non_ci/responses/test_tool_responses.py | 65 +++++++++++++++++-- .../meta_reference/test_openai_responses.py | 48 ++++++++++++++ 4 files changed, 130 insertions(+), 9 deletions(-) diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/utils.py b/llama_stack/providers/inline/agents/meta_reference/responses/utils.py index 1507a55c8..486ac9351 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/utils.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/utils.py @@ -17,6 +17,8 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseOutputMessageContent, OpenAIResponseOutputMessageContentOutputText, OpenAIResponseOutputMessageFunctionToolCall, + OpenAIResponseOutputMessageMCPCall, + OpenAIResponseOutputMessageMCPListTools, OpenAIResponseText, ) from llama_stack.apis.inference import ( @@ -117,6 +119,25 @@ async def convert_response_input_to_chat_messages( ), ) messages.append(OpenAIAssistantMessageParam(tool_calls=[tool_call])) + elif isinstance(input_item, OpenAIResponseOutputMessageMCPCall): + tool_call = OpenAIChatCompletionToolCall( + index=0, + id=input_item.id, + function=OpenAIChatCompletionToolCallFunction( + name=input_item.name, + arguments=input_item.arguments, + ), + ) + messages.append(OpenAIAssistantMessageParam(tool_calls=[tool_call])) + messages.append( + OpenAIToolMessageParam( + content=input_item.output, + tool_call_id=input_item.id, + ) + ) + elif isinstance(input_item, OpenAIResponseOutputMessageMCPListTools): + # the tool list will be handled separately + pass else: content = await convert_response_content_to_chat_content(input_item.content) message_type = await get_message_type_by_role(input_item.role) diff --git a/tests/integration/non_ci/responses/test_basic_responses.py b/tests/integration/non_ci/responses/test_basic_responses.py index a8106e593..17d50d348 100644 --- a/tests/integration/non_ci/responses/test_basic_responses.py +++ b/tests/integration/non_ci/responses/test_basic_responses.py @@ -7,8 +7,9 @@ import time import pytest -from fixtures.test_cases import basic_test_cases, image_test_cases, multi_turn_image_test_cases, multi_turn_test_cases -from streaming_assertions import StreamingValidator + +from .fixtures.test_cases import basic_test_cases, image_test_cases, multi_turn_image_test_cases, multi_turn_test_cases +from .streaming_assertions import StreamingValidator @pytest.mark.parametrize("case", basic_test_cases) diff --git a/tests/integration/non_ci/responses/test_tool_responses.py b/tests/integration/non_ci/responses/test_tool_responses.py index 33d109863..494b89226 100644 --- a/tests/integration/non_ci/responses/test_tool_responses.py +++ b/tests/integration/non_ci/responses/test_tool_responses.py @@ -10,7 +10,12 @@ import os import httpx import openai import pytest -from fixtures.test_cases import ( + +from llama_stack import LlamaStackAsLibraryClient +from llama_stack.core.datatypes import AuthenticationRequiredError +from tests.common.mcp import dependency_tools, make_mcp_server + +from .fixtures.test_cases import ( custom_tool_test_cases, file_search_test_cases, mcp_tool_test_cases, @@ -18,12 +23,8 @@ from fixtures.test_cases import ( multi_turn_tool_execution_test_cases, web_search_test_cases, ) -from helpers import new_vector_store, setup_mcp_tools, upload_file, wait_for_file_attachment -from streaming_assertions import StreamingValidator - -from llama_stack import LlamaStackAsLibraryClient -from llama_stack.core.datatypes import AuthenticationRequiredError -from tests.common.mcp import dependency_tools, make_mcp_server +from .helpers import new_vector_store, setup_mcp_tools, upload_file, wait_for_file_attachment +from .streaming_assertions import StreamingValidator @pytest.mark.parametrize("case", web_search_test_cases) @@ -195,6 +196,56 @@ def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case): assert len(response.output) >= 3 +@pytest.mark.parametrize("case", mcp_tool_test_cases) +def test_response_sequential_mcp_tool(compat_client, text_model_id, case): + if not isinstance(compat_client, LlamaStackAsLibraryClient): + pytest.skip("in-process MCP server is only supported in library client") + + with make_mcp_server() as mcp_server_info: + tools = setup_mcp_tools(case.tools, mcp_server_info) + + response = compat_client.responses.create( + model=text_model_id, + input=case.input, + tools=tools, + stream=False, + ) + + assert len(response.output) >= 3 + list_tools = response.output[0] + assert list_tools.type == "mcp_list_tools" + assert list_tools.server_label == "localmcp" + assert len(list_tools.tools) == 2 + assert {t.name for t in list_tools.tools} == { + "get_boiling_point", + "greet_everyone", + } + + call = response.output[1] + assert call.type == "mcp_call" + assert call.name == "get_boiling_point" + assert json.loads(call.arguments) == { + "liquid_name": "myawesomeliquid", + "celsius": True, + } + assert call.error is None + assert "-100" in call.output + + # sometimes the model will call the tool again, so we need to get the last message + message = response.output[-1] + text_content = message.content[0].text + assert "boiling point" in text_content.lower() + + response2 = compat_client.responses.create( + model=text_model_id, input=case.input, tools=tools, stream=False, previous_response_id=response.id + ) + + assert len(response2.output) >= 1 + message = response2.output[-1] + text_content = message.content[0].text + assert "boiling point" in text_content.lower() + + @pytest.mark.parametrize("case", custom_tool_test_cases) def test_response_non_streaming_custom_tool(compat_client, text_model_id, case): response = compat_client.responses.create( diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py index 5ea14d7c7..a964bc219 100644 --- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py +++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py @@ -24,6 +24,7 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseMessage, OpenAIResponseObjectWithInput, OpenAIResponseOutputMessageContentOutputText, + OpenAIResponseOutputMessageMCPCall, OpenAIResponseOutputMessageWebSearchToolCall, OpenAIResponseText, OpenAIResponseTextFormat, @@ -461,6 +462,53 @@ async def test_prepend_previous_response_web_search(openai_responses_impl, mock_ assert input[3].content == "fake_input" +async def test_prepend_previous_response_mcp_tool_call(openai_responses_impl, mock_responses_store): + """Test prepending a previous response which included an mcp tool call to a new response.""" + input_item_message = OpenAIResponseMessage( + id="123", + content=[OpenAIResponseInputMessageContentText(text="fake_previous_input")], + role="user", + ) + output_tool_call = OpenAIResponseOutputMessageMCPCall( + id="ws_123", + name="fake-tool", + arguments="fake-arguments", + server_label="fake-label", + ) + output_message = OpenAIResponseMessage( + id="123", + content=[OpenAIResponseOutputMessageContentOutputText(text="fake_tool_call_response")], + status="completed", + role="assistant", + ) + response = OpenAIResponseObjectWithInput( + created_at=1, + id="resp_123", + model="fake_model", + output=[output_tool_call, output_message], + status="completed", + text=OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")), + input=[input_item_message], + ) + mock_responses_store.get_response_object.return_value = response + + input_messages = [OpenAIResponseMessage(content="fake_input", role="user")] + input = await openai_responses_impl._prepend_previous_response(input_messages, "resp_123") + + assert len(input) == 4 + # Check for previous input + assert isinstance(input[0], OpenAIResponseMessage) + assert input[0].content[0].text == "fake_previous_input" + # Check for previous output MCP tool call + assert isinstance(input[1], OpenAIResponseOutputMessageMCPCall) + # Check for previous output web search response + assert isinstance(input[2], OpenAIResponseMessage) + assert input[2].content[0].text == "fake_tool_call_response" + # Check for new input + assert isinstance(input[3], OpenAIResponseMessage) + assert input[3].content == "fake_input" + + async def test_create_openai_response_with_instructions(openai_responses_impl, mock_inference_api): # Setup input_text = "What is the capital of Ireland?" From 49060c3020991c05f530a30358e2d6f601f36b4a Mon Sep 17 00:00:00 2001 From: Francisco Arceo Date: Wed, 20 Aug 2025 16:05:12 -0600 Subject: [PATCH 11/42] chore: Update dependabot to capture package-lock.json (#3212) # What does this PR do? This should fix dependabot based on this thread: https://stackoverflow.com/questions/60201543/dependabot-only-updates-lock-file ## Test Plan Signed-off-by: Francisco Javier Arceo --- .github/dependabot.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 134efd93b..01a2464a9 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -19,3 +19,15 @@ updates: - python commit-message: prefix: chore(python-deps) + + - package-ecosystem: npm + directory: "/llama_stack/ui" + schedule: + interval: "weekly" + day: "saturday" + labels: + - type/dependencies + - javascript + commit-message: + prefix: chore(ui-deps) + versioning-strategy: increase From f328ff6e983c1f48686d1f271a122d1b652be31d Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Wed, 20 Aug 2025 16:34:50 -0700 Subject: [PATCH 12/42] fix(ci): dependabot update had a bug --- .github/dependabot.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 01a2464a9..f88402a7a 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -9,6 +9,7 @@ updates: day: "saturday" commit-message: prefix: chore(github-deps) + - package-ecosystem: "uv" directory: "/" schedule: @@ -30,4 +31,3 @@ updates: - javascript commit-message: prefix: chore(ui-deps) - versioning-strategy: increase From eff97f122bf9f00b7e90ea86bbb0e4bad7c3ce24 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 20 Aug 2025 16:47:33 -0700 Subject: [PATCH 13/42] chore(python-deps): bump weaviate-client from 4.16.5 to 4.16.9 (#3219) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [weaviate-client](https://github.com/weaviate/weaviate-python-client) from 4.16.5 to 4.16.9.
Release notes

Sourced from weaviate-client's releases.

v4.16.9

What's Changed

Full Changelog: https://github.com/weaviate/weaviate-python-client/compare/v4.16.8...v4.16.9

v4.16.8

What's Changed

Full Changelog: https://github.com/weaviate/weaviate-python-client/compare/v4.16.7...v4.16.8

v4.16.6

What's Changed

Full Changelog: https://github.com/weaviate/weaviate-python-client/compare/v4.16.5...v4.16.6

Changelog

Sourced from weaviate-client's changelog.

Version 4.16.9

This patch version includes: - Explicitly depend on protobuf package

Version 4.16.8

This patch version includes: - Further attempted fixes for protobuf compatability issues - Introduction of the backups.list() method

Version 4.16.7

This patch version includes: - Fixes compatability issues between the built gRPC stubs and differing protobuf versions depending on the version of grpcio used to build the stubs - Add text2vec-model2vec module to Configure.NamedVectors - Deprecated min_occurrences in Metrics.text in favour of limit

Version 4.16.6

This patch version includes: - Add dimensions property to text2vec-transformers vectorizers in Configure.Vectors - Add text2vec-model2vec vectorizer in Configure.Vectors - Deprecate text2vec-contextionary vectorizer

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=weaviate-client&package-manager=uv&previous-version=4.16.5&new-version=4.16.9)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- uv.lock | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/uv.lock b/uv.lock index 635b2bdfe..d3ea888b6 100644 --- a/uv.lock +++ b/uv.lock @@ -1235,19 +1235,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5a/96/44759eca966720d0f3e1b105c43f8ad4590c97bf8eb3cd489656e9590baa/grpcio-1.67.1-cp313-cp313-win_amd64.whl", hash = "sha256:fa0c739ad8b1996bd24823950e3cb5152ae91fca1c09cc791190bf1627ffefba", size = 4346042, upload-time = "2024-10-29T06:25:21.939Z" }, ] -[[package]] -name = "grpcio-health-checking" -version = "1.67.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "grpcio" }, - { name = "protobuf" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/64/dd/e3b339fa44dc75b501a1a22cb88f1af5b1f8c964488f19c4de4cfbbf05ba/grpcio_health_checking-1.67.1.tar.gz", hash = "sha256:ca90fa76a6afbb4fda71d734cb9767819bba14928b91e308cffbb0c311eb941e", size = 16775, upload-time = "2024-10-29T06:30:16.487Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/5c/8d/7a9878dca6616b48093d71c52d0bc79cb2dd1a2698ff6f5ce7406306de12/grpcio_health_checking-1.67.1-py3-none-any.whl", hash = "sha256:93753da5062152660aef2286c9b261e07dd87124a65e4dc9fbd47d1ce966b39d", size = 18924, upload-time = "2024-10-29T06:26:25.535Z" }, -] - [[package]] name = "h11" version = "0.16.0" @@ -5039,20 +5026,20 @@ wheels = [ [[package]] name = "weaviate-client" -version = "4.16.5" +version = "4.16.9" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "authlib" }, { name = "deprecation" }, { name = "grpcio" }, - { name = "grpcio-health-checking" }, { name = "httpx" }, + { name = "protobuf" }, { name = "pydantic" }, { name = "validators" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ad/d1/9f51e3bfea67ec8afaaed175b4d8d22a8bbba0622f9bcd8b064d53a57f91/weaviate_client-4.16.5.tar.gz", hash = "sha256:3359d7bc77aa4a27e6ecfed82017fc32ddfdda6299a6ffd4cf1f09c33023b147", size = 779506, upload-time = "2025-08-01T09:29:06.183Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f4/e4/6a0b1501645f17a851067fc7bd0d5b53dc9777f2818be9c43debe06eda19/weaviate_client-4.16.9.tar.gz", hash = "sha256:d461071f1ff5ebddd0fc697959628a1d8caa12af1da071401ef25583c3084eba", size = 766390, upload-time = "2025-08-20T15:00:03.924Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/37/4c/e5b3c67fa2b735a572d06095f524f6e2e0f9b47bb99f3c91b9fe3e291a88/weaviate_client-4.16.5-py3-none-any.whl", hash = "sha256:1c5002ea72ba285c3c000a01d498267f8c3da51acf19d0f321f3f8ecbb58411a", size = 597199, upload-time = "2025-08-01T09:29:04.385Z" }, + { url = "https://files.pythonhosted.org/packages/10/1a/fc66f5f33961351c759d56453d18176849da8f64186c941183bb574b808b/weaviate_client-4.16.9-py3-none-any.whl", hash = "sha256:8b4adabaec0d513edef94c8c1de61c89a86eba3b63a4dc1acdfc9580e80199f4", size = 579098, upload-time = "2025-08-20T15:00:01.882Z" }, ] [[package]] From 09bee51d6b7beff99e3c9a79101ac058020c681e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 20 Aug 2025 16:47:46 -0700 Subject: [PATCH 14/42] chore(python-deps): bump locust from 2.38.0 to 2.39.0 (#3221) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [locust](https://github.com/locustio/locust) from 2.38.0 to 2.39.0.
Release notes

Sourced from locust's releases.

2.39.0

What's Changed

New Contributors

Full Changelog: https://github.com/locustio/locust/compare/2.38.1...2.39.0

2.38.1

What's Changed

Full Changelog: https://github.com/locustio/locust/compare/2.38.0...2.38.1

Changelog

Sourced from locust's changelog.

Detailed changelog

The most important changes can also be found in the documentation.

Commits
  • 1810fef Tiny doc fixes
  • 48b4dfc Link SocketIOUser from main docs.
  • 6e4fd7f Merge pull request #3189 from locustio/Add-SocketioUser
  • 95eca45 better documentation of on_message
  • a56ef66 SocketIOUser docs: Link to example on GH
  • adaa71b SocketIOUser, add method docstrings and link to python-socketio's readthedocs
  • 9fb3ff0 Add testcase for SocketIOUser
  • 7047247 SocketIOUser: Fix use of environment object. Remove SocketIOClient.
  • f8ddc9c rename socketio echo_server
  • ae28acf add contrib dependencies to docs build
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=locust&package-manager=uv&previous-version=2.38.0&new-version=2.39.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- uv.lock | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/uv.lock b/uv.lock index d3ea888b6..5d37bb0d2 100644 --- a/uv.lock +++ b/uv.lock @@ -1976,7 +1976,7 @@ wheels = [ [[package]] name = "locust" -version = "2.38.0" +version = "2.39.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "configargparse" }, @@ -1988,15 +1988,16 @@ dependencies = [ { name = "locust-cloud" }, { name = "msgpack" }, { name = "psutil" }, + { name = "python-socketio", extra = ["client"] }, { name = "pywin32", marker = "sys_platform == 'win32'" }, { name = "pyzmq" }, { name = "requests" }, { name = "setuptools" }, { name = "werkzeug" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/fb/93/ecd79dde28e24bdc99488d4e2c0ad4117252257d5cbdd61e3b14d1f03786/locust-2.38.0.tar.gz", hash = "sha256:5bd6c29d8423733cb5d9a265548c9fef7b731f2254aa91885d6c98d0d39f90f0", size = 1406518, upload-time = "2025-08-07T10:18:52.584Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c4/6f/d6ca4483f4795747fbbd610d28e798ca4f5d4358e03f309343eb5bab128f/locust-2.39.0.tar.gz", hash = "sha256:71e82a68324f9d63d4b800035288488c08eab12811fa4c24ff07f031643b7b39", size = 1409879, upload-time = "2025-08-20T13:39:55.233Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ae/be/57ca67b95c45e69c173e86fe5c934d789effc2ec203d3e3ec2a0b32aa707/locust-2.38.0-py3-none-any.whl", hash = "sha256:b92c937e8659e9ffd6d6d1cab2f63f70aa98c87975911938d1f473534f46fd78", size = 1424083, upload-time = "2025-08-07T10:18:50.499Z" }, + { url = "https://files.pythonhosted.org/packages/7c/94/7dc9a2b4ccb18a5b0c4be4bfadfa79b6c0fd860267a7114641402627e7db/locust-2.39.0-py3-none-any.whl", hash = "sha256:3817c4d7cca387b4b871da779c9e145c2a95fbb0b5602be5833976902b967a8f", size = 1428138, upload-time = "2025-08-20T13:39:52.549Z" }, ] [[package]] From 0473a326193eb90eba9aae891980307dd5601fb4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 20 Aug 2025 16:48:05 -0700 Subject: [PATCH 15/42] chore(ui-deps): bump tailwind-merge from 3.3.0 to 3.3.1 in /llama_stack/ui (#3223) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [tailwind-merge](https://github.com/dcastil/tailwind-merge) from 3.3.0 to 3.3.1.
Release notes

Sourced from tailwind-merge's releases.

v3.3.1

Bug Fixes

Full Changelog: https://github.com/dcastil/tailwind-merge/compare/v3.3.0...v3.3.1

Thanks to @​brandonmcconnell, @​manavm1990, @​langy, @​roboflow, @​syntaxfm, @​getsentry, @​codecov, @​sourcegraph, a private sponsor, @​block and @​shawt3000 for sponsoring tailwind-merge! ❤️

Commits
  • 40d8fee v3.3.1
  • 429ea54 add changelog for v3.3.1
  • d3df877 Merge pull request #591 from dcastil/bugfix/590/fix-arbitrary-value-using-col...
  • fdd9cdf add color-mix() to colorFunctionRegex
  • d49e03a add test case for border colors being merged incorrectly
  • 47155f0 Merge pull request #585 from dcastil/renovate/all-minor-patch
  • 2d29675 Update all non-major dependencies
  • c3d7208 Merge pull request #578 from dcastil/dependabot/npm_and_yarn/dot-github/actio...
  • 527214b Bump undici from 5.28.5 to 5.29.0 in /.github/actions/metrics-report
  • See full diff in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=tailwind-merge&package-manager=npm_and_yarn&previous-version=3.3.0&new-version=3.3.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- llama_stack/ui/package-lock.json | 8 ++++---- llama_stack/ui/package.json | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/llama_stack/ui/package-lock.json b/llama_stack/ui/package-lock.json index 2df1cceb3..d4118bc11 100644 --- a/llama_stack/ui/package-lock.json +++ b/llama_stack/ui/package-lock.json @@ -30,7 +30,7 @@ "remeda": "^2.26.1", "shiki": "^1.29.2", "sonner": "^2.0.6", - "tailwind-merge": "^3.3.0" + "tailwind-merge": "^3.3.1" }, "devDependencies": { "@eslint/eslintrc": "^3", @@ -13489,9 +13489,9 @@ } }, "node_modules/tailwind-merge": { - "version": "3.3.0", - "resolved": "https://registry.npmjs.org/tailwind-merge/-/tailwind-merge-3.3.0.tgz", - "integrity": "sha512-fyW/pEfcQSiigd5SNn0nApUOxx0zB/dm6UDU/rEwc2c3sX2smWUNbapHv+QRqLGVp9GWX3THIa7MUGPo+YkDzQ==", + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/tailwind-merge/-/tailwind-merge-3.3.1.tgz", + "integrity": "sha512-gBXpgUm/3rp1lMZZrM/w7D8GKqshif0zAymAhbCyIt8KMe+0v9DQ7cdYLR4FHH/cKpdTXb+A/tKKU3eolfsI+g==", "license": "MIT", "funding": { "type": "github", diff --git a/llama_stack/ui/package.json b/llama_stack/ui/package.json index 226b06f59..958885119 100644 --- a/llama_stack/ui/package.json +++ b/llama_stack/ui/package.json @@ -35,7 +35,7 @@ "remeda": "^2.26.1", "shiki": "^1.29.2", "sonner": "^2.0.6", - "tailwind-merge": "^3.3.0" + "tailwind-merge": "^3.3.1" }, "devDependencies": { "@eslint/eslintrc": "^3", From 90b7c2317e2bb72a6c3b5be0bd3a5a7edbab41b1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 20 Aug 2025 16:48:20 -0700 Subject: [PATCH 16/42] chore(ui-deps): bump @radix-ui/react-separator from 1.1.6 to 1.1.7 in /llama_stack/ui (#3222) Bumps [@radix-ui/react-separator](https://github.com/radix-ui/primitives) from 1.1.6 to 1.1.7.
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=@radix-ui/react-separator&package-manager=npm_and_yarn&previous-version=1.1.6&new-version=1.1.7)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- llama_stack/ui/package-lock.json | 33 +++++++++++++++++++++++++++----- llama_stack/ui/package.json | 2 +- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/llama_stack/ui/package-lock.json b/llama_stack/ui/package-lock.json index d4118bc11..f9ee44792 100644 --- a/llama_stack/ui/package-lock.json +++ b/llama_stack/ui/package-lock.json @@ -12,7 +12,7 @@ "@radix-ui/react-dialog": "^1.1.13", "@radix-ui/react-dropdown-menu": "^2.1.14", "@radix-ui/react-select": "^2.2.5", - "@radix-ui/react-separator": "^1.1.6", + "@radix-ui/react-separator": "^1.1.7", "@radix-ui/react-slot": "^1.2.3", "@radix-ui/react-tooltip": "^1.2.6", "class-variance-authority": "^0.7.1", @@ -2855,12 +2855,35 @@ } }, "node_modules/@radix-ui/react-separator": { - "version": "1.1.6", - "resolved": "https://registry.npmjs.org/@radix-ui/react-separator/-/react-separator-1.1.6.tgz", - "integrity": "sha512-Izof3lPpbCfTM7WDta+LRkz31jem890VjEvpVRoWQNKpDUMMVffuyq854XPGP1KYGWWmjmYvHvPFeocWhFCy1w==", + "version": "1.1.7", + "resolved": "https://registry.npmjs.org/@radix-ui/react-separator/-/react-separator-1.1.7.tgz", + "integrity": "sha512-0HEb8R9E8A+jZjvmFCy/J4xhbXy3TV+9XSnGJ3KvTtjlIUy/YQ/p6UYZvi7YbeoeXdyU9+Y3scizK6hkY37baA==", "license": "MIT", "dependencies": { - "@radix-ui/react-primitive": "2.1.2" + "@radix-ui/react-primitive": "2.1.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-separator/node_modules/@radix-ui/react-primitive": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz", + "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-slot": "1.2.3" }, "peerDependencies": { "@types/react": "*", diff --git a/llama_stack/ui/package.json b/llama_stack/ui/package.json index 958885119..824e604a4 100644 --- a/llama_stack/ui/package.json +++ b/llama_stack/ui/package.json @@ -17,7 +17,7 @@ "@radix-ui/react-dialog": "^1.1.13", "@radix-ui/react-dropdown-menu": "^2.1.14", "@radix-ui/react-select": "^2.2.5", - "@radix-ui/react-separator": "^1.1.6", + "@radix-ui/react-separator": "^1.1.7", "@radix-ui/react-slot": "^1.2.3", "@radix-ui/react-tooltip": "^1.2.6", "class-variance-authority": "^0.7.1", From 65d09c442d71e28ea5c3b02af777b7a28d4daa77 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 20 Aug 2025 16:48:35 -0700 Subject: [PATCH 17/42] chore(ui-deps): bump eslint-config-prettier from 10.1.5 to 10.1.8 in /llama_stack/ui (#3220) Bumps [eslint-config-prettier](https://github.com/prettier/eslint-config-prettier) from 10.1.5 to 10.1.8.
Release notes

Sourced from eslint-config-prettier's releases.

v10.1.8

republish latest version

Full Changelog: https://github.com/prettier/eslint-config-prettier/compare/v10.1.5...v10.1.8

Changelog

Sourced from eslint-config-prettier's changelog.

eslint-config-prettier

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=eslint-config-prettier&package-manager=npm_and_yarn&previous-version=10.1.5&new-version=10.1.8)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- llama_stack/ui/package-lock.json | 8 ++++---- llama_stack/ui/package.json | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/llama_stack/ui/package-lock.json b/llama_stack/ui/package-lock.json index f9ee44792..ffcbdfba4 100644 --- a/llama_stack/ui/package-lock.json +++ b/llama_stack/ui/package-lock.json @@ -44,7 +44,7 @@ "@types/react-dom": "^19", "eslint": "^9", "eslint-config-next": "15.3.2", - "eslint-config-prettier": "^10.1.5", + "eslint-config-prettier": "^10.1.8", "eslint-plugin-prettier": "^5.4.0", "jest": "^29.7.0", "jest-environment-jsdom": "^29.7.0", @@ -6404,9 +6404,9 @@ } }, "node_modules/eslint-config-prettier": { - "version": "10.1.5", - "resolved": "https://registry.npmjs.org/eslint-config-prettier/-/eslint-config-prettier-10.1.5.tgz", - "integrity": "sha512-zc1UmCpNltmVY34vuLRV61r1K27sWuX39E+uyUnY8xS2Bex88VV9cugG+UZbRSRGtGyFboj+D8JODyme1plMpw==", + "version": "10.1.8", + "resolved": "https://registry.npmjs.org/eslint-config-prettier/-/eslint-config-prettier-10.1.8.tgz", + "integrity": "sha512-82GZUjRS0p/jganf6q1rEO25VSoHH0hKPCTrgillPjdI/3bgBhAE1QzHrHTizjpRvy6pGAvKjDJtk2pF9NDq8w==", "dev": true, "license": "MIT", "bin": { diff --git a/llama_stack/ui/package.json b/llama_stack/ui/package.json index 824e604a4..8ba9b47fc 100644 --- a/llama_stack/ui/package.json +++ b/llama_stack/ui/package.json @@ -49,7 +49,7 @@ "@types/react-dom": "^19", "eslint": "^9", "eslint-config-next": "15.3.2", - "eslint-config-prettier": "^10.1.5", + "eslint-config-prettier": "^10.1.8", "eslint-plugin-prettier": "^5.4.0", "jest": "^29.7.0", "jest-environment-jsdom": "^29.7.0", From 620212e92063d62b66a59481c3e757e3ae018420 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 20 Aug 2025 16:48:53 -0700 Subject: [PATCH 18/42] chore(ui-deps): bump @radix-ui/react-collapsible from 1.1.11 to 1.1.12 in /llama_stack/ui (#3218) Bumps [@radix-ui/react-collapsible](https://github.com/radix-ui/primitives) from 1.1.11 to 1.1.12.
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=@radix-ui/react-collapsible&package-manager=npm_and_yarn&previous-version=1.1.11&new-version=1.1.12)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- llama_stack/ui/package-lock.json | 42 +++++++++++++++++++++++++++----- llama_stack/ui/package.json | 2 +- 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/llama_stack/ui/package-lock.json b/llama_stack/ui/package-lock.json index ffcbdfba4..970b78894 100644 --- a/llama_stack/ui/package-lock.json +++ b/llama_stack/ui/package-lock.json @@ -8,7 +8,7 @@ "name": "ui", "version": "0.1.0", "dependencies": { - "@radix-ui/react-collapsible": "^1.1.11", + "@radix-ui/react-collapsible": "^1.1.12", "@radix-ui/react-dialog": "^1.1.13", "@radix-ui/react-dropdown-menu": "^2.1.14", "@radix-ui/react-select": "^2.2.5", @@ -2089,16 +2089,16 @@ } }, "node_modules/@radix-ui/react-collapsible": { - "version": "1.1.11", - "resolved": "https://registry.npmjs.org/@radix-ui/react-collapsible/-/react-collapsible-1.1.11.tgz", - "integrity": "sha512-2qrRsVGSCYasSz1RFOorXwl0H7g7J1frQtgpQgYrt+MOidtPAINHn9CPovQXb83r8ahapdx3Tu0fa/pdFFSdPg==", + "version": "1.1.12", + "resolved": "https://registry.npmjs.org/@radix-ui/react-collapsible/-/react-collapsible-1.1.12.tgz", + "integrity": "sha512-Uu+mSh4agx2ib1uIGPP4/CKNULyajb3p92LsVXmH2EHVMTfZWpll88XJ0j4W0z3f8NK1eYl1+Mf/szHPmcHzyA==", "license": "MIT", "dependencies": { - "@radix-ui/primitive": "1.1.2", + "@radix-ui/primitive": "1.1.3", "@radix-ui/react-compose-refs": "1.1.2", "@radix-ui/react-context": "1.1.2", "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-presence": "1.1.4", + "@radix-ui/react-presence": "1.1.5", "@radix-ui/react-primitive": "2.1.3", "@radix-ui/react-use-controllable-state": "1.2.2", "@radix-ui/react-use-layout-effect": "1.1.1" @@ -2118,6 +2118,36 @@ } } }, + "node_modules/@radix-ui/react-collapsible/node_modules/@radix-ui/primitive": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz", + "integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==", + "license": "MIT" + }, + "node_modules/@radix-ui/react-collapsible/node_modules/@radix-ui/react-presence": { + "version": "1.1.5", + "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz", + "integrity": "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-use-layout-effect": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-collapsible/node_modules/@radix-ui/react-primitive": { "version": "2.1.3", "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz", diff --git a/llama_stack/ui/package.json b/llama_stack/ui/package.json index 8ba9b47fc..7b4208aff 100644 --- a/llama_stack/ui/package.json +++ b/llama_stack/ui/package.json @@ -13,7 +13,7 @@ "test:e2e": "playwright test" }, "dependencies": { - "@radix-ui/react-collapsible": "^1.1.11", + "@radix-ui/react-collapsible": "^1.1.12", "@radix-ui/react-dialog": "^1.1.13", "@radix-ui/react-dropdown-menu": "^2.1.14", "@radix-ui/react-select": "^2.2.5", From bf3b201d6196f27fdf712479ddd023c3feb4e7aa Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 20 Aug 2025 16:49:11 -0700 Subject: [PATCH 19/42] chore(python-deps): bump chromadb from 1.0.16 to 1.0.20 (#3217) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [chromadb](https://github.com/chroma-core/chroma) from 1.0.16 to 1.0.20.
Release notes

Sourced from chromadb's releases.

1.0.20

Version: 1.0.20 Git ref: refs/tags/1.0.20 Build Date: 2025-08-18T17:04 PIP Package: chroma-1.0.20.tar.gz Github Container Registry Image: :1.0.20 DockerHub Image: :1.0.20

What's Changed

Full Changelog: https://github.com/chroma-core/chroma/compare/1.0.19...1.0.20

1.0.18

Version: 1.0.18 Git ref: refs/tags/1.0.18 Build Date: 2025-08-18T08:09 PIP Package: chroma-1.0.18.tar.gz Github Container Registry Image: :1.0.18 DockerHub Image: :1.0.18

What's Changed

... (truncated)

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=chromadb&package-manager=uv&previous-version=1.0.16&new-version=1.0.20)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- uv.lock | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/uv.lock b/uv.lock index 5d37bb0d2..d8b7318f1 100644 --- a/uv.lock +++ b/uv.lock @@ -523,7 +523,7 @@ wheels = [ [[package]] name = "chromadb" -version = "1.0.16" +version = "1.0.20" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "bcrypt" }, @@ -554,13 +554,13 @@ dependencies = [ { name = "typing-extensions" }, { name = "uvicorn", extra = ["standard"] }, ] -sdist = { url = "https://files.pythonhosted.org/packages/15/2a/5b7e793d2a27c425e9f1813e9cb965b70e9bda08b69ee15a10e07dc3e59a/chromadb-1.0.16.tar.gz", hash = "sha256:3c864b5beb5e131bdc1f83c0b63a01ec481c6ee52028f088563ffba8478478e1", size = 1241545, upload-time = "2025-08-08T00:25:41.414Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/5d/430c4780738ed8385afb2031c619c71e4d354b435f1523fd628562d42377/chromadb-1.0.20.tar.gz", hash = "sha256:9ca88516f1eefa26e4c308ec9bdae9d209c0ba5fe1fae3f16b250e52246944db", size = 1244999, upload-time = "2025-08-18T17:03:31.195Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a3/9d/bffcc814272c9b7982551803b2d45b77f39eeea1b9e965c00c05ee81c649/chromadb-1.0.16-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:144163ce7ca4f4448684d5d0c13ebb37c4d68490ecb60967a95d05cea30e0d2d", size = 18942157, upload-time = "2025-08-08T00:25:38.459Z" }, - { url = "https://files.pythonhosted.org/packages/58/4e/de0086f3cbcfd667d75d112bb546386803ab5335599bf7099272a675e98b/chromadb-1.0.16-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:4ebcc5894e6fbb6b576452bbf4659746bfe58d9daf99a18363364e9497434bd2", size = 18147831, upload-time = "2025-08-08T00:25:35.546Z" }, - { url = "https://files.pythonhosted.org/packages/0e/7f/a8aff4ce96281bcb9731d10b2554f41963dd0b47acb4f90a78b2b7c4f199/chromadb-1.0.16-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:937051fc3aae94f7c171503d8f1f7662820aacc75acf45f28d3656c75c5ff1f8", size = 18682195, upload-time = "2025-08-08T00:25:29.654Z" }, - { url = "https://files.pythonhosted.org/packages/a3/9c/2a97d0257176aae472dff6f1ef1b7050449f384e420120e0f31d2d8f532f/chromadb-1.0.16-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0f5c5ad0c59154a9cab1506b857bab8487b588352e668cf1222c54bb9d52daa", size = 19635695, upload-time = "2025-08-08T00:25:32.68Z" }, - { url = "https://files.pythonhosted.org/packages/96/8a/f7e810f3cbdc9186ba4e649dc32711b7ab2c23aba37cf61175f731d22293/chromadb-1.0.16-cp39-abi3-win_amd64.whl", hash = "sha256:2528c01bd8b3facca9d0e1ffac866767c386b94604df484fc792ee891c86e09a", size = 19641144, upload-time = "2025-08-08T00:25:43.446Z" }, + { url = "https://files.pythonhosted.org/packages/59/2f/d40a4aedd9298a012fb9f455a1e334fc875e12c9c667aab8a956a9dff559/chromadb-1.0.20-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:0955b9cbd0dfe23ecfd8d911254ff9e57750acbe9c5ff723e2975290092d9d29", size = 19069234, upload-time = "2025-08-18T17:03:28.714Z" }, + { url = "https://files.pythonhosted.org/packages/6a/2e/fcc80bb635719d3cf0705be89e2510bd191d5f544d1c5e9e4392ba95cff4/chromadb-1.0.20-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:52819408a48f0209a0ce4e6655eaaa683cce03f8081f297f88699f00bc8281aa", size = 18264273, upload-time = "2025-08-18T17:03:25.614Z" }, + { url = "https://files.pythonhosted.org/packages/4f/de/e93edfcebf863d652bb0c03c23ae5a4e9e448b6e01fdac8a8624aa7dd2a4/chromadb-1.0.20-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68dbe15270e743077d47360695e0af918d17b225011e00d491afefbee017097f", size = 18835560, upload-time = "2025-08-18T17:03:18.783Z" }, + { url = "https://files.pythonhosted.org/packages/61/4f/c88ead80ae78c839152cca5dc6edae65b8a1da090b7220739b54c75549eb/chromadb-1.0.20-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2044e1400f67588271ebd2fa654dd5333e9ad108f800aa57a6fa09237afb6142", size = 19755334, upload-time = "2025-08-18T17:03:22.386Z" }, + { url = "https://files.pythonhosted.org/packages/6f/81/6decbd21c67572d67707f7e168851f10404e2857897456c6ba220e9b09be/chromadb-1.0.20-cp39-abi3-win_amd64.whl", hash = "sha256:b81be370b7c34138c01a41d11304498a13598cf9b21ecde31bba932492071301", size = 19778671, upload-time = "2025-08-18T17:03:33.206Z" }, ] [[package]] From 2cc0051ae57b05c94425aece262b404575754d9c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 20 Aug 2025 16:49:28 -0700 Subject: [PATCH 20/42] chore(ui-deps): bump typescript from 5.8.3 to 5.9.2 in /llama_stack/ui (#3216) Bumps [typescript](https://github.com/microsoft/TypeScript) from 5.8.3 to 5.9.2.
Release notes

Sourced from typescript's releases.

TypeScript 5.9

For release notes, check out the release announcement

Downloads are available on:

TypeScript 5.9 RC

For release notes, check out the release announcement

Downloads are available on:

TypeScript 5.9 Beta

For release notes, check out the release announcement.

Downloads are available on:

Commits
  • be86783 Give more specific errors for verbatimModuleSyntax (#62113)
  • 22ef577 LEGO: Pull request from lego/hb_5378966c-b857-470a-8675-daebef4a6da1_20250714...
  • d5a414c Don't use noErrorTruncation when printing types with maximumLength set (#...
  • f14b5c8 Remove unused and confusing dom.iterable.d.ts file (#62037)
  • 2778e84 Restore AbortSignal.abort (#62086)
  • 65cb4bd LEGO: Pull request from lego/hb_5378966c-b857-470a-8675-daebef4a6da1_20250710...
  • 9e20e03 Clear out checker-level stacks on pop (#62016)
  • 87740bc Fix for Issue 61081 (#61221)
  • 833a8d4 Fix Symbol completion priority and cursor positioning (#61945)
  • 0018c9f LEGO: Pull request from lego/hb_5378966c-b857-470a-8675-daebef4a6da1_20250702...
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=typescript&package-manager=npm_and_yarn&previous-version=5.8.3&new-version=5.9.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- llama_stack/ui/package-lock.json | 166 +++++++++++++++++++------------ 1 file changed, 104 insertions(+), 62 deletions(-) diff --git a/llama_stack/ui/package-lock.json b/llama_stack/ui/package-lock.json index 970b78894..190809533 100644 --- a/llama_stack/ui/package-lock.json +++ b/llama_stack/ui/package-lock.json @@ -4000,17 +4000,17 @@ "license": "MIT" }, "node_modules/@typescript-eslint/eslint-plugin": { - "version": "8.32.1", - "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.32.1.tgz", - "integrity": "sha512-6u6Plg9nP/J1GRpe/vcjjabo6Uc5YQPAMxsgQyGC/I0RuukiG1wIe3+Vtg3IrSCVJDmqK3j8adrtzXSENRtFgg==", + "version": "8.40.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.40.0.tgz", + "integrity": "sha512-w/EboPlBwnmOBtRbiOvzjD+wdiZdgFeo17lkltrtn7X37vagKKWJABvyfsJXTlHe6XBzugmYgd4A4nW+k8Mixw==", "dev": true, "license": "MIT", "dependencies": { "@eslint-community/regexpp": "^4.10.0", - "@typescript-eslint/scope-manager": "8.32.1", - "@typescript-eslint/type-utils": "8.32.1", - "@typescript-eslint/utils": "8.32.1", - "@typescript-eslint/visitor-keys": "8.32.1", + "@typescript-eslint/scope-manager": "8.40.0", + "@typescript-eslint/type-utils": "8.40.0", + "@typescript-eslint/utils": "8.40.0", + "@typescript-eslint/visitor-keys": "8.40.0", "graphemer": "^1.4.0", "ignore": "^7.0.0", "natural-compare": "^1.4.0", @@ -4024,15 +4024,15 @@ "url": "https://opencollective.com/typescript-eslint" }, "peerDependencies": { - "@typescript-eslint/parser": "^8.0.0 || ^8.0.0-alpha.0", + "@typescript-eslint/parser": "^8.40.0", "eslint": "^8.57.0 || ^9.0.0", - "typescript": ">=4.8.4 <5.9.0" + "typescript": ">=4.8.4 <6.0.0" } }, "node_modules/@typescript-eslint/eslint-plugin/node_modules/ignore": { - "version": "7.0.4", - "resolved": "https://registry.npmjs.org/ignore/-/ignore-7.0.4.tgz", - "integrity": "sha512-gJzzk+PQNznz8ysRrC0aOkBNVRBDtE1n53IqyqEf3PXrYwomFs5q4pGMizBMJF+ykh03insJ27hB8gSrD2Hn8A==", + "version": "7.0.5", + "resolved": "https://registry.npmjs.org/ignore/-/ignore-7.0.5.tgz", + "integrity": "sha512-Hs59xBNfUIunMFgWAbGX5cq6893IbWg4KnrjbYwX3tx0ztorVgTDA6B2sxf8ejHJ4wz8BqGUMYlnzNBer5NvGg==", "dev": true, "license": "MIT", "engines": { @@ -4040,16 +4040,16 @@ } }, "node_modules/@typescript-eslint/parser": { - "version": "8.32.1", - "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.32.1.tgz", - "integrity": "sha512-LKMrmwCPoLhM45Z00O1ulb6jwyVr2kr3XJp+G+tSEZcbauNnScewcQwtJqXDhXeYPDEjZ8C1SjXm015CirEmGg==", + "version": "8.40.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.40.0.tgz", + "integrity": "sha512-jCNyAuXx8dr5KJMkecGmZ8KI61KBUhkCob+SD+C+I5+Y1FWI2Y3QmY4/cxMCC5WAsZqoEtEETVhUiUMIGCf6Bw==", "dev": true, "license": "MIT", "dependencies": { - "@typescript-eslint/scope-manager": "8.32.1", - "@typescript-eslint/types": "8.32.1", - "@typescript-eslint/typescript-estree": "8.32.1", - "@typescript-eslint/visitor-keys": "8.32.1", + "@typescript-eslint/scope-manager": "8.40.0", + "@typescript-eslint/types": "8.40.0", + "@typescript-eslint/typescript-estree": "8.40.0", + "@typescript-eslint/visitor-keys": "8.40.0", "debug": "^4.3.4" }, "engines": { @@ -4061,18 +4061,40 @@ }, "peerDependencies": { "eslint": "^8.57.0 || ^9.0.0", - "typescript": ">=4.8.4 <5.9.0" + "typescript": ">=4.8.4 <6.0.0" } }, - "node_modules/@typescript-eslint/scope-manager": { - "version": "8.32.1", - "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.32.1.tgz", - "integrity": "sha512-7IsIaIDeZn7kffk7qXC3o6Z4UblZJKV3UBpkvRNpr5NSyLji7tvTcvmnMNYuYLyh26mN8W723xpo3i4MlD33vA==", + "node_modules/@typescript-eslint/project-service": { + "version": "8.40.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.40.0.tgz", + "integrity": "sha512-/A89vz7Wf5DEXsGVvcGdYKbVM9F7DyFXj52lNYUDS1L9yJfqjW/fIp5PgMuEJL/KeqVTe2QSbXAGUZljDUpArw==", "dev": true, "license": "MIT", "dependencies": { - "@typescript-eslint/types": "8.32.1", - "@typescript-eslint/visitor-keys": "8.32.1" + "@typescript-eslint/tsconfig-utils": "^8.40.0", + "@typescript-eslint/types": "^8.40.0", + "debug": "^4.3.4" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependencies": { + "typescript": ">=4.8.4 <6.0.0" + } + }, + "node_modules/@typescript-eslint/scope-manager": { + "version": "8.40.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.40.0.tgz", + "integrity": "sha512-y9ObStCcdCiZKzwqsE8CcpyuVMwRouJbbSrNuThDpv16dFAj429IkM6LNb1dZ2m7hK5fHyzNcErZf7CEeKXR4w==", + "dev": true, + "license": "MIT", + "dependencies": { + "@typescript-eslint/types": "8.40.0", + "@typescript-eslint/visitor-keys": "8.40.0" }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -4082,15 +4104,33 @@ "url": "https://opencollective.com/typescript-eslint" } }, + "node_modules/@typescript-eslint/tsconfig-utils": { + "version": "8.40.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.40.0.tgz", + "integrity": "sha512-jtMytmUaG9d/9kqSl/W3E3xaWESo4hFDxAIHGVW/WKKtQhesnRIJSAJO6XckluuJ6KDB5woD1EiqknriCtAmcw==", + "dev": true, + "license": "MIT", + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependencies": { + "typescript": ">=4.8.4 <6.0.0" + } + }, "node_modules/@typescript-eslint/type-utils": { - "version": "8.32.1", - "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.32.1.tgz", - "integrity": "sha512-mv9YpQGA8iIsl5KyUPi+FGLm7+bA4fgXaeRcFKRDRwDMu4iwrSHeDPipwueNXhdIIZltwCJv+NkxftECbIZWfA==", + "version": "8.40.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.40.0.tgz", + "integrity": "sha512-eE60cK4KzAc6ZrzlJnflXdrMqOBaugeukWICO2rB0KNvwdIMaEaYiywwHMzA1qFpTxrLhN9Lp4E/00EgWcD3Ow==", "dev": true, "license": "MIT", "dependencies": { - "@typescript-eslint/typescript-estree": "8.32.1", - "@typescript-eslint/utils": "8.32.1", + "@typescript-eslint/types": "8.40.0", + "@typescript-eslint/typescript-estree": "8.40.0", + "@typescript-eslint/utils": "8.40.0", "debug": "^4.3.4", "ts-api-utils": "^2.1.0" }, @@ -4103,13 +4143,13 @@ }, "peerDependencies": { "eslint": "^8.57.0 || ^9.0.0", - "typescript": ">=4.8.4 <5.9.0" + "typescript": ">=4.8.4 <6.0.0" } }, "node_modules/@typescript-eslint/types": { - "version": "8.32.1", - "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.32.1.tgz", - "integrity": "sha512-YmybwXUJcgGqgAp6bEsgpPXEg6dcCyPyCSr0CAAueacR/CCBi25G3V8gGQ2kRzQRBNol7VQknxMs9HvVa9Rvfg==", + "version": "8.40.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.40.0.tgz", + "integrity": "sha512-ETdbFlgbAmXHyFPwqUIYrfc12ArvpBhEVgGAxVYSwli26dn8Ko+lIo4Su9vI9ykTZdJn+vJprs/0eZU0YMAEQg==", "dev": true, "license": "MIT", "engines": { @@ -4121,14 +4161,16 @@ } }, "node_modules/@typescript-eslint/typescript-estree": { - "version": "8.32.1", - "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.32.1.tgz", - "integrity": "sha512-Y3AP9EIfYwBb4kWGb+simvPaqQoT5oJuzzj9m0i6FCY6SPvlomY2Ei4UEMm7+FXtlNJbor80ximyslzaQF6xhg==", + "version": "8.40.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.40.0.tgz", + "integrity": "sha512-k1z9+GJReVVOkc1WfVKs1vBrR5MIKKbdAjDTPvIK3L8De6KbFfPFt6BKpdkdk7rZS2GtC/m6yI5MYX+UsuvVYQ==", "dev": true, "license": "MIT", "dependencies": { - "@typescript-eslint/types": "8.32.1", - "@typescript-eslint/visitor-keys": "8.32.1", + "@typescript-eslint/project-service": "8.40.0", + "@typescript-eslint/tsconfig-utils": "8.40.0", + "@typescript-eslint/types": "8.40.0", + "@typescript-eslint/visitor-keys": "8.40.0", "debug": "^4.3.4", "fast-glob": "^3.3.2", "is-glob": "^4.0.3", @@ -4144,13 +4186,13 @@ "url": "https://opencollective.com/typescript-eslint" }, "peerDependencies": { - "typescript": ">=4.8.4 <5.9.0" + "typescript": ">=4.8.4 <6.0.0" } }, "node_modules/@typescript-eslint/typescript-estree/node_modules/brace-expansion": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", - "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==", + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz", + "integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==", "dev": true, "license": "MIT", "dependencies": { @@ -4204,16 +4246,16 @@ } }, "node_modules/@typescript-eslint/utils": { - "version": "8.32.1", - "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.32.1.tgz", - "integrity": "sha512-DsSFNIgLSrc89gpq1LJB7Hm1YpuhK086DRDJSNrewcGvYloWW1vZLHBTIvarKZDcAORIy/uWNx8Gad+4oMpkSA==", + "version": "8.40.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.40.0.tgz", + "integrity": "sha512-Cgzi2MXSZyAUOY+BFwGs17s7ad/7L+gKt6Y8rAVVWS+7o6wrjeFN4nVfTpbE25MNcxyJ+iYUXflbs2xR9h4UBg==", "dev": true, "license": "MIT", "dependencies": { "@eslint-community/eslint-utils": "^4.7.0", - "@typescript-eslint/scope-manager": "8.32.1", - "@typescript-eslint/types": "8.32.1", - "@typescript-eslint/typescript-estree": "8.32.1" + "@typescript-eslint/scope-manager": "8.40.0", + "@typescript-eslint/types": "8.40.0", + "@typescript-eslint/typescript-estree": "8.40.0" }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -4224,18 +4266,18 @@ }, "peerDependencies": { "eslint": "^8.57.0 || ^9.0.0", - "typescript": ">=4.8.4 <5.9.0" + "typescript": ">=4.8.4 <6.0.0" } }, "node_modules/@typescript-eslint/visitor-keys": { - "version": "8.32.1", - "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.32.1.tgz", - "integrity": "sha512-ar0tjQfObzhSaW3C3QNmTc5ofj0hDoNQ5XWrCy6zDyabdr0TWhCkClp+rywGNj/odAFBVzzJrK4tEq5M4Hmu4w==", + "version": "8.40.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.40.0.tgz", + "integrity": "sha512-8CZ47QwalyRjsypfwnbI3hKy5gJDPmrkLjkgMxhi0+DZZ2QNx2naS6/hWoVYUHU7LU2zleF68V9miaVZvhFfTA==", "dev": true, "license": "MIT", "dependencies": { - "@typescript-eslint/types": "8.32.1", - "eslint-visitor-keys": "^4.2.0" + "@typescript-eslint/types": "8.40.0", + "eslint-visitor-keys": "^4.2.1" }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -6741,9 +6783,9 @@ } }, "node_modules/eslint-visitor-keys": { - "version": "4.2.0", - "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-4.2.0.tgz", - "integrity": "sha512-UyLnSehNt62FFhSwjZlHmeokpRK59rcz29j+F1/aDgbkbRTk7wIc9XzdoasMUbRNKDM0qQt/+BJ4BrpFeABemw==", + "version": "4.2.1", + "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-4.2.1.tgz", + "integrity": "sha512-Uhdk5sfqcee/9H/rCOJikYz67o0a2Tw2hGRPOG2Y1R2dg7brRe1uG0yaNQDHu+TO/uQPF/5eCapvYSmHUjt7JQ==", "dev": true, "license": "Apache-2.0", "engines": { @@ -13934,9 +13976,9 @@ } }, "node_modules/typescript": { - "version": "5.8.3", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.8.3.tgz", - "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==", + "version": "5.9.2", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.2.tgz", + "integrity": "sha512-CWBzXQrc/qOkhidw1OzBTQuYRbfyxDXJMVJ1XNwUHGROVmuaeiEm3OslpZ1RV96d7SKKjZKrSJu3+t/xlw3R9A==", "dev": true, "license": "Apache-2.0", "bin": { From 2fa189fe04baf7c8af347e47e0cd3059dff4a026 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 20 Aug 2025 16:49:43 -0700 Subject: [PATCH 21/42] chore(github-deps): bump actions/setup-node from 4.1.0 to 4.4.0 (#3214) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [actions/setup-node](https://github.com/actions/setup-node) from 4.1.0 to 4.4.0.
Release notes

Sourced from actions/setup-node's releases.

v4.4.0

What's Changed

Bug fixes:

Enhancement:

Dependency update:

New Contributors

Full Changeloghttps://github.com/actions/setup-node/compare/v4...v4.4.0

v4.3.0

What's Changed

Dependency updates

New Contributors

Full Changelog: https://github.com/actions/setup-node/compare/v4...v4.3.0

v4.2.0

What's Changed

New Contributors

Full Changelog: https://github.com/actions/setup-node/compare/v4...v4.2.0

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/setup-node&package-manager=github_actions&previous-version=4.1.0&new-version=4.4.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/ui-unit-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ui-unit-tests.yml b/.github/workflows/ui-unit-tests.yml index 00c539c58..09bac8c7e 100644 --- a/.github/workflows/ui-unit-tests.yml +++ b/.github/workflows/ui-unit-tests.yml @@ -29,7 +29,7 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Setup Node.js - uses: actions/setup-node@39370e3970a6d050c480ffad4ff0ed4d3fdee5af # v4.1.0 + uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0 with: node-version: ${{ matrix.node-version }} cache: 'npm' From 886af85e0cffa9436ba9126f228a03047c6ebb95 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 20 Aug 2025 16:50:00 -0700 Subject: [PATCH 22/42] chore(github-deps): bump amannn/action-semantic-pull-request from 5.5.3 to 6.1.0 (#3215) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [amannn/action-semantic-pull-request](https://github.com/amannn/action-semantic-pull-request) from 5.5.3 to 6.1.0.
Release notes

Sourced from amannn/action-semantic-pull-request's releases.

v6.1.0

6.1.0 (2025-08-19)

Features

Bug Fixes

  • Remove trailing whitespace from "unknown release type" error message (#291) (afa4edb)

v6.0.1

6.0.1 (2025-08-13)

Bug Fixes

v6.0.0

6.0.0 (2025-08-13)

⚠ BREAKING CHANGES

  • Upgrade action to use Node.js 24 and ESM (#287)

Features

  • Upgrade action to use Node.js 24 and ESM (#287) (bc0c9a7)
Changelog

Sourced from amannn/action-semantic-pull-request's changelog.

Changelog

6.1.0 (2025-08-19)

Features

Bug Fixes

  • Remove trailing whitespace from "unknown release type" error message (#291) (afa4edb)

6.0.1 (2025-08-13)

Bug Fixes

6.0.0 (2025-08-13)

⚠ BREAKING CHANGES

  • Upgrade action to use Node.js 24 and ESM (#287)

Features

  • Upgrade action to use Node.js 24 and ESM (#287) (bc0c9a7)

5.5.3 (2024-06-28)

Bug Fixes

5.5.2 (2024-04-24)

Bug Fixes

5.5.1 (2024-04-24)

Bug Fixes

5.5.0 (2024-04-23)

... (truncated)

Commits
  • 7f33ba7 chore: Release 6.1.0 [skip ci]
  • afa4edb fix: Remove trailing whitespace from "unknown release type" error message (#291)
  • a30288b feat: Support providing regexps for types (#292)
  • a46a7c8 build: Move Vitest to devDependencies (#290)
  • fdd4d3d chore: Release 6.0.1 [skip ci]
  • 58e4ab4 fix: Actually execute action (#289)
  • 04a8d17 chore: Release 6.0.0 [skip ci]
  • bc0c9a7 feat!: Upgrade action to use Node.js 24 and ESM (#287)
  • 631ffdc build(deps): bump the github-action-workflows group with 2 updates (#286)
  • c1807ce build: configure Dependabot (#231)
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=amannn/action-semantic-pull-request&package-manager=github_actions&previous-version=5.5.3&new-version=6.1.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/semantic-pr.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/semantic-pr.yml b/.github/workflows/semantic-pr.yml index 57a4df646..4adaca84d 100644 --- a/.github/workflows/semantic-pr.yml +++ b/.github/workflows/semantic-pr.yml @@ -22,6 +22,6 @@ jobs: runs-on: ubuntu-latest steps: - name: Check PR Title's semantic conformance - uses: amannn/action-semantic-pull-request@0723387faaf9b38adef4775cd42cfd5155ed6017 # v5.5.3 + uses: amannn/action-semantic-pull-request@7f33ba792281b034f64e96f4c0b5496782dd3b37 # v6.1.0 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From bd1a794add8ab151e20247825585c02d536b31a5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 20 Aug 2025 16:50:34 -0700 Subject: [PATCH 23/42] chore(python-deps): bump llama-api-client from 0.1.2 to 0.2.0 (#3173) Bumps [llama-api-client](https://github.com/meta-llama/llama-api-python) from 0.1.2 to 0.2.0.
Release notes

Sourced from llama-api-client's releases.

v0.2.0

0.2.0 (2025-08-07)

Full Changelog: v0.1.2...v0.2.0

Features

  • clean up environment call outs (4afbd01)
  • client: support file upload requests (ec42e80)

Bug Fixes

  • api: remove chat completion request model (94c4e9f)
  • client: don't send Content-Type header on GET requests (efec88a)
  • parsing: correctly handle nested discriminated unions (b627686)
  • parsing: ignore empty metadata (d6ee851)
  • parsing: parse extra field types (f03ca22)

Chores

  • add examples (abfa065)
  • internal: bump pinned h11 dep (d40e1b1)
  • internal: fix ruff target version (c900ebc)
  • package: mark python 3.13 as supported (ef5bc36)
  • project: add settings file for vscode (e310380)
  • readme: fix version rendering on pypi (786f9fb)
  • sync repo (7e697f6)
  • update SDK settings (de22c0e)

Documentation

Changelog

Sourced from llama-api-client's changelog.

0.2.0 (2025-08-07)

Full Changelog: v0.1.2...v0.2.0

Features

  • clean up environment call outs (4afbd01)
  • client: support file upload requests (ec42e80)

Bug Fixes

  • api: remove chat completion request model (94c4e9f)
  • client: don't send Content-Type header on GET requests (efec88a)
  • parsing: correctly handle nested discriminated unions (b627686)
  • parsing: ignore empty metadata (d6ee851)
  • parsing: parse extra field types (f03ca22)

Chores

  • add examples (abfa065)
  • internal: bump pinned h11 dep (d40e1b1)
  • internal: fix ruff target version (c900ebc)
  • package: mark python 3.13 as supported (ef5bc36)
  • project: add settings file for vscode (e310380)
  • readme: fix version rendering on pypi (786f9fb)
  • sync repo (7e697f6)
  • update SDK settings (de22c0e)

Documentation

Commits
  • 7a8c583 release: 0.2.0
  • 4f1a04e chore(internal): fix ruff target version
  • 06485e9 feat(client): support file upload requests
  • 131b474 chore(project): add settings file for vscode
  • ef4cee6 fix(parsing): parse extra field types
  • fcbc699 fix(parsing): ignore empty metadata
  • b6656cd fix(api): remove chat completion request model
  • 0deda55 feat: clean up environment call outs
  • ecf9102 fix(client): don't send Content-Type header on GET requests
  • 0ac6285 chore(readme): fix version rendering on pypi
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=llama-api-client&package-manager=uv&previous-version=0.1.2&new-version=0.2.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- uv.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/uv.lock b/uv.lock index d8b7318f1..5d30ad304 100644 --- a/uv.lock +++ b/uv.lock @@ -1689,7 +1689,7 @@ wheels = [ [[package]] name = "llama-api-client" -version = "0.1.2" +version = "0.2.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -1699,9 +1699,9 @@ dependencies = [ { name = "sniffio" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d0/78/875de3a16efd0442718ac47cc27319cd80cc5f38e12298e454e08611acc4/llama_api_client-0.1.2.tar.gz", hash = "sha256:709011f2d506009b1b3b3bceea1c84f2a3a7600df1420fb256e680fcd7251387", size = 113695, upload-time = "2025-06-27T19:56:14.057Z" } +sdist = { url = "https://files.pythonhosted.org/packages/59/41/fa8521a0faff96bf5f810e2ab5b78c638f5ba44afd09aa86f94b6a1226ad/llama_api_client-0.2.0.tar.gz", hash = "sha256:b9bd5f5ad332b9133f0775a105f0940f057cbb311891f1d4487247d001c31f17", size = 117108, upload-time = "2025-08-12T17:07:07.734Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/99/08/5d7e6e7e6af5353391376288c200acacebb8e6b156d3636eae598a451673/llama_api_client-0.1.2-py3-none-any.whl", hash = "sha256:8ad6e10726f74b2302bfd766c61c41355a9ecf60f57cde2961882d22af998941", size = 84091, upload-time = "2025-06-27T19:56:12.8Z" }, + { url = "https://files.pythonhosted.org/packages/1d/11/198e65c1a50d9e839b4e3d346b4bd0f624e532446e468d1aba6c74ed7484/llama_api_client-0.2.0-py3-none-any.whl", hash = "sha256:50614ed991e1a72439e6a624a97e6000615ada1b9e2046ecc026fe62f107663c", size = 85002, upload-time = "2025-08-12T17:07:06.293Z" }, ] [[package]] From 6a719716f23551740f6355a2cf7e82ec4775220c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 20 Aug 2025 16:51:40 -0700 Subject: [PATCH 24/42] chore(github-deps): bump actions/checkout from 4.2.2 to 5.0.0 (#3178) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [//]: # (dependabot-start) ⚠️ **Dependabot is rebasing this PR** ⚠️ Rebasing might not happen immediately, so don't worry if this takes some time. Note: if you make any changes to this PR yourself, they will take precedence over the rebase. --- [//]: # (dependabot-end) Bumps [actions/checkout](https://github.com/actions/checkout) from 4.2.2 to 5.0.0.
Release notes

Sourced from actions/checkout's releases.

v5.0.0

What's Changed

⚠️ Minimum Compatible Runner Version

v2.327.1
Release Notes

Make sure your runner is updated to this version or newer to use this release.

Full Changelog: https://github.com/actions/checkout/compare/v4...v5.0.0

v4.3.0

What's Changed

New Contributors

Full Changelog: https://github.com/actions/checkout/compare/v4...v4.3.0

Changelog

Sourced from actions/checkout's changelog.

Changelog

V5.0.0

V4.3.0

v4.2.2

v4.2.1

v4.2.0

v4.1.7

v4.1.6

v4.1.5

v4.1.4

v4.1.3

... (truncated)

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/checkout&package-manager=github_actions&previous-version=4.2.2&new-version=5.0.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/changelog.yml | 2 +- .github/workflows/install-script-ci.yml | 4 ++-- .github/workflows/integration-auth-tests.yml | 2 +- .github/workflows/integration-sql-store-tests.yml | 2 +- .github/workflows/integration-tests.yml | 2 +- .github/workflows/integration-vector-io-tests.yml | 2 +- .github/workflows/pre-commit.yml | 2 +- .github/workflows/providers-build.yml | 10 +++++----- .github/workflows/python-build-test.yml | 2 +- .github/workflows/record-integration-tests.yml | 2 +- .github/workflows/test-external-provider-module.yml | 2 +- .github/workflows/test-external.yml | 2 +- .github/workflows/ui-unit-tests.yml | 2 +- .github/workflows/unit-tests.yml | 2 +- .github/workflows/update-readthedocs.yml | 2 +- 15 files changed, 20 insertions(+), 20 deletions(-) diff --git a/.github/workflows/changelog.yml b/.github/workflows/changelog.yml index e406d99ee..7a75d85f6 100644 --- a/.github/workflows/changelog.yml +++ b/.github/workflows/changelog.yml @@ -17,7 +17,7 @@ jobs: pull-requests: write # for peter-evans/create-pull-request to create a PR runs-on: ubuntu-latest steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 with: ref: main fetch-depth: 0 diff --git a/.github/workflows/install-script-ci.yml b/.github/workflows/install-script-ci.yml index 1ecda6d51..a37919f56 100644 --- a/.github/workflows/install-script-ci.yml +++ b/.github/workflows/install-script-ci.yml @@ -16,14 +16,14 @@ jobs: lint: runs-on: ubuntu-latest steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # 4.2.2 + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 - name: Run ShellCheck on install.sh run: shellcheck scripts/install.sh smoke-test-on-dev: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Install dependencies uses: ./.github/actions/setup-runner diff --git a/.github/workflows/integration-auth-tests.yml b/.github/workflows/integration-auth-tests.yml index c328e3b6c..6e84d94e0 100644 --- a/.github/workflows/integration-auth-tests.yml +++ b/.github/workflows/integration-auth-tests.yml @@ -31,7 +31,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Install dependencies uses: ./.github/actions/setup-runner diff --git a/.github/workflows/integration-sql-store-tests.yml b/.github/workflows/integration-sql-store-tests.yml index 4e5b64963..485e546fa 100644 --- a/.github/workflows/integration-sql-store-tests.yml +++ b/.github/workflows/integration-sql-store-tests.yml @@ -44,7 +44,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Install dependencies uses: ./.github/actions/setup-runner diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index ba18c27c8..57e582b20 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -65,7 +65,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Setup test environment uses: ./.github/actions/setup-test-environment diff --git a/.github/workflows/integration-vector-io-tests.yml b/.github/workflows/integration-vector-io-tests.yml index 61b8e004e..de5701073 100644 --- a/.github/workflows/integration-vector-io-tests.yml +++ b/.github/workflows/integration-vector-io-tests.yml @@ -33,7 +33,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Install dependencies uses: ./.github/actions/setup-runner diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 99e0d0043..194c362c4 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -20,7 +20,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 with: # For dependabot PRs, we need to checkout with a token that can push changes token: ${{ github.actor == 'dependabot[bot]' && secrets.GITHUB_TOKEN || github.token }} diff --git a/.github/workflows/providers-build.yml b/.github/workflows/providers-build.yml index 929d76760..461c25148 100644 --- a/.github/workflows/providers-build.yml +++ b/.github/workflows/providers-build.yml @@ -36,7 +36,7 @@ jobs: distros: ${{ steps.set-matrix.outputs.distros }} steps: - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Generate Distribution List id: set-matrix @@ -55,7 +55,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Install dependencies uses: ./.github/actions/setup-runner @@ -79,7 +79,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Install dependencies uses: ./.github/actions/setup-runner @@ -92,7 +92,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Install dependencies uses: ./.github/actions/setup-runner @@ -117,7 +117,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Install dependencies uses: ./.github/actions/setup-runner diff --git a/.github/workflows/python-build-test.yml b/.github/workflows/python-build-test.yml index fe1dfd58a..9eef7e9ba 100644 --- a/.github/workflows/python-build-test.yml +++ b/.github/workflows/python-build-test.yml @@ -21,7 +21,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Install uv uses: astral-sh/setup-uv@e92bafb6253dcd438e0484186d7669ea7a8ca1cc # v6.4.3 diff --git a/.github/workflows/record-integration-tests.yml b/.github/workflows/record-integration-tests.yml index 22636f209..d4f5586e2 100644 --- a/.github/workflows/record-integration-tests.yml +++ b/.github/workflows/record-integration-tests.yml @@ -46,7 +46,7 @@ jobs: echo "::endgroup::" - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 with: fetch-depth: 0 diff --git a/.github/workflows/test-external-provider-module.yml b/.github/workflows/test-external-provider-module.yml index d61b0dfe9..8a757b068 100644 --- a/.github/workflows/test-external-provider-module.yml +++ b/.github/workflows/test-external-provider-module.yml @@ -27,7 +27,7 @@ jobs: # container and point 'uv pip install' to the correct path... steps: - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Install dependencies uses: ./.github/actions/setup-runner diff --git a/.github/workflows/test-external.yml b/.github/workflows/test-external.yml index b9db0ad51..7ee467451 100644 --- a/.github/workflows/test-external.yml +++ b/.github/workflows/test-external.yml @@ -27,7 +27,7 @@ jobs: # container and point 'uv pip install' to the correct path... steps: - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Install dependencies uses: ./.github/actions/setup-runner diff --git a/.github/workflows/ui-unit-tests.yml b/.github/workflows/ui-unit-tests.yml index 09bac8c7e..4b0d62e90 100644 --- a/.github/workflows/ui-unit-tests.yml +++ b/.github/workflows/ui-unit-tests.yml @@ -26,7 +26,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Setup Node.js uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0 diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index f2a6c7754..cce8d9ff6 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -32,7 +32,7 @@ jobs: - "3.13" steps: - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Install dependencies uses: ./.github/actions/setup-runner diff --git a/.github/workflows/update-readthedocs.yml b/.github/workflows/update-readthedocs.yml index 1dcfdeca5..9ed89a271 100644 --- a/.github/workflows/update-readthedocs.yml +++ b/.github/workflows/update-readthedocs.yml @@ -37,7 +37,7 @@ jobs: TOKEN: ${{ secrets.READTHEDOCS_TOKEN }} steps: - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Install dependencies uses: ./.github/actions/setup-runner From 58e164b8bcbe6821b5a735ce52883ff7f27ff426 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 20 Aug 2025 16:51:53 -0700 Subject: [PATCH 25/42] chore(github-deps): bump astral-sh/setup-uv from 6.4.3 to 6.5.0 (#3179) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [astral-sh/setup-uv](https://github.com/astral-sh/setup-uv) from 6.4.3 to 6.5.0.
Release notes

Sourced from astral-sh/setup-uv's releases.

v6.5.0 🌈 Better error messages, bug fixes and copilot agent settings

Changes

This release brings better error messages in case the GitHub API is impacted, fixes a few bugs and allows to disable problem matchers for better use in Copilot Agent workspaces.

🐛 Bug fixes

🚀 Enhancements

🧰 Maintenance

📚 Documentation

⬆️ Dependency updates

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=astral-sh/setup-uv&package-manager=github_actions&previous-version=6.4.3&new-version=6.5.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/python-build-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-build-test.yml b/.github/workflows/python-build-test.yml index 9eef7e9ba..9de53f7fb 100644 --- a/.github/workflows/python-build-test.yml +++ b/.github/workflows/python-build-test.yml @@ -24,7 +24,7 @@ jobs: uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Install uv - uses: astral-sh/setup-uv@e92bafb6253dcd438e0484186d7669ea7a8ca1cc # v6.4.3 + uses: astral-sh/setup-uv@d9e0f98d3fc6adb07d1e3d37f3043649ddad06a1 # v6.5.0 with: python-version: ${{ matrix.python-version }} activate-environment: true From ac25e35124df747a11de0315a25854ee7bb34dc4 Mon Sep 17 00:00:00 2001 From: Sumanth Kamenani Date: Thu, 21 Aug 2025 17:23:27 -0400 Subject: [PATCH 26/42] feat: Add CORS configuration support for server (#3201) Adds flexible CORS (Cross-Origin Resource Sharing) configuration support to the FastAPI server with both local development and explicit configuration modes: - **Local development mode**: `cors: true` enables localhost-only access with regex pattern `https?://localhost:\d+` - **Explicit configuration mode**: Specific origins configuration with credential support and validation - Prevents insecure combinations (wildcards with credentials) - FastAPI CORSMiddleware integration via `model_dump()` Addresses the need for configurable CORS policies to support web frontends and cross-origin API access while maintaining security. Closes #2119 ## Test Plan 1. Ran Unit Tests. 2. Manual tests: FastAPI middleware integration with actual HTTP requests - Local development mode localhost access validation - Explicit configuration mode origins validation - Preflight OPTIONS request handling Some screenshots of manual tests. image image cc: @leseb @rhuss @franciscojavierarceo --- docs/source/distributions/configuration.md | 72 ++++++++++++++ llama_stack/core/datatypes.py | 41 ++++++++ llama_stack/core/server/server.py | 8 ++ tests/unit/server/test_cors.py | 105 +++++++++++++++++++++ 4 files changed, 226 insertions(+) create mode 100644 tests/unit/server/test_cors.py diff --git a/docs/source/distributions/configuration.md b/docs/source/distributions/configuration.md index 335fa3a68..c9677b3b6 100644 --- a/docs/source/distributions/configuration.md +++ b/docs/source/distributions/configuration.md @@ -225,8 +225,32 @@ server: port: 8321 # Port to listen on (default: 8321) tls_certfile: "/path/to/cert.pem" # Optional: Path to TLS certificate for HTTPS tls_keyfile: "/path/to/key.pem" # Optional: Path to TLS key for HTTPS + cors: true # Optional: Enable CORS (dev mode) or full config object ``` +### CORS Configuration + +CORS (Cross-Origin Resource Sharing) can be configured in two ways: + +**Local development** (allows localhost origins only): +```yaml +server: + cors: true +``` + +**Explicit configuration** (custom origins and settings): +```yaml +server: + cors: + allow_origins: ["https://myapp.com", "https://app.example.com"] + allow_methods: ["GET", "POST", "PUT", "DELETE"] + allow_headers: ["Content-Type", "Authorization"] + allow_credentials: true + max_age: 3600 +``` + +When `cors: true`, the server enables secure localhost-only access for local development. For production, specify exact origins to maintain security. + ### Authentication Configuration > **Breaking Change (v0.2.14)**: The authentication configuration structure has changed. The previous format with `provider_type` and `config` fields has been replaced with a unified `provider_config` field that includes the `type` field. Update your configuration files accordingly. @@ -618,6 +642,54 @@ Content-Type: application/json } ``` +### CORS Configuration + +Configure CORS to allow web browsers to make requests from different domains. Disabled by default. + +#### Quick Setup + +For development, use the simple boolean flag: + +```yaml +server: + cors: true # Auto-enables localhost with any port +``` + +This automatically allows `http://localhost:*` and `https://localhost:*` with secure defaults. + +#### Custom Configuration + +For specific origins and full control: + +```yaml +server: + cors: + allow_origins: ["https://myapp.com", "https://staging.myapp.com"] + allow_credentials: true + allow_methods: ["GET", "POST", "PUT", "DELETE"] + allow_headers: ["Content-Type", "Authorization"] + allow_origin_regex: "https://.*\\.example\\.com" # Optional regex pattern + expose_headers: ["X-Total-Count"] + max_age: 86400 +``` + +#### Configuration Options + +| Field | Description | Default | +| -------------------- | ---------------------------------------------- | ------- | +| `allow_origins` | List of allowed origins. Use `["*"]` for any. | `["*"]` | +| `allow_origin_regex` | Regex pattern for allowed origins (optional). | `None` | +| `allow_methods` | Allowed HTTP methods. | `["*"]` | +| `allow_headers` | Allowed headers. | `["*"]` | +| `allow_credentials` | Allow credentials (cookies, auth headers). | `false` | +| `expose_headers` | Headers exposed to browser. | `[]` | +| `max_age` | Preflight cache time (seconds). | `600` | + +**Security Notes**: +- `allow_credentials: true` requires explicit origins (no wildcards) +- `cors: true` enables localhost access only (secure for development) +- For public APIs, always specify exact allowed origins + ## Extending to handle Safety Configuring Safety can be a little involved so it is instructive to go through an example. diff --git a/llama_stack/core/datatypes.py b/llama_stack/core/datatypes.py index a1b6ad32b..c3940fcbd 100644 --- a/llama_stack/core/datatypes.py +++ b/llama_stack/core/datatypes.py @@ -318,6 +318,41 @@ class QuotaConfig(BaseModel): period: QuotaPeriod = Field(default=QuotaPeriod.DAY, description="Quota period to set") +class CORSConfig(BaseModel): + allow_origins: list[str] = Field(default_factory=list) + allow_origin_regex: str | None = Field(default=None) + allow_methods: list[str] = Field(default=["OPTIONS"]) + allow_headers: list[str] = Field(default_factory=list) + allow_credentials: bool = Field(default=False) + expose_headers: list[str] = Field(default_factory=list) + max_age: int = Field(default=600, ge=0) + + @model_validator(mode="after") + def validate_credentials_config(self) -> Self: + if self.allow_credentials and (self.allow_origins == ["*"] or "*" in self.allow_origins): + raise ValueError("Cannot use wildcard origins with credentials enabled") + return self + + +def process_cors_config(cors_config: bool | CORSConfig | None) -> CORSConfig | None: + if cors_config is False or cors_config is None: + return None + + if cors_config is True: + # dev mode: allow localhost on any port + return CORSConfig( + allow_origins=[], + allow_origin_regex=r"https?://localhost:\d+", + allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"], + allow_headers=["Content-Type", "Authorization", "X-Requested-With"], + ) + + if isinstance(cors_config, CORSConfig): + return cors_config + + raise ValueError(f"Expected bool or CORSConfig, got {type(cors_config).__name__}") + + class ServerConfig(BaseModel): port: int = Field( default=8321, @@ -349,6 +384,12 @@ class ServerConfig(BaseModel): default=None, description="Per client quota request configuration", ) + cors: bool | CORSConfig | None = Field( + default=None, + description="CORS configuration for cross-origin requests. Can be:\n" + "- true: Enable localhost CORS for development\n" + "- {allow_origins: [...], allow_methods: [...], ...}: Full configuration", + ) class StackRunConfig(BaseModel): diff --git a/llama_stack/core/server/server.py b/llama_stack/core/server/server.py index 3d94b6e81..350ce0052 100644 --- a/llama_stack/core/server/server.py +++ b/llama_stack/core/server/server.py @@ -28,6 +28,7 @@ from aiohttp import hdrs from fastapi import Body, FastAPI, HTTPException, Request, Response from fastapi import Path as FastapiPath from fastapi.exceptions import RequestValidationError +from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse, StreamingResponse from openai import BadRequestError from pydantic import BaseModel, ValidationError @@ -40,6 +41,7 @@ from llama_stack.core.datatypes import ( AuthenticationRequiredError, LoggingConfig, StackRunConfig, + process_cors_config, ) from llama_stack.core.distribution import builtin_automatically_routed_apis from llama_stack.core.external import ExternalApiSpec, load_external_apis @@ -483,6 +485,12 @@ def main(args: argparse.Namespace | None = None): window_seconds=window_seconds, ) + if config.server.cors: + logger.info("Enabling CORS") + cors_config = process_cors_config(config.server.cors) + if cors_config: + app.add_middleware(CORSMiddleware, **cors_config.model_dump()) + if Api.telemetry in impls: setup_logger(impls[Api.telemetry]) else: diff --git a/tests/unit/server/test_cors.py b/tests/unit/server/test_cors.py new file mode 100644 index 000000000..8fd2515ba --- /dev/null +++ b/tests/unit/server/test_cors.py @@ -0,0 +1,105 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import pytest + +from llama_stack.core.datatypes import CORSConfig, process_cors_config + + +def test_cors_config_defaults(): + config = CORSConfig() + + assert config.allow_origins == [] + assert config.allow_origin_regex is None + assert config.allow_methods == ["OPTIONS"] + assert config.allow_headers == [] + assert config.allow_credentials is False + assert config.expose_headers == [] + assert config.max_age == 600 + + +def test_cors_config_explicit_config(): + config = CORSConfig( + allow_origins=["https://example.com"], allow_credentials=True, max_age=3600, allow_methods=["GET", "POST"] + ) + + assert config.allow_origins == ["https://example.com"] + assert config.allow_credentials is True + assert config.max_age == 3600 + assert config.allow_methods == ["GET", "POST"] + + +def test_cors_config_regex(): + config = CORSConfig(allow_origins=[], allow_origin_regex=r"https?://localhost:\d+") + + assert config.allow_origins == [] + assert config.allow_origin_regex == r"https?://localhost:\d+" + + +def test_cors_config_wildcard_credentials_error(): + with pytest.raises(ValueError, match="Cannot use wildcard origins with credentials enabled"): + CORSConfig(allow_origins=["*"], allow_credentials=True) + + with pytest.raises(ValueError, match="Cannot use wildcard origins with credentials enabled"): + CORSConfig(allow_origins=["https://example.com", "*"], allow_credentials=True) + + +def test_process_cors_config_false(): + result = process_cors_config(False) + assert result is None + + +def test_process_cors_config_true(): + result = process_cors_config(True) + + assert isinstance(result, CORSConfig) + assert result.allow_origins == [] + assert result.allow_origin_regex == r"https?://localhost:\d+" + assert result.allow_credentials is False + expected_methods = ["GET", "POST", "PUT", "DELETE", "OPTIONS"] + for method in expected_methods: + assert method in result.allow_methods + + +def test_process_cors_config_passthrough(): + original = CORSConfig(allow_origins=["https://example.com"], allow_methods=["GET"]) + result = process_cors_config(original) + + assert result is original + + +def test_process_cors_config_invalid_type(): + with pytest.raises(ValueError, match="Expected bool or CORSConfig, got str"): + process_cors_config("invalid") + + +def test_cors_config_model_dump(): + cors_config = CORSConfig( + allow_origins=["https://example.com"], + allow_methods=["GET", "POST"], + allow_headers=["Content-Type"], + allow_credentials=True, + max_age=3600, + ) + + config_dict = cors_config.model_dump() + + assert config_dict["allow_origins"] == ["https://example.com"] + assert config_dict["allow_methods"] == ["GET", "POST"] + assert config_dict["allow_headers"] == ["Content-Type"] + assert config_dict["allow_credentials"] is True + assert config_dict["max_age"] == 3600 + + expected_keys = { + "allow_origins", + "allow_origin_regex", + "allow_methods", + "allow_headers", + "allow_credentials", + "expose_headers", + "max_age", + } + assert set(config_dict.keys()) == expected_keys From 1790fc0f250a8ec2e3ab9f06257bd24024ebeba2 Mon Sep 17 00:00:00 2001 From: Mustafa Elbehery Date: Fri, 22 Aug 2025 00:59:04 +0200 Subject: [PATCH 27/42] feat: Remove initialize() Method from LlamaStackAsLibrary (#2979) # What does this PR do? This PR removes `init()` from `LlamaStackAsLibrary` Currently client.initialize() had to be invoked by user. To improve dev experience and to avoid runtime errors, this PR init LlamaStackAsLibrary implicitly upon using the client. It prevents also multiple init of the same client, while maintaining backward ccompatibility. This PR does the following - Automatic Initialization: Constructor calls initialize_impl() automatically. - Client is fully initialized after __init__ completes. - Prevents consecutive initialization after the client has been successfully initialized. - initialize() method still exists but is now a no-op. fixes https://github.com/meta-llama/llama-stack/issues/2946 --------- Signed-off-by: Mustafa Elbehery --- .../distributions/importing_as_library.md | 2 - llama_stack/core/library_client.py | 48 ++++-- tests/integration/fixtures/common.py | 3 - .../non_ci/responses/fixtures/fixtures.py | 2 - .../test_library_client_initialization.py | 161 +++++++++++------- 5 files changed, 128 insertions(+), 88 deletions(-) diff --git a/docs/source/distributions/importing_as_library.md b/docs/source/distributions/importing_as_library.md index fbc48dd95..b9b4b065a 100644 --- a/docs/source/distributions/importing_as_library.md +++ b/docs/source/distributions/importing_as_library.md @@ -17,7 +17,6 @@ client = LlamaStackAsLibraryClient( # provider_data is optional, but if you need to pass in any provider specific data, you can do so here. provider_data={"tavily_search_api_key": os.environ["TAVILY_SEARCH_API_KEY"]}, ) -client.initialize() ``` This will parse your config and set up any inline implementations and remote clients needed for your implementation. @@ -32,5 +31,4 @@ If you've created a [custom distribution](https://llama-stack.readthedocs.io/en/ ```python client = LlamaStackAsLibraryClient(config_path) -client.initialize() ``` diff --git a/llama_stack/core/library_client.py b/llama_stack/core/library_client.py index dd1fc8a50..9e7a8006c 100644 --- a/llama_stack/core/library_client.py +++ b/llama_stack/core/library_client.py @@ -146,39 +146,26 @@ class LlamaStackAsLibraryClient(LlamaStackClient): ): super().__init__() self.async_client = AsyncLlamaStackAsLibraryClient( - config_path_or_distro_name, custom_provider_registry, provider_data + config_path_or_distro_name, custom_provider_registry, provider_data, skip_logger_removal ) self.pool_executor = ThreadPoolExecutor(max_workers=4) - self.skip_logger_removal = skip_logger_removal self.provider_data = provider_data self.loop = asyncio.new_event_loop() - def initialize(self): - if in_notebook(): - import nest_asyncio - - nest_asyncio.apply() - if not self.skip_logger_removal: - self._remove_root_logger_handlers() - # use a new event loop to avoid interfering with the main event loop loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: - return loop.run_until_complete(self.async_client.initialize()) + loop.run_until_complete(self.async_client.initialize()) finally: asyncio.set_event_loop(None) - def _remove_root_logger_handlers(self): + def initialize(self): """ - Remove all handlers from the root logger. Needed to avoid polluting the console with logs. + Deprecated method for backward compatibility. """ - root_logger = logging.getLogger() - - for handler in root_logger.handlers[:]: - root_logger.removeHandler(handler) - logger.info(f"Removed handler {handler.__class__.__name__} from root logger") + pass def request(self, *args, **kwargs): loop = self.loop @@ -216,6 +203,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient): config_path_or_distro_name: str, custom_provider_registry: ProviderRegistry | None = None, provider_data: dict[str, Any] | None = None, + skip_logger_removal: bool = False, ): super().__init__() # when using the library client, we should not log to console since many @@ -223,6 +211,13 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient): current_sinks = os.environ.get("TELEMETRY_SINKS", "sqlite").split(",") os.environ["TELEMETRY_SINKS"] = ",".join(sink for sink in current_sinks if sink != "console") + if in_notebook(): + import nest_asyncio + + nest_asyncio.apply() + if not skip_logger_removal: + self._remove_root_logger_handlers() + if config_path_or_distro_name.endswith(".yaml"): config_path = Path(config_path_or_distro_name) if not config_path.exists(): @@ -239,7 +234,24 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient): self.provider_data = provider_data self.route_impls: RouteImpls | None = None # Initialize to None to prevent AttributeError + def _remove_root_logger_handlers(self): + """ + Remove all handlers from the root logger. Needed to avoid polluting the console with logs. + """ + root_logger = logging.getLogger() + + for handler in root_logger.handlers[:]: + root_logger.removeHandler(handler) + logger.info(f"Removed handler {handler.__class__.__name__} from root logger") + async def initialize(self) -> bool: + """ + Initialize the async client. + + Returns: + bool: True if initialization was successful + """ + try: self.route_impls = None self.impls = await construct_stack(self.config, self.custom_provider_registry) diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py index 9cf56f6f5..ee4c5755a 100644 --- a/tests/integration/fixtures/common.py +++ b/tests/integration/fixtures/common.py @@ -256,9 +256,6 @@ def instantiate_llama_stack_client(session): provider_data=get_provider_data(), skip_logger_removal=True, ) - if not client.initialize(): - raise RuntimeError("Initialization failed") - return client diff --git a/tests/integration/non_ci/responses/fixtures/fixtures.py b/tests/integration/non_ci/responses/fixtures/fixtures.py index 62c4ae086..1783a5622 100644 --- a/tests/integration/non_ci/responses/fixtures/fixtures.py +++ b/tests/integration/non_ci/responses/fixtures/fixtures.py @@ -113,8 +113,6 @@ def openai_client(base_url, api_key, provider): raise ValueError(f"Invalid config for Llama Stack: {provider}, it must be of the form 'stack:'") config = parts[1] client = LlamaStackAsLibraryClient(config, skip_logger_removal=True) - if not client.initialize(): - raise RuntimeError("Initialization failed") return client return OpenAI( diff --git a/tests/unit/distribution/test_library_client_initialization.py b/tests/unit/distribution/test_library_client_initialization.py index e510d513d..b7e7a1857 100644 --- a/tests/unit/distribution/test_library_client_initialization.py +++ b/tests/unit/distribution/test_library_client_initialization.py @@ -5,86 +5,121 @@ # the root directory of this source tree. """ -Unit tests for LlamaStackAsLibraryClient initialization error handling. +Unit tests for LlamaStackAsLibraryClient automatic initialization. -These tests ensure that users get proper error messages when they forget to call -initialize() on the library client, preventing AttributeError regressions. +These tests ensure that the library client is automatically initialized +and ready to use immediately after construction. """ -import pytest - from llama_stack.core.library_client import ( AsyncLlamaStackAsLibraryClient, LlamaStackAsLibraryClient, ) +from llama_stack.core.server.routes import RouteImpls -class TestLlamaStackAsLibraryClientInitialization: - """Test proper error handling for uninitialized library clients.""" +class TestLlamaStackAsLibraryClientAutoInitialization: + """Test automatic initialization of library clients.""" - @pytest.mark.parametrize( - "api_call", - [ - lambda client: client.models.list(), - lambda client: client.chat.completions.create(model="test", messages=[{"role": "user", "content": "test"}]), - lambda client: next( - client.chat.completions.create( - model="test", messages=[{"role": "user", "content": "test"}], stream=True - ) - ), - ], - ids=["models.list", "chat.completions.create", "chat.completions.create_stream"], - ) - def test_sync_client_proper_error_without_initialization(self, api_call): - """Test that sync client raises ValueError with helpful message when not initialized.""" - client = LlamaStackAsLibraryClient("nvidia") + def test_sync_client_auto_initialization(self, monkeypatch): + """Test that sync client is automatically initialized after construction.""" + # Mock the stack construction to avoid dependency issues + mock_impls = {} + mock_route_impls = RouteImpls({}) - with pytest.raises(ValueError) as exc_info: - api_call(client) + async def mock_construct_stack(config, custom_provider_registry): + return mock_impls - error_msg = str(exc_info.value) - assert "Client not initialized" in error_msg - assert "Please call initialize() first" in error_msg + def mock_initialize_route_impls(impls): + return mock_route_impls - @pytest.mark.parametrize( - "api_call", - [ - lambda client: client.models.list(), - lambda client: client.chat.completions.create(model="test", messages=[{"role": "user", "content": "test"}]), - ], - ids=["models.list", "chat.completions.create"], - ) - async def test_async_client_proper_error_without_initialization(self, api_call): - """Test that async client raises ValueError with helpful message when not initialized.""" - client = AsyncLlamaStackAsLibraryClient("nvidia") + monkeypatch.setattr("llama_stack.core.library_client.construct_stack", mock_construct_stack) + monkeypatch.setattr("llama_stack.core.library_client.initialize_route_impls", mock_initialize_route_impls) - with pytest.raises(ValueError) as exc_info: - await api_call(client) + client = LlamaStackAsLibraryClient("ci-tests") - error_msg = str(exc_info.value) - assert "Client not initialized" in error_msg - assert "Please call initialize() first" in error_msg + assert client.async_client.route_impls is not None - async def test_async_client_streaming_error_without_initialization(self): - """Test that async client streaming raises ValueError with helpful message when not initialized.""" - client = AsyncLlamaStackAsLibraryClient("nvidia") + async def test_async_client_auto_initialization(self, monkeypatch): + """Test that async client can be initialized and works properly.""" + # Mock the stack construction to avoid dependency issues + mock_impls = {} + mock_route_impls = RouteImpls({}) - with pytest.raises(ValueError) as exc_info: - stream = await client.chat.completions.create( - model="test", messages=[{"role": "user", "content": "test"}], stream=True - ) - await anext(stream) + async def mock_construct_stack(config, custom_provider_registry): + return mock_impls - error_msg = str(exc_info.value) - assert "Client not initialized" in error_msg - assert "Please call initialize() first" in error_msg + def mock_initialize_route_impls(impls): + return mock_route_impls - def test_route_impls_initialized_to_none(self): - """Test that route_impls is initialized to None to prevent AttributeError.""" - # Test sync client - sync_client = LlamaStackAsLibraryClient("nvidia") - assert sync_client.async_client.route_impls is None + monkeypatch.setattr("llama_stack.core.library_client.construct_stack", mock_construct_stack) + monkeypatch.setattr("llama_stack.core.library_client.initialize_route_impls", mock_initialize_route_impls) - # Test async client directly - async_client = AsyncLlamaStackAsLibraryClient("nvidia") - assert async_client.route_impls is None + client = AsyncLlamaStackAsLibraryClient("ci-tests") + + # Initialize the client + result = await client.initialize() + assert result is True + assert client.route_impls is not None + + def test_initialize_method_backward_compatibility(self, monkeypatch): + """Test that initialize() method still works for backward compatibility.""" + # Mock the stack construction to avoid dependency issues + mock_impls = {} + mock_route_impls = RouteImpls({}) + + async def mock_construct_stack(config, custom_provider_registry): + return mock_impls + + def mock_initialize_route_impls(impls): + return mock_route_impls + + monkeypatch.setattr("llama_stack.core.library_client.construct_stack", mock_construct_stack) + monkeypatch.setattr("llama_stack.core.library_client.initialize_route_impls", mock_initialize_route_impls) + + client = LlamaStackAsLibraryClient("ci-tests") + + result = client.initialize() + assert result is None + + result2 = client.initialize() + assert result2 is None + + async def test_async_initialize_method_idempotent(self, monkeypatch): + """Test that async initialize() method can be called multiple times safely.""" + mock_impls = {} + mock_route_impls = RouteImpls({}) + + async def mock_construct_stack(config, custom_provider_registry): + return mock_impls + + def mock_initialize_route_impls(impls): + return mock_route_impls + + monkeypatch.setattr("llama_stack.core.library_client.construct_stack", mock_construct_stack) + monkeypatch.setattr("llama_stack.core.library_client.initialize_route_impls", mock_initialize_route_impls) + + client = AsyncLlamaStackAsLibraryClient("ci-tests") + + result1 = await client.initialize() + assert result1 is True + + result2 = await client.initialize() + assert result2 is True + + def test_route_impls_automatically_set(self, monkeypatch): + """Test that route_impls is automatically set during construction.""" + mock_impls = {} + mock_route_impls = RouteImpls({}) + + async def mock_construct_stack(config, custom_provider_registry): + return mock_impls + + def mock_initialize_route_impls(impls): + return mock_route_impls + + monkeypatch.setattr("llama_stack.core.library_client.construct_stack", mock_construct_stack) + monkeypatch.setattr("llama_stack.core.library_client.initialize_route_impls", mock_initialize_route_impls) + + sync_client = LlamaStackAsLibraryClient("ci-tests") + assert sync_client.async_client.route_impls is not None From b72169ca47a3a586024fd20a72c2357e146cbb8e Mon Sep 17 00:00:00 2001 From: Jiayi Ni Date: Thu, 21 Aug 2025 15:59:39 -0700 Subject: [PATCH 28/42] docs: update the docs for NVIDIA Inference provider (#3227) # What does this PR do? - Documentation update and fix for the NVIDIA Inference provider. - Update the `run_moderation` for safety API with a `NotImplementedError` placeholder. Otherwise initialization NVIDIA inference client will raise an error. ## Test Plan N/A --- .../remote/inference/nvidia/NVIDIA.md | 72 +++++++++++++++++++ .../providers/remote/safety/nvidia/nvidia.py | 5 +- 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md index 35d26fd0b..d96b29fef 100644 --- a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md +++ b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md @@ -41,6 +41,11 @@ client.initialize() ### Create Completion +> Note on Completion API +> +> The hosted NVIDIA Llama NIMs (e.g., `meta-llama/Llama-3.1-8B-Instruct`) with ```NVIDIA_BASE_URL="https://integrate.api.nvidia.com"``` does not support the ```completion``` method, while the locally deployed NIM does. + + ```python response = client.inference.completion( model_id="meta-llama/Llama-3.1-8B-Instruct", @@ -76,6 +81,73 @@ response = client.inference.chat_completion( print(f"Response: {response.completion_message.content}") ``` +### Tool Calling Example ### +```python +from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition + +tool_definition = ToolDefinition( + tool_name="get_weather", + description="Get current weather information for a location", + parameters={ + "location": ToolParamDefinition( + param_type="string", + description="The city and state, e.g. San Francisco, CA", + required=True, + ), + "unit": ToolParamDefinition( + param_type="string", + description="Temperature unit (celsius or fahrenheit)", + required=False, + default="celsius", + ), + }, +) + +tool_response = client.inference.chat_completion( + model_id="meta-llama/Llama-3.1-8B-Instruct", + messages=[{"role": "user", "content": "What's the weather like in San Francisco?"}], + tools=[tool_definition], +) + +print(f"Tool Response: {tool_response.completion_message.content}") +if tool_response.completion_message.tool_calls: + for tool_call in tool_response.completion_message.tool_calls: + print(f"Tool Called: {tool_call.tool_name}") + print(f"Arguments: {tool_call.arguments}") +``` + +### Structured Output Example +```python +from llama_stack.apis.inference import JsonSchemaResponseFormat, ResponseFormatType + +person_schema = { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "occupation": {"type": "string"}, + }, + "required": ["name", "age", "occupation"], +} + +response_format = JsonSchemaResponseFormat( + type=ResponseFormatType.json_schema, json_schema=person_schema +) + +structured_response = client.inference.chat_completion( + model_id="meta-llama/Llama-3.1-8B-Instruct", + messages=[ + { + "role": "user", + "content": "Create a profile for a fictional person named Alice who is 30 years old and is a software engineer. ", + } + ], + response_format=response_format, +) + +print(f"Structured Response: {structured_response.completion_message.content}") +``` + ### Create Embeddings > Note on OpenAI embeddings compatibility > diff --git a/llama_stack/providers/remote/safety/nvidia/nvidia.py b/llama_stack/providers/remote/safety/nvidia/nvidia.py index 0d8d8ba7a..787e924a0 100644 --- a/llama_stack/providers/remote/safety/nvidia/nvidia.py +++ b/llama_stack/providers/remote/safety/nvidia/nvidia.py @@ -9,7 +9,7 @@ from typing import Any import requests from llama_stack.apis.inference import Message -from llama_stack.apis.safety import RunShieldResponse, Safety, SafetyViolation, ViolationLevel +from llama_stack.apis.safety import ModerationObject, RunShieldResponse, Safety, SafetyViolation, ViolationLevel from llama_stack.apis.shields import Shield from llama_stack.log import get_logger from llama_stack.providers.datatypes import ShieldsProtocolPrivate @@ -67,6 +67,9 @@ class NVIDIASafetyAdapter(Safety, ShieldsProtocolPrivate): self.shield = NeMoGuardrails(self.config, shield.shield_id) return await self.shield.run(messages) + async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject: + raise NotImplementedError("NVIDIA safety provider currently does not implement run_moderation") + class NeMoGuardrails: """ From 864610ca5c16b6c2507a4ae9031a482af2cfdb4f Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Thu, 21 Aug 2025 16:05:25 -0700 Subject: [PATCH 29/42] fix(ci): make all CI workflows have the correct concurrency defn --- .github/workflows/integration-auth-tests.yml | 2 +- .github/workflows/integration-sql-store-tests.yml | 2 +- .github/workflows/pre-commit.yml | 2 +- .github/workflows/providers-build.yml | 2 +- .github/workflows/ui-unit-tests.yml | 2 +- .github/workflows/unit-tests.yml | 2 +- .github/workflows/update-readthedocs.yml | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/integration-auth-tests.yml b/.github/workflows/integration-auth-tests.yml index 6e84d94e0..6787806e9 100644 --- a/.github/workflows/integration-auth-tests.yml +++ b/.github/workflows/integration-auth-tests.yml @@ -18,7 +18,7 @@ on: - '.github/workflows/integration-auth-tests.yml' # This workflow concurrency: - group: ${{ github.workflow }}-${{ github.ref }} + group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }} cancel-in-progress: true jobs: diff --git a/.github/workflows/integration-sql-store-tests.yml b/.github/workflows/integration-sql-store-tests.yml index 485e546fa..3efd970e1 100644 --- a/.github/workflows/integration-sql-store-tests.yml +++ b/.github/workflows/integration-sql-store-tests.yml @@ -16,7 +16,7 @@ on: - '.github/workflows/integration-sql-store-tests.yml' # This workflow concurrency: - group: ${{ github.workflow }}-${{ github.ref }} + group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }} cancel-in-progress: true jobs: diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 194c362c4..4eeab1089 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -8,7 +8,7 @@ on: branches: [main] concurrency: - group: ${{ github.workflow }}-${{ github.ref }} + group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }} cancel-in-progress: true jobs: diff --git a/.github/workflows/providers-build.yml b/.github/workflows/providers-build.yml index 461c25148..685dcdc82 100644 --- a/.github/workflows/providers-build.yml +++ b/.github/workflows/providers-build.yml @@ -26,7 +26,7 @@ on: - 'pyproject.toml' concurrency: - group: ${{ github.workflow }}-${{ github.ref }} + group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }} cancel-in-progress: true jobs: diff --git a/.github/workflows/ui-unit-tests.yml b/.github/workflows/ui-unit-tests.yml index 4b0d62e90..2afb92bee 100644 --- a/.github/workflows/ui-unit-tests.yml +++ b/.github/workflows/ui-unit-tests.yml @@ -13,7 +13,7 @@ on: workflow_dispatch: concurrency: - group: ${{ github.workflow }}-${{ github.ref }} + group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }} cancel-in-progress: true jobs: diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index cce8d9ff6..dd2097a45 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -18,7 +18,7 @@ on: workflow_dispatch: concurrency: - group: ${{ github.workflow }}-${{ github.ref }} + group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }} cancel-in-progress: true jobs: diff --git a/.github/workflows/update-readthedocs.yml b/.github/workflows/update-readthedocs.yml index 9ed89a271..e12f0adf8 100644 --- a/.github/workflows/update-readthedocs.yml +++ b/.github/workflows/update-readthedocs.yml @@ -27,7 +27,7 @@ on: - '.github/workflows/update-readthedocs.yml' concurrency: - group: ${{ github.workflow }}-${{ github.ref }} + group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }} cancel-in-progress: true jobs: From deffaa9e4ef610bf666a88562ca102e3eb0c6f1f Mon Sep 17 00:00:00 2001 From: Jiayi Ni Date: Thu, 21 Aug 2025 16:19:51 -0700 Subject: [PATCH 30/42] fix: fix the error type in embedding test case (#3197) # What does this PR do? Currently the embedding integration test cases fail due to a misalignment in the error type. This PR fixes the embedding integration test by fixing the error type. ## Test Plan ``` pytest -s -v tests/integration/inference/test_embedding.py --stack-config="inference=nvidia" --embedding-model="nvidia/llama-3.2-nv-embedqa-1b-v2" --env NVIDIA_API_KEY={nvidia_api_key} --env NVIDIA_BASE_URL="https://integrate.api.nvidia.com" ``` --- .../providers/remote/inference/nvidia/nvidia.py | 16 ++++++---------- tests/integration/inference/test_embedding.py | 17 ++++++++++++++--- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index 7052cfb57..ec4cba742 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -7,7 +7,7 @@ import warnings from collections.abc import AsyncIterator -from openai import NOT_GIVEN, APIConnectionError, BadRequestError +from openai import NOT_GIVEN, APIConnectionError from llama_stack.apis.common.content_types import ( InterleavedContent, @@ -197,15 +197,11 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference, ModelRegistryHelper): } extra_body["input_type"] = task_type_options[task_type] - try: - response = await self.client.embeddings.create( - model=provider_model_id, - input=input, - extra_body=extra_body, - ) - except BadRequestError as e: - raise ValueError(f"Failed to get embeddings: {e}") from e - + response = await self.client.embeddings.create( + model=provider_model_id, + input=input, + extra_body=extra_body, + ) # # OpenAI: CreateEmbeddingResponse(data=[Embedding(embedding=list[float], ...)], ...) # -> diff --git a/tests/integration/inference/test_embedding.py b/tests/integration/inference/test_embedding.py index 075f927f7..e592a6b14 100644 --- a/tests/integration/inference/test_embedding.py +++ b/tests/integration/inference/test_embedding.py @@ -55,7 +55,7 @@ # import pytest -from llama_stack_client import BadRequestError +from llama_stack_client import BadRequestError as LlamaStackBadRequestError from llama_stack_client.types import EmbeddingsResponse from llama_stack_client.types.shared.interleaved_content import ( ImageContentItem, @@ -63,6 +63,9 @@ from llama_stack_client.types.shared.interleaved_content import ( ImageContentItemImageURL, TextContentItem, ) +from openai import BadRequestError as OpenAIBadRequestError + +from llama_stack.core.library_client import LlamaStackAsLibraryClient DUMMY_STRING = "hello" DUMMY_STRING2 = "world" @@ -203,7 +206,14 @@ def test_embedding_truncation_error( ): if inference_provider_type not in SUPPORTED_PROVIDERS: pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") - with pytest.raises(BadRequestError): + # Using LlamaStackClient from llama_stack_client will raise llama_stack_client.BadRequestError + # While using LlamaStackAsLibraryClient from llama_stack.distribution.library_client will raise the error that the backend raises + error_type = ( + OpenAIBadRequestError + if isinstance(llama_stack_client, LlamaStackAsLibraryClient) + else LlamaStackBadRequestError + ) + with pytest.raises(error_type): llama_stack_client.inference.embeddings( model_id=embedding_model_id, contents=[DUMMY_LONG_TEXT], @@ -283,7 +293,8 @@ def test_embedding_text_truncation_error( ): if inference_provider_type not in SUPPORTED_PROVIDERS: pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") - with pytest.raises(BadRequestError): + error_type = ValueError if isinstance(llama_stack_client, LlamaStackAsLibraryClient) else LlamaStackBadRequestError + with pytest.raises(error_type): llama_stack_client.inference.embeddings( model_id=embedding_model_id, contents=[DUMMY_STRING], From 4434fcc2c36ef2c8bc9bf21e6daf3a32fcfaa548 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Thu, 21 Aug 2025 16:37:05 -0700 Subject: [PATCH 31/42] fix(ci): small fixes to the provider build workflow --- .github/workflows/providers-build.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/providers-build.yml b/.github/workflows/providers-build.yml index 685dcdc82..391acbcf8 100644 --- a/.github/workflows/providers-build.yml +++ b/.github/workflows/providers-build.yml @@ -106,6 +106,10 @@ jobs: - name: Inspect the container image entrypoint run: | IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1) + if [ -z "$IMAGE_ID" ]; then + echo "No image found" + exit 1 + fi entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID) echo "Entrypoint: $entrypoint" if [ "$entrypoint" != "[python -m llama_stack.core.server.server /app/run.yaml]" ]; then @@ -140,6 +144,10 @@ jobs: - name: Inspect UBI9 image run: | IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1) + if [ -z "$IMAGE_ID" ]; then + echo "No image found" + exit 1 + fi entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID) echo "Entrypoint: $entrypoint" if [ "$entrypoint" != "[python -m llama_stack.core.server.server /app/run.yaml]" ]; then From c3b2b069745b5947a98d986224fa9b9702addc9a Mon Sep 17 00:00:00 2001 From: Mustafa Elbehery Date: Fri, 22 Aug 2025 02:31:04 +0200 Subject: [PATCH 32/42] refactor(logging): rename llama_stack logger categories (#3065) # What does this PR do? This PR renames categories of llama_stack loggers. This PR aligns logging categories as per the package name, as well as reviews from initial https://github.com/meta-llama/llama-stack/pull/2868. This is a follow up to #3061. Replaces https://github.com/meta-llama/llama-stack/pull/2868 Part of https://github.com/meta-llama/llama-stack/issues/2865 cc @leseb @rhuss Signed-off-by: Mustafa Elbehery --- llama_stack/cli/stack/run.py | 2 +- llama_stack/core/routers/datasets.py | 2 +- llama_stack/core/routers/eval_scoring.py | 2 +- llama_stack/core/routers/inference.py | 2 +- llama_stack/core/routers/safety.py | 2 +- llama_stack/core/routers/tool_runtime.py | 2 +- llama_stack/core/routers/vector_io.py | 2 +- llama_stack/core/routing_tables/benchmarks.py | 2 +- llama_stack/core/routing_tables/common.py | 2 +- llama_stack/core/routing_tables/datasets.py | 2 +- llama_stack/core/routing_tables/models.py | 2 +- llama_stack/core/routing_tables/scoring_functions.py | 2 +- llama_stack/core/routing_tables/shields.py | 2 +- llama_stack/core/routing_tables/toolgroups.py | 2 +- llama_stack/core/routing_tables/vector_dbs.py | 2 +- llama_stack/core/server/auth.py | 2 +- llama_stack/core/server/auth_providers.py | 2 +- llama_stack/core/server/quota.py | 2 +- llama_stack/core/server/server.py | 4 ++-- llama_stack/core/store/registry.py | 2 +- llama_stack/core/utils/config_resolution.py | 2 +- llama_stack/models/llama/llama3/multimodal/model.py | 2 +- llama_stack/models/llama/llama3/tool_utils.py | 2 +- llama_stack/models/llama/llama4/quantization/loader.py | 2 +- llama_stack/models/llama/quantize_impls.py | 2 +- .../providers/inline/agents/meta_reference/agent_instance.py | 2 +- llama_stack/providers/inline/agents/meta_reference/agents.py | 2 +- .../providers/inline/agents/meta_reference/persistence.py | 2 +- .../agents/meta_reference/responses/openai_responses.py | 2 +- .../inline/agents/meta_reference/responses/streaming.py | 2 +- .../inline/agents/meta_reference/responses/tool_executor.py | 2 +- llama_stack/providers/inline/agents/meta_reference/safety.py | 2 +- llama_stack/providers/remote/inference/fireworks/fireworks.py | 2 +- .../providers/remote/inference/llama_openai_compat/llama.py | 2 +- llama_stack/providers/remote/inference/nvidia/nvidia.py | 2 +- llama_stack/providers/remote/inference/nvidia/utils.py | 2 +- llama_stack/providers/remote/inference/ollama/ollama.py | 2 +- llama_stack/providers/remote/inference/openai/openai.py | 2 +- llama_stack/providers/remote/inference/tgi/tgi.py | 2 +- llama_stack/providers/remote/inference/together/together.py | 2 +- llama_stack/providers/remote/inference/vllm/vllm.py | 2 +- llama_stack/providers/remote/post_training/nvidia/utils.py | 2 +- llama_stack/providers/remote/safety/bedrock/bedrock.py | 2 +- llama_stack/providers/remote/safety/nvidia/nvidia.py | 2 +- llama_stack/providers/remote/safety/sambanova/sambanova.py | 2 +- llama_stack/providers/remote/vector_io/chroma/chroma.py | 2 +- llama_stack/providers/remote/vector_io/milvus/milvus.py | 2 +- llama_stack/providers/remote/vector_io/pgvector/pgvector.py | 2 +- llama_stack/providers/remote/vector_io/qdrant/qdrant.py | 2 +- llama_stack/providers/remote/vector_io/weaviate/weaviate.py | 2 +- llama_stack/providers/utils/inference/embedding_mixin.py | 2 +- llama_stack/providers/utils/inference/litellm_openai_mixin.py | 2 +- llama_stack/providers/utils/inference/model_registry.py | 2 +- llama_stack/providers/utils/inference/openai_compat.py | 2 +- llama_stack/providers/utils/inference/openai_mixin.py | 2 +- llama_stack/providers/utils/inference/prompt_adapter.py | 2 +- llama_stack/providers/utils/kvstore/mongodb/mongodb.py | 2 +- llama_stack/providers/utils/kvstore/postgres/postgres.py | 2 +- .../providers/utils/memory/openai_vector_store_mixin.py | 2 +- llama_stack/providers/utils/memory/vector_store.py | 2 +- llama_stack/providers/utils/scheduler.py | 2 +- llama_stack/providers/utils/sqlstore/authorized_sqlstore.py | 2 +- llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py | 2 +- 63 files changed, 64 insertions(+), 64 deletions(-) diff --git a/llama_stack/cli/stack/run.py b/llama_stack/cli/stack/run.py index c8ffce034..b32b8b3ae 100644 --- a/llama_stack/cli/stack/run.py +++ b/llama_stack/cli/stack/run.py @@ -15,7 +15,7 @@ from llama_stack.log import get_logger REPO_ROOT = Path(__file__).parent.parent.parent.parent -logger = get_logger(name=__name__, category="server") +logger = get_logger(name=__name__, category="cli") class StackRun(Subcommand): diff --git a/llama_stack/core/routers/datasets.py b/llama_stack/core/routers/datasets.py index d7984f729..2f1d5f78e 100644 --- a/llama_stack/core/routers/datasets.py +++ b/llama_stack/core/routers/datasets.py @@ -12,7 +12,7 @@ from llama_stack.apis.datasets import DatasetPurpose, DataSource from llama_stack.log import get_logger from llama_stack.providers.datatypes import RoutingTable -logger = get_logger(name=__name__, category="core") +logger = get_logger(name=__name__, category="core::routers") class DatasetIORouter(DatasetIO): diff --git a/llama_stack/core/routers/eval_scoring.py b/llama_stack/core/routers/eval_scoring.py index f7a17eecf..ffca81bf0 100644 --- a/llama_stack/core/routers/eval_scoring.py +++ b/llama_stack/core/routers/eval_scoring.py @@ -16,7 +16,7 @@ from llama_stack.apis.scoring import ( from llama_stack.log import get_logger from llama_stack.providers.datatypes import RoutingTable -logger = get_logger(name=__name__, category="core") +logger = get_logger(name=__name__, category="core::routers") class ScoringRouter(Scoring): diff --git a/llama_stack/core/routers/inference.py b/llama_stack/core/routers/inference.py index 6a3f07247..4b66601bb 100644 --- a/llama_stack/core/routers/inference.py +++ b/llama_stack/core/routers/inference.py @@ -65,7 +65,7 @@ from llama_stack.providers.datatypes import HealthResponse, HealthStatus, Routin from llama_stack.providers.utils.inference.inference_store import InferenceStore from llama_stack.providers.utils.telemetry.tracing import get_current_span -logger = get_logger(name=__name__, category="inference") +logger = get_logger(name=__name__, category="core::routers") class InferenceRouter(Inference): diff --git a/llama_stack/core/routers/safety.py b/llama_stack/core/routers/safety.py index 738ecded3..9ba3327f1 100644 --- a/llama_stack/core/routers/safety.py +++ b/llama_stack/core/routers/safety.py @@ -13,7 +13,7 @@ from llama_stack.apis.shields import Shield from llama_stack.log import get_logger from llama_stack.providers.datatypes import RoutingTable -logger = get_logger(name=__name__, category="core") +logger = get_logger(name=__name__, category="core::routers") class SafetyRouter(Safety): diff --git a/llama_stack/core/routers/tool_runtime.py b/llama_stack/core/routers/tool_runtime.py index 5a40bc0c5..fd606f33b 100644 --- a/llama_stack/core/routers/tool_runtime.py +++ b/llama_stack/core/routers/tool_runtime.py @@ -22,7 +22,7 @@ from llama_stack.log import get_logger from ..routing_tables.toolgroups import ToolGroupsRoutingTable -logger = get_logger(name=__name__, category="core") +logger = get_logger(name=__name__, category="core::routers") class ToolRuntimeRouter(ToolRuntime): diff --git a/llama_stack/core/routers/vector_io.py b/llama_stack/core/routers/vector_io.py index 3d0996c49..786b0e391 100644 --- a/llama_stack/core/routers/vector_io.py +++ b/llama_stack/core/routers/vector_io.py @@ -30,7 +30,7 @@ from llama_stack.apis.vector_io import ( from llama_stack.log import get_logger from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable -logger = get_logger(name=__name__, category="core") +logger = get_logger(name=__name__, category="core::routers") class VectorIORouter(VectorIO): diff --git a/llama_stack/core/routing_tables/benchmarks.py b/llama_stack/core/routing_tables/benchmarks.py index 74bee8040..c875dee5b 100644 --- a/llama_stack/core/routing_tables/benchmarks.py +++ b/llama_stack/core/routing_tables/benchmarks.py @@ -14,7 +14,7 @@ from llama_stack.log import get_logger from .common import CommonRoutingTableImpl -logger = get_logger(name=__name__, category="core") +logger = get_logger(name=__name__, category="core::routing_tables") class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks): diff --git a/llama_stack/core/routing_tables/common.py b/llama_stack/core/routing_tables/common.py index 339ff6da4..e523746d8 100644 --- a/llama_stack/core/routing_tables/common.py +++ b/llama_stack/core/routing_tables/common.py @@ -23,7 +23,7 @@ from llama_stack.core.store import DistributionRegistry from llama_stack.log import get_logger from llama_stack.providers.datatypes import Api, RoutingTable -logger = get_logger(name=__name__, category="core") +logger = get_logger(name=__name__, category="core::routing_tables") def get_impl_api(p: Any) -> Api: diff --git a/llama_stack/core/routing_tables/datasets.py b/llama_stack/core/routing_tables/datasets.py index fc6a75df4..b129c9ec5 100644 --- a/llama_stack/core/routing_tables/datasets.py +++ b/llama_stack/core/routing_tables/datasets.py @@ -26,7 +26,7 @@ from llama_stack.log import get_logger from .common import CommonRoutingTableImpl -logger = get_logger(name=__name__, category="core") +logger = get_logger(name=__name__, category="core::routing_tables") class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets): diff --git a/llama_stack/core/routing_tables/models.py b/llama_stack/core/routing_tables/models.py index 34c431e00..b6141efa9 100644 --- a/llama_stack/core/routing_tables/models.py +++ b/llama_stack/core/routing_tables/models.py @@ -17,7 +17,7 @@ from llama_stack.log import get_logger from .common import CommonRoutingTableImpl, lookup_model -logger = get_logger(name=__name__, category="core") +logger = get_logger(name=__name__, category="core::routing_tables") class ModelsRoutingTable(CommonRoutingTableImpl, Models): diff --git a/llama_stack/core/routing_tables/scoring_functions.py b/llama_stack/core/routing_tables/scoring_functions.py index 5874ba941..71e5bed63 100644 --- a/llama_stack/core/routing_tables/scoring_functions.py +++ b/llama_stack/core/routing_tables/scoring_functions.py @@ -19,7 +19,7 @@ from llama_stack.log import get_logger from .common import CommonRoutingTableImpl -logger = get_logger(name=__name__, category="core") +logger = get_logger(name=__name__, category="core::routing_tables") class ScoringFunctionsRoutingTable(CommonRoutingTableImpl, ScoringFunctions): diff --git a/llama_stack/core/routing_tables/shields.py b/llama_stack/core/routing_tables/shields.py index e08f35bfc..b1918d20a 100644 --- a/llama_stack/core/routing_tables/shields.py +++ b/llama_stack/core/routing_tables/shields.py @@ -15,7 +15,7 @@ from llama_stack.log import get_logger from .common import CommonRoutingTableImpl -logger = get_logger(name=__name__, category="core") +logger = get_logger(name=__name__, category="core::routing_tables") class ShieldsRoutingTable(CommonRoutingTableImpl, Shields): diff --git a/llama_stack/core/routing_tables/toolgroups.py b/llama_stack/core/routing_tables/toolgroups.py index 6910b3906..eeea406c1 100644 --- a/llama_stack/core/routing_tables/toolgroups.py +++ b/llama_stack/core/routing_tables/toolgroups.py @@ -14,7 +14,7 @@ from llama_stack.log import get_logger from .common import CommonRoutingTableImpl -logger = get_logger(name=__name__, category="core") +logger = get_logger(name=__name__, category="core::routing_tables") def parse_toolgroup_from_toolgroup_name_pair(toolgroup_name_with_maybe_tool_name: str) -> str | None: diff --git a/llama_stack/core/routing_tables/vector_dbs.py b/llama_stack/core/routing_tables/vector_dbs.py index e8dc46997..00f71b4fe 100644 --- a/llama_stack/core/routing_tables/vector_dbs.py +++ b/llama_stack/core/routing_tables/vector_dbs.py @@ -30,7 +30,7 @@ from llama_stack.log import get_logger from .common import CommonRoutingTableImpl, lookup_model -logger = get_logger(name=__name__, category="core") +logger = get_logger(name=__name__, category="core::routing_tables") class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs): diff --git a/llama_stack/core/server/auth.py b/llama_stack/core/server/auth.py index e4fb4ff2b..c98d3bec0 100644 --- a/llama_stack/core/server/auth.py +++ b/llama_stack/core/server/auth.py @@ -15,7 +15,7 @@ from llama_stack.core.server.auth_providers import create_auth_provider from llama_stack.core.server.routes import find_matching_route, initialize_route_impls from llama_stack.log import get_logger -logger = get_logger(name=__name__, category="auth") +logger = get_logger(name=__name__, category="core::auth") class AuthenticationMiddleware: diff --git a/llama_stack/core/server/auth_providers.py b/llama_stack/core/server/auth_providers.py index 73d5581c2..a8af6f75a 100644 --- a/llama_stack/core/server/auth_providers.py +++ b/llama_stack/core/server/auth_providers.py @@ -23,7 +23,7 @@ from llama_stack.core.datatypes import ( ) from llama_stack.log import get_logger -logger = get_logger(name=__name__, category="auth") +logger = get_logger(name=__name__, category="core::auth") class AuthResponse(BaseModel): diff --git a/llama_stack/core/server/quota.py b/llama_stack/core/server/quota.py index 1cb850cde..693f224c3 100644 --- a/llama_stack/core/server/quota.py +++ b/llama_stack/core/server/quota.py @@ -15,7 +15,7 @@ from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig from llama_stack.providers.utils.kvstore.kvstore import kvstore_impl -logger = get_logger(name=__name__, category="quota") +logger = get_logger(name=__name__, category="core::server") class QuotaMiddleware: diff --git a/llama_stack/core/server/server.py b/llama_stack/core/server/server.py index 350ce0052..d6dfc3435 100644 --- a/llama_stack/core/server/server.py +++ b/llama_stack/core/server/server.py @@ -84,7 +84,7 @@ from .quota import QuotaMiddleware REPO_ROOT = Path(__file__).parent.parent.parent.parent -logger = get_logger(name=__name__, category="server") +logger = get_logger(name=__name__, category="core::server") def warn_with_traceback(message, category, filename, lineno, file=None, line=None): @@ -415,7 +415,7 @@ def main(args: argparse.Namespace | None = None): config_contents = yaml.safe_load(fp) if isinstance(config_contents, dict) and (cfg := config_contents.get("logging_config")): logger_config = LoggingConfig(**cfg) - logger = get_logger(name=__name__, category="server", config=logger_config) + logger = get_logger(name=__name__, category="core::server", config=logger_config) if args.env: for env_pair in args.env: try: diff --git a/llama_stack/core/store/registry.py b/llama_stack/core/store/registry.py index 4b60e1001..5f4abe9aa 100644 --- a/llama_stack/core/store/registry.py +++ b/llama_stack/core/store/registry.py @@ -16,7 +16,7 @@ from llama_stack.log import get_logger from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig -logger = get_logger(__name__, category="core") +logger = get_logger(__name__, category="core::registry") class DistributionRegistry(Protocol): diff --git a/llama_stack/core/utils/config_resolution.py b/llama_stack/core/utils/config_resolution.py index 30cd71e15..182a571ee 100644 --- a/llama_stack/core/utils/config_resolution.py +++ b/llama_stack/core/utils/config_resolution.py @@ -10,7 +10,7 @@ from pathlib import Path from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR from llama_stack.log import get_logger -logger = get_logger(name=__name__, category="config_resolution") +logger = get_logger(name=__name__, category="core") DISTRO_DIR = Path(__file__).parent.parent.parent.parent / "llama_stack" / "distributions" diff --git a/llama_stack/models/llama/llama3/multimodal/model.py b/llama_stack/models/llama/llama3/multimodal/model.py index 096156a5f..7b501eb0e 100644 --- a/llama_stack/models/llama/llama3/multimodal/model.py +++ b/llama_stack/models/llama/llama3/multimodal/model.py @@ -36,7 +36,7 @@ from .utils import get_negative_inf_value, to_2tuple MP_SCALE = 8 -logger = get_logger(name=__name__, category="models") +logger = get_logger(name=__name__, category="models::llama") def reduce_from_tensor_model_parallel_region(input_): diff --git a/llama_stack/models/llama/llama3/tool_utils.py b/llama_stack/models/llama/llama3/tool_utils.py index 574080184..d0e3e7671 100644 --- a/llama_stack/models/llama/llama3/tool_utils.py +++ b/llama_stack/models/llama/llama3/tool_utils.py @@ -11,7 +11,7 @@ from llama_stack.log import get_logger from ..datatypes import BuiltinTool, RecursiveType, ToolCall, ToolPromptFormat -logger = get_logger(name=__name__, category="inference") +logger = get_logger(name=__name__, category="models::llama") BUILTIN_TOOL_PATTERN = r'\b(?P\w+)\.call\(query="(?P[^"]*)"\)' CUSTOM_TOOL_CALL_PATTERN = re.compile(r"[^}]+)>(?P{.*?})") diff --git a/llama_stack/models/llama/llama4/quantization/loader.py b/llama_stack/models/llama/llama4/quantization/loader.py index 8220a9040..7557a8a64 100644 --- a/llama_stack/models/llama/llama4/quantization/loader.py +++ b/llama_stack/models/llama/llama4/quantization/loader.py @@ -18,7 +18,7 @@ from ...datatypes import QuantizationMode from ..model import Transformer, TransformerBlock from ..moe import MoE -log = get_logger(name=__name__, category="models") +log = get_logger(name=__name__, category="models::llama") def swiglu_wrapper_no_reduce( diff --git a/llama_stack/models/llama/quantize_impls.py b/llama_stack/models/llama/quantize_impls.py index 7fab2d3a6..0a205601f 100644 --- a/llama_stack/models/llama/quantize_impls.py +++ b/llama_stack/models/llama/quantize_impls.py @@ -9,7 +9,7 @@ import collections from llama_stack.log import get_logger -log = get_logger(name=__name__, category="llama") +log = get_logger(name=__name__, category="models::llama") try: import fbgemm_gpu.experimental.gen_ai # noqa: F401 diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py index 5f7c90879..fde38515b 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py +++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py @@ -84,7 +84,7 @@ MEMORY_QUERY_TOOL = "knowledge_search" WEB_SEARCH_TOOL = "web_search" RAG_TOOL_GROUP = "builtin::rag" -logger = get_logger(name=__name__, category="agents") +logger = get_logger(name=__name__, category="agents::meta_reference") class ChatAgent(ShieldRunnerMixin): diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py index 5794ad2c0..8bdde86b0 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agents.py +++ b/llama_stack/providers/inline/agents/meta_reference/agents.py @@ -51,7 +51,7 @@ from .config import MetaReferenceAgentsImplConfig from .persistence import AgentInfo from .responses.openai_responses import OpenAIResponsesImpl -logger = get_logger(name=__name__, category="agents") +logger = get_logger(name=__name__, category="agents::meta_reference") class MetaReferenceAgentsImpl(Agents): diff --git a/llama_stack/providers/inline/agents/meta_reference/persistence.py b/llama_stack/providers/inline/agents/meta_reference/persistence.py index c19051f86..3b7b4729c 100644 --- a/llama_stack/providers/inline/agents/meta_reference/persistence.py +++ b/llama_stack/providers/inline/agents/meta_reference/persistence.py @@ -17,7 +17,7 @@ from llama_stack.core.request_headers import get_authenticated_user from llama_stack.log import get_logger from llama_stack.providers.utils.kvstore import KVStore -log = get_logger(name=__name__, category="agents") +log = get_logger(name=__name__, category="agents::meta_reference") class AgentSessionInfo(Session): diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py index e528a4005..c632e61aa 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py @@ -41,7 +41,7 @@ from .utils import ( convert_response_text_to_chat_response_format, ) -logger = get_logger(name=__name__, category="responses") +logger = get_logger(name=__name__, category="openai::responses") class OpenAIResponsePreviousResponseWithInputItems(BaseModel): diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index 0879e978a..3e69fa5cd 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -47,7 +47,7 @@ from llama_stack.log import get_logger from .types import ChatCompletionContext, ChatCompletionResult from .utils import convert_chat_choice_to_response_message, is_function_tool_call -logger = get_logger(name=__name__, category="responses") +logger = get_logger(name=__name__, category="agents::meta_reference") class StreamingResponseOrchestrator: diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py b/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py index 5b98b4f51..b028c018b 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py @@ -38,7 +38,7 @@ from llama_stack.log import get_logger from .types import ChatCompletionContext, ToolExecutionResult -logger = get_logger(name=__name__, category="responses") +logger = get_logger(name=__name__, category="agents::meta_reference") class ToolExecutor: diff --git a/llama_stack/providers/inline/agents/meta_reference/safety.py b/llama_stack/providers/inline/agents/meta_reference/safety.py index b8a5d8a95..8f3ecf5c9 100644 --- a/llama_stack/providers/inline/agents/meta_reference/safety.py +++ b/llama_stack/providers/inline/agents/meta_reference/safety.py @@ -11,7 +11,7 @@ from llama_stack.apis.safety import Safety, SafetyViolation, ViolationLevel from llama_stack.log import get_logger from llama_stack.providers.utils.telemetry import tracing -log = get_logger(name=__name__, category="agents") +log = get_logger(name=__name__, category="agents::meta_reference") class SafetyException(Exception): # noqa: N818 diff --git a/llama_stack/providers/remote/inference/fireworks/fireworks.py b/llama_stack/providers/remote/inference/fireworks/fireworks.py index bd86f7238..e907e8ec6 100644 --- a/llama_stack/providers/remote/inference/fireworks/fireworks.py +++ b/llama_stack/providers/remote/inference/fireworks/fireworks.py @@ -65,7 +65,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( from .config import FireworksImplConfig from .models import MODEL_ENTRIES -logger = get_logger(name=__name__, category="inference") +logger = get_logger(name=__name__, category="inference::fireworks") class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProviderData): diff --git a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py index cfcfcbf90..f2069b5e5 100644 --- a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py +++ b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py @@ -10,7 +10,7 @@ from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from .models import MODEL_ENTRIES -logger = get_logger(name=__name__, category="inference") +logger = get_logger(name=__name__, category="inference::llama_openai_compat") class LlamaCompatInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin): diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index ec4cba742..a5475bc92 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -57,7 +57,7 @@ from .openai_utils import ( ) from .utils import _is_nvidia_hosted -logger = get_logger(name=__name__, category="inference") +logger = get_logger(name=__name__, category="inference::nvidia") class NVIDIAInferenceAdapter(OpenAIMixin, Inference, ModelRegistryHelper): diff --git a/llama_stack/providers/remote/inference/nvidia/utils.py b/llama_stack/providers/remote/inference/nvidia/utils.py index 790bbafd1..b8431e859 100644 --- a/llama_stack/providers/remote/inference/nvidia/utils.py +++ b/llama_stack/providers/remote/inference/nvidia/utils.py @@ -10,7 +10,7 @@ from llama_stack.log import get_logger from . import NVIDIAConfig -logger = get_logger(name=__name__, category="inference") +logger = get_logger(name=__name__, category="inference::nvidia") def _is_nvidia_hosted(config: NVIDIAConfig) -> bool: diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py index a93421536..d8b331ef7 100644 --- a/llama_stack/providers/remote/inference/ollama/ollama.py +++ b/llama_stack/providers/remote/inference/ollama/ollama.py @@ -85,7 +85,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( from .models import MODEL_ENTRIES -logger = get_logger(name=__name__, category="inference") +logger = get_logger(name=__name__, category="inference::ollama") class OllamaInferenceAdapter( diff --git a/llama_stack/providers/remote/inference/openai/openai.py b/llama_stack/providers/remote/inference/openai/openai.py index 1c72fa0bc..0f73c9321 100644 --- a/llama_stack/providers/remote/inference/openai/openai.py +++ b/llama_stack/providers/remote/inference/openai/openai.py @@ -11,7 +11,7 @@ from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from .config import OpenAIConfig from .models import MODEL_ENTRIES -logger = get_logger(name=__name__, category="inference") +logger = get_logger(name=__name__, category="inference::openai") # diff --git a/llama_stack/providers/remote/inference/tgi/tgi.py b/llama_stack/providers/remote/inference/tgi/tgi.py index 9da961438..97c72d14c 100644 --- a/llama_stack/providers/remote/inference/tgi/tgi.py +++ b/llama_stack/providers/remote/inference/tgi/tgi.py @@ -58,7 +58,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig -log = get_logger(name=__name__, category="inference") +log = get_logger(name=__name__, category="inference::tgi") def build_hf_repo_model_entries(): diff --git a/llama_stack/providers/remote/inference/together/together.py b/llama_stack/providers/remote/inference/together/together.py index a06e4173b..54c76607f 100644 --- a/llama_stack/providers/remote/inference/together/together.py +++ b/llama_stack/providers/remote/inference/together/together.py @@ -61,7 +61,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( from .config import TogetherImplConfig from .models import MODEL_ENTRIES -logger = get_logger(name=__name__, category="inference") +logger = get_logger(name=__name__, category="inference::together") class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProviderData): diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index ac626874c..234bec62c 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -85,7 +85,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( from .config import VLLMInferenceAdapterConfig -log = get_logger(name=__name__, category="inference") +log = get_logger(name=__name__, category="inference::vllm") def build_hf_repo_model_entries(): diff --git a/llama_stack/providers/remote/post_training/nvidia/utils.py b/llama_stack/providers/remote/post_training/nvidia/utils.py index 9a6c3b53c..162951ff3 100644 --- a/llama_stack/providers/remote/post_training/nvidia/utils.py +++ b/llama_stack/providers/remote/post_training/nvidia/utils.py @@ -15,7 +15,7 @@ from llama_stack.providers.remote.post_training.nvidia.config import SFTLoRADefa from .config import NvidiaPostTrainingConfig -logger = get_logger(name=__name__, category="integration") +logger = get_logger(name=__name__, category="post_training::nvidia") def warn_unsupported_params(config_dict: Any, supported_keys: set[str], config_name: str) -> None: diff --git a/llama_stack/providers/remote/safety/bedrock/bedrock.py b/llama_stack/providers/remote/safety/bedrock/bedrock.py index 1ca87ae3d..8855e02a4 100644 --- a/llama_stack/providers/remote/safety/bedrock/bedrock.py +++ b/llama_stack/providers/remote/safety/bedrock/bedrock.py @@ -21,7 +21,7 @@ from llama_stack.providers.utils.bedrock.client import create_bedrock_client from .config import BedrockSafetyConfig -logger = get_logger(name=__name__, category="safety") +logger = get_logger(name=__name__, category="safety::bedrock") class BedrockSafetyAdapter(Safety, ShieldsProtocolPrivate): diff --git a/llama_stack/providers/remote/safety/nvidia/nvidia.py b/llama_stack/providers/remote/safety/nvidia/nvidia.py index 787e924a0..65f901da2 100644 --- a/llama_stack/providers/remote/safety/nvidia/nvidia.py +++ b/llama_stack/providers/remote/safety/nvidia/nvidia.py @@ -17,7 +17,7 @@ from llama_stack.providers.utils.inference.openai_compat import convert_message_ from .config import NVIDIASafetyConfig -logger = get_logger(name=__name__, category="safety") +logger = get_logger(name=__name__, category="safety::nvidia") class NVIDIASafetyAdapter(Safety, ShieldsProtocolPrivate): diff --git a/llama_stack/providers/remote/safety/sambanova/sambanova.py b/llama_stack/providers/remote/safety/sambanova/sambanova.py index 676ee7185..2beb5e0ea 100644 --- a/llama_stack/providers/remote/safety/sambanova/sambanova.py +++ b/llama_stack/providers/remote/safety/sambanova/sambanova.py @@ -25,7 +25,7 @@ from llama_stack.providers.utils.inference.openai_compat import convert_message_ from .config import SambaNovaSafetyConfig -logger = get_logger(name=__name__, category="safety") +logger = get_logger(name=__name__, category="safety::sambanova") CANNED_RESPONSE_TEXT = "I can't answer that. Can I help with something else?" diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py index 0047e6055..a9ec644ef 100644 --- a/llama_stack/providers/remote/vector_io/chroma/chroma.py +++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py @@ -33,7 +33,7 @@ from llama_stack.providers.utils.memory.vector_store import ( from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig -log = get_logger(name=__name__, category="vector_io") +log = get_logger(name=__name__, category="vector_io::chroma") ChromaClientType = chromadb.api.AsyncClientAPI | chromadb.api.ClientAPI diff --git a/llama_stack/providers/remote/vector_io/milvus/milvus.py b/llama_stack/providers/remote/vector_io/milvus/milvus.py index 034ec331c..e07e8ff12 100644 --- a/llama_stack/providers/remote/vector_io/milvus/milvus.py +++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py @@ -36,7 +36,7 @@ from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collecti from .config import MilvusVectorIOConfig as RemoteMilvusVectorIOConfig -logger = get_logger(name=__name__, category="vector_io") +logger = get_logger(name=__name__, category="vector_io::milvus") VERSION = "v3" VECTOR_DBS_PREFIX = f"vector_dbs:milvus:{VERSION}::" diff --git a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py index e829c9e72..1c8d361c2 100644 --- a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py +++ b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py @@ -34,7 +34,7 @@ from llama_stack.providers.utils.memory.vector_store import ( from .config import PGVectorVectorIOConfig -log = get_logger(name=__name__, category="vector_io") +log = get_logger(name=__name__, category="vector_io::pgvector") VERSION = "v3" VECTOR_DBS_PREFIX = f"vector_dbs:pgvector:{VERSION}::" diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py index 8499ff997..0a0faa23a 100644 --- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py +++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py @@ -36,7 +36,7 @@ from llama_stack.providers.utils.memory.vector_store import ( from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig -log = get_logger(name=__name__, category="vector_io") +log = get_logger(name=__name__, category="vector_io::qdrant") CHUNK_ID_KEY = "_chunk_id" # KV store prefixes for vector databases diff --git a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py index ddf95317b..59b6bf124 100644 --- a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py +++ b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py @@ -34,7 +34,7 @@ from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collecti from .config import WeaviateVectorIOConfig -log = get_logger(name=__name__, category="vector_io") +log = get_logger(name=__name__, category="vector_io::weaviate") VERSION = "v3" VECTOR_DBS_PREFIX = f"vector_dbs:weaviate:{VERSION}::" diff --git a/llama_stack/providers/utils/inference/embedding_mixin.py b/llama_stack/providers/utils/inference/embedding_mixin.py index 05886cdc8..65ba2854b 100644 --- a/llama_stack/providers/utils/inference/embedding_mixin.py +++ b/llama_stack/providers/utils/inference/embedding_mixin.py @@ -28,7 +28,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import interleaved_con EMBEDDING_MODELS = {} -log = get_logger(name=__name__, category="inference") +log = get_logger(name=__name__, category="providers::utils") class SentenceTransformerEmbeddingMixin: diff --git a/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/llama_stack/providers/utils/inference/litellm_openai_mixin.py index da2e634f6..880348805 100644 --- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py +++ b/llama_stack/providers/utils/inference/litellm_openai_mixin.py @@ -54,7 +54,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( interleaved_content_as_str, ) -logger = get_logger(name=__name__, category="inference") +logger = get_logger(name=__name__, category="providers::utils") class LiteLLMOpenAIMixin( diff --git a/llama_stack/providers/utils/inference/model_registry.py b/llama_stack/providers/utils/inference/model_registry.py index ddb3bda8c..44add8f9e 100644 --- a/llama_stack/providers/utils/inference/model_registry.py +++ b/llama_stack/providers/utils/inference/model_registry.py @@ -17,7 +17,7 @@ from llama_stack.providers.utils.inference import ( ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR, ) -logger = get_logger(name=__name__, category="core") +logger = get_logger(name=__name__, category="providers::utils") class RemoteInferenceProviderConfig(BaseModel): diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py index eb32d2de9..55c2ac0ad 100644 --- a/llama_stack/providers/utils/inference/openai_compat.py +++ b/llama_stack/providers/utils/inference/openai_compat.py @@ -134,7 +134,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( decode_assistant_message, ) -logger = get_logger(name=__name__, category="inference") +logger = get_logger(name=__name__, category="providers::utils") class OpenAICompatCompletionChoiceDelta(BaseModel): diff --git a/llama_stack/providers/utils/inference/openai_mixin.py b/llama_stack/providers/utils/inference/openai_mixin.py index 72286dffb..f60deee6e 100644 --- a/llama_stack/providers/utils/inference/openai_mixin.py +++ b/llama_stack/providers/utils/inference/openai_mixin.py @@ -25,7 +25,7 @@ from llama_stack.apis.inference import ( from llama_stack.log import get_logger from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params -logger = get_logger(name=__name__, category="core") +logger = get_logger(name=__name__, category="providers::utils") class OpenAIMixin(ABC): diff --git a/llama_stack/providers/utils/inference/prompt_adapter.py b/llama_stack/providers/utils/inference/prompt_adapter.py index bb9a91b97..a93326e41 100644 --- a/llama_stack/providers/utils/inference/prompt_adapter.py +++ b/llama_stack/providers/utils/inference/prompt_adapter.py @@ -58,7 +58,7 @@ from llama_stack.models.llama.sku_list import resolve_model from llama_stack.models.llama.sku_types import ModelFamily, is_multimodal from llama_stack.providers.utils.inference import supported_inference_models -log = get_logger(name=__name__, category="inference") +log = get_logger(name=__name__, category="providers::utils") class ChatCompletionRequestWithRawContent(ChatCompletionRequest): diff --git a/llama_stack/providers/utils/kvstore/mongodb/mongodb.py b/llama_stack/providers/utils/kvstore/mongodb/mongodb.py index af52f3708..bab87a4aa 100644 --- a/llama_stack/providers/utils/kvstore/mongodb/mongodb.py +++ b/llama_stack/providers/utils/kvstore/mongodb/mongodb.py @@ -13,7 +13,7 @@ from llama_stack.providers.utils.kvstore import KVStore from ..config import MongoDBKVStoreConfig -log = get_logger(name=__name__, category="kvstore") +log = get_logger(name=__name__, category="providers::utils") class MongoDBKVStoreImpl(KVStore): diff --git a/llama_stack/providers/utils/kvstore/postgres/postgres.py b/llama_stack/providers/utils/kvstore/postgres/postgres.py index 021e90774..56d6dbb48 100644 --- a/llama_stack/providers/utils/kvstore/postgres/postgres.py +++ b/llama_stack/providers/utils/kvstore/postgres/postgres.py @@ -14,7 +14,7 @@ from llama_stack.log import get_logger from ..api import KVStore from ..config import PostgresKVStoreConfig -log = get_logger(name=__name__, category="kvstore") +log = get_logger(name=__name__, category="providers::utils") class PostgresKVStoreImpl(KVStore): diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index 0775b31d1..3acdcf293 100644 --- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -44,7 +44,7 @@ from llama_stack.providers.utils.memory.vector_store import ( make_overlapped_chunks, ) -logger = get_logger(name=__name__, category="memory") +logger = get_logger(name=__name__, category="providers::utils") # Constants for OpenAI vector stores CHUNK_MULTIPLIER = 5 diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py index b5d82432d..b74080384 100644 --- a/llama_stack/providers/utils/memory/vector_store.py +++ b/llama_stack/providers/utils/memory/vector_store.py @@ -33,7 +33,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( ) from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id -log = get_logger(name=__name__, category="memory") +log = get_logger(name=__name__, category="providers::utils") class ChunkForDeletion(BaseModel): diff --git a/llama_stack/providers/utils/scheduler.py b/llama_stack/providers/utils/scheduler.py index 65c3d2898..146591b2f 100644 --- a/llama_stack/providers/utils/scheduler.py +++ b/llama_stack/providers/utils/scheduler.py @@ -17,7 +17,7 @@ from pydantic import BaseModel from llama_stack.log import get_logger -logger = get_logger(name=__name__, category="scheduler") +logger = get_logger(name=__name__, category="providers::utils") # TODO: revisit the list of possible statuses when defining a more coherent diff --git a/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py b/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py index ccc835768..867ba2f55 100644 --- a/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +++ b/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py @@ -17,7 +17,7 @@ from llama_stack.log import get_logger from .api import ColumnDefinition, ColumnType, PaginatedResponse, SqlStore from .sqlstore import SqlStoreType -logger = get_logger(name=__name__, category="authorized_sqlstore") +logger = get_logger(name=__name__, category="providers::utils") # Hardcoded copy of the default policy that our SQL filtering implements # WARNING: If default_policy() changes, this constant must be updated accordingly diff --git a/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py b/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py index 7fa0cc755..f75c35314 100644 --- a/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +++ b/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py @@ -30,7 +30,7 @@ from llama_stack.log import get_logger from .api import ColumnDefinition, ColumnType, SqlStore from .sqlstore import SqlAlchemySqlStoreConfig -logger = get_logger(name=__name__, category="sqlstore") +logger = get_logger(name=__name__, category="providers::utils") TYPE_MAPPING: dict[ColumnType, Any] = { ColumnType.INTEGER: Integer, From d78ac434bd8f4edc25ac2a64ed8a4e172c27ef6f Mon Sep 17 00:00:00 2001 From: Francisco Arceo Date: Thu, 21 Aug 2025 19:11:03 -0600 Subject: [PATCH 33/42] feat(UI): Adding a session manager (#3203) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? - Introduces the Agent Session creation for the Playground and allows users to set tools - note tools are actually not usable yet and this is marked explicitly - this also caches sessions locally for faster loading on the UI and deletes them appropriately - allows users to easily create new sessions as well - Moved Model Configuration settings and "System Message" / Prompt to the left component - Added new logo and favicon - Added new typing animation when LLM is generating ### Create New Session Screenshot 2025-08-21 at 4 18
08 PM ### List of Sessions Screenshot 2025-08-21 at 4 18
56 PM ## Test Plan Unit tests added --------- Signed-off-by: Francisco Javier Arceo --- .../ui/app/chat-playground/page.test.tsx | 587 ++++++++ llama_stack/ui/app/chat-playground/page.tsx | 1229 +++++++++++++++-- llama_stack/ui/app/favicon.ico | Bin 25931 -> 0 bytes llama_stack/ui/app/globals.css | 41 + llama_stack/ui/app/layout.tsx | 3 + .../chat-playground/chat-message.tsx | 16 +- .../chat-playground/conversations.test.tsx | 345 +++++ .../chat-playground/conversations.tsx | 568 ++++++++ .../chat-playground/typing-indicator.tsx | 6 +- .../ui/components/layout/app-sidebar.tsx | 12 +- llama_stack/ui/public/favicon.ico | Bin 0 -> 4286 bytes llama_stack/ui/public/logo.webp | Bin 0 -> 19618 bytes 12 files changed, 2677 insertions(+), 130 deletions(-) create mode 100644 llama_stack/ui/app/chat-playground/page.test.tsx delete mode 100644 llama_stack/ui/app/favicon.ico create mode 100644 llama_stack/ui/components/chat-playground/conversations.test.tsx create mode 100644 llama_stack/ui/components/chat-playground/conversations.tsx create mode 100644 llama_stack/ui/public/favicon.ico create mode 100644 llama_stack/ui/public/logo.webp diff --git a/llama_stack/ui/app/chat-playground/page.test.tsx b/llama_stack/ui/app/chat-playground/page.test.tsx new file mode 100644 index 000000000..54c15f95a --- /dev/null +++ b/llama_stack/ui/app/chat-playground/page.test.tsx @@ -0,0 +1,587 @@ +import React from "react"; +import { + render, + screen, + fireEvent, + waitFor, + act, +} from "@testing-library/react"; +import "@testing-library/jest-dom"; +import ChatPlaygroundPage from "./page"; + +const mockClient = { + agents: { + list: jest.fn(), + create: jest.fn(), + retrieve: jest.fn(), + delete: jest.fn(), + session: { + list: jest.fn(), + create: jest.fn(), + delete: jest.fn(), + retrieve: jest.fn(), + }, + turn: { + create: jest.fn(), + }, + }, + models: { + list: jest.fn(), + }, + toolgroups: { + list: jest.fn(), + }, +}; + +jest.mock("@/hooks/use-auth-client", () => ({ + useAuthClient: jest.fn(() => mockClient), +})); + +jest.mock("@/components/chat-playground/chat", () => ({ + Chat: jest.fn( + ({ + className, + messages, + handleSubmit, + input, + handleInputChange, + isGenerating, + append, + suggestions, + }) => ( +
+
{messages.length}
+ + + {suggestions?.map((suggestion: string, index: number) => ( + + ))} +
+ ) + ), +})); + +jest.mock("@/components/chat-playground/conversations", () => ({ + SessionManager: jest.fn(({ selectedAgentId, onNewSession }) => ( +
+ {selectedAgentId && ( + <> +
{selectedAgentId}
+ + + )} +
+ )), + SessionUtils: { + saveCurrentSessionId: jest.fn(), + loadCurrentSessionId: jest.fn(), + loadCurrentAgentId: jest.fn(), + saveCurrentAgentId: jest.fn(), + clearCurrentSession: jest.fn(), + saveSessionData: jest.fn(), + loadSessionData: jest.fn(), + saveAgentConfig: jest.fn(), + loadAgentConfig: jest.fn(), + clearAgentCache: jest.fn(), + createDefaultSession: jest.fn(() => ({ + id: "test-session-123", + name: "Default Session", + messages: [], + selectedModel: "", + systemMessage: "You are a helpful assistant.", + agentId: "test-agent-123", + createdAt: Date.now(), + updatedAt: Date.now(), + })), + }, +})); + +const mockAgents = [ + { + agent_id: "agent_123", + agent_config: { + name: "Test Agent", + instructions: "You are a test assistant.", + }, + }, + { + agent_id: "agent_456", + agent_config: { + agent_name: "Another Agent", + instructions: "You are another assistant.", + }, + }, +]; + +const mockModels = [ + { + identifier: "test-model-1", + model_type: "llm", + }, + { + identifier: "test-model-2", + model_type: "llm", + }, +]; + +const mockToolgroups = [ + { + identifier: "builtin::rag", + provider_id: "test-provider", + type: "tool_group", + provider_resource_id: "test-resource", + }, +]; + +describe("ChatPlaygroundPage", () => { + beforeEach(() => { + jest.clearAllMocks(); + Element.prototype.scrollIntoView = jest.fn(); + mockClient.agents.list.mockResolvedValue({ data: mockAgents }); + mockClient.models.list.mockResolvedValue(mockModels); + mockClient.toolgroups.list.mockResolvedValue(mockToolgroups); + mockClient.agents.session.create.mockResolvedValue({ + session_id: "new-session-123", + }); + mockClient.agents.session.list.mockResolvedValue({ data: [] }); + mockClient.agents.session.retrieve.mockResolvedValue({ + session_id: "test-session", + session_name: "Test Session", + started_at: new Date().toISOString(), + turns: [], + }); // No turns by default + mockClient.agents.retrieve.mockResolvedValue({ + agent_id: "test-agent", + agent_config: { + toolgroups: ["builtin::rag"], + instructions: "Test instructions", + model: "test-model", + }, + }); + mockClient.agents.delete.mockResolvedValue(undefined); + }); + + describe("Agent Selector Rendering", () => { + test("shows agent selector when agents are available", async () => { + await act(async () => { + render(); + }); + + await waitFor(() => { + expect(screen.getByText("Agent Session:")).toBeInTheDocument(); + expect(screen.getAllByRole("combobox")).toHaveLength(2); + expect(screen.getByText("+ New Agent")).toBeInTheDocument(); + expect(screen.getByText("Clear Chat")).toBeInTheDocument(); + }); + }); + + test("does not show agent selector when no agents are available", async () => { + mockClient.agents.list.mockResolvedValue({ data: [] }); + + await act(async () => { + render(); + }); + + await waitFor(() => { + expect(screen.queryByText("Agent Session:")).not.toBeInTheDocument(); + expect(screen.getAllByRole("combobox")).toHaveLength(1); + expect(screen.getByText("+ New Agent")).toBeInTheDocument(); + expect(screen.queryByText("Clear Chat")).not.toBeInTheDocument(); + }); + }); + + test("does not show agent selector while loading", async () => { + mockClient.agents.list.mockImplementation(() => new Promise(() => {})); + + await act(async () => { + render(); + }); + + expect(screen.queryByText("Agent Session:")).not.toBeInTheDocument(); + expect(screen.getAllByRole("combobox")).toHaveLength(1); + expect(screen.getByText("+ New Agent")).toBeInTheDocument(); + expect(screen.queryByText("Clear Chat")).not.toBeInTheDocument(); + }); + + test("shows agent options in selector", async () => { + await act(async () => { + render(); + }); + + await waitFor(() => { + const agentCombobox = screen.getAllByRole("combobox").find(element => { + return ( + element.textContent?.includes("Test Agent") || + element.textContent?.includes("Select Agent") + ); + }); + expect(agentCombobox).toBeDefined(); + fireEvent.click(agentCombobox!); + }); + + await waitFor(() => { + expect(screen.getAllByText("Test Agent")).toHaveLength(2); + expect(screen.getByText("Another Agent")).toBeInTheDocument(); + }); + }); + + test("displays agent ID when no name is available", async () => { + const agentWithoutName = { + agent_id: "agent_789", + agent_config: { + instructions: "You are an agent without a name.", + }, + }; + + mockClient.agents.list.mockResolvedValue({ data: [agentWithoutName] }); + + await act(async () => { + render(); + }); + + await waitFor(() => { + const agentCombobox = screen.getAllByRole("combobox").find(element => { + return ( + element.textContent?.includes("Agent agent_78") || + element.textContent?.includes("Select Agent") + ); + }); + expect(agentCombobox).toBeDefined(); + fireEvent.click(agentCombobox!); + }); + + await waitFor(() => { + expect(screen.getAllByText("Agent agent_78...")).toHaveLength(2); + }); + }); + }); + + describe("Agent Creation Modal", () => { + test("opens agent creation modal when + New Agent is clicked", async () => { + await act(async () => { + render(); + }); + + const newAgentButton = screen.getByText("+ New Agent"); + fireEvent.click(newAgentButton); + + expect(screen.getByText("Create New Agent")).toBeInTheDocument(); + expect(screen.getByText("Agent Name (optional)")).toBeInTheDocument(); + expect(screen.getAllByText("Model")).toHaveLength(2); + expect(screen.getByText("System Instructions")).toBeInTheDocument(); + expect(screen.getByText("Tools (optional)")).toBeInTheDocument(); + }); + + test("closes modal when Cancel is clicked", async () => { + await act(async () => { + render(); + }); + + const newAgentButton = screen.getByText("+ New Agent"); + fireEvent.click(newAgentButton); + + const cancelButton = screen.getByText("Cancel"); + fireEvent.click(cancelButton); + + expect(screen.queryByText("Create New Agent")).not.toBeInTheDocument(); + }); + + test("creates agent when Create Agent is clicked", async () => { + mockClient.agents.create.mockResolvedValue({ agent_id: "new-agent-123" }); + mockClient.agents.list + .mockResolvedValueOnce({ data: mockAgents }) + .mockResolvedValueOnce({ + data: [ + ...mockAgents, + { agent_id: "new-agent-123", agent_config: { name: "New Agent" } }, + ], + }); + + await act(async () => { + render(); + }); + + const newAgentButton = screen.getByText("+ New Agent"); + await act(async () => { + fireEvent.click(newAgentButton); + }); + + await waitFor(() => { + expect(screen.getByText("Create New Agent")).toBeInTheDocument(); + }); + + const nameInput = screen.getByPlaceholderText("My Custom Agent"); + await act(async () => { + fireEvent.change(nameInput, { target: { value: "Test Agent Name" } }); + }); + + const instructionsTextarea = screen.getByDisplayValue( + "You are a helpful assistant." + ); + await act(async () => { + fireEvent.change(instructionsTextarea, { + target: { value: "Custom instructions" }, + }); + }); + + await waitFor(() => { + const modalModelSelectors = screen + .getAllByRole("combobox") + .filter(el => { + return ( + el.textContent?.includes("Select Model") || + el.closest('[class*="modal"]') || + el.closest('[class*="card"]') + ); + }); + expect(modalModelSelectors.length).toBeGreaterThan(0); + }); + + const modalModelSelectors = screen.getAllByRole("combobox").filter(el => { + return ( + el.textContent?.includes("Select Model") || + el.closest('[class*="modal"]') || + el.closest('[class*="card"]') + ); + }); + + await act(async () => { + fireEvent.click(modalModelSelectors[0]); + }); + + await waitFor(() => { + const modelOptions = screen.getAllByText("test-model-1"); + expect(modelOptions.length).toBeGreaterThan(0); + }); + + const modelOptions = screen.getAllByText("test-model-1"); + const dropdownOption = modelOptions.find( + option => + option.closest('[role="option"]') || + option.id?.includes("radix") || + option.getAttribute("aria-selected") !== null + ); + + await act(async () => { + fireEvent.click( + dropdownOption || modelOptions[modelOptions.length - 1] + ); + }); + + await waitFor(() => { + const createButton = screen.getByText("Create Agent"); + expect(createButton).not.toBeDisabled(); + }); + + const createButton = screen.getByText("Create Agent"); + await act(async () => { + fireEvent.click(createButton); + }); + + await waitFor(() => { + expect(mockClient.agents.create).toHaveBeenCalledWith({ + agent_config: { + model: expect.any(String), + instructions: "Custom instructions", + name: "Test Agent Name", + enable_session_persistence: true, + }, + }); + }); + + await waitFor(() => { + expect(screen.queryByText("Create New Agent")).not.toBeInTheDocument(); + }); + }); + }); + + describe("Agent Selection", () => { + test("creates default session when agent is selected", async () => { + await act(async () => { + render(); + }); + + await waitFor(() => { + // first agent should be auto-selected + expect(mockClient.agents.session.create).toHaveBeenCalledWith( + "agent_123", + { session_name: "Default Session" } + ); + }); + }); + + test("switches agent when different agent is selected", async () => { + await act(async () => { + render(); + }); + + await waitFor(() => { + const agentCombobox = screen.getAllByRole("combobox").find(element => { + return ( + element.textContent?.includes("Test Agent") || + element.textContent?.includes("Select Agent") + ); + }); + expect(agentCombobox).toBeDefined(); + fireEvent.click(agentCombobox!); + }); + + await waitFor(() => { + const anotherAgentOption = screen.getByText("Another Agent"); + fireEvent.click(anotherAgentOption); + }); + + expect(mockClient.agents.session.create).toHaveBeenCalledWith( + "agent_456", + { session_name: "Default Session" } + ); + }); + }); + + describe("Agent Deletion", () => { + test("shows delete button when multiple agents exist", async () => { + await act(async () => { + render(); + }); + + await waitFor(() => { + expect(screen.getByTitle("Delete current agent")).toBeInTheDocument(); + }); + }); + + test("hides delete button when only one agent exists", async () => { + mockClient.agents.list.mockResolvedValue({ + data: [mockAgents[0]], + }); + + await act(async () => { + render(); + }); + + await waitFor(() => { + expect( + screen.queryByTitle("Delete current agent") + ).not.toBeInTheDocument(); + }); + }); + + test("deletes agent and switches to another when confirmed", async () => { + global.confirm = jest.fn(() => true); + + await act(async () => { + render(); + }); + + await waitFor(() => { + expect(screen.getByTitle("Delete current agent")).toBeInTheDocument(); + }); + + mockClient.agents.delete.mockResolvedValue(undefined); + mockClient.agents.list.mockResolvedValueOnce({ data: mockAgents }); + mockClient.agents.list.mockResolvedValueOnce({ + data: [mockAgents[1]], + }); + + const deleteButton = screen.getByTitle("Delete current agent"); + await act(async () => { + deleteButton.click(); + }); + + await waitFor(() => { + expect(mockClient.agents.delete).toHaveBeenCalledWith("agent_123"); + expect(global.confirm).toHaveBeenCalledWith( + "Are you sure you want to delete this agent? This action cannot be undone and will delete all associated sessions." + ); + }); + + (global.confirm as jest.Mock).mockRestore(); + }); + + test("does not delete agent when cancelled", async () => { + global.confirm = jest.fn(() => false); + + await act(async () => { + render(); + }); + + await waitFor(() => { + expect(screen.getByTitle("Delete current agent")).toBeInTheDocument(); + }); + + const deleteButton = screen.getByTitle("Delete current agent"); + await act(async () => { + deleteButton.click(); + }); + + await waitFor(() => { + expect(global.confirm).toHaveBeenCalled(); + expect(mockClient.agents.delete).not.toHaveBeenCalled(); + }); + + (global.confirm as jest.Mock).mockRestore(); + }); + }); + + describe("Error Handling", () => { + test("handles agent loading errors gracefully", async () => { + mockClient.agents.list.mockRejectedValue( + new Error("Failed to load agents") + ); + const consoleSpy = jest + .spyOn(console, "error") + .mockImplementation(() => {}); + + await act(async () => { + render(); + }); + + await waitFor(() => { + expect(consoleSpy).toHaveBeenCalledWith( + "Error fetching agents:", + expect.any(Error) + ); + }); + + expect(screen.getByText("+ New Agent")).toBeInTheDocument(); + + consoleSpy.mockRestore(); + }); + + test("handles model loading errors gracefully", async () => { + mockClient.models.list.mockRejectedValue( + new Error("Failed to load models") + ); + const consoleSpy = jest + .spyOn(console, "error") + .mockImplementation(() => {}); + + await act(async () => { + render(); + }); + + await waitFor(() => { + expect(consoleSpy).toHaveBeenCalledWith( + "Error fetching models:", + expect.any(Error) + ); + }); + + consoleSpy.mockRestore(); + }); + }); +}); diff --git a/llama_stack/ui/app/chat-playground/page.tsx b/llama_stack/ui/app/chat-playground/page.tsx index b8651aca0..f26791a41 100644 --- a/llama_stack/ui/app/chat-playground/page.tsx +++ b/llama_stack/ui/app/chat-playground/page.tsx @@ -1,6 +1,6 @@ "use client"; -import { useState, useEffect } from "react"; +import { useState, useEffect, useCallback, useRef } from "react"; import { flushSync } from "react-dom"; import { Button } from "@/components/ui/button"; import { @@ -10,14 +10,22 @@ import { SelectTrigger, SelectValue, } from "@/components/ui/select"; +import { Card } from "@/components/ui/card"; +import { Input } from "@/components/ui/input"; +import { Trash2 } from "lucide-react"; import { Chat } from "@/components/chat-playground/chat"; import { type Message } from "@/components/chat-playground/chat-message"; import { useAuthClient } from "@/hooks/use-auth-client"; -import type { CompletionCreateParams } from "llama-stack-client/resources/chat/completions"; import type { Model } from "llama-stack-client/resources/models"; - +import type { TurnCreateParams } from "llama-stack-client/resources/agents/turn"; +import { + SessionUtils, + type ChatSession, +} from "@/components/chat-playground/conversations"; export default function ChatPlaygroundPage() { - const [messages, setMessages] = useState([]); + const [currentSession, setCurrentSession] = useState( + null + ); const [input, setInput] = useState(""); const [isGenerating, setIsGenerating] = useState(false); const [error, setError] = useState(null); @@ -25,10 +33,523 @@ export default function ChatPlaygroundPage() { const [selectedModel, setSelectedModel] = useState(""); const [modelsLoading, setModelsLoading] = useState(true); const [modelsError, setModelsError] = useState(null); + const [agents, setAgents] = useState< + Array<{ + agent_id: string; + agent_config?: { + agent_name?: string; + name?: string; + instructions?: string; + }; + [key: string]: unknown; + }> + >([]); + const [selectedAgentConfig, setSelectedAgentConfig] = useState<{ + toolgroups?: Array< + string | { name: string; args: Record } + >; + } | null>(null); + const [selectedAgentId, setSelectedAgentId] = useState(""); + const [agentsLoading, setAgentsLoading] = useState(true); + const [showCreateAgent, setShowCreateAgent] = useState(false); + const [newAgentName, setNewAgentName] = useState(""); + const [newAgentInstructions, setNewAgentInstructions] = useState( + "You are a helpful assistant." + ); + const [selectedToolgroups, setSelectedToolgroups] = useState([]); + const [availableToolgroups, setAvailableToolgroups] = useState< + Array<{ + identifier: string; + provider_id: string; + type: string; + provider_resource_id?: string; + }> + >([]); const client = useAuthClient(); + const abortControllerRef = useRef(null); const isModelsLoading = modelsLoading ?? true; + const loadAgentConfig = useCallback( + async (agentId: string) => { + try { + console.log("Loading agent config for:", agentId); + + // try to load from cache first + const cachedConfig = SessionUtils.loadAgentConfig(agentId); + if (cachedConfig) { + console.log("✅ Loaded agent config from cache:", cachedConfig); + setSelectedAgentConfig({ + toolgroups: cachedConfig.toolgroups, + }); + return; + } + + console.log("📡 Fetching agent config from API..."); + const agentDetails = await client.agents.retrieve(agentId); + console.log("Agent details retrieved:", agentDetails); + console.log("Agent config:", agentDetails.agent_config); + console.log("Agent toolgroups:", agentDetails.agent_config?.toolgroups); + + // cache the config + SessionUtils.saveAgentConfig(agentId, agentDetails.agent_config); + + setSelectedAgentConfig({ + toolgroups: agentDetails.agent_config?.toolgroups, + }); + } catch (error) { + console.error("Error loading agent config:", error); + setSelectedAgentConfig(null); + } + }, + [client] + ); + + const createDefaultSession = useCallback( + async (agentId: string) => { + try { + const response = await client.agents.session.create(agentId, { + session_name: "Default Session", + }); + + const defaultSession: ChatSession = { + id: response.session_id, + name: "Default Session", + messages: [], + selectedModel: selectedModel, // Use current selected model + systemMessage: "You are a helpful assistant.", + agentId, + createdAt: Date.now(), + updatedAt: Date.now(), + }; + + setCurrentSession(defaultSession); + console.log( + `💾 Saving default session ID for agent ${agentId}:`, + defaultSession.id + ); + SessionUtils.saveCurrentSessionId(defaultSession.id, agentId); + // cache entire session data + SessionUtils.saveSessionData(agentId, defaultSession); + } catch (error) { + console.error("Error creating default session:", error); + } + }, + [client, selectedModel] + ); + + const loadSessionMessages = useCallback( + async (agentId: string, sessionId: string): Promise => { + try { + const session = await client.agents.session.retrieve( + agentId, + sessionId + ); + + if (!session || !session.turns || !Array.isArray(session.turns)) { + return []; + } + + const messages: Message[] = []; + for (const turn of session.turns) { + // add user messages + if (turn.input_messages && Array.isArray(turn.input_messages)) { + for (const input of turn.input_messages) { + if (input.role === "user" && input.content) { + messages.push({ + id: `${turn.turn_id}-user-${messages.length}`, + role: "user", + content: + typeof input.content === "string" + ? input.content + : JSON.stringify(input.content), + createdAt: new Date(turn.started_at || Date.now()), + }); + } + } + } + + // add assistant message from output_message + if (turn.output_message && turn.output_message.content) { + messages.push({ + id: `${turn.turn_id}-assistant-${messages.length}`, + role: "assistant", + content: + typeof turn.output_message.content === "string" + ? turn.output_message.content + : JSON.stringify(turn.output_message.content), + createdAt: new Date( + turn.completed_at || turn.started_at || Date.now() + ), + }); + } + } + + return messages; + } catch (error) { + console.error("Error loading session messages:", error); + return []; + } + }, + [client] + ); + + const loadAgentSessions = useCallback( + async (agentId: string) => { + try { + console.log("Loading sessions for agent:", agentId); + const response = await client.agents.session.list(agentId); + console.log("Available sessions:", response.data); + + if ( + response.data && + Array.isArray(response.data) && + response.data.length > 0 + ) { + // check for a previously saved session ID for this specific agent + const savedSessionId = SessionUtils.loadCurrentSessionId(agentId); + console.log(`Saved session ID for agent ${agentId}:`, savedSessionId); + + // try to load cached session data first + if (savedSessionId) { + const cachedSession = SessionUtils.loadSessionData( + agentId, + savedSessionId + ); + if (cachedSession) { + console.log("✅ Loaded session from cache:", cachedSession.id); + setCurrentSession(cachedSession); + SessionUtils.saveCurrentSessionId(cachedSession.id, agentId); + return; + } + console.log("📡 Cache miss, fetching session from API..."); + } + + let sessionToLoad = response.data[0] as { + session_id: string; + session_name?: string; + started_at?: string; + }; + console.log( + "Default session to load (first in list):", + sessionToLoad.session_id + ); + + // try to find saved session id in available sessions + if (savedSessionId) { + const foundSession = response.data.find( + (s: { session_id: string }) => s.session_id === savedSessionId + ); + console.log("Found saved session in list:", foundSession); + if (foundSession) { + sessionToLoad = foundSession as { + session_id: string; + session_name?: string; + started_at?: string; + }; + console.log( + "✅ Restored previously selected session:", + savedSessionId + ); + } else { + console.log( + "❌ Previously selected session not found, using latest session" + ); + } + } else { + console.log("❌ No saved session ID found, using latest session"); + } + + const messages = await loadSessionMessages( + agentId, + sessionToLoad.session_id + ); + + const session: ChatSession = { + id: sessionToLoad.session_id, + name: sessionToLoad.session_name || "Session", + messages, + selectedModel: selectedModel || "", // Preserve current model or use empty + systemMessage: "You are a helpful assistant.", + agentId, + createdAt: sessionToLoad.started_at + ? new Date(sessionToLoad.started_at).getTime() + : Date.now(), + updatedAt: Date.now(), + }; + + setCurrentSession(session); + console.log(`💾 Saving session ID for agent ${agentId}:`, session.id); + SessionUtils.saveCurrentSessionId(session.id, agentId); + // cache session data + SessionUtils.saveSessionData(agentId, session); + } else { + // no sessions, create a new one + await createDefaultSession(agentId); + } + } catch (error) { + console.error("Error loading agent sessions:", error); + // fallback to creating a new session + await createDefaultSession(agentId); + } + }, + [client, loadSessionMessages, createDefaultSession, selectedModel] + ); + + useEffect(() => { + const fetchAgents = async () => { + try { + setAgentsLoading(true); + const agentList = await client.agents.list(); + setAgents( + (agentList.data as Array<{ + agent_id: string; + agent_config?: { + agent_name?: string; + name?: string; + instructions?: string; + }; + [key: string]: unknown; + }>) || [] + ); + + if (agentList.data && agentList.data.length > 0) { + // check if there's a previously selected agent + const savedAgentId = SessionUtils.loadCurrentAgentId(); + + let agentToSelect = agentList.data[0] as { + agent_id: string; + agent_config?: { + agent_name?: string; + name?: string; + instructions?: string; + }; + [key: string]: unknown; + }; + + // if we have a saved agent ID, find it in the available agents + if (savedAgentId) { + const foundAgent = agentList.data.find( + (a: { agent_id: string }) => a.agent_id === savedAgentId + ); + if (foundAgent) { + agentToSelect = foundAgent as typeof agentToSelect; + } else { + console.log("Previously slelected agent not found:"); + } + } + setSelectedAgentId(agentToSelect.agent_id); + SessionUtils.saveCurrentAgentId(agentToSelect.agent_id); + // load agent config immediately + await loadAgentConfig(agentToSelect.agent_id); + // Note: loadAgentSessions will be called after models are loaded + } + } catch (error) { + console.error("Error fetching agents:", error); + } finally { + setAgentsLoading(false); + } + }; + + fetchAgents(); + + // fetch available toolgroups + const fetchToolgroups = async () => { + try { + console.log("Fetching toolgroups..."); + const toolgroups = await client.toolgroups.list(); + console.log("Toolgroups response:", toolgroups); + + // The client returns data directly, not wrapped in .data + const toolGroupsArray = Array.isArray(toolgroups) + ? toolgroups + : toolgroups && + typeof toolgroups === "object" && + "data" in toolgroups && + Array.isArray((toolgroups as { data: unknown }).data) + ? ( + toolgroups as { + data: Array<{ + identifier: string; + provider_id: string; + type: string; + provider_resource_id?: string; + }>; + } + ).data + : []; + + if (toolGroupsArray && Array.isArray(toolGroupsArray)) { + setAvailableToolgroups(toolGroupsArray); + console.log("Set toolgroups:", toolGroupsArray); + } else { + console.error("Invalid toolgroups data format:", toolgroups); + } + } catch (error) { + console.error("Error fetching toolgroups:", error); + if (error instanceof Error) { + console.error("Error details:", { + name: error.name, + message: error.message, + stack: error.stack, + }); + } + } + }; + + fetchToolgroups(); + }, [client, loadAgentSessions, loadAgentConfig]); + + const createNewAgent = useCallback( + async ( + name: string, + instructions: string, + model: string, + toolgroups: string[] = [] + ) => { + try { + console.log("Creating agent with toolgroups:", toolgroups); + const agentConfig = { + model, + instructions, + name: name || undefined, + enable_session_persistence: true, + toolgroups: toolgroups.length > 0 ? toolgroups : undefined, + }; + console.log("Agent config being sent:", agentConfig); + + const response = await client.agents.create({ + agent_config: agentConfig, + }); + + // refresh agents list + const agentList = await client.agents.list(); + setAgents( + (agentList.data as Array<{ + agent_id: string; + agent_config?: { + agent_name?: string; + name?: string; + instructions?: string; + }; + [key: string]: unknown; + }>) || [] + ); + + // set the new agent as selected + setSelectedAgentId(response.agent_id); + await loadAgentConfig(response.agent_id); + await loadAgentSessions(response.agent_id); + + return response.agent_id; + } catch (error) { + console.error("Error creating agent:", error); + throw error; + } + }, + [client, loadAgentSessions, loadAgentConfig] + ); + + const deleteAgent = useCallback( + async (agentId: string) => { + if (agents.length <= 1) { + return; + } + + if ( + confirm( + "Are you sure you want to delete this agent? This action cannot be undone and will delete all associated sessions." + ) + ) { + try { + await client.agents.delete(agentId); + + // clear cached data for agent + SessionUtils.clearAgentCache(agentId); + + // Refresh agents list + const agentList = await client.agents.list(); + setAgents( + (agentList.data as Array<{ + agent_id: string; + agent_config?: { + agent_name?: string; + name?: string; + instructions?: string; + }; + [key: string]: unknown; + }>) || [] + ); + + // if we deleted the current agent, switch to another one + if (selectedAgentId === agentId) { + const remainingAgents = agentList.data?.filter( + (a: { agent_id: string }) => a.agent_id !== agentId + ); + if (remainingAgents && remainingAgents.length > 0) { + const newAgent = remainingAgents[0] as { + agent_id: string; + agent_config?: { + agent_name?: string; + name?: string; + instructions?: string; + }; + [key: string]: unknown; + }; + setSelectedAgentId(newAgent.agent_id); + SessionUtils.saveCurrentAgentId(newAgent.agent_id); + await loadAgentConfig(newAgent.agent_id); + await loadAgentSessions(newAgent.agent_id); + } else { + // No agents left + setSelectedAgentId(""); + setCurrentSession(null); + setSelectedAgentConfig(null); + } + } + } catch (error) { + console.error("Error deleting agent:", error); + } + } + }, + [agents.length, client, selectedAgentId, loadAgentConfig, loadAgentSessions] + ); + + const handleModelChange = useCallback((newModel: string) => { + setSelectedModel(newModel); + setCurrentSession(prev => + prev + ? { + ...prev, + selectedModel: newModel, + updatedAt: Date.now(), + } + : prev + ); + }, []); + + useEffect(() => { + if (currentSession) { + console.log( + `💾 Auto-saving session ID for agent ${currentSession.agentId}:`, + currentSession.id + ); + SessionUtils.saveCurrentSessionId( + currentSession.id, + currentSession.agentId + ); + // cache session data + SessionUtils.saveSessionData(currentSession.agentId, currentSession); + // only update selectedModel if the session has a valid model and it's different from current + if ( + currentSession.selectedModel && + currentSession.selectedModel !== selectedModel + ) { + setSelectedModel(currentSession.selectedModel); + } + } + }, [currentSession, selectedModel]); + useEffect(() => { const fetchModels = async () => { try { @@ -38,7 +559,7 @@ export default function ChatPlaygroundPage() { const llmModels = modelList.filter(model => model.model_type === "llm"); setModels(llmModels); if (llmModels.length > 0) { - setSelectedModel(llmModels[0].identifier); + handleModelChange(llmModels[0].identifier); } } catch (err) { console.error("Error fetching models:", err); @@ -49,39 +570,27 @@ export default function ChatPlaygroundPage() { }; fetchModels(); - }, [client]); + }, [client, handleModelChange]); - const extractTextContent = (content: unknown): string => { - if (typeof content === "string") { - return content; - } - if (Array.isArray(content)) { - return content - .filter( - item => - item && - typeof item === "object" && - "type" in item && - item.type === "text" - ) - .map(item => - item && typeof item === "object" && "text" in item - ? String(item.text) - : "" - ) - .join(""); - } + // load agent sessions after both agents and models are ready + useEffect(() => { if ( - content && - typeof content === "object" && - "type" in content && - content.type === "text" && - "text" in content + selectedAgentId && + !agentsLoading && + !modelsLoading && + selectedModel && + !currentSession ) { - return String(content.text) || ""; + loadAgentSessions(selectedAgentId); } - return ""; - }; + }, [ + selectedAgentId, + agentsLoading, + modelsLoading, + selectedModel, + currentSession, + loadAgentSessions, + ]); const handleInputChange = (e: React.ChangeEvent) => { setInput(e.target.value); @@ -91,7 +600,6 @@ export default function ChatPlaygroundPage() { event?.preventDefault?.(); if (!input.trim()) return; - // Add user message to chat const userMessage: Message = { id: Date.now().toString(), role: "user", @@ -99,40 +607,54 @@ export default function ChatPlaygroundPage() { createdAt: new Date(), }; - setMessages(prev => [...prev, userMessage]); + setCurrentSession(prev => { + if (!prev) return prev; + const updatedSession = { + ...prev, + messages: [...prev.messages, userMessage], + updatedAt: Date.now(), + }; + // Update cache with new message + SessionUtils.saveSessionData(prev.agentId, updatedSession); + return updatedSession; + }); setInput(""); - // Use the helper function with the content await handleSubmitWithContent(userMessage.content); }; const handleSubmitWithContent = async (content: string) => { + if (!currentSession || !selectedAgentId) return; + setIsGenerating(true); setError(null); - try { - const messageParams: CompletionCreateParams["messages"] = [ - ...messages.map(msg => { - const msgContent = - typeof msg.content === "string" - ? msg.content - : extractTextContent(msg.content); - if (msg.role === "user") { - return { role: "user" as const, content: msgContent }; - } else if (msg.role === "assistant") { - return { role: "assistant" as const, content: msgContent }; - } else { - return { role: "system" as const, content: msgContent }; - } - }), - { role: "user" as const, content }, - ]; + if (abortControllerRef.current) { + abortControllerRef.current.abort(); + } - const response = await client.chat.completions.create({ - model: selectedModel, - messages: messageParams, + const abortController = new AbortController(); + abortControllerRef.current = abortController; + + try { + const userMessage = { + role: "user" as const, + content, + }; + + const turnParams: TurnCreateParams = { + messages: [userMessage], stream: true, - }); + }; + + const response = await client.agents.turn.create( + selectedAgentId, + currentSession.id, + turnParams, + { + signal: abortController.signal, + } as { signal: AbortSignal } + ); const assistantMessage: Message = { id: (Date.now() + 1).toString(), @@ -141,31 +663,112 @@ export default function ChatPlaygroundPage() { createdAt: new Date(), }; - setMessages(prev => [...prev, assistantMessage]); + const extractDeltaText = (chunk: unknown): string | null => { + // this is an awful way to handle different chunk formats, but i'm not sure if there's much of a better way + if (chunk?.delta?.text && typeof chunk.delta.text === "string") { + return chunk.delta.text; + } + + if ( + chunk?.event?.delta?.text && + typeof chunk.event.delta.text === "string" + ) { + return chunk.event.delta.text; + } + + if ( + chunk?.choices?.[0]?.delta?.content && + typeof chunk.choices[0].delta.content === "string" + ) { + return chunk.choices[0].delta.content; + } + + if (typeof chunk === "string") { + return chunk; + } + + if ( + chunk?.event?.payload?.delta?.text && + typeof chunk.event.payload.delta.text === "string" + ) { + return chunk.event.payload.delta.text; + } + + if (process.env.NODE_ENV !== "production") { + console.debug("Unrecognized chunk format:", chunk); + } + + return null; + }; + setCurrentSession(prev => { + if (!prev) return null; + const updatedSession = { + ...prev, + messages: [...prev.messages, assistantMessage], + updatedAt: Date.now(), + }; + // update cache with assistant message + SessionUtils.saveSessionData(prev.agentId, updatedSession); + return updatedSession; + }); + let fullContent = ""; for await (const chunk of response) { - if (chunk.choices && chunk.choices[0]?.delta?.content) { - const deltaContent = chunk.choices[0].delta.content; - fullContent += deltaContent; + const deltaText = extractDeltaText(chunk); + + if (deltaText) { + fullContent += deltaText; flushSync(() => { - setMessages(prev => { - const newMessages = [...prev]; - const lastMessage = newMessages[newMessages.length - 1]; - if (lastMessage.role === "assistant") { - lastMessage.content = fullContent; + setCurrentSession(prev => { + if (!prev) return null; + const newMessages = [...prev.messages]; + const last = newMessages[newMessages.length - 1]; + if (last.role === "assistant") { + last.content = fullContent; } - return newMessages; + const updatedSession = { + ...prev, + messages: newMessages, + updatedAt: Date.now(), + }; + // update cache with streaming content (throttled) + if (fullContent.length % 100 === 0) { + // Only cache every 100 characters to avoid spam + SessionUtils.saveSessionData(prev.agentId, updatedSession); + } + return updatedSession; }); }); } } } catch (err) { + if (err instanceof Error && err.name === "AbortError") { + console.log("Request aborted"); + return; + } + console.error("Error sending message:", err); setError("Failed to send message. Please try again."); - setMessages(prev => prev.slice(0, -1)); + setCurrentSession(prev => + prev + ? { + ...prev, + messages: prev.messages.slice(0, -1), + updatedAt: Date.now(), + } + : prev + ); } finally { setIsGenerating(false); + abortControllerRef.current = null; + // cache final session state after streaming completes + setCurrentSession(prev => { + if (prev) { + SessionUtils.saveSessionData(prev.agentId, prev); + } + return prev; + }); } }; const suggestions = [ @@ -181,69 +784,457 @@ export default function ChatPlaygroundPage() { content: message.content, createdAt: new Date(), }; - setMessages(prev => [...prev, newMessage]); + setCurrentSession(prev => + prev + ? { + ...prev, + messages: [...prev.messages, newMessage], + updatedAt: Date.now(), + } + : prev + ); handleSubmitWithContent(newMessage.content); }; const clearChat = () => { - setMessages([]); + if (abortControllerRef.current) { + abortControllerRef.current.abort(); + abortControllerRef.current = null; + setIsGenerating(false); + } + + setCurrentSession(prev => + prev ? { ...prev, messages: [], updatedAt: Date.now() } : prev + ); setError(null); }; return ( -
-
-

Chat Playground (Completions)

-
- - +
+ {/* Header */} +
+
+

Agent Session

+
+ {!agentsLoading && agents.length > 0 && ( +
+ + + {selectedAgentId && agents.length > 1 && ( + + )} +
+ )} + + {!agentsLoading && agents.length > 0 && ( + + )} +
+
+
+ {/* Main Two-Column Layout */} +
+ {/* Left Column - Configuration Panel */} +
+

+ Settings +

+ + {/* Model Configuration */} +
+

+ Model Configuration +

+
+
+ + + {modelsError && ( +

{modelsError}

+ )} +
+ +
+ +
+ {(selectedAgentId && + agents.find(a => a.agent_id === selectedAgentId) + ?.agent_config?.instructions) || + "No agent selected"} +
+

+ Instructions are set when creating an agent and cannot be + changed. +

+
+
+
+ + {/* Agent Tools */} +
+

+ Agent Tools +

+
+
+ +
+ {selectedAgentConfig?.toolgroups && + selectedAgentConfig.toolgroups.length > 0 ? ( + selectedAgentConfig.toolgroups.map( + ( + toolgroup: + | string + | { name: string; args: Record }, + index: number + ) => { + const toolName = + typeof toolgroup === "string" + ? toolgroup + : toolgroup.name; + const toolArgs = + typeof toolgroup === "object" ? toolgroup.args : null; + + return ( +
+
+ + {toolName} + + + {toolName.includes("rag") + ? "🔍 RAG" + : toolName.includes("search") + ? "🌐 Search" + : "🔧 Tool"} + +
+ {toolArgs && Object.keys(toolArgs).length > 0 && ( +
+ Args:{" "} + {Object.entries(toolArgs) + .map( + ([key, value]) => + `${key}: ${JSON.stringify(value)}` + ) + .join(", ")} +
+ )} +
+ ); + } + ) + ) : ( +
+

+ No tools configured +

+

+ This agent only has text generation capabilities +

+
+ )} +
+

+ Tools are configured when creating an agent and provide + additional capabilities like web search, math calculations, or + RAG document retrieval. +

+
+
+
+
+ + {/* Right Column - Chat Interface */} +
+ {error && ( +
+

{error}

+
+ )} + + + setCurrentSession(prev => + prev ? { ...prev, messages, updatedAt: Date.now() } : prev + ) + } + />
- {modelsError && ( -
-

{modelsError}

+ {/* Create Agent Modal */} + {showCreateAgent && ( +
+ +

Create New Agent

+ +
+
+ + setNewAgentName(e.target.value)} + placeholder="My Custom Agent" + /> +
+ +
+ + +
+ +
+ +