diff --git a/pyproject.toml b/pyproject.toml index 79e433633..f5c0a39c5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,8 +67,8 @@ dev = [ "pytest-cov", "pytest-html", "pytest-json-report", - "pytest-socket", # For blocking network access in unit tests - "nbval", # For notebook testing + "pytest-socket", # For blocking network access in unit tests + "nbval", # For notebook testing "black", "ruff", "mypy", @@ -78,9 +78,9 @@ dev = [ "pandas-stubs", "types-psutil", "types-tqdm", - "boto3-stubs", + "boto3-stubs[s3]", "pre-commit", - "ruamel.yaml", # needed for openapi generator + "ruamel.yaml", # needed for openapi generator ] # These are the dependencies required for running unit tests. unit = [ diff --git a/src/llama_stack/cli/stack/run.py b/src/llama_stack/cli/stack/run.py index 728d06ca6..2882500ce 100644 --- a/src/llama_stack/cli/stack/run.py +++ b/src/llama_stack/cli/stack/run.py @@ -168,7 +168,7 @@ class StackRun(Subcommand): # Another approach would be to ignore SIGINT entirely - let uvicorn handle it through its own # signal handling but this is quite intrusive and not worth the effort. try: - uvicorn.run("llama_stack.core.server.server:create_app", **uvicorn_config) + uvicorn.run("llama_stack.core.server.server:create_app", **uvicorn_config) # type: ignore[arg-type] except (KeyboardInterrupt, SystemExit): logger.info("Received interrupt signal, shutting down gracefully...") diff --git a/src/llama_stack/providers/remote/files/s3/files.py b/src/llama_stack/providers/remote/files/s3/files.py index c0e9f81d6..76261bdf4 100644 --- a/src/llama_stack/providers/remote/files/s3/files.py +++ b/src/llama_stack/providers/remote/files/s3/files.py @@ -4,14 +4,19 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from __future__ import annotations + import uuid from datetime import UTC, datetime -from typing import Annotated, Any +from typing import TYPE_CHECKING, Annotated, Any, cast import boto3 from botocore.exceptions import BotoCoreError, ClientError, NoCredentialsError from fastapi import Depends, File, Form, Response, UploadFile +if TYPE_CHECKING: + from mypy_boto3_s3.client import S3Client + from llama_stack.apis.common.errors import ResourceNotFoundError from llama_stack.apis.common.responses import Order from llama_stack.apis.files import ( @@ -34,7 +39,7 @@ from .config import S3FilesImplConfig # TODO: provider data for S3 credentials -def _create_s3_client(config: S3FilesImplConfig) -> boto3.client: +def _create_s3_client(config: S3FilesImplConfig) -> S3Client: try: s3_config = { "region_name": config.region, @@ -52,13 +57,16 @@ def _create_s3_client(config: S3FilesImplConfig) -> boto3.client: } ) - return boto3.client("s3", **s3_config) + # Both cast and type:ignore are needed here: + # - cast tells mypy the return type for downstream usage (S3Client vs generic client) + # - type:ignore suppresses the call-overload error from boto3's complex overloaded signatures + return cast("S3Client", boto3.client("s3", **s3_config)) # type: ignore[call-overload] except (BotoCoreError, NoCredentialsError) as e: raise RuntimeError(f"Failed to initialize S3 client: {e}") from e -async def _create_bucket_if_not_exists(client: boto3.client, config: S3FilesImplConfig) -> None: +async def _create_bucket_if_not_exists(client: S3Client, config: S3FilesImplConfig) -> None: try: client.head_bucket(Bucket=config.bucket_name) except ClientError as e: @@ -76,7 +84,7 @@ async def _create_bucket_if_not_exists(client: boto3.client, config: S3FilesImpl else: client.create_bucket( Bucket=config.bucket_name, - CreateBucketConfiguration={"LocationConstraint": config.region}, + CreateBucketConfiguration=cast(Any, {"LocationConstraint": config.region}), ) except ClientError as create_error: raise RuntimeError( @@ -128,7 +136,7 @@ class S3FilesImpl(Files): def __init__(self, config: S3FilesImplConfig, policy: list[AccessRule]) -> None: self._config = config self.policy = policy - self._client: boto3.client | None = None + self._client: S3Client | None = None self._sql_store: AuthorizedSqlStore | None = None def _now(self) -> int: @@ -184,7 +192,7 @@ class S3FilesImpl(Files): pass @property - def client(self) -> boto3.client: + def client(self) -> S3Client: assert self._client is not None, "Provider not initialized" return self._client diff --git a/src/llama_stack/providers/remote/inference/gemini/gemini.py b/src/llama_stack/providers/remote/inference/gemini/gemini.py index 27fea8b32..ee960d13b 100644 --- a/src/llama_stack/providers/remote/inference/gemini/gemini.py +++ b/src/llama_stack/providers/remote/inference/gemini/gemini.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from openai import NOT_GIVEN +from typing import Any from llama_stack.apis.inference import ( OpenAIEmbeddingData, @@ -37,21 +37,20 @@ class GeminiInferenceAdapter(OpenAIMixin): Override embeddings method to handle Gemini's missing usage statistics. Gemini's embedding API doesn't return usage information, so we provide default values. """ - # Prepare request parameters - request_params = { + # Build request params conditionally to avoid NotGiven/Omit type mismatch + request_params: dict[str, Any] = { "model": await self._get_provider_model_id(params.model), "input": params.input, - "encoding_format": params.encoding_format if params.encoding_format is not None else NOT_GIVEN, - "dimensions": params.dimensions if params.dimensions is not None else NOT_GIVEN, - "user": params.user if params.user is not None else NOT_GIVEN, } + if params.encoding_format is not None: + request_params["encoding_format"] = params.encoding_format + if params.dimensions is not None: + request_params["dimensions"] = params.dimensions + if params.user is not None: + request_params["user"] = params.user + if params.model_extra: + request_params["extra_body"] = params.model_extra - # Add extra_body if present - extra_body = params.model_extra - if extra_body: - request_params["extra_body"] = extra_body - - # Call OpenAI embeddings API with properly typed parameters response = await self.client.embeddings.create(**request_params) data = [] diff --git a/src/llama_stack/providers/remote/inference/vllm/vllm.py b/src/llama_stack/providers/remote/inference/vllm/vllm.py index 74a18f3de..fa350ec48 100644 --- a/src/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/src/llama_stack/providers/remote/inference/vllm/vllm.py @@ -7,13 +7,11 @@ from collections.abc import AsyncIterator from urllib.parse import urljoin import httpx -from openai.types.chat.chat_completion_chunk import ( - ChatCompletionChunk as OpenAIChatCompletionChunk, -) from pydantic import ConfigDict from llama_stack.apis.inference import ( OpenAIChatCompletion, + OpenAIChatCompletionChunk, OpenAIChatCompletionRequestWithExtraBody, ToolChoice, ) diff --git a/src/llama_stack/providers/utils/inference/openai_mixin.py b/src/llama_stack/providers/utils/inference/openai_mixin.py index bbd3d2e10..558ce2bfe 100644 --- a/src/llama_stack/providers/utils/inference/openai_mixin.py +++ b/src/llama_stack/providers/utils/inference/openai_mixin.py @@ -10,7 +10,7 @@ from abc import ABC, abstractmethod from collections.abc import AsyncIterator, Iterable from typing import Any -from openai import NOT_GIVEN, AsyncOpenAI +from openai import AsyncOpenAI from pydantic import BaseModel, ConfigDict from llama_stack.apis.inference import ( @@ -351,21 +351,21 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel): """ Direct OpenAI embeddings API call. """ - # Prepare request parameters - request_params = { + # Build request params conditionally to avoid NotGiven/Omit type mismatch + # The OpenAI SDK uses Omit in signatures but NOT_GIVEN has type NotGiven + request_params: dict[str, Any] = { "model": await self._get_provider_model_id(params.model), "input": params.input, - "encoding_format": params.encoding_format if params.encoding_format is not None else NOT_GIVEN, - "dimensions": params.dimensions if params.dimensions is not None else NOT_GIVEN, - "user": params.user if params.user is not None else NOT_GIVEN, } + if params.encoding_format is not None: + request_params["encoding_format"] = params.encoding_format + if params.dimensions is not None: + request_params["dimensions"] = params.dimensions + if params.user is not None: + request_params["user"] = params.user + if params.model_extra: + request_params["extra_body"] = params.model_extra - # Add extra_body if present - extra_body = params.model_extra - if extra_body: - request_params["extra_body"] = extra_body - - # Call OpenAI embeddings API with properly typed parameters response = await self.client.embeddings.create(**request_params) data = [] diff --git a/uv.lock b/uv.lock index 7d162e3e6..a38a712ae 100644 --- a/uv.lock +++ b/uv.lock @@ -410,6 +410,11 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/83/8a/d14e63701c4e869f1d37ba5657f9821961616b98a30074f20b559c071fb6/boto3_stubs-1.40.60-py3-none-any.whl", hash = "sha256:1ea7f9dbabc7f9ac8477646c12cc51ef49af6b24d53cc2ae8cf6fa6bed6a995a", size = 69746, upload-time = "2025-10-27T19:49:05.619Z" }, ] +[package.optional-dependencies] +s3 = [ + { name = "mypy-boto3-s3" }, +] + [[package]] name = "botocore" version = "1.40.12" @@ -1871,7 +1876,7 @@ codegen = [ ] dev = [ { name = "black" }, - { name = "boto3-stubs" }, + { name = "boto3-stubs", extra = ["s3"] }, { name = "mypy" }, { name = "nbval" }, { name = "pandas-stubs" }, @@ -1995,7 +2000,7 @@ codegen = [ ] dev = [ { name = "black" }, - { name = "boto3-stubs" }, + { name = "boto3-stubs", extras = ["s3"] }, { name = "mypy" }, { name = "nbval" }, { name = "pandas-stubs" }, @@ -2568,6 +2573,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/87/e3/be76d87158ebafa0309946c4a73831974d4d6ab4f4ef40c3b53a385a66fd/mypy-1.18.2-py3-none-any.whl", hash = "sha256:22a1748707dd62b58d2ae53562ffc4d7f8bcc727e8ac7cbc69c053ddc874d47e", size = 2352367, upload-time = "2025-09-19T00:10:15.489Z" }, ] +[[package]] +name = "mypy-boto3-s3" +version = "1.40.26" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/00/b8/55d21ed9ca479df66d9892212ba7d7977850ef17aa80a83e3f11f31190fd/mypy_boto3_s3-1.40.26.tar.gz", hash = "sha256:8d2bfd1052894d0e84c9fb9358d838ba0eed0265076c7dd7f45622c770275c99", size = 75948, upload-time = "2025-09-08T20:12:21.405Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/85/a5/dba3384423834009bdd41c7021de5c663468a0e7bc4071cb301721e52a99/mypy_boto3_s3-1.40.26-py3-none-any.whl", hash = "sha256:6d055d16ef89a0133ade92f6b4f09603e4acc31a0f5e8f846edf4eb48f17b5a7", size = 82762, upload-time = "2025-09-08T20:12:19.338Z" }, +] + [[package]] name = "mypy-extensions" version = "1.1.0"