chore: indicate to mypy that InferenceProvider.batch_completion/batch_chat_completion is concrete (#3239)

# What does this PR do?

closes https://github.com/llamastack/llama-stack/issues/3236

mypy considered our default implementations (raise NotImplementedError)
to be trivial. the result was we implemented the same stubs in
providers.

this change puts enough into the default impls so mypy considers them
non-trivial. this allows us to remove the duplicate implementations.
This commit is contained in:
Matthew Farrellee 2025-08-22 16:17:30 -05:00 committed by GitHub
parent 2ee898cc4c
commit 3d119a86d4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 2 additions and 89 deletions

View file

@ -9,7 +9,6 @@ from collections.abc import AsyncGenerator
from llama_stack.apis.inference import (
CompletionResponse,
InferenceProvider,
InterleavedContent,
LogProbConfig,
Message,
ResponseFormat,
@ -100,25 +99,3 @@ class SentenceTransformersInferenceImpl(
tool_config: ToolConfig | None = None,
) -> AsyncGenerator:
raise ValueError("Sentence transformers don't support chat completion")
async def batch_completion(
self,
model_id: str,
content_batch: list[InterleavedContent],
sampling_params: SamplingParams | None = None,
response_format: ResponseFormat | None = None,
logprobs: LogProbConfig | None = None,
):
raise NotImplementedError("Batch completion is not supported for Sentence Transformers")
async def batch_chat_completion(
self,
model_id: str,
messages_batch: list[list[Message]],
sampling_params: SamplingParams | None = None,
tools: list[ToolDefinition] | None = None,
tool_config: ToolConfig | None = None,
response_format: ResponseFormat | None = None,
logprobs: LogProbConfig | None = None,
):
raise NotImplementedError("Batch chat completion is not supported for Sentence Transformers")