feat(api): remove List* response types and nils for get/list

TODO:
- make sure docstrings are refreshed as needed.
- make sure this passes tests.
- address a TODO in code (obsolete comment?)
- make sure client side still works.
- analyze if any providers need adjustments.

Signed-off-by: Ihar Hrachyshka <ihar.hrachyshka@gmail.com>
This commit is contained in:
Ihar Hrachyshka 2025-03-14 10:25:59 -04:00
parent bfc79217a8
commit 90ed785fbd
21 changed files with 222 additions and 935 deletions

View file

@ -6,8 +6,6 @@
from typing import List, Optional, Protocol, runtime_checkable
from pydantic import BaseModel
from llama_stack.apis.inference import (
ChatCompletionResponse,
CompletionResponse,
@ -20,17 +18,7 @@ from llama_stack.apis.inference import (
ToolDefinition,
ToolPromptFormat,
)
from llama_stack.schema_utils import json_schema_type, webmethod
@json_schema_type
class BatchCompletionResponse(BaseModel):
batch: List[CompletionResponse]
@json_schema_type
class BatchChatCompletionResponse(BaseModel):
batch: List[ChatCompletionResponse]
from llama_stack.schema_utils import webmethod
@runtime_checkable
@ -43,7 +31,7 @@ class BatchInference(Protocol):
sampling_params: Optional[SamplingParams] = None,
response_format: Optional[ResponseFormat] = None,
logprobs: Optional[LogProbConfig] = None,
) -> BatchCompletionResponse: ...
) -> list[CompletionResponse]: ...
@webmethod(route="/batch-inference/chat-completion", method="POST")
async def batch_chat_completion(
@ -57,4 +45,4 @@ class BatchInference(Protocol):
tool_prompt_format: Optional[ToolPromptFormat] = None,
response_format: Optional[ResponseFormat] = None,
logprobs: Optional[LogProbConfig] = None,
) -> BatchChatCompletionResponse: ...
) -> list[ChatCompletionResponse]: ...