mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
fix: annotations list and web_search_preview in Responses (#2520)
# What does this PR do? These are a couple of fixes to get an example LangChain app working with our OpenAI Responses API implementation. The Responses API spec requires an annotations array in `output[*].content[*].annotations` and we were not providing one. So, this adds that as an empty list, even though we don't do anything to populate it yet. This prevents an error from client libraries like Langchain that expect this field to always exist, even if an empty list. The other fix is `web_search_preview` is a valid name for the web search tool in the Responses API, but we only responded to `web_search` or `web_search_preview_2025_03_11`. ## Test Plan The existing Responses unit tests were expanded to test these cases, via: ``` pytest -sv tests/unit/providers/agents/meta_reference/test_openai_responses.py ``` The existing test_openai_responses.py integration tests still pass with this change, tested as below with Fireworks: ``` uv run llama stack run llama_stack/templates/starter/run.yaml LLAMA_STACK_CONFIG=http://localhost:8321 \ uv run pytest -sv tests/integration/agents/test_openai_responses.py \ --text-model accounts/fireworks/models/llama4-scout-instruct-basic ``` Lastly, this example LangChain app now works with Llama stack (tested with Ollama in the starter template in this case). This LangChain code is using the example snippets for using Responses API at https://python.langchain.com/docs/integrations/chat/openai/#responses-api ```python from langchain_openai import ChatOpenAI llm = ChatOpenAI( base_url="http://localhost:8321/v1/openai/v1", api_key="fake", model="ollama/meta-llama/Llama-3.2-3B-Instruct", ) tool = {"type": "web_search_preview"} llm_with_tools = llm.bind_tools([tool]) response = llm_with_tools.invoke("What was a positive news story from today?") print(response.content) ``` Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
parent
1d3f27fe5b
commit
2d9fd041eb
5 changed files with 355 additions and 36 deletions
|
@ -44,10 +44,55 @@ OpenAIResponseInputMessageContent = Annotated[
|
|||
register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseAnnotationFileCitation(BaseModel):
|
||||
type: Literal["file_citation"] = "file_citation"
|
||||
file_id: str
|
||||
filename: str
|
||||
index: int
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseAnnotationCitation(BaseModel):
|
||||
type: Literal["url_citation"] = "url_citation"
|
||||
end_index: int
|
||||
start_index: int
|
||||
title: str
|
||||
url: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseAnnotationContainerFileCitation(BaseModel):
|
||||
type: Literal["container_file_citation"] = "container_file_citation"
|
||||
container_id: str
|
||||
end_index: int
|
||||
file_id: str
|
||||
filename: str
|
||||
start_index: int
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseAnnotationFilePath(BaseModel):
|
||||
type: Literal["file_path"] = "file_path"
|
||||
file_id: str
|
||||
index: int
|
||||
|
||||
|
||||
OpenAIResponseAnnotations = Annotated[
|
||||
OpenAIResponseAnnotationFileCitation
|
||||
| OpenAIResponseAnnotationCitation
|
||||
| OpenAIResponseAnnotationContainerFileCitation
|
||||
| OpenAIResponseAnnotationFilePath,
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
register_schema(OpenAIResponseAnnotations, name="OpenAIResponseAnnotations")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseOutputMessageContentOutputText(BaseModel):
|
||||
text: str
|
||||
type: Literal["output_text"] = "output_text"
|
||||
annotations: list[OpenAIResponseAnnotations] = Field(default_factory=list)
|
||||
|
||||
|
||||
OpenAIResponseOutputMessageContent = Annotated[
|
||||
|
@ -384,9 +429,16 @@ OpenAIResponseInput = Annotated[
|
|||
register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
|
||||
|
||||
|
||||
# Must match type Literals of OpenAIResponseInputToolWebSearch below
|
||||
WebSearchToolTypes = ["web_search", "web_search_preview", "web_search_preview_2025_03_11"]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseInputToolWebSearch(BaseModel):
|
||||
type: Literal["web_search"] | Literal["web_search_preview_2025_03_11"] = "web_search"
|
||||
# Must match values of WebSearchToolTypes above
|
||||
type: Literal["web_search"] | Literal["web_search_preview"] | Literal["web_search_preview_2025_03_11"] = (
|
||||
"web_search"
|
||||
)
|
||||
# TODO: actually use search_context_size somewhere...
|
||||
search_context_size: str | None = Field(default="medium", pattern="^low|medium|high$")
|
||||
# TODO: add user_location
|
||||
|
|
|
@ -42,6 +42,7 @@ from llama_stack.apis.agents.openai_responses import (
|
|||
OpenAIResponseOutputMessageWebSearchToolCall,
|
||||
OpenAIResponseText,
|
||||
OpenAIResponseTextFormat,
|
||||
WebSearchToolTypes,
|
||||
)
|
||||
from llama_stack.apis.common.content_types import TextContentItem
|
||||
from llama_stack.apis.inference.inference import (
|
||||
|
@ -609,7 +610,7 @@ class OpenAIResponsesImpl:
|
|||
# TODO: Handle other tool types
|
||||
if input_tool.type == "function":
|
||||
chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump()))
|
||||
elif input_tool.type == "web_search":
|
||||
elif input_tool.type in WebSearchToolTypes:
|
||||
tool_name = "web_search"
|
||||
tool = await self.tool_groups_api.get_tool(tool_name)
|
||||
if not tool:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue