mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
feat: Add temperature support to responses API (#2065)
# What does this PR do? Add support for the temperature to the responses API ## Test Plan Manually tested simple case unit tests added for simple case and tool calls Signed-off-by: Derek Higgins <derekh@redhat.com>
This commit is contained in:
parent
f36f68c590
commit
64829947d0
6 changed files with 220 additions and 3 deletions
|
@ -628,6 +628,7 @@ class Agents(Protocol):
|
|||
previous_response_id: Optional[str] = None,
|
||||
store: Optional[bool] = True,
|
||||
stream: Optional[bool] = False,
|
||||
temperature: Optional[float] = None,
|
||||
tools: Optional[List[OpenAIResponseInputTool]] = None,
|
||||
) -> Union[OpenAIResponseObject, AsyncIterator[OpenAIResponseObjectStream]]:
|
||||
"""Create a new OpenAI response.
|
||||
|
|
|
@ -270,8 +270,9 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
previous_response_id: Optional[str] = None,
|
||||
store: Optional[bool] = True,
|
||||
stream: Optional[bool] = False,
|
||||
temperature: Optional[float] = None,
|
||||
tools: Optional[List[OpenAIResponseInputTool]] = None,
|
||||
) -> OpenAIResponseObject:
|
||||
return await self.openai_responses_impl.create_openai_response(
|
||||
input, model, previous_response_id, store, stream, tools
|
||||
input, model, previous_response_id, store, stream, temperature, tools
|
||||
)
|
||||
|
|
|
@ -106,6 +106,7 @@ class OpenAIResponsesImpl:
|
|||
previous_response_id: Optional[str] = None,
|
||||
store: Optional[bool] = True,
|
||||
stream: Optional[bool] = False,
|
||||
temperature: Optional[float] = None,
|
||||
tools: Optional[List[OpenAIResponseInputTool]] = None,
|
||||
):
|
||||
stream = False if stream is None else stream
|
||||
|
@ -141,6 +142,7 @@ class OpenAIResponsesImpl:
|
|||
messages=messages,
|
||||
tools=chat_tools,
|
||||
stream=stream,
|
||||
temperature=temperature,
|
||||
)
|
||||
|
||||
if stream:
|
||||
|
@ -180,7 +182,7 @@ class OpenAIResponsesImpl:
|
|||
output_messages: List[OpenAIResponseOutput] = []
|
||||
if chat_response.choices[0].message.tool_calls:
|
||||
output_messages.extend(
|
||||
await self._execute_tool_and_return_final_output(model, stream, chat_response, messages)
|
||||
await self._execute_tool_and_return_final_output(model, stream, chat_response, messages, temperature)
|
||||
)
|
||||
else:
|
||||
output_messages.extend(await _openai_choices_to_output_messages(chat_response.choices))
|
||||
|
@ -241,7 +243,12 @@ class OpenAIResponsesImpl:
|
|||
return chat_tools
|
||||
|
||||
async def _execute_tool_and_return_final_output(
|
||||
self, model_id: str, stream: bool, chat_response: OpenAIChatCompletion, messages: List[OpenAIMessageParam]
|
||||
self,
|
||||
model_id: str,
|
||||
stream: bool,
|
||||
chat_response: OpenAIChatCompletion,
|
||||
messages: List[OpenAIMessageParam],
|
||||
temperature: float,
|
||||
) -> List[OpenAIResponseOutput]:
|
||||
output_messages: List[OpenAIResponseOutput] = []
|
||||
choice = chat_response.choices[0]
|
||||
|
@ -295,6 +302,7 @@ class OpenAIResponsesImpl:
|
|||
model=model_id,
|
||||
messages=messages,
|
||||
stream=stream,
|
||||
temperature=temperature,
|
||||
)
|
||||
# type cast to appease mypy
|
||||
tool_results_chat_response = cast(OpenAIChatCompletion, tool_results_chat_response)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue