mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 01:48:05 +00:00
Merge branch 'main' into parallel-tool-calls-impl
This commit is contained in:
commit
0c2b82b30a
15 changed files with 958 additions and 82 deletions
2
.github/CODEOWNERS
vendored
2
.github/CODEOWNERS
vendored
|
|
@ -2,4 +2,4 @@
|
|||
|
||||
# These owners will be the default owners for everything in
|
||||
# the repo. Unless a later match takes precedence,
|
||||
* @ashwinb @yanxi0830 @hardikjshah @raghotham @ehhuang @leseb @bbrowning @reluctantfuturist @mattf @slekkala1 @franciscojavierarceo
|
||||
* @ashwinb @raghotham @ehhuang @leseb @bbrowning @mattf @franciscojavierarceo
|
||||
|
|
|
|||
88
.github/workflows/stainless-builds.yml
vendored
88
.github/workflows/stainless-builds.yml
vendored
|
|
@ -43,7 +43,41 @@ env:
|
|||
# Stainless organization dashboard
|
||||
|
||||
jobs:
|
||||
compute-branch:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
preview_branch: ${{ steps.compute.outputs.preview_branch }}
|
||||
base_branch: ${{ steps.compute.outputs.base_branch }}
|
||||
merge_branch: ${{ steps.compute.outputs.merge_branch }}
|
||||
steps:
|
||||
- name: Compute branch names
|
||||
id: compute
|
||||
run: |
|
||||
HEAD_REPO="${{ github.event.pull_request.head.repo.full_name }}"
|
||||
BASE_REPO="${{ github.repository }}"
|
||||
BRANCH_NAME="${{ github.event.pull_request.head.ref }}"
|
||||
FORK_OWNER="${{ github.event.pull_request.head.repo.owner.login }}"
|
||||
|
||||
if [ "$HEAD_REPO" != "$BASE_REPO" ]; then
|
||||
# Fork PR: prefix with fork owner for isolation
|
||||
if [ -z "$FORK_OWNER" ]; then
|
||||
echo "Error: Fork PR detected but fork owner is empty" >&2
|
||||
exit 1
|
||||
fi
|
||||
PREVIEW_BRANCH="preview/${FORK_OWNER}/${BRANCH_NAME}"
|
||||
BASE_BRANCH="preview/base/${FORK_OWNER}/${BRANCH_NAME}"
|
||||
else
|
||||
# Same-repo PR
|
||||
PREVIEW_BRANCH="preview/${BRANCH_NAME}"
|
||||
BASE_BRANCH="preview/base/${BRANCH_NAME}"
|
||||
fi
|
||||
|
||||
echo "preview_branch=${PREVIEW_BRANCH}" >> $GITHUB_OUTPUT
|
||||
echo "base_branch=${BASE_BRANCH}" >> $GITHUB_OUTPUT
|
||||
echo "merge_branch=${PREVIEW_BRANCH}" >> $GITHUB_OUTPUT
|
||||
|
||||
preview:
|
||||
needs: compute-branch
|
||||
if: github.event.action != 'closed'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
|
|
@ -59,32 +93,6 @@ jobs:
|
|||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
fetch-depth: 2
|
||||
|
||||
# Compute the Stainless branch name, prefixing with fork owner if PR is from a fork.
|
||||
# For fork PRs like "contributor:fix/issue-123", this creates "preview/contributor/fix/issue-123"
|
||||
# For same-repo PRs, this creates "preview/fix/issue-123"
|
||||
- name: Compute branch names
|
||||
id: branch-names
|
||||
run: |
|
||||
HEAD_REPO="${{ github.event.pull_request.head.repo.full_name }}"
|
||||
BASE_REPO="${{ github.repository }}"
|
||||
BRANCH_NAME="${{ github.event.pull_request.head.ref }}"
|
||||
|
||||
if [ "$HEAD_REPO" != "$BASE_REPO" ]; then
|
||||
# Fork PR: prefix with fork owner for isolation
|
||||
FORK_OWNER="${{ github.event.pull_request.head.repo.owner.login }}"
|
||||
PREVIEW_BRANCH="preview/${FORK_OWNER}/${BRANCH_NAME}"
|
||||
BASE_BRANCH="preview/base/${FORK_OWNER}/${BRANCH_NAME}"
|
||||
else
|
||||
# Same-repo PR
|
||||
PREVIEW_BRANCH="preview/${BRANCH_NAME}"
|
||||
BASE_BRANCH="preview/base/${BRANCH_NAME}"
|
||||
fi
|
||||
|
||||
echo "preview_branch=${PREVIEW_BRANCH}" >> $GITHUB_OUTPUT
|
||||
echo "base_branch=${BASE_BRANCH}" >> $GITHUB_OUTPUT
|
||||
|
||||
# This action builds preview SDKs from the OpenAPI spec changes and
|
||||
# posts/updates a comment on the PR with build results and links to the preview.
|
||||
- name: Run preview builds
|
||||
uses: stainless-api/upload-openapi-spec-action/preview@32823b096b4319c53ee948d702d9052873af485f # 1.6.0
|
||||
with:
|
||||
|
|
@ -97,10 +105,11 @@ jobs:
|
|||
base_sha: ${{ github.event.pull_request.base.sha }}
|
||||
base_ref: ${{ github.event.pull_request.base.ref }}
|
||||
head_sha: ${{ github.event.pull_request.head.sha }}
|
||||
branch: ${{ steps.branch-names.outputs.preview_branch }}
|
||||
base_branch: ${{ steps.branch-names.outputs.base_branch }}
|
||||
branch: ${{ needs.compute-branch.outputs.preview_branch }}
|
||||
base_branch: ${{ needs.compute-branch.outputs.base_branch }}
|
||||
|
||||
merge:
|
||||
needs: compute-branch
|
||||
if: github.event.action == 'closed' && github.event.pull_request.merged == true
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
|
|
@ -116,27 +125,6 @@ jobs:
|
|||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
fetch-depth: 2
|
||||
|
||||
# Compute the Stainless branch name, prefixing with fork owner if PR is from a fork.
|
||||
# For fork PRs like "contributor:fix/issue-123", this creates "preview/contributor/fix/issue-123"
|
||||
# For same-repo PRs, this creates "preview/fix/issue-123"
|
||||
- name: Compute branch names
|
||||
id: branch-names
|
||||
run: |
|
||||
HEAD_REPO="${{ github.event.pull_request.head.repo.full_name }}"
|
||||
BASE_REPO="${{ github.repository }}"
|
||||
BRANCH_NAME="${{ github.event.pull_request.head.ref }}"
|
||||
|
||||
if [ "$HEAD_REPO" != "$BASE_REPO" ]; then
|
||||
# Fork PR: prefix with fork owner for isolation
|
||||
FORK_OWNER="${{ github.event.pull_request.head.repo.owner.login }}"
|
||||
MERGE_BRANCH="preview/${FORK_OWNER}/${BRANCH_NAME}"
|
||||
else
|
||||
# Same-repo PR
|
||||
MERGE_BRANCH="preview/${BRANCH_NAME}"
|
||||
fi
|
||||
|
||||
echo "merge_branch=${MERGE_BRANCH}" >> $GITHUB_OUTPUT
|
||||
|
||||
# Note that this only merges in changes that happened on the last build on
|
||||
# the computed preview branch. It's possible that there are OAS/config
|
||||
# changes that haven't been built, if the preview job didn't finish
|
||||
|
|
@ -155,4 +143,4 @@ jobs:
|
|||
base_sha: ${{ github.event.pull_request.base.sha }}
|
||||
base_ref: ${{ github.event.pull_request.base.ref }}
|
||||
head_sha: ${{ github.event.pull_request.head.sha }}
|
||||
merge_branch: ${{ steps.branch-names.outputs.merge_branch }}
|
||||
merge_branch: ${{ needs.compute-branch.outputs.merge_branch }}
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ TEST_PATTERN=""
|
|||
INFERENCE_MODE="replay"
|
||||
EXTRA_PARAMS=""
|
||||
COLLECT_ONLY=false
|
||||
TYPESCRIPT_ONLY=false
|
||||
|
||||
# Function to display usage
|
||||
usage() {
|
||||
|
|
@ -34,6 +35,7 @@ Options:
|
|||
--subdirs STRING Comma-separated list of test subdirectories to run (overrides suite)
|
||||
--pattern STRING Regex pattern to pass to pytest -k
|
||||
--collect-only Collect tests only without running them (skips server startup)
|
||||
--typescript-only Skip Python tests and run only TypeScript client tests
|
||||
--help Show this help message
|
||||
|
||||
Suites are defined in tests/integration/suites.py and define which tests to run.
|
||||
|
|
@ -90,6 +92,10 @@ while [[ $# -gt 0 ]]; do
|
|||
COLLECT_ONLY=true
|
||||
shift
|
||||
;;
|
||||
--typescript-only)
|
||||
TYPESCRIPT_ONLY=true
|
||||
shift
|
||||
;;
|
||||
--help)
|
||||
usage
|
||||
exit 0
|
||||
|
|
@ -544,6 +550,8 @@ if [[ -n "$STACK_CONFIG" ]]; then
|
|||
STACK_CONFIG_ARG="--stack-config=$STACK_CONFIG"
|
||||
fi
|
||||
|
||||
# Run Python tests unless typescript-only mode
|
||||
if [[ "$TYPESCRIPT_ONLY" == "false" ]]; then
|
||||
pytest -s -v $PYTEST_TARGET \
|
||||
$STACK_CONFIG_ARG \
|
||||
--inference-mode="$INFERENCE_MODE" \
|
||||
|
|
@ -554,6 +562,11 @@ pytest -s -v $PYTEST_TARGET \
|
|||
--color=yes $EXTRA_PARAMS \
|
||||
--capture=tee-sys
|
||||
exit_code=$?
|
||||
else
|
||||
echo "Skipping Python tests (--typescript-only mode)"
|
||||
exit_code=0
|
||||
fi
|
||||
|
||||
set +x
|
||||
set -e
|
||||
|
||||
|
|
|
|||
|
|
@ -27,8 +27,10 @@ async def get_provider_impl(
|
|||
deps[Api.tool_runtime],
|
||||
deps[Api.tool_groups],
|
||||
deps[Api.conversations],
|
||||
policy,
|
||||
deps[Api.prompts],
|
||||
deps[Api.files],
|
||||
telemetry_enabled,
|
||||
policy,
|
||||
)
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ from llama_stack.providers.utils.responses.responses_store import ResponsesStore
|
|||
from llama_stack_api import (
|
||||
Agents,
|
||||
Conversations,
|
||||
Files,
|
||||
Inference,
|
||||
ListOpenAIResponseInputItem,
|
||||
ListOpenAIResponseObject,
|
||||
|
|
@ -22,6 +23,7 @@ from llama_stack_api import (
|
|||
OpenAIResponsePrompt,
|
||||
OpenAIResponseText,
|
||||
Order,
|
||||
Prompts,
|
||||
ResponseGuardrail,
|
||||
Safety,
|
||||
ToolGroups,
|
||||
|
|
@ -45,6 +47,8 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
tool_runtime_api: ToolRuntime,
|
||||
tool_groups_api: ToolGroups,
|
||||
conversations_api: Conversations,
|
||||
prompts_api: Prompts,
|
||||
files_api: Files,
|
||||
policy: list[AccessRule],
|
||||
telemetry_enabled: bool = False,
|
||||
):
|
||||
|
|
@ -56,7 +60,8 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
self.tool_groups_api = tool_groups_api
|
||||
self.conversations_api = conversations_api
|
||||
self.telemetry_enabled = telemetry_enabled
|
||||
|
||||
self.prompts_api = prompts_api
|
||||
self.files_api = files_api
|
||||
self.in_memory_store = InmemoryKVStoreImpl()
|
||||
self.openai_responses_impl: OpenAIResponsesImpl | None = None
|
||||
self.policy = policy
|
||||
|
|
@ -73,6 +78,8 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
vector_io_api=self.vector_io_api,
|
||||
safety_api=self.safety_api,
|
||||
conversations_api=self.conversations_api,
|
||||
prompts_api=self.prompts_api,
|
||||
files_api=self.files_api,
|
||||
)
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import re
|
||||
import time
|
||||
import uuid
|
||||
from collections.abc import AsyncIterator
|
||||
|
|
@ -18,13 +19,17 @@ from llama_stack.providers.utils.responses.responses_store import (
|
|||
from llama_stack_api import (
|
||||
ConversationItem,
|
||||
Conversations,
|
||||
Files,
|
||||
Inference,
|
||||
InvalidConversationIdError,
|
||||
ListOpenAIResponseInputItem,
|
||||
ListOpenAIResponseObject,
|
||||
OpenAIChatCompletionContentPartParam,
|
||||
OpenAIDeleteResponseObject,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseInput,
|
||||
OpenAIResponseInputMessageContentFile,
|
||||
OpenAIResponseInputMessageContentImage,
|
||||
OpenAIResponseInputMessageContentText,
|
||||
OpenAIResponseInputTool,
|
||||
OpenAIResponseMessage,
|
||||
|
|
@ -34,7 +39,9 @@ from llama_stack_api import (
|
|||
OpenAIResponseText,
|
||||
OpenAIResponseTextFormat,
|
||||
OpenAISystemMessageParam,
|
||||
OpenAIUserMessageParam,
|
||||
Order,
|
||||
Prompts,
|
||||
ResponseGuardrailSpec,
|
||||
Safety,
|
||||
ToolGroups,
|
||||
|
|
@ -46,6 +53,7 @@ from .streaming import StreamingResponseOrchestrator
|
|||
from .tool_executor import ToolExecutor
|
||||
from .types import ChatCompletionContext, ToolContext
|
||||
from .utils import (
|
||||
convert_response_content_to_chat_content,
|
||||
convert_response_input_to_chat_messages,
|
||||
convert_response_text_to_chat_response_format,
|
||||
extract_guardrail_ids,
|
||||
|
|
@ -69,6 +77,8 @@ class OpenAIResponsesImpl:
|
|||
vector_io_api: VectorIO, # VectorIO
|
||||
safety_api: Safety | None,
|
||||
conversations_api: Conversations,
|
||||
prompts_api: Prompts,
|
||||
files_api: Files,
|
||||
):
|
||||
self.inference_api = inference_api
|
||||
self.tool_groups_api = tool_groups_api
|
||||
|
|
@ -82,6 +92,8 @@ class OpenAIResponsesImpl:
|
|||
tool_runtime_api=tool_runtime_api,
|
||||
vector_io_api=vector_io_api,
|
||||
)
|
||||
self.prompts_api = prompts_api
|
||||
self.files_api = files_api
|
||||
|
||||
async def _prepend_previous_response(
|
||||
self,
|
||||
|
|
@ -122,11 +134,13 @@ class OpenAIResponsesImpl:
|
|||
# Use stored messages directly and convert only new input
|
||||
message_adapter = TypeAdapter(list[OpenAIMessageParam])
|
||||
messages = message_adapter.validate_python(previous_response.messages)
|
||||
new_messages = await convert_response_input_to_chat_messages(input, previous_messages=messages)
|
||||
new_messages = await convert_response_input_to_chat_messages(
|
||||
input, previous_messages=messages, files_api=self.files_api
|
||||
)
|
||||
messages.extend(new_messages)
|
||||
else:
|
||||
# Backward compatibility: reconstruct from inputs
|
||||
messages = await convert_response_input_to_chat_messages(all_input)
|
||||
messages = await convert_response_input_to_chat_messages(all_input, files_api=self.files_api)
|
||||
|
||||
tool_context.recover_tools_from_previous_response(previous_response)
|
||||
elif conversation is not None:
|
||||
|
|
@ -138,7 +152,7 @@ class OpenAIResponsesImpl:
|
|||
all_input = input
|
||||
if not conversation_items.data:
|
||||
# First turn - just convert the new input
|
||||
messages = await convert_response_input_to_chat_messages(input)
|
||||
messages = await convert_response_input_to_chat_messages(input, files_api=self.files_api)
|
||||
else:
|
||||
if not stored_messages:
|
||||
all_input = conversation_items.data
|
||||
|
|
@ -154,14 +168,82 @@ class OpenAIResponsesImpl:
|
|||
all_input = input
|
||||
|
||||
messages = stored_messages or []
|
||||
new_messages = await convert_response_input_to_chat_messages(all_input, previous_messages=messages)
|
||||
new_messages = await convert_response_input_to_chat_messages(
|
||||
all_input, previous_messages=messages, files_api=self.files_api
|
||||
)
|
||||
messages.extend(new_messages)
|
||||
else:
|
||||
all_input = input
|
||||
messages = await convert_response_input_to_chat_messages(all_input)
|
||||
messages = await convert_response_input_to_chat_messages(all_input, files_api=self.files_api)
|
||||
|
||||
return all_input, messages, tool_context
|
||||
|
||||
async def _prepend_prompt(
|
||||
self,
|
||||
messages: list[OpenAIMessageParam],
|
||||
openai_response_prompt: OpenAIResponsePrompt | None,
|
||||
) -> None:
|
||||
"""Prepend prompt template to messages, resolving text/image/file variables.
|
||||
|
||||
:param messages: List of OpenAIMessageParam objects
|
||||
:param openai_response_prompt: (Optional) OpenAIResponsePrompt object with variables
|
||||
:returns: string of utf-8 characters
|
||||
"""
|
||||
if not openai_response_prompt or not openai_response_prompt.id:
|
||||
return
|
||||
|
||||
prompt_version = int(openai_response_prompt.version) if openai_response_prompt.version else None
|
||||
cur_prompt = await self.prompts_api.get_prompt(openai_response_prompt.id, prompt_version)
|
||||
|
||||
if not cur_prompt or not cur_prompt.prompt:
|
||||
return
|
||||
|
||||
cur_prompt_text = cur_prompt.prompt
|
||||
cur_prompt_variables = cur_prompt.variables
|
||||
|
||||
if not openai_response_prompt.variables:
|
||||
messages.insert(0, OpenAISystemMessageParam(content=cur_prompt_text))
|
||||
return
|
||||
|
||||
# Validate that all provided variables exist in the prompt
|
||||
for name in openai_response_prompt.variables.keys():
|
||||
if name not in cur_prompt_variables:
|
||||
raise ValueError(f"Variable {name} not found in prompt {openai_response_prompt.id}")
|
||||
|
||||
# Separate text and media variables
|
||||
text_substitutions = {}
|
||||
media_content_parts: list[OpenAIChatCompletionContentPartParam] = []
|
||||
|
||||
for name, value in openai_response_prompt.variables.items():
|
||||
# Text variable found
|
||||
if isinstance(value, OpenAIResponseInputMessageContentText):
|
||||
text_substitutions[name] = value.text
|
||||
|
||||
# Media variable found
|
||||
elif isinstance(value, OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile):
|
||||
converted_parts = await convert_response_content_to_chat_content([value], files_api=self.files_api)
|
||||
if isinstance(converted_parts, list):
|
||||
media_content_parts.extend(converted_parts)
|
||||
|
||||
# Eg: {{product_photo}} becomes "[Image: product_photo]"
|
||||
# This gives the model textual context about what media exists in the prompt
|
||||
var_type = value.type.replace("input_", "").replace("_", " ").title()
|
||||
text_substitutions[name] = f"[{var_type}: {name}]"
|
||||
|
||||
def replace_variable(match: re.Match[str]) -> str:
|
||||
var_name = match.group(1).strip()
|
||||
return str(text_substitutions.get(var_name, match.group(0)))
|
||||
|
||||
pattern = r"\{\{\s*(\w+)\s*\}\}"
|
||||
processed_prompt_text = re.sub(pattern, replace_variable, cur_prompt_text)
|
||||
|
||||
# Insert system message with resolved text
|
||||
messages.insert(0, OpenAISystemMessageParam(content=processed_prompt_text))
|
||||
|
||||
# If we have media, create a new user message because allows to ingest images and files
|
||||
if media_content_parts:
|
||||
messages.append(OpenAIUserMessageParam(content=media_content_parts))
|
||||
|
||||
async def get_openai_response(
|
||||
self,
|
||||
response_id: str,
|
||||
|
|
@ -297,6 +379,7 @@ class OpenAIResponsesImpl:
|
|||
input=input,
|
||||
conversation=conversation,
|
||||
model=model,
|
||||
prompt=prompt,
|
||||
instructions=instructions,
|
||||
previous_response_id=previous_response_id,
|
||||
store=store,
|
||||
|
|
@ -350,6 +433,7 @@ class OpenAIResponsesImpl:
|
|||
instructions: str | None = None,
|
||||
previous_response_id: str | None = None,
|
||||
conversation: str | None = None,
|
||||
prompt: OpenAIResponsePrompt | None = None,
|
||||
store: bool | None = True,
|
||||
temperature: float | None = None,
|
||||
text: OpenAIResponseText | None = None,
|
||||
|
|
@ -372,6 +456,9 @@ class OpenAIResponsesImpl:
|
|||
if instructions:
|
||||
messages.insert(0, OpenAISystemMessageParam(content=instructions))
|
||||
|
||||
# Prepend reusable prompt (if provided)
|
||||
await self._prepend_prompt(messages, prompt)
|
||||
|
||||
# Structured outputs
|
||||
response_format = await convert_response_text_to_chat_response_format(text)
|
||||
|
||||
|
|
@ -394,6 +481,7 @@ class OpenAIResponsesImpl:
|
|||
ctx=ctx,
|
||||
response_id=response_id,
|
||||
created_at=created_at,
|
||||
prompt=prompt,
|
||||
text=text,
|
||||
max_infer_iters=max_infer_iters,
|
||||
parallel_tool_calls=parallel_tool_calls,
|
||||
|
|
|
|||
|
|
@ -5,11 +5,14 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import mimetypes
|
||||
import re
|
||||
import uuid
|
||||
from collections.abc import Sequence
|
||||
|
||||
from llama_stack_api import (
|
||||
Files,
|
||||
OpenAIAssistantMessageParam,
|
||||
OpenAIChatCompletionContentPartImageParam,
|
||||
OpenAIChatCompletionContentPartParam,
|
||||
|
|
@ -18,6 +21,8 @@ from llama_stack_api import (
|
|||
OpenAIChatCompletionToolCallFunction,
|
||||
OpenAIChoice,
|
||||
OpenAIDeveloperMessageParam,
|
||||
OpenAIFile,
|
||||
OpenAIFileFile,
|
||||
OpenAIImageURL,
|
||||
OpenAIJSONSchema,
|
||||
OpenAIMessageParam,
|
||||
|
|
@ -29,6 +34,7 @@ from llama_stack_api import (
|
|||
OpenAIResponseInput,
|
||||
OpenAIResponseInputFunctionToolCallOutput,
|
||||
OpenAIResponseInputMessageContent,
|
||||
OpenAIResponseInputMessageContentFile,
|
||||
OpenAIResponseInputMessageContentImage,
|
||||
OpenAIResponseInputMessageContentText,
|
||||
OpenAIResponseInputTool,
|
||||
|
|
@ -37,9 +43,11 @@ from llama_stack_api import (
|
|||
OpenAIResponseMessage,
|
||||
OpenAIResponseOutputMessageContent,
|
||||
OpenAIResponseOutputMessageContentOutputText,
|
||||
OpenAIResponseOutputMessageFileSearchToolCall,
|
||||
OpenAIResponseOutputMessageFunctionToolCall,
|
||||
OpenAIResponseOutputMessageMCPCall,
|
||||
OpenAIResponseOutputMessageMCPListTools,
|
||||
OpenAIResponseOutputMessageWebSearchToolCall,
|
||||
OpenAIResponseText,
|
||||
OpenAISystemMessageParam,
|
||||
OpenAIToolMessageParam,
|
||||
|
|
@ -49,6 +57,46 @@ from llama_stack_api import (
|
|||
)
|
||||
|
||||
|
||||
async def extract_bytes_from_file(file_id: str, files_api: Files) -> bytes:
|
||||
"""
|
||||
Extract raw bytes from file using the Files API.
|
||||
|
||||
:param file_id: The file identifier (e.g., "file-abc123")
|
||||
:param files_api: Files API instance
|
||||
:returns: Raw file content as bytes
|
||||
:raises: ValueError if file cannot be retrieved
|
||||
"""
|
||||
try:
|
||||
response = await files_api.openai_retrieve_file_content(file_id)
|
||||
return bytes(response.body)
|
||||
except Exception as e:
|
||||
raise ValueError(f"Failed to retrieve file content for file_id '{file_id}': {str(e)}") from e
|
||||
|
||||
|
||||
def generate_base64_ascii_text_from_bytes(raw_bytes: bytes) -> str:
|
||||
"""
|
||||
Converts raw binary bytes into a safe ASCII text representation for URLs
|
||||
|
||||
:param raw_bytes: the actual bytes that represents file content
|
||||
:returns: string of utf-8 characters
|
||||
"""
|
||||
return base64.b64encode(raw_bytes).decode("utf-8")
|
||||
|
||||
|
||||
def construct_data_url(ascii_text: str, mime_type: str | None) -> str:
|
||||
"""
|
||||
Construct data url with decoded data inside
|
||||
|
||||
:param ascii_text: ASCII content
|
||||
:param mime_type: MIME type of file
|
||||
:returns: data url string (eg. data:image/png,base64,%3Ch1%3EHello%2C%20World%21%3C%2Fh1%3E)
|
||||
"""
|
||||
if not mime_type:
|
||||
mime_type = "application/octet-stream"
|
||||
|
||||
return f"data:{mime_type};base64,{ascii_text}"
|
||||
|
||||
|
||||
async def convert_chat_choice_to_response_message(
|
||||
choice: OpenAIChoice,
|
||||
citation_files: dict[str, str] | None = None,
|
||||
|
|
@ -78,11 +126,15 @@ async def convert_chat_choice_to_response_message(
|
|||
|
||||
async def convert_response_content_to_chat_content(
|
||||
content: str | Sequence[OpenAIResponseInputMessageContent | OpenAIResponseOutputMessageContent],
|
||||
files_api: Files | None,
|
||||
) -> str | list[OpenAIChatCompletionContentPartParam]:
|
||||
"""
|
||||
Convert the content parts from an OpenAI Response API request into OpenAI Chat Completion content parts.
|
||||
|
||||
The content schemas of each API look similar, but are not exactly the same.
|
||||
|
||||
:param content: The content to convert
|
||||
:param files_api: Files API for resolving file_id to raw file content (required if content contains files/images)
|
||||
"""
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
|
|
@ -95,9 +147,68 @@ async def convert_response_content_to_chat_content(
|
|||
elif isinstance(content_part, OpenAIResponseOutputMessageContentOutputText):
|
||||
converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text))
|
||||
elif isinstance(content_part, OpenAIResponseInputMessageContentImage):
|
||||
detail = content_part.detail
|
||||
image_mime_type = None
|
||||
if content_part.image_url:
|
||||
image_url = OpenAIImageURL(url=content_part.image_url, detail=content_part.detail)
|
||||
image_url = OpenAIImageURL(url=content_part.image_url, detail=detail)
|
||||
converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
|
||||
elif content_part.file_id:
|
||||
if files_api is None:
|
||||
raise ValueError("file_ids are not supported by this implementation of the Stack")
|
||||
image_file_response = await files_api.openai_retrieve_file(content_part.file_id)
|
||||
if image_file_response.filename:
|
||||
image_mime_type, _ = mimetypes.guess_type(image_file_response.filename)
|
||||
raw_image_bytes = await extract_bytes_from_file(content_part.file_id, files_api)
|
||||
ascii_text = generate_base64_ascii_text_from_bytes(raw_image_bytes)
|
||||
image_data_url = construct_data_url(ascii_text, image_mime_type)
|
||||
image_url = OpenAIImageURL(url=image_data_url, detail=detail)
|
||||
converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Image content must have either 'image_url' or 'file_id'. "
|
||||
f"Got image_url={content_part.image_url}, file_id={content_part.file_id}"
|
||||
)
|
||||
elif isinstance(content_part, OpenAIResponseInputMessageContentFile):
|
||||
resolved_file_data = None
|
||||
file_data = content_part.file_data
|
||||
file_id = content_part.file_id
|
||||
file_url = content_part.file_url
|
||||
filename = content_part.filename
|
||||
file_mime_type = None
|
||||
if not any([file_data, file_id, file_url]):
|
||||
raise ValueError(
|
||||
f"File content must have at least one of 'file_data', 'file_id', or 'file_url'. "
|
||||
f"Got file_data={file_data}, file_id={file_id}, file_url={file_url}"
|
||||
)
|
||||
if file_id:
|
||||
if files_api is None:
|
||||
raise ValueError("file_ids are not supported by this implementation of the Stack")
|
||||
|
||||
file_response = await files_api.openai_retrieve_file(file_id)
|
||||
if not filename:
|
||||
filename = file_response.filename
|
||||
file_mime_type, _ = mimetypes.guess_type(file_response.filename)
|
||||
raw_file_bytes = await extract_bytes_from_file(file_id, files_api)
|
||||
ascii_text = generate_base64_ascii_text_from_bytes(raw_file_bytes)
|
||||
resolved_file_data = construct_data_url(ascii_text, file_mime_type)
|
||||
elif file_data:
|
||||
if file_data.startswith("data:"):
|
||||
resolved_file_data = file_data
|
||||
else:
|
||||
# Raw base64 data, wrap in data URL format
|
||||
if filename:
|
||||
file_mime_type, _ = mimetypes.guess_type(filename)
|
||||
resolved_file_data = construct_data_url(file_data, file_mime_type)
|
||||
elif file_url:
|
||||
resolved_file_data = file_url
|
||||
converted_parts.append(
|
||||
OpenAIFile(
|
||||
file=OpenAIFileFile(
|
||||
file_data=resolved_file_data,
|
||||
filename=filename,
|
||||
)
|
||||
)
|
||||
)
|
||||
elif isinstance(content_part, str):
|
||||
converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part))
|
||||
else:
|
||||
|
|
@ -110,12 +221,14 @@ async def convert_response_content_to_chat_content(
|
|||
async def convert_response_input_to_chat_messages(
|
||||
input: str | list[OpenAIResponseInput],
|
||||
previous_messages: list[OpenAIMessageParam] | None = None,
|
||||
files_api: Files | None = None,
|
||||
) -> list[OpenAIMessageParam]:
|
||||
"""
|
||||
Convert the input from an OpenAI Response API request into OpenAI Chat Completion messages.
|
||||
|
||||
:param input: The input to convert
|
||||
:param previous_messages: Optional previous messages to check for function_call references
|
||||
:param files_api: Files API for resolving file_id to raw file content (optional, required for file/image content)
|
||||
"""
|
||||
messages: list[OpenAIMessageParam] = []
|
||||
if isinstance(input, list):
|
||||
|
|
@ -169,6 +282,12 @@ async def convert_response_input_to_chat_messages(
|
|||
elif isinstance(input_item, OpenAIResponseOutputMessageMCPListTools):
|
||||
# the tool list will be handled separately
|
||||
pass
|
||||
elif isinstance(
|
||||
input_item,
|
||||
OpenAIResponseOutputMessageWebSearchToolCall | OpenAIResponseOutputMessageFileSearchToolCall,
|
||||
):
|
||||
# these tool calls are tracked internally but not converted to chat messages
|
||||
pass
|
||||
elif isinstance(input_item, OpenAIResponseMCPApprovalRequest) or isinstance(
|
||||
input_item, OpenAIResponseMCPApprovalResponse
|
||||
):
|
||||
|
|
@ -176,7 +295,7 @@ async def convert_response_input_to_chat_messages(
|
|||
pass
|
||||
elif isinstance(input_item, OpenAIResponseMessage):
|
||||
# Narrow type to OpenAIResponseMessage which has content and role attributes
|
||||
content = await convert_response_content_to_chat_content(input_item.content)
|
||||
content = await convert_response_content_to_chat_content(input_item.content, files_api)
|
||||
message_type = await get_message_type_by_role(input_item.role)
|
||||
if message_type is None:
|
||||
raise ValueError(
|
||||
|
|
|
|||
|
|
@ -34,6 +34,8 @@ def available_providers() -> list[ProviderSpec]:
|
|||
Api.tool_runtime,
|
||||
Api.tool_groups,
|
||||
Api.conversations,
|
||||
Api.prompts,
|
||||
Api.files,
|
||||
],
|
||||
optional_api_dependencies=[
|
||||
Api.safety,
|
||||
|
|
|
|||
|
|
@ -213,6 +213,19 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
|
|||
|
||||
return api_key
|
||||
|
||||
def _validate_model_allowed(self, provider_model_id: str) -> None:
|
||||
"""
|
||||
Validate that the model is in the allowed_models list if configured.
|
||||
|
||||
:param provider_model_id: The provider-specific model ID to validate
|
||||
:raises ValueError: If the model is not in the allowed_models list
|
||||
"""
|
||||
if self.config.allowed_models is not None and provider_model_id not in self.config.allowed_models:
|
||||
raise ValueError(
|
||||
f"Model '{provider_model_id}' is not in the allowed models list. "
|
||||
f"Allowed models: {self.config.allowed_models}"
|
||||
)
|
||||
|
||||
async def _get_provider_model_id(self, model: str) -> str:
|
||||
"""
|
||||
Get the provider-specific model ID from the model store.
|
||||
|
|
@ -259,8 +272,11 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
|
|||
Direct OpenAI completion API call.
|
||||
"""
|
||||
# TODO: fix openai_completion to return type compatible with OpenAI's API response
|
||||
provider_model_id = await self._get_provider_model_id(params.model)
|
||||
self._validate_model_allowed(provider_model_id)
|
||||
|
||||
completion_kwargs = await prepare_openai_completion_params(
|
||||
model=await self._get_provider_model_id(params.model),
|
||||
model=provider_model_id,
|
||||
prompt=params.prompt,
|
||||
best_of=params.best_of,
|
||||
echo=params.echo,
|
||||
|
|
@ -292,6 +308,9 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
|
|||
"""
|
||||
Direct OpenAI chat completion API call.
|
||||
"""
|
||||
provider_model_id = await self._get_provider_model_id(params.model)
|
||||
self._validate_model_allowed(provider_model_id)
|
||||
|
||||
messages = params.messages
|
||||
|
||||
if self.download_images:
|
||||
|
|
@ -313,7 +332,7 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
|
|||
messages = [await _localize_image_url(m) for m in messages]
|
||||
|
||||
request_params = await prepare_openai_completion_params(
|
||||
model=await self._get_provider_model_id(params.model),
|
||||
model=provider_model_id,
|
||||
messages=messages,
|
||||
frequency_penalty=params.frequency_penalty,
|
||||
function_call=params.function_call,
|
||||
|
|
@ -351,10 +370,13 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
|
|||
"""
|
||||
Direct OpenAI embeddings API call.
|
||||
"""
|
||||
provider_model_id = await self._get_provider_model_id(params.model)
|
||||
self._validate_model_allowed(provider_model_id)
|
||||
|
||||
# Build request params conditionally to avoid NotGiven/Omit type mismatch
|
||||
# The OpenAI SDK uses Omit in signatures but NOT_GIVEN has type NotGiven
|
||||
request_params: dict[str, Any] = {
|
||||
"model": await self._get_provider_model_id(params.model),
|
||||
"model": provider_model_id,
|
||||
"input": params.input,
|
||||
}
|
||||
if params.encoding_format is not None:
|
||||
|
|
|
|||
|
|
@ -25,6 +25,13 @@ from llama_stack.providers.utils.responses.responses_store import (
|
|||
ResponsesStore,
|
||||
_OpenAIResponseObjectWithInputAndMessages,
|
||||
)
|
||||
from llama_stack_api import (
|
||||
OpenAIChatCompletionContentPartImageParam,
|
||||
OpenAIFile,
|
||||
OpenAIFileObject,
|
||||
OpenAISystemMessageParam,
|
||||
Prompt,
|
||||
)
|
||||
from llama_stack_api.agents import Order
|
||||
from llama_stack_api.inference import (
|
||||
OpenAIAssistantMessageParam,
|
||||
|
|
@ -38,6 +45,8 @@ from llama_stack_api.inference import (
|
|||
)
|
||||
from llama_stack_api.openai_responses import (
|
||||
ListOpenAIResponseInputItem,
|
||||
OpenAIResponseInputMessageContentFile,
|
||||
OpenAIResponseInputMessageContentImage,
|
||||
OpenAIResponseInputMessageContentText,
|
||||
OpenAIResponseInputToolFunction,
|
||||
OpenAIResponseInputToolMCP,
|
||||
|
|
@ -47,6 +56,7 @@ from llama_stack_api.openai_responses import (
|
|||
OpenAIResponseOutputMessageFunctionToolCall,
|
||||
OpenAIResponseOutputMessageMCPCall,
|
||||
OpenAIResponseOutputMessageWebSearchToolCall,
|
||||
OpenAIResponsePrompt,
|
||||
OpenAIResponseText,
|
||||
OpenAIResponseTextFormat,
|
||||
WebSearchToolTypes,
|
||||
|
|
@ -98,6 +108,19 @@ def mock_safety_api():
|
|||
return safety_api
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_prompts_api():
|
||||
prompts_api = AsyncMock()
|
||||
return prompts_api
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_files_api():
|
||||
"""Mock files API for testing."""
|
||||
files_api = AsyncMock()
|
||||
return files_api
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def openai_responses_impl(
|
||||
mock_inference_api,
|
||||
|
|
@ -107,6 +130,8 @@ def openai_responses_impl(
|
|||
mock_vector_io_api,
|
||||
mock_safety_api,
|
||||
mock_conversations_api,
|
||||
mock_prompts_api,
|
||||
mock_files_api,
|
||||
):
|
||||
return OpenAIResponsesImpl(
|
||||
inference_api=mock_inference_api,
|
||||
|
|
@ -116,6 +141,8 @@ def openai_responses_impl(
|
|||
vector_io_api=mock_vector_io_api,
|
||||
safety_api=mock_safety_api,
|
||||
conversations_api=mock_conversations_api,
|
||||
prompts_api=mock_prompts_api,
|
||||
files_api=mock_files_api,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -499,7 +526,7 @@ async def test_create_openai_response_with_tool_call_function_arguments_none(ope
|
|||
mock_inference_api.openai_chat_completion.return_value = fake_stream_toolcall()
|
||||
|
||||
|
||||
async def test_create_openai_response_with_multiple_messages(openai_responses_impl, mock_inference_api):
|
||||
async def test_create_openai_response_with_multiple_messages(openai_responses_impl, mock_inference_api, mock_files_api):
|
||||
"""Test creating an OpenAI response with multiple messages."""
|
||||
# Setup
|
||||
input_messages = [
|
||||
|
|
@ -710,7 +737,7 @@ async def test_create_openai_response_with_instructions(openai_responses_impl, m
|
|||
|
||||
|
||||
async def test_create_openai_response_with_instructions_and_multiple_messages(
|
||||
openai_responses_impl, mock_inference_api
|
||||
openai_responses_impl, mock_inference_api, mock_files_api
|
||||
):
|
||||
# Setup
|
||||
input_messages = [
|
||||
|
|
@ -1242,3 +1269,489 @@ async def test_create_openai_response_with_output_types_as_input(
|
|||
|
||||
assert stored_with_outputs.input == input_with_output_types
|
||||
assert len(stored_with_outputs.input) == 3
|
||||
|
||||
|
||||
async def test_create_openai_response_with_prompt(openai_responses_impl, mock_inference_api, mock_prompts_api):
|
||||
"""Test creating an OpenAI response with a prompt."""
|
||||
input_text = "What is the capital of Ireland?"
|
||||
model = "meta-llama/Llama-3.1-8B-Instruct"
|
||||
prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
|
||||
prompt = Prompt(
|
||||
prompt="You are a helpful {{ area_name }} assistant at {{ company_name }}. Always provide accurate information.",
|
||||
prompt_id=prompt_id,
|
||||
version=1,
|
||||
variables=["area_name", "company_name"],
|
||||
is_default=True,
|
||||
)
|
||||
|
||||
openai_response_prompt = OpenAIResponsePrompt(
|
||||
id=prompt_id,
|
||||
version="1",
|
||||
variables={
|
||||
"area_name": OpenAIResponseInputMessageContentText(text="geography"),
|
||||
"company_name": OpenAIResponseInputMessageContentText(text="Dummy Company"),
|
||||
},
|
||||
)
|
||||
|
||||
mock_prompts_api.get_prompt.return_value = prompt
|
||||
mock_inference_api.openai_chat_completion.return_value = fake_stream()
|
||||
|
||||
result = await openai_responses_impl.create_openai_response(
|
||||
input=input_text,
|
||||
model=model,
|
||||
prompt=openai_response_prompt,
|
||||
)
|
||||
|
||||
mock_prompts_api.get_prompt.assert_called_with(prompt_id, 1)
|
||||
mock_inference_api.openai_chat_completion.assert_called()
|
||||
call_args = mock_inference_api.openai_chat_completion.call_args
|
||||
sent_messages = call_args.args[0].messages
|
||||
assert len(sent_messages) == 2
|
||||
|
||||
system_messages = [msg for msg in sent_messages if msg.role == "system"]
|
||||
assert len(system_messages) == 1
|
||||
assert (
|
||||
system_messages[0].content
|
||||
== "You are a helpful geography assistant at Dummy Company. Always provide accurate information."
|
||||
)
|
||||
|
||||
user_messages = [msg for msg in sent_messages if msg.role == "user"]
|
||||
assert len(user_messages) == 1
|
||||
assert user_messages[0].content == input_text
|
||||
|
||||
assert result.model == model
|
||||
assert result.status == "completed"
|
||||
assert isinstance(result.prompt, OpenAIResponsePrompt)
|
||||
assert result.prompt.id == prompt_id
|
||||
assert result.prompt.variables == openai_response_prompt.variables
|
||||
assert result.prompt.version == "1"
|
||||
|
||||
|
||||
async def test_prepend_prompt_successful_without_variables(openai_responses_impl, mock_prompts_api, mock_inference_api):
|
||||
"""Test prepend_prompt function without variables."""
|
||||
input_text = "What is the capital of Ireland?"
|
||||
model = "meta-llama/Llama-3.1-8B-Instruct"
|
||||
prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
|
||||
prompt = Prompt(
|
||||
prompt="You are a helpful assistant. Always provide accurate information.",
|
||||
prompt_id=prompt_id,
|
||||
version=1,
|
||||
variables=[],
|
||||
is_default=True,
|
||||
)
|
||||
|
||||
openai_response_prompt = OpenAIResponsePrompt(id=prompt_id, version="1")
|
||||
|
||||
mock_prompts_api.get_prompt.return_value = prompt
|
||||
mock_inference_api.openai_chat_completion.return_value = fake_stream()
|
||||
|
||||
await openai_responses_impl.create_openai_response(
|
||||
input=input_text,
|
||||
model=model,
|
||||
prompt=openai_response_prompt,
|
||||
)
|
||||
|
||||
mock_prompts_api.get_prompt.assert_called_with(prompt_id, 1)
|
||||
mock_inference_api.openai_chat_completion.assert_called()
|
||||
call_args = mock_inference_api.openai_chat_completion.call_args
|
||||
sent_messages = call_args.args[0].messages
|
||||
assert len(sent_messages) == 2
|
||||
system_messages = [msg for msg in sent_messages if msg.role == "system"]
|
||||
assert system_messages[0].content == "You are a helpful assistant. Always provide accurate information."
|
||||
|
||||
|
||||
async def test_prepend_prompt_invalid_variable(openai_responses_impl, mock_prompts_api):
|
||||
"""Test error handling in prepend_prompt function when prompt parameters contain invalid variables."""
|
||||
prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
|
||||
prompt = Prompt(
|
||||
prompt="You are a {{ role }} assistant.",
|
||||
prompt_id=prompt_id,
|
||||
version=1,
|
||||
variables=["role"], # Only "role" is valid
|
||||
is_default=True,
|
||||
)
|
||||
|
||||
openai_response_prompt = OpenAIResponsePrompt(
|
||||
id=prompt_id,
|
||||
version="1",
|
||||
variables={
|
||||
"role": OpenAIResponseInputMessageContentText(text="helpful"),
|
||||
"company": OpenAIResponseInputMessageContentText(
|
||||
text="Dummy Company"
|
||||
), # company is not in prompt.variables
|
||||
},
|
||||
)
|
||||
|
||||
mock_prompts_api.get_prompt.return_value = prompt
|
||||
|
||||
# Initial messages
|
||||
messages = [OpenAIUserMessageParam(content="Test prompt")]
|
||||
|
||||
# Execute - should raise ValueError for invalid variable
|
||||
with pytest.raises(ValueError, match="Variable company not found in prompt"):
|
||||
await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
|
||||
|
||||
# Verify
|
||||
mock_prompts_api.get_prompt.assert_called_once_with(prompt_id, 1)
|
||||
|
||||
|
||||
async def test_prepend_prompt_not_found(openai_responses_impl, mock_prompts_api):
|
||||
"""Test prepend_prompt function when prompt is not found."""
|
||||
prompt_id = "pmpt_nonexistent"
|
||||
openai_response_prompt = OpenAIResponsePrompt(id=prompt_id, version="1")
|
||||
|
||||
mock_prompts_api.get_prompt.return_value = None # Prompt not found
|
||||
|
||||
# Initial messages
|
||||
messages = [OpenAIUserMessageParam(content="Test prompt")]
|
||||
initial_length = len(messages)
|
||||
|
||||
# Execute
|
||||
result = await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
|
||||
|
||||
# Verify
|
||||
mock_prompts_api.get_prompt.assert_called_once_with(prompt_id, 1)
|
||||
|
||||
# Should return None when prompt not found
|
||||
assert result is None
|
||||
|
||||
# Messages should not be modified
|
||||
assert len(messages) == initial_length
|
||||
assert messages[0].content == "Test prompt"
|
||||
|
||||
|
||||
async def test_prepend_prompt_variable_substitution(openai_responses_impl, mock_prompts_api):
|
||||
"""Test complex variable substitution with multiple occurrences and special characters in prepend_prompt function."""
|
||||
prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
|
||||
|
||||
# Support all whitespace variations: {{name}}, {{ name }}, {{ name}}, {{name }}, etc.
|
||||
prompt = Prompt(
|
||||
prompt="Hello {{name}}! You are working at {{ company}}. Your role is {{role}} at {{company}}. Remember, {{ name }}, to be {{ tone }}.",
|
||||
prompt_id=prompt_id,
|
||||
version=1,
|
||||
variables=["name", "company", "role", "tone"],
|
||||
is_default=True,
|
||||
)
|
||||
|
||||
openai_response_prompt = OpenAIResponsePrompt(
|
||||
id=prompt_id,
|
||||
version="1",
|
||||
variables={
|
||||
"name": OpenAIResponseInputMessageContentText(text="Alice"),
|
||||
"company": OpenAIResponseInputMessageContentText(text="Dummy Company"),
|
||||
"role": OpenAIResponseInputMessageContentText(text="AI Assistant"),
|
||||
"tone": OpenAIResponseInputMessageContentText(text="professional"),
|
||||
},
|
||||
)
|
||||
|
||||
mock_prompts_api.get_prompt.return_value = prompt
|
||||
|
||||
# Initial messages
|
||||
messages = [OpenAIUserMessageParam(content="Test")]
|
||||
|
||||
# Execute
|
||||
await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
|
||||
|
||||
# Verify
|
||||
assert len(messages) == 2
|
||||
assert isinstance(messages[0], OpenAISystemMessageParam)
|
||||
expected_content = "Hello Alice! You are working at Dummy Company. Your role is AI Assistant at Dummy Company. Remember, Alice, to be professional."
|
||||
assert messages[0].content == expected_content
|
||||
|
||||
|
||||
async def test_prepend_prompt_with_image_variable(openai_responses_impl, mock_prompts_api, mock_files_api):
|
||||
"""Test prepend_prompt with image variable - should create placeholder in system message and append image as separate user message."""
|
||||
prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
|
||||
prompt = Prompt(
|
||||
prompt="Analyze this {{product_image}} and describe what you see.",
|
||||
prompt_id=prompt_id,
|
||||
version=1,
|
||||
variables=["product_image"],
|
||||
is_default=True,
|
||||
)
|
||||
|
||||
# Mock file content and file metadata
|
||||
mock_file_content = b"fake_image_data"
|
||||
mock_files_api.openai_retrieve_file_content.return_value = type("obj", (object,), {"body": mock_file_content})()
|
||||
mock_files_api.openai_retrieve_file.return_value = OpenAIFileObject(
|
||||
object="file",
|
||||
id="file-abc123",
|
||||
bytes=len(mock_file_content),
|
||||
created_at=1234567890,
|
||||
expires_at=1234567890,
|
||||
filename="product.jpg",
|
||||
purpose="assistants",
|
||||
)
|
||||
|
||||
openai_response_prompt = OpenAIResponsePrompt(
|
||||
id=prompt_id,
|
||||
version="1",
|
||||
variables={
|
||||
"product_image": OpenAIResponseInputMessageContentImage(
|
||||
file_id="file-abc123",
|
||||
detail="high",
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
mock_prompts_api.get_prompt.return_value = prompt
|
||||
|
||||
# Initial messages
|
||||
messages = [OpenAIUserMessageParam(content="What do you think?")]
|
||||
|
||||
# Execute
|
||||
await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
|
||||
|
||||
assert len(messages) == 3
|
||||
|
||||
# Check system message has placeholder
|
||||
assert isinstance(messages[0], OpenAISystemMessageParam)
|
||||
assert messages[0].content == "Analyze this [Image: product_image] and describe what you see."
|
||||
|
||||
# Check original user message is still there
|
||||
assert isinstance(messages[1], OpenAIUserMessageParam)
|
||||
assert messages[1].content == "What do you think?"
|
||||
|
||||
# Check new user message with image is appended
|
||||
assert isinstance(messages[2], OpenAIUserMessageParam)
|
||||
assert isinstance(messages[2].content, list)
|
||||
assert len(messages[2].content) == 1
|
||||
|
||||
# Should be image with data URL
|
||||
assert isinstance(messages[2].content[0], OpenAIChatCompletionContentPartImageParam)
|
||||
assert messages[2].content[0].image_url.url.startswith("data:image/")
|
||||
assert messages[2].content[0].image_url.detail == "high"
|
||||
|
||||
|
||||
async def test_prepend_prompt_with_file_variable(openai_responses_impl, mock_prompts_api, mock_files_api):
|
||||
"""Test prepend_prompt with file variable - should create placeholder in system message and append file as separate user message."""
|
||||
prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
|
||||
prompt = Prompt(
|
||||
prompt="Review the document {{contract_file}} and summarize key points.",
|
||||
prompt_id=prompt_id,
|
||||
version=1,
|
||||
variables=["contract_file"],
|
||||
is_default=True,
|
||||
)
|
||||
|
||||
# Mock file retrieval
|
||||
mock_file_content = b"fake_pdf_content"
|
||||
mock_files_api.openai_retrieve_file_content.return_value = type("obj", (object,), {"body": mock_file_content})()
|
||||
mock_files_api.openai_retrieve_file.return_value = OpenAIFileObject(
|
||||
object="file",
|
||||
id="file-contract-789",
|
||||
bytes=len(mock_file_content),
|
||||
created_at=1234567890,
|
||||
expires_at=1234567890,
|
||||
filename="contract.pdf",
|
||||
purpose="assistants",
|
||||
)
|
||||
|
||||
openai_response_prompt = OpenAIResponsePrompt(
|
||||
id=prompt_id,
|
||||
version="1",
|
||||
variables={
|
||||
"contract_file": OpenAIResponseInputMessageContentFile(
|
||||
file_id="file-contract-789",
|
||||
filename="contract.pdf",
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
mock_prompts_api.get_prompt.return_value = prompt
|
||||
|
||||
# Initial messages
|
||||
messages = [OpenAIUserMessageParam(content="Please review this.")]
|
||||
|
||||
# Execute
|
||||
await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
|
||||
|
||||
assert len(messages) == 3
|
||||
|
||||
# Check system message has placeholder
|
||||
assert isinstance(messages[0], OpenAISystemMessageParam)
|
||||
assert messages[0].content == "Review the document [File: contract_file] and summarize key points."
|
||||
|
||||
# Check original user message is still there
|
||||
assert isinstance(messages[1], OpenAIUserMessageParam)
|
||||
assert messages[1].content == "Please review this."
|
||||
|
||||
# Check new user message with file is appended
|
||||
assert isinstance(messages[2], OpenAIUserMessageParam)
|
||||
assert isinstance(messages[2].content, list)
|
||||
assert len(messages[2].content) == 1
|
||||
|
||||
# First part should be file with data URL
|
||||
assert isinstance(messages[2].content[0], OpenAIFile)
|
||||
assert messages[2].content[0].file.file_data.startswith("data:application/pdf;base64,")
|
||||
assert messages[2].content[0].file.filename == "contract.pdf"
|
||||
assert messages[2].content[0].file.file_id is None
|
||||
|
||||
|
||||
async def test_prepend_prompt_with_mixed_variables(openai_responses_impl, mock_prompts_api, mock_files_api):
|
||||
"""Test prepend_prompt with text, image, and file variables mixed together."""
|
||||
prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
|
||||
prompt = Prompt(
|
||||
prompt="Hello {{name}}! Analyze {{photo}} and review {{document}}. Provide insights for {{company}}.",
|
||||
prompt_id=prompt_id,
|
||||
version=1,
|
||||
variables=["name", "photo", "document", "company"],
|
||||
is_default=True,
|
||||
)
|
||||
|
||||
# Mock file retrieval for image and file
|
||||
mock_image_content = b"fake_image_data"
|
||||
mock_file_content = b"fake_doc_content"
|
||||
|
||||
async def mock_retrieve_file_content(file_id):
|
||||
if file_id == "file-photo-123":
|
||||
return type("obj", (object,), {"body": mock_image_content})()
|
||||
elif file_id == "file-doc-456":
|
||||
return type("obj", (object,), {"body": mock_file_content})()
|
||||
|
||||
mock_files_api.openai_retrieve_file_content.side_effect = mock_retrieve_file_content
|
||||
|
||||
def mock_retrieve_file(file_id):
|
||||
if file_id == "file-photo-123":
|
||||
return OpenAIFileObject(
|
||||
object="file",
|
||||
id="file-photo-123",
|
||||
bytes=len(mock_image_content),
|
||||
created_at=1234567890,
|
||||
expires_at=1234567890,
|
||||
filename="photo.jpg",
|
||||
purpose="assistants",
|
||||
)
|
||||
elif file_id == "file-doc-456":
|
||||
return OpenAIFileObject(
|
||||
object="file",
|
||||
id="file-doc-456",
|
||||
bytes=len(mock_file_content),
|
||||
created_at=1234567890,
|
||||
expires_at=1234567890,
|
||||
filename="doc.pdf",
|
||||
purpose="assistants",
|
||||
)
|
||||
|
||||
mock_files_api.openai_retrieve_file.side_effect = mock_retrieve_file
|
||||
|
||||
openai_response_prompt = OpenAIResponsePrompt(
|
||||
id=prompt_id,
|
||||
version="1",
|
||||
variables={
|
||||
"name": OpenAIResponseInputMessageContentText(text="Alice"),
|
||||
"photo": OpenAIResponseInputMessageContentImage(file_id="file-photo-123", detail="auto"),
|
||||
"document": OpenAIResponseInputMessageContentFile(file_id="file-doc-456", filename="doc.pdf"),
|
||||
"company": OpenAIResponseInputMessageContentText(text="Acme Corp"),
|
||||
},
|
||||
)
|
||||
|
||||
mock_prompts_api.get_prompt.return_value = prompt
|
||||
|
||||
# Initial messages
|
||||
messages = [OpenAIUserMessageParam(content="Here's my question.")]
|
||||
|
||||
# Execute
|
||||
await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
|
||||
|
||||
assert len(messages) == 3
|
||||
|
||||
# Check system message has text and placeholders
|
||||
assert isinstance(messages[0], OpenAISystemMessageParam)
|
||||
expected_system = "Hello Alice! Analyze [Image: photo] and review [File: document]. Provide insights for Acme Corp."
|
||||
assert messages[0].content == expected_system
|
||||
|
||||
# Check original user message is still there
|
||||
assert isinstance(messages[1], OpenAIUserMessageParam)
|
||||
assert messages[1].content == "Here's my question."
|
||||
|
||||
# Check new user message with media is appended (2 media items)
|
||||
assert isinstance(messages[2], OpenAIUserMessageParam)
|
||||
assert isinstance(messages[2].content, list)
|
||||
assert len(messages[2].content) == 2
|
||||
|
||||
# First part should be image with data URL
|
||||
assert isinstance(messages[2].content[0], OpenAIChatCompletionContentPartImageParam)
|
||||
assert messages[2].content[0].image_url.url.startswith("data:image/")
|
||||
|
||||
# Second part should be file with data URL
|
||||
assert isinstance(messages[2].content[1], OpenAIFile)
|
||||
assert messages[2].content[1].file.file_data.startswith("data:application/pdf;base64,")
|
||||
assert messages[2].content[1].file.filename == "doc.pdf"
|
||||
assert messages[2].content[1].file.file_id is None
|
||||
|
||||
|
||||
async def test_prepend_prompt_with_image_using_image_url(openai_responses_impl, mock_prompts_api):
|
||||
"""Test prepend_prompt with image variable using image_url instead of file_id."""
|
||||
prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
|
||||
prompt = Prompt(
|
||||
prompt="Describe {{screenshot}}.",
|
||||
prompt_id=prompt_id,
|
||||
version=1,
|
||||
variables=["screenshot"],
|
||||
is_default=True,
|
||||
)
|
||||
|
||||
openai_response_prompt = OpenAIResponsePrompt(
|
||||
id=prompt_id,
|
||||
version="1",
|
||||
variables={
|
||||
"screenshot": OpenAIResponseInputMessageContentImage(
|
||||
image_url="https://example.com/screenshot.png",
|
||||
detail="low",
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
mock_prompts_api.get_prompt.return_value = prompt
|
||||
|
||||
# Initial messages
|
||||
messages = [OpenAIUserMessageParam(content="What is this?")]
|
||||
|
||||
# Execute
|
||||
await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
|
||||
|
||||
assert len(messages) == 3
|
||||
|
||||
# Check system message has placeholder
|
||||
assert isinstance(messages[0], OpenAISystemMessageParam)
|
||||
assert messages[0].content == "Describe [Image: screenshot]."
|
||||
|
||||
# Check original user message is still there
|
||||
assert isinstance(messages[1], OpenAIUserMessageParam)
|
||||
assert messages[1].content == "What is this?"
|
||||
|
||||
# Check new user message with image is appended
|
||||
assert isinstance(messages[2], OpenAIUserMessageParam)
|
||||
assert isinstance(messages[2].content, list)
|
||||
|
||||
# Image should use the provided URL
|
||||
assert isinstance(messages[2].content[0], OpenAIChatCompletionContentPartImageParam)
|
||||
assert messages[2].content[0].image_url.url == "https://example.com/screenshot.png"
|
||||
assert messages[2].content[0].image_url.detail == "low"
|
||||
|
||||
|
||||
async def test_prepend_prompt_image_variable_missing_required_fields(openai_responses_impl, mock_prompts_api):
|
||||
"""Test prepend_prompt with image variable that has neither file_id nor image_url - should raise error."""
|
||||
prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
|
||||
prompt = Prompt(
|
||||
prompt="Analyze {{bad_image}}.",
|
||||
prompt_id=prompt_id,
|
||||
version=1,
|
||||
variables=["bad_image"],
|
||||
is_default=True,
|
||||
)
|
||||
|
||||
# Create image content with neither file_id nor image_url
|
||||
openai_response_prompt = OpenAIResponsePrompt(
|
||||
id=prompt_id,
|
||||
version="1",
|
||||
variables={"bad_image": OpenAIResponseInputMessageContentImage()}, # No file_id or image_url
|
||||
)
|
||||
|
||||
mock_prompts_api.get_prompt.return_value = prompt
|
||||
messages = [OpenAIUserMessageParam(content="Test")]
|
||||
|
||||
# Execute - should raise ValueError
|
||||
with pytest.raises(ValueError, match="Image content must have either 'image_url' or 'file_id'"):
|
||||
await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
|
||||
|
|
|
|||
|
|
@ -39,6 +39,8 @@ def responses_impl_with_conversations(
|
|||
mock_vector_io_api,
|
||||
mock_conversations_api,
|
||||
mock_safety_api,
|
||||
mock_prompts_api,
|
||||
mock_files_api,
|
||||
):
|
||||
"""Create OpenAIResponsesImpl instance with conversations API."""
|
||||
return OpenAIResponsesImpl(
|
||||
|
|
@ -49,6 +51,8 @@ def responses_impl_with_conversations(
|
|||
vector_io_api=mock_vector_io_api,
|
||||
conversations_api=mock_conversations_api,
|
||||
safety_api=mock_safety_api,
|
||||
prompts_api=mock_prompts_api,
|
||||
files_api=mock_files_api,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
import pytest
|
||||
|
||||
from llama_stack.providers.inline.agents.meta_reference.responses.utils import (
|
||||
|
|
@ -46,6 +48,12 @@ from llama_stack_api.openai_responses import (
|
|||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_files_api():
|
||||
"""Mock files API for testing."""
|
||||
return AsyncMock()
|
||||
|
||||
|
||||
class TestConvertChatChoiceToResponseMessage:
|
||||
async def test_convert_string_content(self):
|
||||
choice = OpenAIChoice(
|
||||
|
|
@ -78,17 +86,17 @@ class TestConvertChatChoiceToResponseMessage:
|
|||
|
||||
|
||||
class TestConvertResponseContentToChatContent:
|
||||
async def test_convert_string_content(self):
|
||||
result = await convert_response_content_to_chat_content("Simple string")
|
||||
async def test_convert_string_content(self, mock_files_api):
|
||||
result = await convert_response_content_to_chat_content("Simple string", mock_files_api)
|
||||
assert result == "Simple string"
|
||||
|
||||
async def test_convert_text_content_parts(self):
|
||||
async def test_convert_text_content_parts(self, mock_files_api):
|
||||
content = [
|
||||
OpenAIResponseInputMessageContentText(text="First part"),
|
||||
OpenAIResponseOutputMessageContentOutputText(text="Second part"),
|
||||
]
|
||||
|
||||
result = await convert_response_content_to_chat_content(content)
|
||||
result = await convert_response_content_to_chat_content(content, mock_files_api)
|
||||
|
||||
assert len(result) == 2
|
||||
assert isinstance(result[0], OpenAIChatCompletionContentPartTextParam)
|
||||
|
|
@ -96,10 +104,10 @@ class TestConvertResponseContentToChatContent:
|
|||
assert isinstance(result[1], OpenAIChatCompletionContentPartTextParam)
|
||||
assert result[1].text == "Second part"
|
||||
|
||||
async def test_convert_image_content(self):
|
||||
async def test_convert_image_content(self, mock_files_api):
|
||||
content = [OpenAIResponseInputMessageContentImage(image_url="https://example.com/image.jpg", detail="high")]
|
||||
|
||||
result = await convert_response_content_to_chat_content(content)
|
||||
result = await convert_response_content_to_chat_content(content, mock_files_api)
|
||||
|
||||
assert len(result) == 1
|
||||
assert isinstance(result[0], OpenAIChatCompletionContentPartImageParam)
|
||||
|
|
|
|||
|
|
@ -30,6 +30,8 @@ def mock_apis():
|
|||
"vector_io_api": AsyncMock(),
|
||||
"conversations_api": AsyncMock(),
|
||||
"safety_api": AsyncMock(),
|
||||
"prompts_api": AsyncMock(),
|
||||
"files_api": AsyncMock(),
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -52,6 +52,8 @@ def mock_deps():
|
|||
tool_runtime_api = AsyncMock()
|
||||
tool_groups_api = AsyncMock()
|
||||
conversations_api = AsyncMock()
|
||||
prompts_api = AsyncMock()
|
||||
files_api = AsyncMock()
|
||||
|
||||
return {
|
||||
Api.inference: inference_api,
|
||||
|
|
@ -59,6 +61,8 @@ def mock_deps():
|
|||
Api.tool_runtime: tool_runtime_api,
|
||||
Api.tool_groups: tool_groups_api,
|
||||
Api.conversations: conversations_api,
|
||||
Api.prompts: prompts_api,
|
||||
Api.files: files_api,
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -144,6 +148,8 @@ class TestGuardrailsFunctionality:
|
|||
vector_io_api=mock_deps[Api.vector_io],
|
||||
safety_api=None, # No Safety API
|
||||
conversations_api=mock_deps[Api.conversations],
|
||||
prompts_api=mock_deps[Api.prompts],
|
||||
files_api=mock_deps[Api.files],
|
||||
)
|
||||
|
||||
# Test with string guardrail
|
||||
|
|
@ -191,6 +197,8 @@ class TestGuardrailsFunctionality:
|
|||
vector_io_api=mock_deps[Api.vector_io],
|
||||
safety_api=None, # No Safety API
|
||||
conversations_api=mock_deps[Api.conversations],
|
||||
prompts_api=mock_deps[Api.prompts],
|
||||
files_api=mock_deps[Api.files],
|
||||
)
|
||||
|
||||
# Should not raise when no guardrails requested
|
||||
|
|
|
|||
|
|
@ -15,7 +15,14 @@ from pydantic import BaseModel, Field
|
|||
from llama_stack.core.request_headers import request_provider_data_context
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
from llama_stack_api import Model, ModelType, OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
|
||||
from llama_stack_api import (
|
||||
Model,
|
||||
ModelType,
|
||||
OpenAIChatCompletionRequestWithExtraBody,
|
||||
OpenAICompletionRequestWithExtraBody,
|
||||
OpenAIEmbeddingsRequestWithExtraBody,
|
||||
OpenAIUserMessageParam,
|
||||
)
|
||||
|
||||
|
||||
class OpenAIMixinImpl(OpenAIMixin):
|
||||
|
|
@ -834,3 +841,96 @@ class TestOpenAIMixinProviderDataApiKey:
|
|||
error_message = str(exc_info.value)
|
||||
assert "test_api_key" in error_message
|
||||
assert "x-llamastack-provider-data" in error_message
|
||||
|
||||
|
||||
class TestOpenAIMixinAllowedModelsInference:
|
||||
"""Test cases for allowed_models enforcement during inference requests"""
|
||||
|
||||
async def test_inference_with_allowed_models(self, mixin, mock_client_context):
|
||||
"""Test that all inference methods succeed with allowed models"""
|
||||
mixin.config.allowed_models = ["gpt-4", "text-davinci-003", "text-embedding-ada-002"]
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.chat.completions.create = AsyncMock(return_value=MagicMock())
|
||||
mock_client.completions.create = AsyncMock(return_value=MagicMock())
|
||||
mock_embedding_response = MagicMock()
|
||||
mock_embedding_response.data = [MagicMock(embedding=[0.1, 0.2, 0.3])]
|
||||
mock_embedding_response.usage = MagicMock(prompt_tokens=5, total_tokens=5)
|
||||
mock_client.embeddings.create = AsyncMock(return_value=mock_embedding_response)
|
||||
|
||||
with mock_client_context(mixin, mock_client):
|
||||
# Test chat completion
|
||||
await mixin.openai_chat_completion(
|
||||
OpenAIChatCompletionRequestWithExtraBody(
|
||||
model="gpt-4", messages=[OpenAIUserMessageParam(role="user", content="Hello")]
|
||||
)
|
||||
)
|
||||
mock_client.chat.completions.create.assert_called_once()
|
||||
|
||||
# Test completion
|
||||
await mixin.openai_completion(
|
||||
OpenAICompletionRequestWithExtraBody(model="text-davinci-003", prompt="Hello")
|
||||
)
|
||||
mock_client.completions.create.assert_called_once()
|
||||
|
||||
# Test embeddings
|
||||
await mixin.openai_embeddings(
|
||||
OpenAIEmbeddingsRequestWithExtraBody(model="text-embedding-ada-002", input="test text")
|
||||
)
|
||||
mock_client.embeddings.create.assert_called_once()
|
||||
|
||||
async def test_inference_with_disallowed_models(self, mixin, mock_client_context):
|
||||
"""Test that all inference methods fail with disallowed models"""
|
||||
mixin.config.allowed_models = ["gpt-4"]
|
||||
|
||||
mock_client = MagicMock()
|
||||
|
||||
with mock_client_context(mixin, mock_client):
|
||||
# Test chat completion with disallowed model
|
||||
with pytest.raises(ValueError, match="Model 'gpt-4-turbo' is not in the allowed models list"):
|
||||
await mixin.openai_chat_completion(
|
||||
OpenAIChatCompletionRequestWithExtraBody(
|
||||
model="gpt-4-turbo", messages=[OpenAIUserMessageParam(role="user", content="Hello")]
|
||||
)
|
||||
)
|
||||
|
||||
# Test completion with disallowed model
|
||||
with pytest.raises(ValueError, match="Model 'text-davinci-002' is not in the allowed models list"):
|
||||
await mixin.openai_completion(
|
||||
OpenAICompletionRequestWithExtraBody(model="text-davinci-002", prompt="Hello")
|
||||
)
|
||||
|
||||
# Test embeddings with disallowed model
|
||||
with pytest.raises(ValueError, match="Model 'text-embedding-3-large' is not in the allowed models list"):
|
||||
await mixin.openai_embeddings(
|
||||
OpenAIEmbeddingsRequestWithExtraBody(model="text-embedding-3-large", input="test text")
|
||||
)
|
||||
|
||||
mock_client.chat.completions.create.assert_not_called()
|
||||
mock_client.completions.create.assert_not_called()
|
||||
mock_client.embeddings.create.assert_not_called()
|
||||
|
||||
async def test_inference_with_no_restrictions(self, mixin, mock_client_context):
|
||||
"""Test that inference succeeds when allowed_models is None or empty list blocks all"""
|
||||
# Test with None (no restrictions)
|
||||
assert mixin.config.allowed_models is None
|
||||
mock_client = MagicMock()
|
||||
mock_client.chat.completions.create = AsyncMock(return_value=MagicMock())
|
||||
|
||||
with mock_client_context(mixin, mock_client):
|
||||
await mixin.openai_chat_completion(
|
||||
OpenAIChatCompletionRequestWithExtraBody(
|
||||
model="any-model", messages=[OpenAIUserMessageParam(role="user", content="Hello")]
|
||||
)
|
||||
)
|
||||
mock_client.chat.completions.create.assert_called_once()
|
||||
|
||||
# Test with empty list (blocks all models)
|
||||
mixin.config.allowed_models = []
|
||||
with mock_client_context(mixin, mock_client):
|
||||
with pytest.raises(ValueError, match="Model 'gpt-4' is not in the allowed models list"):
|
||||
await mixin.openai_chat_completion(
|
||||
OpenAIChatCompletionRequestWithExtraBody(
|
||||
model="gpt-4", messages=[OpenAIUserMessageParam(role="user", content="Hello")]
|
||||
)
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue