From 0757d5a9170011f691c955438736726e6b5fd70e Mon Sep 17 00:00:00 2001
From: Ian Miller <75687988+r3v5@users.noreply.github.com>
Date: Wed, 19 Nov 2025 19:48:11 +0000
Subject: [PATCH 1/5] feat(responses)!: implement support for OpenAI compatible
 prompts in Responses API (#3965)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?
<!-- Provide a short summary of what this PR does and why. Link to
relevant issues if applicable. -->
This PR is responsible for providing actual implementation of OpenAI
compatible prompts in Responses API. This is the follow up PR with
actual implementation after introducing #3942

The need of this functionality was initiated in #3514.

> Note, https://github.com/llamastack/llama-stack/pull/3514 is divided
on three separate PRs. Current PR is the third of three.

<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->
Closes #3321

## Test Plan
<!-- Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.* -->
Manual testing, CI workflow with added unit tests

Comprehensive manual testing with new implementation:

**Test Prompts with Images with text on them in Responses API:**

I used this image for testing purposes: [iphone 17
image](https://github.com/user-attachments/assets/9e2ee821-e394-4bbd-b1c8-d48a3fa315de)

1. Upload an image:

```
curl -X POST http://localhost:8321/v1/files \
  -H "Content-Type: multipart/form-data" \
  -F "file=@/Users/ianmiller/iphone.jpeg" \
  -F "purpose=assistants"
```


`{"object":"file","id":"file-d6d375f238e14f21952cc40246bc8504","bytes":556241,"created_at":1761750049,"expires_at":1793286049,"filename":"iphone.jpeg","purpose":"assistants"}%`

2. Create prompt:

```
curl -X POST http://localhost:8321/v1/prompts \
  -H "Content-Type: application/json" \
  -d '{
    "prompt": "You are a product analysis expert. Analyze the following product:\n\nProduct Name: {{product_name}}\nDescription: {{description}}\n\nImage: {{product_photo}}\n\nProvide a detailed analysis including quality assessment, target audience, and pricing recommendations.",
    "variables": ["product_name", "description", "product_photo"]
  }'
```

`{"prompt":"You are a product analysis expert. Analyze the following
product:\n\nProduct Name: {{product_name}}\nDescription:
{{description}}\n\nImage: {{product_photo}}\n\nProvide a detailed
analysis including quality assessment, target audience, and pricing
recommendations.","version":1,"prompt_id":"pmpt_7be2208cb82cdbc35356354dae1f335d1e9b7baeca21ea62","variables":["product_name","description","product_photo"],"is_default":false}%`


3. Create response:

```
curl -X POST http://localhost:8321/v1/responses \
  -H "Accept: application/json, text/event-stream" \
  -H "Content-Type: application/json" \
  -d '{
    "input": "Please analyze this product",
    "model": "openai/gpt-4o",
    "store": true,
    "prompt": {
      "id": "pmpt_7be2208cb82cdbc35356354dae1f335d1e9b7baeca21ea62",
      "version": "1",
      "variables": {
        "product_name": {
          "type": "input_text",
          "text": "iPhone 17 Pro Max"
        },
         "product_photo": {
          "type": "input_image",
          "file_id": "file-d6d375f238e14f21952cc40246bc8504",
          "detail": "high"
        }
      }
    }
  }'
```


`{"created_at":1761750427,"error":null,"id":"resp_f897f914-e3b8-4783-8223-3ed0d32fcbc6","model":"openai/gpt-4o","object":"response","output":[{"content":[{"text":"###
Product Analysis: iPhone 17 Pro Max\n\n**Quality Assessment:**\n\n-
**Display & Design:**\n - The 6.9-inch display is large, ideal for
streaming and productivity.\n - Anti-reflective technology and 120Hz
refresh rate enhance viewing experience, providing smoother visuals and
reducing glare.\n - Titanium frame suggests a premium build, offering
durability and a sleek appearance.\n\n- **Performance:**\n - The Apple
A19 Pro chip promises significant performance improvements, likely
leading to faster processing and efficient multitasking.\n - 12GB RAM is
substantial for a smartphone, ensuring smooth operation for demanding
apps and games.\n\n- **Camera System:**\n - The triple 48MP camera setup
(wide, ultra-wide, telephoto) is designed for versatile photography
needs, capturing high-resolution photos and videos.\n - The 24MP front
camera will appeal to selfie enthusiasts and content creators needing
quality front-facing shots.\n\n- **Connectivity:**\n - Wi-Fi 7 support
indicates future-proof wireless capabilities, providing faster and more
reliable internet connectivity.\n\n**Target Audience:**\n\n- **Tech
Enthusiasts:** Individuals interested in cutting-edge technology and
performance.\n- **Content Creators:** Users who need a robust camera
system for photo and video production.\n- **Luxury Consumers:** Those
who prefer premium materials and top-of-the-line specs.\n-
**Professionals:** Users who require efficient multitasking and
productivity features.\n\n**Pricing Recommendations:**\n\n- Given the
premium specifications, a higher price point is expected. Consider
pricing competitively within the high-end smartphone market while
justifying cost through unique features like the titanium frame and
advanced connectivity options.\n- Positioning around the $1,200 to
$1,500 range would align with expectations for top-tier devices,
catering to its target audience while ensuring
profitability.\n\nOverall, the iPhone 17 Pro Max showcases a blend of
innovative features and premium design, aimed at users seeking high
performance and superior
aesthetics.","type":"output_text","annotations":[]}],"role":"assistant","type":"message","id":"msg_66f4d844-4d9e-4102-80fc-eb75b34b6dbd","status":"completed"}],"parallel_tool_calls":false,"previous_response_id":null,"prompt":{"id":"pmpt_7be2208cb82cdbc35356354dae1f335d1e9b7baeca21ea62","variables":{"product_name":{"text":"iPhone
17 Pro
Max","type":"input_text"},"product_photo":{"detail":"high","type":"input_image","file_id":"file-d6d375f238e14f21952cc40246bc8504","image_url":null}},"version":"1"},"status":"completed","temperature":null,"text":{"format":{"type":"text"}},"top_p":null,"tools":[],"truncation":null,"usage":{"input_tokens":830,"output_tokens":394,"total_tokens":1224,"input_tokens_details":{"cached_tokens":0},"output_tokens_details":{"reasoning_tokens":0}},"instructions":null}%`

**Test Prompts with PDF files in Responses API:**

I used this PDF file for testing purposes:
[invoicesample.pdf](https://github.com/user-attachments/files/22958943/invoicesample.pdf)

1. Upload PDF:

```
curl -X POST http://localhost:8321/v1/files \
  -H "Content-Type: multipart/form-data" \
  -F "file=@/Users/ianmiller/invoicesample.pdf" \
  -F "purpose=assistants"
```


`{"object":"file","id":"file-7fbb1043a4bb468cab60ffe4b8631d8e","bytes":149568,"created_at":1761750730,"expires_at":1793286730,"filename":"invoicesample.pdf","purpose":"assistants"}%`


2. Create prompt:

```
curl -X POST http://localhost:8321/v1/prompts \
  -H "Content-Type: application/json" \
  -d '{
    "prompt": "You are an accounting and financial analysis expert. Analyze the following invoice document:\n\nInvoice Document: {{invoice_doc}}\n\nProvide a comprehensive analysis",
    "variables": ["invoice_doc"]
  }'
```

`{"prompt":"You are an accounting and financial analysis expert. Analyze
the following invoice document:\n\nInvoice Document:
{{invoice_doc}}\n\nProvide a comprehensive
analysis","version":1,"prompt_id":"pmpt_72e2a184a86f32a568b6afb5455dca5c16bf3cc3f80092dc","variables":["invoice_doc"],"is_default":false}%`


3. Create response:

```
curl -X POST http://localhost:8321/v1/responses \
  -H "Content-Type: application/json" \
  -d '{
    "input": "Please provide a detailed analysis of this invoice",
    "model": "openai/gpt-4o",
    "store": true,
    "prompt": {
      "id": "pmpt_72e2a184a86f32a568b6afb5455dca5c16bf3cc3f80092dc",
      "version": "1",
      "variables": {
        "invoice_doc": {
          "type": "input_file",
          "file_id": "file-7fbb1043a4bb468cab60ffe4b8631d8e",
          "filename": "invoicesample.pdf"
        }
      }
    }
  }'
```


`{"created_at":1761750881,"error":null,"id":"resp_da866913-db06-4702-8000-174daed9dbbb","model":"openai/gpt-4o","object":"response","output":[{"content":[{"text":"Here's
a detailed analysis of the invoice provided:\n\n### Seller
Information\n- **Business Name:** The invoice features a logo with
\"Sunny Farm\" indicating the business identity.\n- **Address:** 123
Somewhere St, Melbourne VIC 3000\n- **Contact Information:** Phone
number (03) 1234 5678\n\n### Buyer Information\n- **Name:** Denny
Gunawan\n- **Address:** 221 Queen St, Melbourne VIC 3000\n\n###
Transaction Details\n- **Invoice Number:** #20130304\n- **Date of
Transaction:** Not explicitly mentioned, likely inferred from the
invoice number or needs clarification.\n\n### Items Purchased\n1.
**Apple**\n - Price: $5.00/kg\n - Quantity: 1 kg\n - Subtotal:
$5.00\n\n2. **Orange**\n - Price: $1.99/kg\n - Quantity: 2 kg\n -
Subtotal: $3.98\n\n3. **Watermelon**\n - Price: $1.69/kg\n - Quantity: 3
kg\n - Subtotal: $5.07\n\n4. **Mango**\n - Price: $9.56/kg\n - Quantity:
2 kg\n - Subtotal: $19.12\n\n5. **Peach**\n - Price: $2.99/kg\n -
Quantity: 1 kg\n - Subtotal: $2.99\n\n### Financial Summary\n-
**Subtotal for Items:** $36.00\n- **GST (Goods and Services Tax):** 10%
of $36.00, which amounts to $3.60\n- **Total Amount Due:** $39.60\n\n###
Notes\n- The invoice includes a placeholder text: \"Lorem ipsum dolor
sit amet...\" which is typically used as filler text. This might
indicate a section intended for terms, conditions, or additional notes
that haven’t been completed.\n\n### Visual and Design Elements\n- The
invoice uses a simple and clear layout, featuring the business logo
prominently and stating essential information such as contact and
transaction details in a structured manner.\n- There is a \"Thank You\"
note at the bottom, which adds a professional and courteous
touch.\n\n### Considerations\n- Ensure the date of the transaction is
clear if there are any future references needed.\n- Replace filler text
with relevant terms and conditions or any special instructions
pertaining to the transaction.\n\nThis invoice appears standard,
representing a small business transaction with clearly itemized products
and applicable
taxes.","type":"output_text","annotations":[]}],"role":"assistant","type":"message","id":"msg_39f3b39e-4684-4444-8e4d-e7395f88c9dc","status":"completed"}],"parallel_tool_calls":false,"previous_response_id":null,"prompt":{"id":"pmpt_72e2a184a86f32a568b6afb5455dca5c16bf3cc3f80092dc","variables":{"invoice_doc":{"type":"input_file","file_data":null,"file_id":"file-7fbb1043a4bb468cab60ffe4b8631d8e","file_url":null,"filename":"invoicesample.pdf"}},"version":"1"},"status":"completed","temperature":null,"text":{"format":{"type":"text"}},"top_p":null,"tools":[],"truncation":null,"usage":{"input_tokens":529,"output_tokens":513,"total_tokens":1042,"input_tokens_details":{"cached_tokens":0},"output_tokens_details":{"reasoning_tokens":0}},"instructions":null}%`

**Test simple text Prompt in Responses API:**

1. Create prompt:

```
 curl -X POST http://localhost:8321/v1/prompts \
  -H "Content-Type: application/json" \
  -d '{
    "prompt": "Hello {{name}}! You are working at {{company}}. Your role is {{role}} at {{company}}. Remember, {{name}}, to be {{tone}}.",
    "variables": ["name", "company", "role", "tone"]
  }'
```

`{"prompt":"Hello {{name}}! You are working at {{company}}. Your role is
{{role}} at {{company}}. Remember, {{name}}, to be
{{tone}}.","version":1,"prompt_id":"pmpt_f340a3164a4f65d975c774ffe38ea42d15e7ce4a835919ef","variables":["name","company","role","tone"],"is_default":false}%`

2. Create response:

```
curl -X POST http://localhost:8321/v1/responses \
  -H "Accept: application/json, text/event-stream" \
  -H "Content-Type: application/json" \
  -d '{
    "input": "What is the capital of Ireland?",
    "model": "openai/gpt-4o",
    "store": true,
    "prompt": {
      "id": "pmpt_f340a3164a4f65d975c774ffe38ea42d15e7ce4a835919ef",
      "version": "1",
      "variables": {
        "name": {
          "type": "input_text",
          "text": "Alice"
        },
        "company": {
          "type": "input_text",
          "text": "Dummy Company"
        },
        "role": {
          "type": "input_text",
          "text": "Geography expert"
        },
        "tone": {
          "type": "input_text",
          "text": "professional and helpful"
        }
      }
    }
  }'

```


`{"created_at":1761751097,"error":null,"id":"resp_1b037b95-d9ae-4ad0-8e76-d953897ecaef","model":"openai/gpt-4o","object":"response","output":[{"content":[{"text":"The
capital of Ireland is
Dublin.","type":"output_text","annotations":[]}],"role":"assistant","type":"message","id":"msg_8e7c72b6-2aa2-4da6-8e57-da4e12fa3ce2","status":"completed"}],"parallel_tool_calls":false,"previous_response_id":null,"prompt":{"id":"pmpt_f340a3164a4f65d975c774ffe38ea42d15e7ce4a835919ef","variables":{"name":{"text":"Alice","type":"input_text"},"company":{"text":"Dummy
Company","type":"input_text"},"role":{"text":"Geography
expert","type":"input_text"},"tone":{"text":"professional and
helpful","type":"input_text"}},"version":"1"},"status":"completed","temperature":null,"text":{"format":{"type":"text"}},"top_p":null,"tools":[],"truncation":null,"usage":{"input_tokens":47,"output_tokens":7,"total_tokens":54,"input_tokens_details":{"cached_tokens":0},"output_tokens_details":{"reasoning_tokens":0}},"instructions":null}%`
---
 .../inline/agents/meta_reference/__init__.py  |   4 +-
 .../inline/agents/meta_reference/agents.py    |   9 +-
 .../responses/openai_responses.py             |  98 +++-
 .../agents/meta_reference/responses/utils.py  | 123 ++++-
 src/llama_stack/providers/registry/agents.py  |   2 +
 .../meta_reference/test_openai_responses.py   | 517 +++++++++++++++++-
 .../test_openai_responses_conversations.py    |   4 +
 .../test_response_conversion_utils.py         |  20 +-
 .../test_responses_safety_utils.py            |   2 +
 .../meta_reference/test_safety_optional.py    |   8 +
 10 files changed, 770 insertions(+), 17 deletions(-)

diff --git a/src/llama_stack/providers/inline/agents/meta_reference/__init__.py b/src/llama_stack/providers/inline/agents/meta_reference/__init__.py
index b3fb814e3..9683baf00 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/__init__.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/__init__.py
@@ -27,8 +27,10 @@ async def get_provider_impl(
         deps[Api.tool_runtime],
         deps[Api.tool_groups],
         deps[Api.conversations],
-        policy,
+        deps[Api.prompts],
+        deps[Api.files],
         telemetry_enabled,
+        policy,
     )
     await impl.initialize()
     return impl
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/agents.py b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
index 2d5aa6c04..ca419a51a 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -12,6 +12,7 @@ from llama_stack.providers.utils.responses.responses_store import ResponsesStore
 from llama_stack_api import (
     Agents,
     Conversations,
+    Files,
     Inference,
     ListOpenAIResponseInputItem,
     ListOpenAIResponseObject,
@@ -22,6 +23,7 @@ from llama_stack_api import (
     OpenAIResponsePrompt,
     OpenAIResponseText,
     Order,
+    Prompts,
     ResponseGuardrail,
     Safety,
     ToolGroups,
@@ -45,6 +47,8 @@ class MetaReferenceAgentsImpl(Agents):
         tool_runtime_api: ToolRuntime,
         tool_groups_api: ToolGroups,
         conversations_api: Conversations,
+        prompts_api: Prompts,
+        files_api: Files,
         policy: list[AccessRule],
         telemetry_enabled: bool = False,
     ):
@@ -56,7 +60,8 @@ class MetaReferenceAgentsImpl(Agents):
         self.tool_groups_api = tool_groups_api
         self.conversations_api = conversations_api
         self.telemetry_enabled = telemetry_enabled
-
+        self.prompts_api = prompts_api
+        self.files_api = files_api
         self.in_memory_store = InmemoryKVStoreImpl()
         self.openai_responses_impl: OpenAIResponsesImpl | None = None
         self.policy = policy
@@ -73,6 +78,8 @@ class MetaReferenceAgentsImpl(Agents):
             vector_io_api=self.vector_io_api,
             safety_api=self.safety_api,
             conversations_api=self.conversations_api,
+            prompts_api=self.prompts_api,
+            files_api=self.files_api,
         )
 
     async def shutdown(self) -> None:
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
index 11bfb1417..c8282df69 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@@ -4,6 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+import re
 import time
 import uuid
 from collections.abc import AsyncIterator
@@ -18,13 +19,17 @@ from llama_stack.providers.utils.responses.responses_store import (
 from llama_stack_api import (
     ConversationItem,
     Conversations,
+    Files,
     Inference,
     InvalidConversationIdError,
     ListOpenAIResponseInputItem,
     ListOpenAIResponseObject,
+    OpenAIChatCompletionContentPartParam,
     OpenAIDeleteResponseObject,
     OpenAIMessageParam,
     OpenAIResponseInput,
+    OpenAIResponseInputMessageContentFile,
+    OpenAIResponseInputMessageContentImage,
     OpenAIResponseInputMessageContentText,
     OpenAIResponseInputTool,
     OpenAIResponseMessage,
@@ -34,7 +39,9 @@ from llama_stack_api import (
     OpenAIResponseText,
     OpenAIResponseTextFormat,
     OpenAISystemMessageParam,
+    OpenAIUserMessageParam,
     Order,
+    Prompts,
     ResponseGuardrailSpec,
     Safety,
     ToolGroups,
@@ -46,6 +53,7 @@ from .streaming import StreamingResponseOrchestrator
 from .tool_executor import ToolExecutor
 from .types import ChatCompletionContext, ToolContext
 from .utils import (
+    convert_response_content_to_chat_content,
     convert_response_input_to_chat_messages,
     convert_response_text_to_chat_response_format,
     extract_guardrail_ids,
@@ -69,6 +77,8 @@ class OpenAIResponsesImpl:
         vector_io_api: VectorIO,  # VectorIO
         safety_api: Safety | None,
         conversations_api: Conversations,
+        prompts_api: Prompts,
+        files_api: Files,
     ):
         self.inference_api = inference_api
         self.tool_groups_api = tool_groups_api
@@ -82,6 +92,8 @@ class OpenAIResponsesImpl:
             tool_runtime_api=tool_runtime_api,
             vector_io_api=vector_io_api,
         )
+        self.prompts_api = prompts_api
+        self.files_api = files_api
 
     async def _prepend_previous_response(
         self,
@@ -122,11 +134,13 @@ class OpenAIResponsesImpl:
                 # Use stored messages directly and convert only new input
                 message_adapter = TypeAdapter(list[OpenAIMessageParam])
                 messages = message_adapter.validate_python(previous_response.messages)
-                new_messages = await convert_response_input_to_chat_messages(input, previous_messages=messages)
+                new_messages = await convert_response_input_to_chat_messages(
+                    input, previous_messages=messages, files_api=self.files_api
+                )
                 messages.extend(new_messages)
             else:
                 # Backward compatibility: reconstruct from inputs
-                messages = await convert_response_input_to_chat_messages(all_input)
+                messages = await convert_response_input_to_chat_messages(all_input, files_api=self.files_api)
 
             tool_context.recover_tools_from_previous_response(previous_response)
         elif conversation is not None:
@@ -138,7 +152,7 @@ class OpenAIResponsesImpl:
             all_input = input
             if not conversation_items.data:
                 # First turn - just convert the new input
-                messages = await convert_response_input_to_chat_messages(input)
+                messages = await convert_response_input_to_chat_messages(input, files_api=self.files_api)
             else:
                 if not stored_messages:
                     all_input = conversation_items.data
@@ -154,14 +168,82 @@ class OpenAIResponsesImpl:
                     all_input = input
 
                 messages = stored_messages or []
-                new_messages = await convert_response_input_to_chat_messages(all_input, previous_messages=messages)
+                new_messages = await convert_response_input_to_chat_messages(
+                    all_input, previous_messages=messages, files_api=self.files_api
+                )
                 messages.extend(new_messages)
         else:
             all_input = input
-            messages = await convert_response_input_to_chat_messages(all_input)
+            messages = await convert_response_input_to_chat_messages(all_input, files_api=self.files_api)
 
         return all_input, messages, tool_context
 
+    async def _prepend_prompt(
+        self,
+        messages: list[OpenAIMessageParam],
+        openai_response_prompt: OpenAIResponsePrompt | None,
+    ) -> None:
+        """Prepend prompt template to messages, resolving text/image/file variables.
+
+        :param messages: List of OpenAIMessageParam objects
+        :param openai_response_prompt: (Optional) OpenAIResponsePrompt object with variables
+        :returns: string of utf-8 characters
+        """
+        if not openai_response_prompt or not openai_response_prompt.id:
+            return
+
+        prompt_version = int(openai_response_prompt.version) if openai_response_prompt.version else None
+        cur_prompt = await self.prompts_api.get_prompt(openai_response_prompt.id, prompt_version)
+
+        if not cur_prompt or not cur_prompt.prompt:
+            return
+
+        cur_prompt_text = cur_prompt.prompt
+        cur_prompt_variables = cur_prompt.variables
+
+        if not openai_response_prompt.variables:
+            messages.insert(0, OpenAISystemMessageParam(content=cur_prompt_text))
+            return
+
+        # Validate that all provided variables exist in the prompt
+        for name in openai_response_prompt.variables.keys():
+            if name not in cur_prompt_variables:
+                raise ValueError(f"Variable {name} not found in prompt {openai_response_prompt.id}")
+
+        # Separate text and media variables
+        text_substitutions = {}
+        media_content_parts: list[OpenAIChatCompletionContentPartParam] = []
+
+        for name, value in openai_response_prompt.variables.items():
+            # Text variable found
+            if isinstance(value, OpenAIResponseInputMessageContentText):
+                text_substitutions[name] = value.text
+
+            # Media variable found
+            elif isinstance(value, OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile):
+                converted_parts = await convert_response_content_to_chat_content([value], files_api=self.files_api)
+                if isinstance(converted_parts, list):
+                    media_content_parts.extend(converted_parts)
+
+                # Eg: {{product_photo}} becomes "[Image: product_photo]"
+                # This gives the model textual context about what media exists in the prompt
+                var_type = value.type.replace("input_", "").replace("_", " ").title()
+                text_substitutions[name] = f"[{var_type}: {name}]"
+
+        def replace_variable(match: re.Match[str]) -> str:
+            var_name = match.group(1).strip()
+            return str(text_substitutions.get(var_name, match.group(0)))
+
+        pattern = r"\{\{\s*(\w+)\s*\}\}"
+        processed_prompt_text = re.sub(pattern, replace_variable, cur_prompt_text)
+
+        # Insert system message with resolved text
+        messages.insert(0, OpenAISystemMessageParam(content=processed_prompt_text))
+
+        # If we have media, create a new user message because allows to ingest images and files
+        if media_content_parts:
+            messages.append(OpenAIUserMessageParam(content=media_content_parts))
+
     async def get_openai_response(
         self,
         response_id: str,
@@ -297,6 +379,7 @@ class OpenAIResponsesImpl:
             input=input,
             conversation=conversation,
             model=model,
+            prompt=prompt,
             instructions=instructions,
             previous_response_id=previous_response_id,
             store=store,
@@ -350,6 +433,7 @@ class OpenAIResponsesImpl:
         instructions: str | None = None,
         previous_response_id: str | None = None,
         conversation: str | None = None,
+        prompt: OpenAIResponsePrompt | None = None,
         store: bool | None = True,
         temperature: float | None = None,
         text: OpenAIResponseText | None = None,
@@ -372,6 +456,9 @@ class OpenAIResponsesImpl:
         if instructions:
             messages.insert(0, OpenAISystemMessageParam(content=instructions))
 
+        # Prepend reusable prompt (if provided)
+        await self._prepend_prompt(messages, prompt)
+
         # Structured outputs
         response_format = await convert_response_text_to_chat_response_format(text)
 
@@ -394,6 +481,7 @@ class OpenAIResponsesImpl:
             ctx=ctx,
             response_id=response_id,
             created_at=created_at,
+            prompt=prompt,
             text=text,
             max_infer_iters=max_infer_iters,
             parallel_tool_calls=parallel_tool_calls,
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
index 25460bcfe..7bbf6bd30 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
@@ -5,11 +5,14 @@
 # the root directory of this source tree.
 
 import asyncio
+import base64
+import mimetypes
 import re
 import uuid
 from collections.abc import Sequence
 
 from llama_stack_api import (
+    Files,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletionContentPartImageParam,
     OpenAIChatCompletionContentPartParam,
@@ -18,6 +21,8 @@ from llama_stack_api import (
     OpenAIChatCompletionToolCallFunction,
     OpenAIChoice,
     OpenAIDeveloperMessageParam,
+    OpenAIFile,
+    OpenAIFileFile,
     OpenAIImageURL,
     OpenAIJSONSchema,
     OpenAIMessageParam,
@@ -29,6 +34,7 @@ from llama_stack_api import (
     OpenAIResponseInput,
     OpenAIResponseInputFunctionToolCallOutput,
     OpenAIResponseInputMessageContent,
+    OpenAIResponseInputMessageContentFile,
     OpenAIResponseInputMessageContentImage,
     OpenAIResponseInputMessageContentText,
     OpenAIResponseInputTool,
@@ -37,9 +43,11 @@ from llama_stack_api import (
     OpenAIResponseMessage,
     OpenAIResponseOutputMessageContent,
     OpenAIResponseOutputMessageContentOutputText,
+    OpenAIResponseOutputMessageFileSearchToolCall,
     OpenAIResponseOutputMessageFunctionToolCall,
     OpenAIResponseOutputMessageMCPCall,
     OpenAIResponseOutputMessageMCPListTools,
+    OpenAIResponseOutputMessageWebSearchToolCall,
     OpenAIResponseText,
     OpenAISystemMessageParam,
     OpenAIToolMessageParam,
@@ -49,6 +57,46 @@ from llama_stack_api import (
 )
 
 
+async def extract_bytes_from_file(file_id: str, files_api: Files) -> bytes:
+    """
+    Extract raw bytes from file using the Files API.
+
+    :param file_id: The file identifier (e.g., "file-abc123")
+    :param files_api: Files API instance
+    :returns: Raw file content as bytes
+    :raises: ValueError if file cannot be retrieved
+    """
+    try:
+        response = await files_api.openai_retrieve_file_content(file_id)
+        return bytes(response.body)
+    except Exception as e:
+        raise ValueError(f"Failed to retrieve file content for file_id '{file_id}': {str(e)}") from e
+
+
+def generate_base64_ascii_text_from_bytes(raw_bytes: bytes) -> str:
+    """
+    Converts raw binary bytes into a safe ASCII text representation for URLs
+
+    :param raw_bytes: the actual bytes that represents file content
+    :returns: string of utf-8 characters
+    """
+    return base64.b64encode(raw_bytes).decode("utf-8")
+
+
+def construct_data_url(ascii_text: str, mime_type: str | None) -> str:
+    """
+    Construct data url with decoded data inside
+
+    :param ascii_text: ASCII content
+    :param mime_type: MIME type of file
+    :returns: data url string (eg. data:image/png,base64,%3Ch1%3EHello%2C%20World%21%3C%2Fh1%3E)
+    """
+    if not mime_type:
+        mime_type = "application/octet-stream"
+
+    return f"data:{mime_type};base64,{ascii_text}"
+
+
 async def convert_chat_choice_to_response_message(
     choice: OpenAIChoice,
     citation_files: dict[str, str] | None = None,
@@ -78,11 +126,15 @@ async def convert_chat_choice_to_response_message(
 
 async def convert_response_content_to_chat_content(
     content: str | Sequence[OpenAIResponseInputMessageContent | OpenAIResponseOutputMessageContent],
+    files_api: Files | None,
 ) -> str | list[OpenAIChatCompletionContentPartParam]:
     """
     Convert the content parts from an OpenAI Response API request into OpenAI Chat Completion content parts.
 
     The content schemas of each API look similar, but are not exactly the same.
+
+    :param content: The content to convert
+    :param files_api: Files API for resolving file_id to raw file content (required if content contains files/images)
     """
     if isinstance(content, str):
         return content
@@ -95,9 +147,68 @@ async def convert_response_content_to_chat_content(
         elif isinstance(content_part, OpenAIResponseOutputMessageContentOutputText):
             converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text))
         elif isinstance(content_part, OpenAIResponseInputMessageContentImage):
+            detail = content_part.detail
+            image_mime_type = None
             if content_part.image_url:
-                image_url = OpenAIImageURL(url=content_part.image_url, detail=content_part.detail)
+                image_url = OpenAIImageURL(url=content_part.image_url, detail=detail)
                 converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
+            elif content_part.file_id:
+                if files_api is None:
+                    raise ValueError("file_ids are not supported by this implementation of the Stack")
+                image_file_response = await files_api.openai_retrieve_file(content_part.file_id)
+                if image_file_response.filename:
+                    image_mime_type, _ = mimetypes.guess_type(image_file_response.filename)
+                raw_image_bytes = await extract_bytes_from_file(content_part.file_id, files_api)
+                ascii_text = generate_base64_ascii_text_from_bytes(raw_image_bytes)
+                image_data_url = construct_data_url(ascii_text, image_mime_type)
+                image_url = OpenAIImageURL(url=image_data_url, detail=detail)
+                converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
+            else:
+                raise ValueError(
+                    f"Image content must have either 'image_url' or 'file_id'. "
+                    f"Got image_url={content_part.image_url}, file_id={content_part.file_id}"
+                )
+        elif isinstance(content_part, OpenAIResponseInputMessageContentFile):
+            resolved_file_data = None
+            file_data = content_part.file_data
+            file_id = content_part.file_id
+            file_url = content_part.file_url
+            filename = content_part.filename
+            file_mime_type = None
+            if not any([file_data, file_id, file_url]):
+                raise ValueError(
+                    f"File content must have at least one of 'file_data', 'file_id', or 'file_url'. "
+                    f"Got file_data={file_data}, file_id={file_id}, file_url={file_url}"
+                )
+            if file_id:
+                if files_api is None:
+                    raise ValueError("file_ids are not supported by this implementation of the Stack")
+
+                file_response = await files_api.openai_retrieve_file(file_id)
+                if not filename:
+                    filename = file_response.filename
+                file_mime_type, _ = mimetypes.guess_type(file_response.filename)
+                raw_file_bytes = await extract_bytes_from_file(file_id, files_api)
+                ascii_text = generate_base64_ascii_text_from_bytes(raw_file_bytes)
+                resolved_file_data = construct_data_url(ascii_text, file_mime_type)
+            elif file_data:
+                if file_data.startswith("data:"):
+                    resolved_file_data = file_data
+                else:
+                    # Raw base64 data, wrap in data URL format
+                    if filename:
+                        file_mime_type, _ = mimetypes.guess_type(filename)
+                    resolved_file_data = construct_data_url(file_data, file_mime_type)
+            elif file_url:
+                resolved_file_data = file_url
+            converted_parts.append(
+                OpenAIFile(
+                    file=OpenAIFileFile(
+                        file_data=resolved_file_data,
+                        filename=filename,
+                    )
+                )
+            )
         elif isinstance(content_part, str):
             converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part))
         else:
@@ -110,12 +221,14 @@ async def convert_response_content_to_chat_content(
 async def convert_response_input_to_chat_messages(
     input: str | list[OpenAIResponseInput],
     previous_messages: list[OpenAIMessageParam] | None = None,
+    files_api: Files | None = None,
 ) -> list[OpenAIMessageParam]:
     """
     Convert the input from an OpenAI Response API request into OpenAI Chat Completion messages.
 
     :param input: The input to convert
     :param previous_messages: Optional previous messages to check for function_call references
+    :param files_api: Files API for resolving file_id to raw file content (optional, required for file/image content)
     """
     messages: list[OpenAIMessageParam] = []
     if isinstance(input, list):
@@ -169,6 +282,12 @@ async def convert_response_input_to_chat_messages(
             elif isinstance(input_item, OpenAIResponseOutputMessageMCPListTools):
                 # the tool list will be handled separately
                 pass
+            elif isinstance(
+                input_item,
+                OpenAIResponseOutputMessageWebSearchToolCall | OpenAIResponseOutputMessageFileSearchToolCall,
+            ):
+                # these tool calls are tracked internally but not converted to chat messages
+                pass
             elif isinstance(input_item, OpenAIResponseMCPApprovalRequest) or isinstance(
                 input_item, OpenAIResponseMCPApprovalResponse
             ):
@@ -176,7 +295,7 @@ async def convert_response_input_to_chat_messages(
                 pass
             elif isinstance(input_item, OpenAIResponseMessage):
                 # Narrow type to OpenAIResponseMessage which has content and role attributes
-                content = await convert_response_content_to_chat_content(input_item.content)
+                content = await convert_response_content_to_chat_content(input_item.content, files_api)
                 message_type = await get_message_type_by_role(input_item.role)
                 if message_type is None:
                     raise ValueError(
diff --git a/src/llama_stack/providers/registry/agents.py b/src/llama_stack/providers/registry/agents.py
index e85be99d6..22bb45faf 100644
--- a/src/llama_stack/providers/registry/agents.py
+++ b/src/llama_stack/providers/registry/agents.py
@@ -34,6 +34,8 @@ def available_providers() -> list[ProviderSpec]:
                 Api.tool_runtime,
                 Api.tool_groups,
                 Api.conversations,
+                Api.prompts,
+                Api.files,
             ],
             optional_api_dependencies=[
                 Api.safety,
diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
index 256df6baf..97bccbfe4 100644
--- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
@@ -25,6 +25,13 @@ from llama_stack.providers.utils.responses.responses_store import (
     ResponsesStore,
     _OpenAIResponseObjectWithInputAndMessages,
 )
+from llama_stack_api import (
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIFile,
+    OpenAIFileObject,
+    OpenAISystemMessageParam,
+    Prompt,
+)
 from llama_stack_api.agents import Order
 from llama_stack_api.inference import (
     OpenAIAssistantMessageParam,
@@ -38,6 +45,8 @@ from llama_stack_api.inference import (
 )
 from llama_stack_api.openai_responses import (
     ListOpenAIResponseInputItem,
+    OpenAIResponseInputMessageContentFile,
+    OpenAIResponseInputMessageContentImage,
     OpenAIResponseInputMessageContentText,
     OpenAIResponseInputToolFunction,
     OpenAIResponseInputToolMCP,
@@ -47,6 +56,7 @@ from llama_stack_api.openai_responses import (
     OpenAIResponseOutputMessageFunctionToolCall,
     OpenAIResponseOutputMessageMCPCall,
     OpenAIResponseOutputMessageWebSearchToolCall,
+    OpenAIResponsePrompt,
     OpenAIResponseText,
     OpenAIResponseTextFormat,
     WebSearchToolTypes,
@@ -98,6 +108,19 @@ def mock_safety_api():
     return safety_api
 
 
+@pytest.fixture
+def mock_prompts_api():
+    prompts_api = AsyncMock()
+    return prompts_api
+
+
+@pytest.fixture
+def mock_files_api():
+    """Mock files API for testing."""
+    files_api = AsyncMock()
+    return files_api
+
+
 @pytest.fixture
 def openai_responses_impl(
     mock_inference_api,
@@ -107,6 +130,8 @@ def openai_responses_impl(
     mock_vector_io_api,
     mock_safety_api,
     mock_conversations_api,
+    mock_prompts_api,
+    mock_files_api,
 ):
     return OpenAIResponsesImpl(
         inference_api=mock_inference_api,
@@ -116,6 +141,8 @@ def openai_responses_impl(
         vector_io_api=mock_vector_io_api,
         safety_api=mock_safety_api,
         conversations_api=mock_conversations_api,
+        prompts_api=mock_prompts_api,
+        files_api=mock_files_api,
     )
 
 
@@ -499,7 +526,7 @@ async def test_create_openai_response_with_tool_call_function_arguments_none(ope
     mock_inference_api.openai_chat_completion.return_value = fake_stream_toolcall()
 
 
-async def test_create_openai_response_with_multiple_messages(openai_responses_impl, mock_inference_api):
+async def test_create_openai_response_with_multiple_messages(openai_responses_impl, mock_inference_api, mock_files_api):
     """Test creating an OpenAI response with multiple messages."""
     # Setup
     input_messages = [
@@ -710,7 +737,7 @@ async def test_create_openai_response_with_instructions(openai_responses_impl, m
 
 
 async def test_create_openai_response_with_instructions_and_multiple_messages(
-    openai_responses_impl, mock_inference_api
+    openai_responses_impl, mock_inference_api, mock_files_api
 ):
     # Setup
     input_messages = [
@@ -1242,3 +1269,489 @@ async def test_create_openai_response_with_output_types_as_input(
 
     assert stored_with_outputs.input == input_with_output_types
     assert len(stored_with_outputs.input) == 3
+
+
+async def test_create_openai_response_with_prompt(openai_responses_impl, mock_inference_api, mock_prompts_api):
+    """Test creating an OpenAI response with a prompt."""
+    input_text = "What is the capital of Ireland?"
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+    prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
+    prompt = Prompt(
+        prompt="You are a helpful {{ area_name }} assistant at {{ company_name }}. Always provide accurate information.",
+        prompt_id=prompt_id,
+        version=1,
+        variables=["area_name", "company_name"],
+        is_default=True,
+    )
+
+    openai_response_prompt = OpenAIResponsePrompt(
+        id=prompt_id,
+        version="1",
+        variables={
+            "area_name": OpenAIResponseInputMessageContentText(text="geography"),
+            "company_name": OpenAIResponseInputMessageContentText(text="Dummy Company"),
+        },
+    )
+
+    mock_prompts_api.get_prompt.return_value = prompt
+    mock_inference_api.openai_chat_completion.return_value = fake_stream()
+
+    result = await openai_responses_impl.create_openai_response(
+        input=input_text,
+        model=model,
+        prompt=openai_response_prompt,
+    )
+
+    mock_prompts_api.get_prompt.assert_called_with(prompt_id, 1)
+    mock_inference_api.openai_chat_completion.assert_called()
+    call_args = mock_inference_api.openai_chat_completion.call_args
+    sent_messages = call_args.args[0].messages
+    assert len(sent_messages) == 2
+
+    system_messages = [msg for msg in sent_messages if msg.role == "system"]
+    assert len(system_messages) == 1
+    assert (
+        system_messages[0].content
+        == "You are a helpful geography assistant at Dummy Company. Always provide accurate information."
+    )
+
+    user_messages = [msg for msg in sent_messages if msg.role == "user"]
+    assert len(user_messages) == 1
+    assert user_messages[0].content == input_text
+
+    assert result.model == model
+    assert result.status == "completed"
+    assert isinstance(result.prompt, OpenAIResponsePrompt)
+    assert result.prompt.id == prompt_id
+    assert result.prompt.variables == openai_response_prompt.variables
+    assert result.prompt.version == "1"
+
+
+async def test_prepend_prompt_successful_without_variables(openai_responses_impl, mock_prompts_api, mock_inference_api):
+    """Test prepend_prompt function without variables."""
+    input_text = "What is the capital of Ireland?"
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+    prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
+    prompt = Prompt(
+        prompt="You are a helpful assistant. Always provide accurate information.",
+        prompt_id=prompt_id,
+        version=1,
+        variables=[],
+        is_default=True,
+    )
+
+    openai_response_prompt = OpenAIResponsePrompt(id=prompt_id, version="1")
+
+    mock_prompts_api.get_prompt.return_value = prompt
+    mock_inference_api.openai_chat_completion.return_value = fake_stream()
+
+    await openai_responses_impl.create_openai_response(
+        input=input_text,
+        model=model,
+        prompt=openai_response_prompt,
+    )
+
+    mock_prompts_api.get_prompt.assert_called_with(prompt_id, 1)
+    mock_inference_api.openai_chat_completion.assert_called()
+    call_args = mock_inference_api.openai_chat_completion.call_args
+    sent_messages = call_args.args[0].messages
+    assert len(sent_messages) == 2
+    system_messages = [msg for msg in sent_messages if msg.role == "system"]
+    assert system_messages[0].content == "You are a helpful assistant. Always provide accurate information."
+
+
+async def test_prepend_prompt_invalid_variable(openai_responses_impl, mock_prompts_api):
+    """Test error handling in prepend_prompt function when prompt parameters contain invalid variables."""
+    prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
+    prompt = Prompt(
+        prompt="You are a {{ role }} assistant.",
+        prompt_id=prompt_id,
+        version=1,
+        variables=["role"],  # Only "role" is valid
+        is_default=True,
+    )
+
+    openai_response_prompt = OpenAIResponsePrompt(
+        id=prompt_id,
+        version="1",
+        variables={
+            "role": OpenAIResponseInputMessageContentText(text="helpful"),
+            "company": OpenAIResponseInputMessageContentText(
+                text="Dummy Company"
+            ),  # company is not in prompt.variables
+        },
+    )
+
+    mock_prompts_api.get_prompt.return_value = prompt
+
+    # Initial messages
+    messages = [OpenAIUserMessageParam(content="Test prompt")]
+
+    # Execute - should raise ValueError for invalid variable
+    with pytest.raises(ValueError, match="Variable company not found in prompt"):
+        await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
+
+    # Verify
+    mock_prompts_api.get_prompt.assert_called_once_with(prompt_id, 1)
+
+
+async def test_prepend_prompt_not_found(openai_responses_impl, mock_prompts_api):
+    """Test prepend_prompt function when prompt is not found."""
+    prompt_id = "pmpt_nonexistent"
+    openai_response_prompt = OpenAIResponsePrompt(id=prompt_id, version="1")
+
+    mock_prompts_api.get_prompt.return_value = None  # Prompt not found
+
+    # Initial messages
+    messages = [OpenAIUserMessageParam(content="Test prompt")]
+    initial_length = len(messages)
+
+    # Execute
+    result = await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
+
+    # Verify
+    mock_prompts_api.get_prompt.assert_called_once_with(prompt_id, 1)
+
+    # Should return None when prompt not found
+    assert result is None
+
+    # Messages should not be modified
+    assert len(messages) == initial_length
+    assert messages[0].content == "Test prompt"
+
+
+async def test_prepend_prompt_variable_substitution(openai_responses_impl, mock_prompts_api):
+    """Test complex variable substitution with multiple occurrences and special characters in prepend_prompt function."""
+    prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
+
+    # Support all whitespace variations: {{name}}, {{ name }}, {{ name}}, {{name }}, etc.
+    prompt = Prompt(
+        prompt="Hello {{name}}! You are working at {{ company}}. Your role is {{role}} at {{company}}. Remember, {{ name }}, to be {{ tone }}.",
+        prompt_id=prompt_id,
+        version=1,
+        variables=["name", "company", "role", "tone"],
+        is_default=True,
+    )
+
+    openai_response_prompt = OpenAIResponsePrompt(
+        id=prompt_id,
+        version="1",
+        variables={
+            "name": OpenAIResponseInputMessageContentText(text="Alice"),
+            "company": OpenAIResponseInputMessageContentText(text="Dummy Company"),
+            "role": OpenAIResponseInputMessageContentText(text="AI Assistant"),
+            "tone": OpenAIResponseInputMessageContentText(text="professional"),
+        },
+    )
+
+    mock_prompts_api.get_prompt.return_value = prompt
+
+    # Initial messages
+    messages = [OpenAIUserMessageParam(content="Test")]
+
+    # Execute
+    await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
+
+    # Verify
+    assert len(messages) == 2
+    assert isinstance(messages[0], OpenAISystemMessageParam)
+    expected_content = "Hello Alice! You are working at Dummy Company. Your role is AI Assistant at Dummy Company. Remember, Alice, to be professional."
+    assert messages[0].content == expected_content
+
+
+async def test_prepend_prompt_with_image_variable(openai_responses_impl, mock_prompts_api, mock_files_api):
+    """Test prepend_prompt with image variable - should create placeholder in system message and append image as separate user message."""
+    prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
+    prompt = Prompt(
+        prompt="Analyze this {{product_image}} and describe what you see.",
+        prompt_id=prompt_id,
+        version=1,
+        variables=["product_image"],
+        is_default=True,
+    )
+
+    # Mock file content and file metadata
+    mock_file_content = b"fake_image_data"
+    mock_files_api.openai_retrieve_file_content.return_value = type("obj", (object,), {"body": mock_file_content})()
+    mock_files_api.openai_retrieve_file.return_value = OpenAIFileObject(
+        object="file",
+        id="file-abc123",
+        bytes=len(mock_file_content),
+        created_at=1234567890,
+        expires_at=1234567890,
+        filename="product.jpg",
+        purpose="assistants",
+    )
+
+    openai_response_prompt = OpenAIResponsePrompt(
+        id=prompt_id,
+        version="1",
+        variables={
+            "product_image": OpenAIResponseInputMessageContentImage(
+                file_id="file-abc123",
+                detail="high",
+            )
+        },
+    )
+
+    mock_prompts_api.get_prompt.return_value = prompt
+
+    # Initial messages
+    messages = [OpenAIUserMessageParam(content="What do you think?")]
+
+    # Execute
+    await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
+
+    assert len(messages) == 3
+
+    # Check system message has placeholder
+    assert isinstance(messages[0], OpenAISystemMessageParam)
+    assert messages[0].content == "Analyze this [Image: product_image] and describe what you see."
+
+    # Check original user message is still there
+    assert isinstance(messages[1], OpenAIUserMessageParam)
+    assert messages[1].content == "What do you think?"
+
+    # Check new user message with image is appended
+    assert isinstance(messages[2], OpenAIUserMessageParam)
+    assert isinstance(messages[2].content, list)
+    assert len(messages[2].content) == 1
+
+    # Should be image with data URL
+    assert isinstance(messages[2].content[0], OpenAIChatCompletionContentPartImageParam)
+    assert messages[2].content[0].image_url.url.startswith("data:image/")
+    assert messages[2].content[0].image_url.detail == "high"
+
+
+async def test_prepend_prompt_with_file_variable(openai_responses_impl, mock_prompts_api, mock_files_api):
+    """Test prepend_prompt with file variable - should create placeholder in system message and append file as separate user message."""
+    prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
+    prompt = Prompt(
+        prompt="Review the document {{contract_file}} and summarize key points.",
+        prompt_id=prompt_id,
+        version=1,
+        variables=["contract_file"],
+        is_default=True,
+    )
+
+    # Mock file retrieval
+    mock_file_content = b"fake_pdf_content"
+    mock_files_api.openai_retrieve_file_content.return_value = type("obj", (object,), {"body": mock_file_content})()
+    mock_files_api.openai_retrieve_file.return_value = OpenAIFileObject(
+        object="file",
+        id="file-contract-789",
+        bytes=len(mock_file_content),
+        created_at=1234567890,
+        expires_at=1234567890,
+        filename="contract.pdf",
+        purpose="assistants",
+    )
+
+    openai_response_prompt = OpenAIResponsePrompt(
+        id=prompt_id,
+        version="1",
+        variables={
+            "contract_file": OpenAIResponseInputMessageContentFile(
+                file_id="file-contract-789",
+                filename="contract.pdf",
+            )
+        },
+    )
+
+    mock_prompts_api.get_prompt.return_value = prompt
+
+    # Initial messages
+    messages = [OpenAIUserMessageParam(content="Please review this.")]
+
+    # Execute
+    await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
+
+    assert len(messages) == 3
+
+    # Check system message has placeholder
+    assert isinstance(messages[0], OpenAISystemMessageParam)
+    assert messages[0].content == "Review the document [File: contract_file] and summarize key points."
+
+    # Check original user message is still there
+    assert isinstance(messages[1], OpenAIUserMessageParam)
+    assert messages[1].content == "Please review this."
+
+    # Check new user message with file is appended
+    assert isinstance(messages[2], OpenAIUserMessageParam)
+    assert isinstance(messages[2].content, list)
+    assert len(messages[2].content) == 1
+
+    # First part should be file with data URL
+    assert isinstance(messages[2].content[0], OpenAIFile)
+    assert messages[2].content[0].file.file_data.startswith("data:application/pdf;base64,")
+    assert messages[2].content[0].file.filename == "contract.pdf"
+    assert messages[2].content[0].file.file_id is None
+
+
+async def test_prepend_prompt_with_mixed_variables(openai_responses_impl, mock_prompts_api, mock_files_api):
+    """Test prepend_prompt with text, image, and file variables mixed together."""
+    prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
+    prompt = Prompt(
+        prompt="Hello {{name}}! Analyze {{photo}} and review {{document}}. Provide insights for {{company}}.",
+        prompt_id=prompt_id,
+        version=1,
+        variables=["name", "photo", "document", "company"],
+        is_default=True,
+    )
+
+    # Mock file retrieval for image and file
+    mock_image_content = b"fake_image_data"
+    mock_file_content = b"fake_doc_content"
+
+    async def mock_retrieve_file_content(file_id):
+        if file_id == "file-photo-123":
+            return type("obj", (object,), {"body": mock_image_content})()
+        elif file_id == "file-doc-456":
+            return type("obj", (object,), {"body": mock_file_content})()
+
+    mock_files_api.openai_retrieve_file_content.side_effect = mock_retrieve_file_content
+
+    def mock_retrieve_file(file_id):
+        if file_id == "file-photo-123":
+            return OpenAIFileObject(
+                object="file",
+                id="file-photo-123",
+                bytes=len(mock_image_content),
+                created_at=1234567890,
+                expires_at=1234567890,
+                filename="photo.jpg",
+                purpose="assistants",
+            )
+        elif file_id == "file-doc-456":
+            return OpenAIFileObject(
+                object="file",
+                id="file-doc-456",
+                bytes=len(mock_file_content),
+                created_at=1234567890,
+                expires_at=1234567890,
+                filename="doc.pdf",
+                purpose="assistants",
+            )
+
+    mock_files_api.openai_retrieve_file.side_effect = mock_retrieve_file
+
+    openai_response_prompt = OpenAIResponsePrompt(
+        id=prompt_id,
+        version="1",
+        variables={
+            "name": OpenAIResponseInputMessageContentText(text="Alice"),
+            "photo": OpenAIResponseInputMessageContentImage(file_id="file-photo-123", detail="auto"),
+            "document": OpenAIResponseInputMessageContentFile(file_id="file-doc-456", filename="doc.pdf"),
+            "company": OpenAIResponseInputMessageContentText(text="Acme Corp"),
+        },
+    )
+
+    mock_prompts_api.get_prompt.return_value = prompt
+
+    # Initial messages
+    messages = [OpenAIUserMessageParam(content="Here's my question.")]
+
+    # Execute
+    await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
+
+    assert len(messages) == 3
+
+    # Check system message has text and placeholders
+    assert isinstance(messages[0], OpenAISystemMessageParam)
+    expected_system = "Hello Alice! Analyze [Image: photo] and review [File: document]. Provide insights for Acme Corp."
+    assert messages[0].content == expected_system
+
+    # Check original user message is still there
+    assert isinstance(messages[1], OpenAIUserMessageParam)
+    assert messages[1].content == "Here's my question."
+
+    # Check new user message with media is appended (2 media items)
+    assert isinstance(messages[2], OpenAIUserMessageParam)
+    assert isinstance(messages[2].content, list)
+    assert len(messages[2].content) == 2
+
+    # First part should be image with data URL
+    assert isinstance(messages[2].content[0], OpenAIChatCompletionContentPartImageParam)
+    assert messages[2].content[0].image_url.url.startswith("data:image/")
+
+    # Second part should be file with data URL
+    assert isinstance(messages[2].content[1], OpenAIFile)
+    assert messages[2].content[1].file.file_data.startswith("data:application/pdf;base64,")
+    assert messages[2].content[1].file.filename == "doc.pdf"
+    assert messages[2].content[1].file.file_id is None
+
+
+async def test_prepend_prompt_with_image_using_image_url(openai_responses_impl, mock_prompts_api):
+    """Test prepend_prompt with image variable using image_url instead of file_id."""
+    prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
+    prompt = Prompt(
+        prompt="Describe {{screenshot}}.",
+        prompt_id=prompt_id,
+        version=1,
+        variables=["screenshot"],
+        is_default=True,
+    )
+
+    openai_response_prompt = OpenAIResponsePrompt(
+        id=prompt_id,
+        version="1",
+        variables={
+            "screenshot": OpenAIResponseInputMessageContentImage(
+                image_url="https://example.com/screenshot.png",
+                detail="low",
+            )
+        },
+    )
+
+    mock_prompts_api.get_prompt.return_value = prompt
+
+    # Initial messages
+    messages = [OpenAIUserMessageParam(content="What is this?")]
+
+    # Execute
+    await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
+
+    assert len(messages) == 3
+
+    # Check system message has placeholder
+    assert isinstance(messages[0], OpenAISystemMessageParam)
+    assert messages[0].content == "Describe [Image: screenshot]."
+
+    # Check original user message is still there
+    assert isinstance(messages[1], OpenAIUserMessageParam)
+    assert messages[1].content == "What is this?"
+
+    # Check new user message with image is appended
+    assert isinstance(messages[2], OpenAIUserMessageParam)
+    assert isinstance(messages[2].content, list)
+
+    # Image should use the provided URL
+    assert isinstance(messages[2].content[0], OpenAIChatCompletionContentPartImageParam)
+    assert messages[2].content[0].image_url.url == "https://example.com/screenshot.png"
+    assert messages[2].content[0].image_url.detail == "low"
+
+
+async def test_prepend_prompt_image_variable_missing_required_fields(openai_responses_impl, mock_prompts_api):
+    """Test prepend_prompt with image variable that has neither file_id nor image_url - should raise error."""
+    prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
+    prompt = Prompt(
+        prompt="Analyze {{bad_image}}.",
+        prompt_id=prompt_id,
+        version=1,
+        variables=["bad_image"],
+        is_default=True,
+    )
+
+    # Create image content with neither file_id nor image_url
+    openai_response_prompt = OpenAIResponsePrompt(
+        id=prompt_id,
+        version="1",
+        variables={"bad_image": OpenAIResponseInputMessageContentImage()},  # No file_id or image_url
+    )
+
+    mock_prompts_api.get_prompt.return_value = prompt
+    messages = [OpenAIUserMessageParam(content="Test")]
+
+    # Execute - should raise ValueError
+    with pytest.raises(ValueError, match="Image content must have either 'image_url' or 'file_id'"):
+        await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses_conversations.py b/tests/unit/providers/agents/meta_reference/test_openai_responses_conversations.py
index fa1ddae78..5a3e6bf21 100644
--- a/tests/unit/providers/agents/meta_reference/test_openai_responses_conversations.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses_conversations.py
@@ -39,6 +39,8 @@ def responses_impl_with_conversations(
     mock_vector_io_api,
     mock_conversations_api,
     mock_safety_api,
+    mock_prompts_api,
+    mock_files_api,
 ):
     """Create OpenAIResponsesImpl instance with conversations API."""
     return OpenAIResponsesImpl(
@@ -49,6 +51,8 @@ def responses_impl_with_conversations(
         vector_io_api=mock_vector_io_api,
         conversations_api=mock_conversations_api,
         safety_api=mock_safety_api,
+        prompts_api=mock_prompts_api,
+        files_api=mock_files_api,
     )
 
 
diff --git a/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py b/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py
index b7a437686..e496a96e3 100644
--- a/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py
+++ b/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py
@@ -5,6 +5,8 @@
 # the root directory of this source tree.
 
 
+from unittest.mock import AsyncMock
+
 import pytest
 
 from llama_stack.providers.inline.agents.meta_reference.responses.utils import (
@@ -46,6 +48,12 @@ from llama_stack_api.openai_responses import (
 )
 
 
+@pytest.fixture
+def mock_files_api():
+    """Mock files API for testing."""
+    return AsyncMock()
+
+
 class TestConvertChatChoiceToResponseMessage:
     async def test_convert_string_content(self):
         choice = OpenAIChoice(
@@ -78,17 +86,17 @@ class TestConvertChatChoiceToResponseMessage:
 
 
 class TestConvertResponseContentToChatContent:
-    async def test_convert_string_content(self):
-        result = await convert_response_content_to_chat_content("Simple string")
+    async def test_convert_string_content(self, mock_files_api):
+        result = await convert_response_content_to_chat_content("Simple string", mock_files_api)
         assert result == "Simple string"
 
-    async def test_convert_text_content_parts(self):
+    async def test_convert_text_content_parts(self, mock_files_api):
         content = [
             OpenAIResponseInputMessageContentText(text="First part"),
             OpenAIResponseOutputMessageContentOutputText(text="Second part"),
         ]
 
-        result = await convert_response_content_to_chat_content(content)
+        result = await convert_response_content_to_chat_content(content, mock_files_api)
 
         assert len(result) == 2
         assert isinstance(result[0], OpenAIChatCompletionContentPartTextParam)
@@ -96,10 +104,10 @@ class TestConvertResponseContentToChatContent:
         assert isinstance(result[1], OpenAIChatCompletionContentPartTextParam)
         assert result[1].text == "Second part"
 
-    async def test_convert_image_content(self):
+    async def test_convert_image_content(self, mock_files_api):
         content = [OpenAIResponseInputMessageContentImage(image_url="https://example.com/image.jpg", detail="high")]
 
-        result = await convert_response_content_to_chat_content(content)
+        result = await convert_response_content_to_chat_content(content, mock_files_api)
 
         assert len(result) == 1
         assert isinstance(result[0], OpenAIChatCompletionContentPartImageParam)
diff --git a/tests/unit/providers/agents/meta_reference/test_responses_safety_utils.py b/tests/unit/providers/agents/meta_reference/test_responses_safety_utils.py
index d4d1b872a..a914bbef4 100644
--- a/tests/unit/providers/agents/meta_reference/test_responses_safety_utils.py
+++ b/tests/unit/providers/agents/meta_reference/test_responses_safety_utils.py
@@ -30,6 +30,8 @@ def mock_apis():
         "vector_io_api": AsyncMock(),
         "conversations_api": AsyncMock(),
         "safety_api": AsyncMock(),
+        "prompts_api": AsyncMock(),
+        "files_api": AsyncMock(),
     }
 
 
diff --git a/tests/unit/providers/agents/meta_reference/test_safety_optional.py b/tests/unit/providers/agents/meta_reference/test_safety_optional.py
index b48d38b29..c2311b68f 100644
--- a/tests/unit/providers/agents/meta_reference/test_safety_optional.py
+++ b/tests/unit/providers/agents/meta_reference/test_safety_optional.py
@@ -52,6 +52,8 @@ def mock_deps():
     tool_runtime_api = AsyncMock()
     tool_groups_api = AsyncMock()
     conversations_api = AsyncMock()
+    prompts_api = AsyncMock()
+    files_api = AsyncMock()
 
     return {
         Api.inference: inference_api,
@@ -59,6 +61,8 @@ def mock_deps():
         Api.tool_runtime: tool_runtime_api,
         Api.tool_groups: tool_groups_api,
         Api.conversations: conversations_api,
+        Api.prompts: prompts_api,
+        Api.files: files_api,
     }
 
 
@@ -144,6 +148,8 @@ class TestGuardrailsFunctionality:
                 vector_io_api=mock_deps[Api.vector_io],
                 safety_api=None,  # No Safety API
                 conversations_api=mock_deps[Api.conversations],
+                prompts_api=mock_deps[Api.prompts],
+                files_api=mock_deps[Api.files],
             )
 
             # Test with string guardrail
@@ -191,6 +197,8 @@ class TestGuardrailsFunctionality:
                 vector_io_api=mock_deps[Api.vector_io],
                 safety_api=None,  # No Safety API
                 conversations_api=mock_deps[Api.conversations],
+                prompts_api=mock_deps[Api.prompts],
+                files_api=mock_deps[Api.files],
             )
 
             # Should not raise when no guardrails requested

From aa2a7dae07d7ecd9213c3ab4f7fc9fa19eed22cc Mon Sep 17 00:00:00 2001
From: Sam El-Borai <sam@elborai.me>
Date: Wed, 19 Nov 2025 20:53:20 +0100
Subject: [PATCH 2/5] chore(ci): make stainless workflow more DRY (#4195)

# What does this PR do?
<!-- Provide a short summary of what this PR does and why. Link to
relevant issues if applicable. -->

Addresses feedback from
https://github.com/llamastack/llama-stack/pull/4187#discussion_r2542797437

<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->

## Test Plan
<!-- Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.* -->
---
 .github/workflows/stainless-builds.yml | 88 +++++++++++---------------
 1 file changed, 38 insertions(+), 50 deletions(-)

diff --git a/.github/workflows/stainless-builds.yml b/.github/workflows/stainless-builds.yml
index a18c70887..28869fdd8 100644
--- a/.github/workflows/stainless-builds.yml
+++ b/.github/workflows/stainless-builds.yml
@@ -43,7 +43,41 @@ env:
   #   Stainless organization dashboard
 
 jobs:
+  compute-branch:
+    runs-on: ubuntu-latest
+    outputs:
+      preview_branch: ${{ steps.compute.outputs.preview_branch }}
+      base_branch: ${{ steps.compute.outputs.base_branch }}
+      merge_branch: ${{ steps.compute.outputs.merge_branch }}
+    steps:
+      - name: Compute branch names
+        id: compute
+        run: |
+          HEAD_REPO="${{ github.event.pull_request.head.repo.full_name }}"
+          BASE_REPO="${{ github.repository }}"
+          BRANCH_NAME="${{ github.event.pull_request.head.ref }}"
+          FORK_OWNER="${{ github.event.pull_request.head.repo.owner.login }}"
+
+          if [ "$HEAD_REPO" != "$BASE_REPO" ]; then
+            # Fork PR: prefix with fork owner for isolation
+            if [ -z "$FORK_OWNER" ]; then
+              echo "Error: Fork PR detected but fork owner is empty" >&2
+              exit 1
+            fi
+            PREVIEW_BRANCH="preview/${FORK_OWNER}/${BRANCH_NAME}"
+            BASE_BRANCH="preview/base/${FORK_OWNER}/${BRANCH_NAME}"
+          else
+            # Same-repo PR
+            PREVIEW_BRANCH="preview/${BRANCH_NAME}"
+            BASE_BRANCH="preview/base/${BRANCH_NAME}"
+          fi
+
+          echo "preview_branch=${PREVIEW_BRANCH}" >> $GITHUB_OUTPUT
+          echo "base_branch=${BASE_BRANCH}" >> $GITHUB_OUTPUT
+          echo "merge_branch=${PREVIEW_BRANCH}" >> $GITHUB_OUTPUT
+
   preview:
+    needs: compute-branch
     if: github.event.action != 'closed'
     runs-on: ubuntu-latest
     permissions:
@@ -59,32 +93,6 @@ jobs:
           ref: ${{ github.event.pull_request.head.sha }}
           fetch-depth: 2
 
-      # Compute the Stainless branch name, prefixing with fork owner if PR is from a fork.
-      # For fork PRs like "contributor:fix/issue-123", this creates "preview/contributor/fix/issue-123"
-      # For same-repo PRs, this creates "preview/fix/issue-123"
-      - name: Compute branch names
-        id: branch-names
-        run: |
-          HEAD_REPO="${{ github.event.pull_request.head.repo.full_name }}"
-          BASE_REPO="${{ github.repository }}"
-          BRANCH_NAME="${{ github.event.pull_request.head.ref }}"
-
-          if [ "$HEAD_REPO" != "$BASE_REPO" ]; then
-            # Fork PR: prefix with fork owner for isolation
-            FORK_OWNER="${{ github.event.pull_request.head.repo.owner.login }}"
-            PREVIEW_BRANCH="preview/${FORK_OWNER}/${BRANCH_NAME}"
-            BASE_BRANCH="preview/base/${FORK_OWNER}/${BRANCH_NAME}"
-          else
-            # Same-repo PR
-            PREVIEW_BRANCH="preview/${BRANCH_NAME}"
-            BASE_BRANCH="preview/base/${BRANCH_NAME}"
-          fi
-
-          echo "preview_branch=${PREVIEW_BRANCH}" >> $GITHUB_OUTPUT
-          echo "base_branch=${BASE_BRANCH}" >> $GITHUB_OUTPUT
-
-      # This action builds preview SDKs from the OpenAPI spec changes and
-      # posts/updates a comment on the PR with build results and links to the preview.
       - name: Run preview builds
         uses: stainless-api/upload-openapi-spec-action/preview@32823b096b4319c53ee948d702d9052873af485f # 1.6.0
         with:
@@ -97,10 +105,11 @@ jobs:
           base_sha: ${{ github.event.pull_request.base.sha }}
           base_ref: ${{ github.event.pull_request.base.ref }}
           head_sha: ${{ github.event.pull_request.head.sha }}
-          branch: ${{ steps.branch-names.outputs.preview_branch }}
-          base_branch: ${{ steps.branch-names.outputs.base_branch }}
+          branch: ${{ needs.compute-branch.outputs.preview_branch }}
+          base_branch: ${{ needs.compute-branch.outputs.base_branch }}
 
   merge:
+    needs: compute-branch
     if: github.event.action == 'closed' && github.event.pull_request.merged == true
     runs-on: ubuntu-latest
     permissions:
@@ -116,27 +125,6 @@ jobs:
           ref: ${{ github.event.pull_request.head.sha }}
           fetch-depth: 2
 
-      # Compute the Stainless branch name, prefixing with fork owner if PR is from a fork.
-      # For fork PRs like "contributor:fix/issue-123", this creates "preview/contributor/fix/issue-123"
-      # For same-repo PRs, this creates "preview/fix/issue-123"
-      - name: Compute branch names
-        id: branch-names
-        run: |
-          HEAD_REPO="${{ github.event.pull_request.head.repo.full_name }}"
-          BASE_REPO="${{ github.repository }}"
-          BRANCH_NAME="${{ github.event.pull_request.head.ref }}"
-
-          if [ "$HEAD_REPO" != "$BASE_REPO" ]; then
-            # Fork PR: prefix with fork owner for isolation
-            FORK_OWNER="${{ github.event.pull_request.head.repo.owner.login }}"
-            MERGE_BRANCH="preview/${FORK_OWNER}/${BRANCH_NAME}"
-          else
-            # Same-repo PR
-            MERGE_BRANCH="preview/${BRANCH_NAME}"
-          fi
-
-          echo "merge_branch=${MERGE_BRANCH}" >> $GITHUB_OUTPUT
-
       # Note that this only merges in changes that happened on the last build on
       # the computed preview branch. It's possible that there are OAS/config
       # changes that haven't been built, if the preview job didn't finish
@@ -155,4 +143,4 @@ jobs:
           base_sha: ${{ github.event.pull_request.base.sha }}
           base_ref: ${{ github.event.pull_request.base.ref }}
           head_sha: ${{ github.event.pull_request.head.sha }}
-          merge_branch: ${{ steps.branch-names.outputs.merge_branch }}
+          merge_branch: ${{ needs.compute-branch.outputs.merge_branch }}

From b6ce2428083fd3ec46f4422473dbc2512b835e66 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Wed, 19 Nov 2025 13:43:11 -0800
Subject: [PATCH 3/5] chore: update code owners (#4199)

Update code owners given changed affiliations, projects, etc.
---
 .github/CODEOWNERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 8fff470f6..418d3113a 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -2,4 +2,4 @@
 
 # These owners will be the default owners for everything in
 # the repo. Unless a later match takes precedence,
-* @ashwinb @yanxi0830 @hardikjshah @raghotham @ehhuang @leseb @bbrowning @reluctantfuturist @mattf @slekkala1 @franciscojavierarceo
+* @ashwinb @raghotham @ehhuang @leseb @bbrowning @mattf @franciscojavierarceo

From d649c3663e3293a86cf1e9d4d83c91cba7032857 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Wed, 19 Nov 2025 14:49:44 -0800
Subject: [PATCH 4/5] fix: enforce allowed_models during inference requests
 (#4197)

The `allowed_models` configuration was only being applied when listing
models via the `/v1/models` endpoint, but the actual inference requests
weren't checking this restriction. This meant users could directly
request any model the provider supports by specifying it in their
inference call, completely bypassing the intended cost controls.

The fix adds validation to all three inference methods (chat
completions, completions, and embeddings) that checks the requested
model against the allowed_models list before making the provider API
call.

### Test plan

Added unit tests
---
 .../providers/utils/inference/openai_mixin.py |  28 ++++-
 .../utils/inference/test_openai_mixin.py      | 102 +++++++++++++++++-
 2 files changed, 126 insertions(+), 4 deletions(-)

diff --git a/src/llama_stack/providers/utils/inference/openai_mixin.py b/src/llama_stack/providers/utils/inference/openai_mixin.py
index 559ac90ce..30511a341 100644
--- a/src/llama_stack/providers/utils/inference/openai_mixin.py
+++ b/src/llama_stack/providers/utils/inference/openai_mixin.py
@@ -213,6 +213,19 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
 
         return api_key
 
+    def _validate_model_allowed(self, provider_model_id: str) -> None:
+        """
+        Validate that the model is in the allowed_models list if configured.
+
+        :param provider_model_id: The provider-specific model ID to validate
+        :raises ValueError: If the model is not in the allowed_models list
+        """
+        if self.config.allowed_models is not None and provider_model_id not in self.config.allowed_models:
+            raise ValueError(
+                f"Model '{provider_model_id}' is not in the allowed models list. "
+                f"Allowed models: {self.config.allowed_models}"
+            )
+
     async def _get_provider_model_id(self, model: str) -> str:
         """
         Get the provider-specific model ID from the model store.
@@ -259,8 +272,11 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
         Direct OpenAI completion API call.
         """
         # TODO: fix openai_completion to return type compatible with OpenAI's API response
+        provider_model_id = await self._get_provider_model_id(params.model)
+        self._validate_model_allowed(provider_model_id)
+
         completion_kwargs = await prepare_openai_completion_params(
-            model=await self._get_provider_model_id(params.model),
+            model=provider_model_id,
             prompt=params.prompt,
             best_of=params.best_of,
             echo=params.echo,
@@ -292,6 +308,9 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
         """
         Direct OpenAI chat completion API call.
         """
+        provider_model_id = await self._get_provider_model_id(params.model)
+        self._validate_model_allowed(provider_model_id)
+
         messages = params.messages
 
         if self.download_images:
@@ -313,7 +332,7 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
             messages = [await _localize_image_url(m) for m in messages]
 
         request_params = await prepare_openai_completion_params(
-            model=await self._get_provider_model_id(params.model),
+            model=provider_model_id,
             messages=messages,
             frequency_penalty=params.frequency_penalty,
             function_call=params.function_call,
@@ -351,10 +370,13 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
         """
         Direct OpenAI embeddings API call.
         """
+        provider_model_id = await self._get_provider_model_id(params.model)
+        self._validate_model_allowed(provider_model_id)
+
         # Build request params conditionally to avoid NotGiven/Omit type mismatch
         # The OpenAI SDK uses Omit in signatures but NOT_GIVEN has type NotGiven
         request_params: dict[str, Any] = {
-            "model": await self._get_provider_model_id(params.model),
+            "model": provider_model_id,
             "input": params.input,
         }
         if params.encoding_format is not None:
diff --git a/tests/unit/providers/utils/inference/test_openai_mixin.py b/tests/unit/providers/utils/inference/test_openai_mixin.py
index 5b13a75f4..02d44f2ba 100644
--- a/tests/unit/providers/utils/inference/test_openai_mixin.py
+++ b/tests/unit/providers/utils/inference/test_openai_mixin.py
@@ -15,7 +15,14 @@ from pydantic import BaseModel, Field
 from llama_stack.core.request_headers import request_provider_data_context
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
-from llama_stack_api import Model, ModelType, OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
+from llama_stack_api import (
+    Model,
+    ModelType,
+    OpenAIChatCompletionRequestWithExtraBody,
+    OpenAICompletionRequestWithExtraBody,
+    OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIUserMessageParam,
+)
 
 
 class OpenAIMixinImpl(OpenAIMixin):
@@ -834,3 +841,96 @@ class TestOpenAIMixinProviderDataApiKey:
         error_message = str(exc_info.value)
         assert "test_api_key" in error_message
         assert "x-llamastack-provider-data" in error_message
+
+
+class TestOpenAIMixinAllowedModelsInference:
+    """Test cases for allowed_models enforcement during inference requests"""
+
+    async def test_inference_with_allowed_models(self, mixin, mock_client_context):
+        """Test that all inference methods succeed with allowed models"""
+        mixin.config.allowed_models = ["gpt-4", "text-davinci-003", "text-embedding-ada-002"]
+
+        mock_client = MagicMock()
+        mock_client.chat.completions.create = AsyncMock(return_value=MagicMock())
+        mock_client.completions.create = AsyncMock(return_value=MagicMock())
+        mock_embedding_response = MagicMock()
+        mock_embedding_response.data = [MagicMock(embedding=[0.1, 0.2, 0.3])]
+        mock_embedding_response.usage = MagicMock(prompt_tokens=5, total_tokens=5)
+        mock_client.embeddings.create = AsyncMock(return_value=mock_embedding_response)
+
+        with mock_client_context(mixin, mock_client):
+            # Test chat completion
+            await mixin.openai_chat_completion(
+                OpenAIChatCompletionRequestWithExtraBody(
+                    model="gpt-4", messages=[OpenAIUserMessageParam(role="user", content="Hello")]
+                )
+            )
+            mock_client.chat.completions.create.assert_called_once()
+
+            # Test completion
+            await mixin.openai_completion(
+                OpenAICompletionRequestWithExtraBody(model="text-davinci-003", prompt="Hello")
+            )
+            mock_client.completions.create.assert_called_once()
+
+            # Test embeddings
+            await mixin.openai_embeddings(
+                OpenAIEmbeddingsRequestWithExtraBody(model="text-embedding-ada-002", input="test text")
+            )
+            mock_client.embeddings.create.assert_called_once()
+
+    async def test_inference_with_disallowed_models(self, mixin, mock_client_context):
+        """Test that all inference methods fail with disallowed models"""
+        mixin.config.allowed_models = ["gpt-4"]
+
+        mock_client = MagicMock()
+
+        with mock_client_context(mixin, mock_client):
+            # Test chat completion with disallowed model
+            with pytest.raises(ValueError, match="Model 'gpt-4-turbo' is not in the allowed models list"):
+                await mixin.openai_chat_completion(
+                    OpenAIChatCompletionRequestWithExtraBody(
+                        model="gpt-4-turbo", messages=[OpenAIUserMessageParam(role="user", content="Hello")]
+                    )
+                )
+
+            # Test completion with disallowed model
+            with pytest.raises(ValueError, match="Model 'text-davinci-002' is not in the allowed models list"):
+                await mixin.openai_completion(
+                    OpenAICompletionRequestWithExtraBody(model="text-davinci-002", prompt="Hello")
+                )
+
+            # Test embeddings with disallowed model
+            with pytest.raises(ValueError, match="Model 'text-embedding-3-large' is not in the allowed models list"):
+                await mixin.openai_embeddings(
+                    OpenAIEmbeddingsRequestWithExtraBody(model="text-embedding-3-large", input="test text")
+                )
+
+            mock_client.chat.completions.create.assert_not_called()
+            mock_client.completions.create.assert_not_called()
+            mock_client.embeddings.create.assert_not_called()
+
+    async def test_inference_with_no_restrictions(self, mixin, mock_client_context):
+        """Test that inference succeeds when allowed_models is None or empty list blocks all"""
+        # Test with None (no restrictions)
+        assert mixin.config.allowed_models is None
+        mock_client = MagicMock()
+        mock_client.chat.completions.create = AsyncMock(return_value=MagicMock())
+
+        with mock_client_context(mixin, mock_client):
+            await mixin.openai_chat_completion(
+                OpenAIChatCompletionRequestWithExtraBody(
+                    model="any-model", messages=[OpenAIUserMessageParam(role="user", content="Hello")]
+                )
+            )
+            mock_client.chat.completions.create.assert_called_once()
+
+        # Test with empty list (blocks all models)
+        mixin.config.allowed_models = []
+        with mock_client_context(mixin, mock_client):
+            with pytest.raises(ValueError, match="Model 'gpt-4' is not in the allowed models list"):
+                await mixin.openai_chat_completion(
+                    OpenAIChatCompletionRequestWithExtraBody(
+                        model="gpt-4", messages=[OpenAIUserMessageParam(role="user", content="Hello")]
+                    )
+                )

From acf74cb8df904b16612dbdca4819b2db9b2bb64d Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Wed, 19 Nov 2025 16:25:30 -0800
Subject: [PATCH 5/5] feat(ci): add --typescript-only flag to skip Python tests
 in integration test script (#4201)

This adds a `--typescript-only` flag to `scripts/integration-tests.sh`
that skips pytest execution entirely while still starting the Llama
Stack server (required for TS client tests). The TypeScript client can
now be tested independently without Python test dependencies.
---
 scripts/integration-tests.sh | 33 +++++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh
index 20ecd0c4d..2adef892d 100755
--- a/scripts/integration-tests.sh
+++ b/scripts/integration-tests.sh
@@ -20,6 +20,7 @@ TEST_PATTERN=""
 INFERENCE_MODE="replay"
 EXTRA_PARAMS=""
 COLLECT_ONLY=false
+TYPESCRIPT_ONLY=false
 
 # Function to display usage
 usage() {
@@ -34,6 +35,7 @@ Options:
     --subdirs STRING         Comma-separated list of test subdirectories to run (overrides suite)
     --pattern STRING         Regex pattern to pass to pytest -k
     --collect-only           Collect tests only without running them (skips server startup)
+    --typescript-only        Skip Python tests and run only TypeScript client tests
     --help                   Show this help message
 
 Suites are defined in tests/integration/suites.py and define which tests to run.
@@ -90,6 +92,10 @@ while [[ $# -gt 0 ]]; do
         COLLECT_ONLY=true
         shift
         ;;
+    --typescript-only)
+        TYPESCRIPT_ONLY=true
+        shift
+        ;;
     --help)
         usage
         exit 0
@@ -544,16 +550,23 @@ if [[ -n "$STACK_CONFIG" ]]; then
     STACK_CONFIG_ARG="--stack-config=$STACK_CONFIG"
 fi
 
-pytest -s -v $PYTEST_TARGET \
-    $STACK_CONFIG_ARG \
-    --inference-mode="$INFERENCE_MODE" \
-    -k "$PYTEST_PATTERN" \
-    $EXTRA_PARAMS \
-    --color=yes \
-    --embedding-model=sentence-transformers/nomic-ai/nomic-embed-text-v1.5 \
-    --color=yes $EXTRA_PARAMS \
-    --capture=tee-sys
-exit_code=$?
+# Run Python tests unless typescript-only mode
+if [[ "$TYPESCRIPT_ONLY" == "false" ]]; then
+    pytest -s -v $PYTEST_TARGET \
+        $STACK_CONFIG_ARG \
+        --inference-mode="$INFERENCE_MODE" \
+        -k "$PYTEST_PATTERN" \
+        $EXTRA_PARAMS \
+        --color=yes \
+        --embedding-model=sentence-transformers/nomic-ai/nomic-embed-text-v1.5 \
+        --color=yes $EXTRA_PARAMS \
+        --capture=tee-sys
+    exit_code=$?
+else
+    echo "Skipping Python tests (--typescript-only mode)"
+    exit_code=0
+fi
+
 set +x
 set -e