Merge branch 'main' into fix/nvidia-safety-provider-endpoint-4189

2025-12-03 09:53:45 +00:00 · 2025-11-20 13:30:11 +02:00 · 2025-11-20 13:30:11 +02:00 · f8f28344a5
commit f8f28344a5
parent 1458e881e5 acf74cb8df
117 changed files with 16294 additions and 769 deletions
--- a/tests/integration/README.md
+++ b/tests/integration/README.md
@ -211,3 +211,23 @@ def test_asymmetric_embeddings(llama_stack_client, embedding_model_id):

    assert query_response.embeddings is not None
 ```
+
+## TypeScript Client Replays
+
+TypeScript SDK tests can run alongside Python tests when testing against `server:<config>` stacks. Set `TS_CLIENT_PATH` to the path or version of `llama-stack-client-typescript` to enable:
+
+```bash
+# Use published npm package (responses suite)
+TS_CLIENT_PATH=^0.3.2 scripts/integration-tests.sh --stack-config server:ci-tests --suite responses --setup gpt
+
+# Use local checkout from ~/.cache (recommended for development)
+git clone https://github.com/llamastack/llama-stack-client-typescript.git ~/.cache/llama-stack-client-typescript
+TS_CLIENT_PATH=~/.cache/llama-stack-client-typescript scripts/integration-tests.sh --stack-config server:ci-tests --suite responses --setup gpt
+
+# Run base suite with TypeScript tests
+TS_CLIENT_PATH=~/.cache/llama-stack-client-typescript scripts/integration-tests.sh --stack-config server:ci-tests --suite base --setup ollama
+```
+
+TypeScript tests run immediately after Python tests pass, using the same replay fixtures. The mapping between Python suites/setups and TypeScript test files is defined in `tests/integration/client-typescript/suites.json`.
+
+If `TS_CLIENT_PATH` is unset, TypeScript tests are skipped entirely.
--- a/tests/integration/agents/test_openai_responses.py
+++ b/tests/integration/agents/test_openai_responses.py
@ -516,169 +516,3 @@ def test_response_with_instructions(openai_client, client_with_models, text_mode

    # Verify instructions from previous response was not carried over to the next response
    assert response_with_instructions2.instructions == instructions2
-
-
-@pytest.mark.skip(reason="Tool calling is not reliable.")
-def test_max_tool_calls_with_function_tools(openai_client, client_with_models, text_model_id):
-    """Test handling of max_tool_calls with function tools in responses."""
-    if isinstance(client_with_models, LlamaStackAsLibraryClient):
-        pytest.skip("OpenAI responses are not supported when testing with library client yet.")
-
-    client = openai_client
-    max_tool_calls = 1
-
-    tools = [
-        {
-            "type": "function",
-            "name": "get_weather",
-            "description": "Get weather information for a specified location",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "location": {
-                        "type": "string",
-                        "description": "The city name (e.g., 'New York', 'London')",
-                    },
-                },
-            },
-        },
-        {
-            "type": "function",
-            "name": "get_time",
-            "description": "Get current time for a specified location",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "location": {
-                        "type": "string",
-                        "description": "The city name (e.g., 'New York', 'London')",
-                    },
-                },
-            },
-        },
-    ]
-
-    # First create a response that triggers function tools
-    response = client.responses.create(
-        model=text_model_id,
-        input="Can you tell me the weather in Paris and the current time?",
-        tools=tools,
-        stream=False,
-        max_tool_calls=max_tool_calls,
-    )
-
-    # Verify we got two function calls and that the max_tool_calls do not affect function tools
-    assert len(response.output) == 2
-    assert response.output[0].type == "function_call"
-    assert response.output[0].name == "get_weather"
-    assert response.output[0].status == "completed"
-    assert response.output[1].type == "function_call"
-    assert response.output[1].name == "get_time"
-    assert response.output[0].status == "completed"
-
-    # Verify we have a valid max_tool_calls field
-    assert response.max_tool_calls == max_tool_calls
-
-
-def test_max_tool_calls_invalid(openai_client, client_with_models, text_model_id):
-    """Test handling of invalid max_tool_calls in responses."""
-    if isinstance(client_with_models, LlamaStackAsLibraryClient):
-        pytest.skip("OpenAI responses are not supported when testing with library client yet.")
-
-    client = openai_client
-
-    input = "Search for today's top technology news."
-    invalid_max_tool_calls = 0
-    tools = [
-        {"type": "web_search"},
-    ]
-
-    # Create a response with an invalid max_tool_calls value i.e. 0
-    # Handle ValueError from LLS and BadRequestError from OpenAI client
-    with pytest.raises((ValueError, BadRequestError)) as excinfo:
-        client.responses.create(
-            model=text_model_id,
-            input=input,
-            tools=tools,
-            stream=False,
-            max_tool_calls=invalid_max_tool_calls,
-        )
-
-    error_message = str(excinfo.value)
-    assert f"Invalid max_tool_calls={invalid_max_tool_calls}; should be >= 1" in error_message, (
-        f"Expected error message about invalid max_tool_calls, got: {error_message}"
-    )
-
-
-def test_max_tool_calls_with_builtin_tools(openai_client, client_with_models, text_model_id):
-    """Test handling of max_tool_calls with built-in tools in responses."""
-    if isinstance(client_with_models, LlamaStackAsLibraryClient):
-        pytest.skip("OpenAI responses are not supported when testing with library client yet.")
-
-    client = openai_client
-
-    input = "Search for today's top technology and a positive news story. You MUST make exactly two separate web search calls."
-    max_tool_calls = [1, 5]
-    tools = [
-        {"type": "web_search"},
-    ]
-
-    # First create a response that triggers web_search tools without max_tool_calls
-    response = client.responses.create(
-        model=text_model_id,
-        input=input,
-        tools=tools,
-        stream=False,
-    )
-
-    # Verify we got two web search calls followed by a message
-    assert len(response.output) == 3
-    assert response.output[0].type == "web_search_call"
-    assert response.output[0].status == "completed"
-    assert response.output[1].type == "web_search_call"
-    assert response.output[1].status == "completed"
-    assert response.output[2].type == "message"
-    assert response.output[2].status == "completed"
-    assert response.output[2].role == "assistant"
-
-    # Next create a response that triggers web_search tools with max_tool_calls set to 1
-    response_2 = client.responses.create(
-        model=text_model_id,
-        input=input,
-        tools=tools,
-        stream=False,
-        max_tool_calls=max_tool_calls[0],
-    )
-
-    # Verify we got one web search tool call followed by a message
-    assert len(response_2.output) == 2
-    assert response_2.output[0].type == "web_search_call"
-    assert response_2.output[0].status == "completed"
-    assert response_2.output[1].type == "message"
-    assert response_2.output[1].status == "completed"
-    assert response_2.output[1].role == "assistant"
-
-    # Verify we have a valid max_tool_calls field
-    assert response_2.max_tool_calls == max_tool_calls[0]
-
-    # Finally create a response that triggers web_search tools with max_tool_calls set to 5
-    response_3 = client.responses.create(
-        model=text_model_id,
-        input=input,
-        tools=tools,
-        stream=False,
-        max_tool_calls=max_tool_calls[1],
-    )
-
-    # Verify we got two web search calls followed by a message
-    assert len(response_3.output) == 3
-    assert response_3.output[0].type == "web_search_call"
-    assert response_3.output[0].status == "completed"
-    assert response_3.output[1].type == "web_search_call"
-    assert response_3.output[1].status == "completed"
-    assert response_3.output[2].type == "message"
-    assert response_3.output[2].status == "completed"
-    assert response_3.output[2].role == "assistant"
-
-    # Verify we have a valid max_tool_calls field
-    assert response_3.max_tool_calls == max_tool_calls[1]
--- a/tests/integration/client-typescript/tests/inference.test.ts
+++ b/tests/integration/client-typescript/tests/inference.test.ts
@ -0,0 +1,104 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the terms described in the LICENSE file in
+// the root directory of this source tree.
+
+/**
+ * Integration tests for Inference API (Chat Completions).
+ * Ported from: llama-stack/tests/integration/inference/test_openai_completion.py
+ *
+ * IMPORTANT: Test cases must match EXACTLY with Python tests to use recorded API responses.
+ */
+
+import { createTestClient, requireTextModel } from '../setup';
+
+describe('Inference API - Chat Completions', () => {
+  // Test cases matching llama-stack/tests/integration/test_cases/inference/chat_completion.json
+  const chatCompletionTestCases = [
+    {
+      id: 'non_streaming_01',
+      question: 'Which planet do humans live on?',
+      expected: 'earth',
+      testId:
+        'tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-inference:chat_completion:non_streaming_01]',
+    },
+    {
+      id: 'non_streaming_02',
+      question: 'Which planet has rings around it with a name starting with letter S?',
+      expected: 'saturn',
+      testId:
+        'tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-inference:chat_completion:non_streaming_02]',
+    },
+  ];
+
+  const streamingTestCases = [
+    {
+      id: 'streaming_01',
+      question: "What's the name of the Sun in latin?",
+      expected: 'sol',
+      testId:
+        'tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-inference:chat_completion:streaming_01]',
+    },
+    {
+      id: 'streaming_02',
+      question: 'What is the name of the US captial?',
+      expected: 'washington',
+      testId:
+        'tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-inference:chat_completion:streaming_02]',
+    },
+  ];
+
+  test.each(chatCompletionTestCases)(
+    'chat completion non-streaming: $id',
+    async ({ question, expected, testId }) => {
+      const client = createTestClient(testId);
+      const textModel = requireTextModel();
+
+      const response = await client.chat.completions.create({
+        model: textModel,
+        messages: [
+          {
+            role: 'user',
+            content: question,
+          },
+        ],
+        stream: false,
+      });
+
+      // Non-streaming responses have choices with message property
+      const choice = response.choices[0];
+      expect(choice).toBeDefined();
+      if (!choice || !('message' in choice)) {
+        throw new Error('Expected non-streaming response with message');
+      }
+      const content = choice.message.content;
+      expect(content).toBeDefined();
+      const messageContent = typeof content === 'string' ? content.toLowerCase().trim() : '';
+      expect(messageContent.length).toBeGreaterThan(0);
+      expect(messageContent).toContain(expected.toLowerCase());
+    },
+  );
+
+  test.each(streamingTestCases)('chat completion streaming: $id', async ({ question, expected, testId }) => {
+    const client = createTestClient(testId);
+    const textModel = requireTextModel();
+
+    const stream = await client.chat.completions.create({
+      model: textModel,
+      messages: [{ role: 'user', content: question }],
+      stream: true,
+    });
+
+    const streamedContent: string[] = [];
+    for await (const chunk of stream) {
+      if (chunk.choices && chunk.choices.length > 0 && chunk.choices[0]?.delta?.content) {
+        streamedContent.push(chunk.choices[0].delta.content);
+      }
+    }
+
+    expect(streamedContent.length).toBeGreaterThan(0);
+    const fullContent = streamedContent.join('').toLowerCase().trim();
+    expect(fullContent).toContain(expected.toLowerCase());
+  });
+});
--- a/tests/integration/client-typescript/tests/responses.test.ts
+++ b/tests/integration/client-typescript/tests/responses.test.ts
@ -0,0 +1,132 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the terms described in the LICENSE file in
+// the root directory of this source tree.
+
+/**
+ * Integration tests for Responses API.
+ * Ported from: llama-stack/tests/integration/responses/test_basic_responses.py
+ *
+ * IMPORTANT: Test cases and IDs must match EXACTLY with Python tests to use recorded API responses.
+ */
+
+import { createTestClient, requireTextModel, getResponseOutputText } from '../setup';
+
+describe('Responses API - Basic', () => {
+  // Test cases matching llama-stack/tests/integration/responses/fixtures/test_cases.py
+  const basicTestCases = [
+    {
+      id: 'earth',
+      input: 'Which planet do humans live on?',
+      expected: 'earth',
+      // Use client_with_models fixture to match non-streaming recordings
+      testId:
+        'tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[client_with_models-txt=openai/gpt-4o-earth]',
+    },
+    {
+      id: 'saturn',
+      input: 'Which planet has rings around it with a name starting with letter S?',
+      expected: 'saturn',
+      testId:
+        'tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[client_with_models-txt=openai/gpt-4o-saturn]',
+    },
+  ];
+
+  test.each(basicTestCases)('non-streaming basic response: $id', async ({ input, expected, testId }) => {
+    // Create client with test_id for all requests
+    const client = createTestClient(testId);
+    const textModel = requireTextModel();
+
+    // Create a response
+    const response = await client.responses.create({
+      model: textModel,
+      input,
+      stream: false,
+    });
+
+    // Verify response has content
+    const outputText = getResponseOutputText(response).toLowerCase().trim();
+    expect(outputText.length).toBeGreaterThan(0);
+    expect(outputText).toContain(expected.toLowerCase());
+
+    // Verify usage is reported
+    expect(response.usage).toBeDefined();
+    expect(response.usage!.input_tokens).toBeGreaterThan(0);
+    expect(response.usage!.output_tokens).toBeGreaterThan(0);
+    expect(response.usage!.total_tokens).toBe(response.usage!.input_tokens + response.usage!.output_tokens);
+
+    // Verify stored response matches
+    const retrievedResponse = await client.responses.retrieve(response.id);
+    expect(getResponseOutputText(retrievedResponse)).toBe(getResponseOutputText(response));
+
+    // Test follow-up with previous_response_id
+    const nextResponse = await client.responses.create({
+      model: textModel,
+      input: 'Repeat your previous response in all caps.',
+      previous_response_id: response.id,
+    });
+    const nextOutputText = getResponseOutputText(nextResponse).trim();
+    expect(nextOutputText).toContain(expected.toUpperCase());
+  });
+
+  test.each(basicTestCases)('streaming basic response: $id', async ({ input, expected, testId }) => {
+    // Modify test_id for streaming variant
+    const streamingTestId = testId.replace(
+      'test_response_non_streaming_basic',
+      'test_response_streaming_basic',
+    );
+    const client = createTestClient(streamingTestId);
+    const textModel = requireTextModel();
+
+    // Create a streaming response
+    const stream = await client.responses.create({
+      model: textModel,
+      input,
+      stream: true,
+    });
+
+    const events: any[] = [];
+    let responseId = '';
+
+    for await (const chunk of stream) {
+      events.push(chunk);
+
+      if (chunk.type === 'response.created') {
+        // Verify response.created is the first event
+        expect(events.length).toBe(1);
+        expect(chunk.response.status).toBe('in_progress');
+        responseId = chunk.response.id;
+      } else if (chunk.type === 'response.completed') {
+        // Verify response.completed comes after response.created
+        expect(events.length).toBeGreaterThanOrEqual(2);
+        expect(chunk.response.status).toBe('completed');
+        expect(chunk.response.id).toBe(responseId);
+
+        // Verify content quality
+        const outputText = getResponseOutputText(chunk.response).toLowerCase().trim();
+        expect(outputText.length).toBeGreaterThan(0);
+        expect(outputText).toContain(expected.toLowerCase());
+
+        // Verify usage is reported
+        expect(chunk.response.usage).toBeDefined();
+        expect(chunk.response.usage!.input_tokens).toBeGreaterThan(0);
+        expect(chunk.response.usage!.output_tokens).toBeGreaterThan(0);
+        expect(chunk.response.usage!.total_tokens).toBe(
+          chunk.response.usage!.input_tokens + chunk.response.usage!.output_tokens,
+        );
+      }
+    }
+
+    // Verify we got both events
+    expect(events.length).toBeGreaterThanOrEqual(2);
+    const firstEvent = events[0];
+    const lastEvent = events[events.length - 1];
+    expect(firstEvent.type).toBe('response.created');
+    expect(lastEvent.type).toBe('response.completed');
+
+    // Verify stored response matches streamed response
+    const retrievedResponse = await client.responses.retrieve(responseId);
+    expect(getResponseOutputText(retrievedResponse)).toBe(getResponseOutputText(lastEvent.response));
+  });
+});
--- a/tests/integration/client-typescript/jest.integration.config.js
+++ b/tests/integration/client-typescript/jest.integration.config.js
@ -0,0 +1,31 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the terms described in the LICENSE file in
+// the root directory of this source tree.
+
+/** @type {import('ts-jest').JestConfigWithTsJest} */
+module.exports = {
+  preset: 'ts-jest/presets/default-esm',
+  testEnvironment: 'node',
+  extensionsToTreatAsEsm: ['.ts'],
+  moduleNameMapper: {
+    '^(\\.{1,2}/.*)\\.js$': '$1',
+  },
+  transform: {
+    '^.+\\.tsx?$': [
+      'ts-jest',
+      {
+        useESM: true,
+        tsconfig: {
+          module: 'ES2022',
+          moduleResolution: 'bundler',
+        },
+      },
+    ],
+  },
+  testMatch: ['<rootDir>/__tests__/**/*.test.ts'],
+  setupFilesAfterEnv: ['<rootDir>/setup.ts'],
+  testTimeout: 60000, // 60 seconds (integration tests can be slow)
+  watchman: false, // Disable watchman to avoid permission issues
+};
--- a/tests/integration/client-typescript/package-lock.json
+++ b/tests/integration/client-typescript/package-lock.json
--- a/tests/integration/client-typescript/package.json
+++ b/tests/integration/client-typescript/package.json
@ -0,0 +1,18 @@
+{
+  "name": "llama-stack-typescript-integration-tests",
+  "version": "0.0.1",
+  "private": true,
+  "description": "TypeScript client integration tests for Llama Stack",
+  "scripts": {
+    "test": "node run-tests.js"
+  },
+  "devDependencies": {
+    "@swc/core": "^1.3.102",
+    "@swc/jest": "^0.2.29",
+    "@types/jest": "^29.4.0",
+    "@types/node": "^20.0.0",
+    "jest": "^29.4.0",
+    "ts-jest": "^29.1.0",
+    "typescript": "^5.0.0"
+  }
+}
--- a/tests/integration/client-typescript/run-tests.js
+++ b/tests/integration/client-typescript/run-tests.js
@ -0,0 +1,63 @@
+#!/usr/bin/env node
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the terms described in the LICENSE file in
+// the root directory of this source tree.
+
+/**
+ * Test runner that finds and executes TypeScript tests based on suite/setup mapping.
+ * Called by integration-tests.sh via npm test.
+ */
+
+const fs = require('fs');
+const path = require('path');
+const { execSync } = require('child_process');
+
+const suite = process.env.LLAMA_STACK_TEST_SUITE;
+const setup = process.env.LLAMA_STACK_TEST_SETUP || '';
+
+if (!suite) {
+  console.error('Error: LLAMA_STACK_TEST_SUITE environment variable is required');
+  process.exit(1);
+}
+
+// Read suites.json to find matching test files
+const suitesPath = path.join(__dirname, 'suites.json');
+if (!fs.existsSync(suitesPath)) {
+  console.log(`No TypeScript tests configured (${suitesPath} not found)`);
+  process.exit(0);
+}
+
+const suites = JSON.parse(fs.readFileSync(suitesPath, 'utf-8'));
+
+// Find matching entry
+let testFiles = [];
+for (const entry of suites) {
+  if (entry.suite !== suite) {
+    continue;
+  }
+  const entrySetup = entry.setup || '';
+  if (entrySetup && entrySetup !== setup) {
+    continue;
+  }
+  testFiles = entry.files || [];
+  break;
+}
+
+if (testFiles.length === 0) {
+  console.log(`No TypeScript integration tests mapped for suite ${suite} (setup ${setup})`);
+  process.exit(0);
+}
+
+console.log(`Running TypeScript tests for suite ${suite} (setup ${setup}): ${testFiles.join(', ')}`);
+
+// Run Jest with the mapped test files
+try {
+  execSync(`npx jest --config jest.integration.config.js ${testFiles.join(' ')}`, {
+    stdio: 'inherit',
+    cwd: __dirname,
+  });
+} catch (error) {
+  process.exit(error.status || 1);
+}
--- a/tests/integration/client-typescript/setup.ts
+++ b/tests/integration/client-typescript/setup.ts
@ -0,0 +1,162 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the terms described in the LICENSE file in
+// the root directory of this source tree.
+
+/**
+ * Global setup for integration tests.
+ * This file mimics pytest's fixture system by providing shared test configuration.
+ */
+
+import LlamaStackClient from 'llama-stack-client';
+
+/**
+ * Load test configuration from the Python setup system.
+ * This reads setup definitions from tests/integration/suites.py via get_setup_env.py.
+ */
+function loadTestConfig() {
+  const baseURL = process.env['TEST_API_BASE_URL'];
+  const setupName = process.env['LLAMA_STACK_TEST_SETUP'];
+  const textModel = process.env['LLAMA_STACK_TEST_TEXT_MODEL'];
+  const embeddingModel = process.env['LLAMA_STACK_TEST_EMBEDDING_MODEL'];
+
+  if (!baseURL) {
+    throw new Error(
+      'TEST_API_BASE_URL is required for integration tests. ' +
+        'Run tests using: ./scripts/integration-test.sh',
+    );
+  }
+
+  return {
+    baseURL,
+    textModel,
+    embeddingModel,
+    setupName,
+  };
+}
+
+// Read configuration from environment variables (set by scripts/integration-test.sh)
+export const TEST_CONFIG = loadTestConfig();
+
+// Validate required configuration
+beforeAll(() => {
+  console.log('\n=== Integration Test Configuration ===');
+  console.log(`Base URL: ${TEST_CONFIG.baseURL}`);
+  console.log(`Setup: ${TEST_CONFIG.setupName || 'NOT SET'}`);
+  console.log(
+    `Text Model: ${TEST_CONFIG.textModel || 'NOT SET - tests requiring text model will be skipped'}`,
+  );
+  console.log(
+    `Embedding Model: ${
+      TEST_CONFIG.embeddingModel || 'NOT SET - tests requiring embedding model will be skipped'
+    }`,
+  );
+  console.log('=====================================\n');
+});
+
+/**
+ * Create a client instance for integration tests.
+ * Mimics pytest's `llama_stack_client` fixture.
+ *
+ * @param testId - Test ID to send in X-LlamaStack-Provider-Data header for replay mode.
+ *                 Format: "tests/integration/responses/test_basic_responses.py::test_name[params]"
+ */
+export function createTestClient(testId?: string): LlamaStackClient {
+  const headers: Record<string, string> = {};
+
+  // In server mode with replay, send test ID for recording isolation
+  if (process.env['LLAMA_STACK_TEST_STACK_CONFIG_TYPE'] === 'server' && testId) {
+    headers['X-LlamaStack-Provider-Data'] = JSON.stringify({
+      __test_id: testId,
+    });
+  }
+
+  return new LlamaStackClient({
+    baseURL: TEST_CONFIG.baseURL,
+    timeout: 60000, // 60 seconds
+    defaultHeaders: headers,
+  });
+}
+
+/**
+ * Skip test if required model is not configured.
+ * Mimics pytest's `skip_if_no_model` autouse fixture.
+ */
+export function skipIfNoModel(modelType: 'text' | 'embedding'): typeof test {
+  const model = modelType === 'text' ? TEST_CONFIG.textModel : TEST_CONFIG.embeddingModel;
+
+  if (!model) {
+    const envVar = modelType === 'text' ? 'LLAMA_STACK_TEST_TEXT_MODEL' : 'LLAMA_STACK_TEST_EMBEDDING_MODEL';
+    const message = `Skipping: ${modelType} model not configured (set ${envVar})`;
+    return test.skip.bind(test) as typeof test;
+  }
+
+  return test;
+}
+
+/**
+ * Get the configured text model, throwing if not set.
+ * Use this in tests that absolutely require a text model.
+ */
+export function requireTextModel(): string {
+  if (!TEST_CONFIG.textModel) {
+    throw new Error(
+      'LLAMA_STACK_TEST_TEXT_MODEL environment variable is required. ' +
+        'Run tests using: ./scripts/integration-test.sh',
+    );
+  }
+  return TEST_CONFIG.textModel;
+}
+
+/**
+ * Get the configured embedding model, throwing if not set.
+ * Use this in tests that absolutely require an embedding model.
+ */
+export function requireEmbeddingModel(): string {
+  if (!TEST_CONFIG.embeddingModel) {
+    throw new Error(
+      'LLAMA_STACK_TEST_EMBEDDING_MODEL environment variable is required. ' +
+        'Run tests using: ./scripts/integration-test.sh',
+    );
+  }
+  return TEST_CONFIG.embeddingModel;
+}
+
+/**
+ * Extracts aggregated text output from a ResponseObject.
+ * This concatenates all text content from the response's output array.
+ *
+ * Copied from llama-stack-client's response-helpers until it's available in published version.
+ */
+export function getResponseOutputText(response: any): string {
+  const pieces: string[] = [];
+
+  for (const output of response.output ?? []) {
+    if (!output || output.type !== 'message') {
+      continue;
+    }
+
+    const content = output.content;
+    if (typeof content === 'string') {
+      pieces.push(content);
+      continue;
+    }
+
+    if (!Array.isArray(content)) {
+      continue;
+    }
+
+    for (const item of content) {
+      if (typeof item === 'string') {
+        pieces.push(item);
+        continue;
+      }
+      if (item && item.type === 'output_text' && 'text' in item && typeof item.text === 'string') {
+        pieces.push(item.text);
+      }
+    }
+  }
+
+  return pieces.join('');
+}
--- a/tests/integration/client-typescript/suites.json
+++ b/tests/integration/client-typescript/suites.json
@ -0,0 +1,12 @@
+[
+  {
+    "suite": "responses",
+    "setup": "gpt",
+    "files": ["__tests__/responses.test.ts"]
+  },
+  {
+    "suite": "base",
+    "setup": "ollama",
+    "files": ["__tests__/inference.test.ts"]
+  }
+]
--- a/tests/integration/client-typescript/tsconfig.json
+++ b/tests/integration/client-typescript/tsconfig.json
@ -0,0 +1,16 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "ES2022",
+    "lib": ["ES2022"],
+    "moduleResolution": "bundler",
+    "esModuleInterop": true,
+    "allowSyntheticDefaultImports": true,
+    "strict": true,
+    "skipLibCheck": true,
+    "resolveJsonModule": true,
+    "types": ["jest", "node"]
+  },
+  "include": ["**/*.ts"],
+  "exclude": ["node_modules"]
+}
--- a/tests/integration/responses/recordings/1997dc007d202497ce456683d24ddde3553f0db5d5a673146d8bb99c072e77cd.json
+++ b/tests/integration/responses/recordings/1997dc007d202497ce456683d24ddde3553f0db5d5a673146d8bb99c072e77cd.json
@ -0,0 +1,773 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_max_tool_calls_with_mcp_tools[client_with_models-txt=openai/gpt-4o]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Get the experiment ID for 'boiling_point' and get the user ID for 'charlie'"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "1V9w3bXnppL"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_y8S7JKR2Qhu4Bh1uxdHRcNDg",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_experiment_id"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "YEsj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"ex",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "n"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "perim",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "Q"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "ent_na",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "me\":",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "U"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " \"boi",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "ling_p",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "oint",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "ha"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "d5D"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": "call_HELkyZOm2fzLx2CeTH3bEcS2",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_user_id"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "0LbsjDcKz6"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"us",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "c"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "ernam",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "9"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "e\": \"c",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "7C0WFn181I3y3l"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "harl",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "wf"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "ie\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "r"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "FAci"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1997dc007d20",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": {
+            "completion_tokens": 51,
+            "prompt_tokens": 393,
+            "total_tokens": 444,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "6xgpRRdKjviPT"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/responses/recordings/463ab0e2f2914026cfa3c742259c43af318468eb4ef84fd4008ebb40824b7e86.json
+++ b/tests/integration/responses/recordings/463ab0e2f2914026cfa3c742259c43af318468eb4ef84fd4008ebb40824b7e86.json
@ -0,0 +1,593 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_max_tool_calls_with_function_tools[openai_client-txt=openai/gpt-4o]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Can you tell me the weather in Paris and the current time?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "type": "function",
+            "name": "get_weather",
+            "description": "Get weather information for a specified location",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "location": {
+                  "type": "string",
+                  "description": "The city name (e.g., 'New York', 'London')"
+                }
+              }
+            },
+            "strict": null
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "type": "function",
+            "name": "get_time",
+            "description": "Get current time for a specified location",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "location": {
+                  "type": "string",
+                  "description": "The city name (e.g., 'New York', 'London')"
+                }
+              }
+            },
+            "strict": null
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-463ab0e2f291",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "QmTXstGvpa8"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-463ab0e2f291",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_HJMoLtHXfCzhlMQOfqIKt0n3",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_weather"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "iFjmkK23KL"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-463ab0e2f291",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"lo",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "7"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-463ab0e2f291",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "catio",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "L"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-463ab0e2f291",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "n\": \"P",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "THa6gWbrWhVmZ6"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-463ab0e2f291",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "aris",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "eL"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-463ab0e2f291",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "jng"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-463ab0e2f291",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": "call_vGKvTKZM7aALMaUw3Jas7lRg",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_time"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "LSailgMcgSl54"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-463ab0e2f291",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"lo",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "z"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-463ab0e2f291",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "catio",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "4"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-463ab0e2f291",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "n\": \"P",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "0engr6vRvqXTEP"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-463ab0e2f291",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "aris",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "Pe"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-463ab0e2f291",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "LU9"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-463ab0e2f291",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "kD7d"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-463ab0e2f291",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": {
+            "completion_tokens": 44,
+            "prompt_tokens": 110,
+            "total_tokens": 154,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "R4ICoxqTqj7ZY"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/responses/recordings/b218af7fa0663e60b12633f54cfddbcf60a1fedd85c501850b9f7e759443809f.json
+++ b/tests/integration/responses/recordings/b218af7fa0663e60b12633f54cfddbcf60a1fedd85c501850b9f7e759443809f.json
@ -0,0 +1,773 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_max_tool_calls_with_mcp_tools[openai_client-txt=openai/gpt-4o]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Get the experiment ID for 'boiling_point' and get the user ID for 'charlie'"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_id",
+            "description": "\n        Get the user ID for a given username. This ID is needed for other operations.\n\n        :param username: The username to look up\n        :return: The user ID for the username\n        ",
+            "parameters": {
+              "properties": {
+                "username": {
+                  "title": "Username",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "username"
+              ],
+              "title": "get_user_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_user_permissions",
+            "description": "\n        Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n        :param user_id: The user ID to check permissions for\n        :return: The permissions for the user\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id"
+              ],
+              "title": "get_user_permissionsArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "check_file_access",
+            "description": "\n        Check if a user can access a specific file. Requires a valid user ID.\n\n        :param user_id: The user ID to check access for\n        :param filename: The filename to check access to\n        :return: Whether the user can access the file (yes/no)\n        ",
+            "parameters": {
+              "properties": {
+                "user_id": {
+                  "title": "User Id",
+                  "type": "string"
+                },
+                "filename": {
+                  "title": "Filename",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "user_id",
+                "filename"
+              ],
+              "title": "check_file_accessArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_id",
+            "description": "\n        Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n        :param experiment_name: The name of the experiment\n        :return: The experiment ID\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_name": {
+                  "title": "Experiment Name",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_name"
+              ],
+              "title": "get_experiment_idArguments",
+              "type": "object"
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "get_experiment_results",
+            "description": "\n        Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n        :param experiment_id: The experiment ID to get results for\n        :return: The experiment results\n        ",
+            "parameters": {
+              "properties": {
+                "experiment_id": {
+                  "title": "Experiment Id",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "experiment_id"
+              ],
+              "title": "get_experiment_resultsArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "N5OTLR9CfmU"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_z8P1RQv54BLxyMlRdMFkcCGd",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_experiment_id"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "3EKK"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"ex",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "R"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "perim",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "Q"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "ent_na",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "me\":",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "6"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " \"boi",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "ling_p",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "oint",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "pw"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "Gfk"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": "call_I5tcLgyMADoVwLKDj9HkTCs5",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_user_id"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "Yp7IueDs5V"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"us",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "8"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "ernam",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "X"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "e\": \"c",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "2oif8BwVnTCnAF"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "harl",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "hv"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "ie\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "C"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": null,
+          "obfuscation": "ctjO"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b218af7fa066",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_c98e05ca17",
+          "usage": {
+            "completion_tokens": 51,
+            "prompt_tokens": 393,
+            "total_tokens": 444,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "fclbZeBSSKN4C"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/responses/recordings/b2b5903325356ef0d90af4f2bb8c2a685da5e743820a68de74640451f0072184.json
+++ b/tests/integration/responses/recordings/b2b5903325356ef0d90af4f2bb8c2a685da5e743820a68de74640451f0072184.json
--- a/tests/integration/responses/recordings/b376e47c185753246e6b47e33dd6700e308ebbe9389bc5a1da8f4840fc9031ef.json
+++ b/tests/integration/responses/recordings/b376e47c185753246e6b47e33dd6700e308ebbe9389bc5a1da8f4840fc9031ef.json
--- a/tests/integration/responses/recordings/c1b953d78e040ae516301c6dd5004cf049a522bd106852b6d09e9baf41df88d3.json
+++ b/tests/integration/responses/recordings/c1b953d78e040ae516301c6dd5004cf049a522bd106852b6d09e9baf41df88d3.json
--- a/tests/integration/responses/recordings/d073f434d28c2f72bea92232de0de4d4f415f237e22b2b6983677a1e1319a0d3.json
+++ b/tests/integration/responses/recordings/d073f434d28c2f72bea92232de0de4d4f415f237e22b2b6983677a1e1319a0d3.json
@ -0,0 +1,593 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_max_tool_calls_with_function_tools[client_with_models-txt=openai/gpt-4o]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Can you tell me the weather in Paris and the current time?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "type": "function",
+            "name": "get_weather",
+            "description": "Get weather information for a specified location",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "location": {
+                  "type": "string",
+                  "description": "The city name (e.g., 'New York', 'London')"
+                }
+              }
+            },
+            "strict": null
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "type": "function",
+            "name": "get_time",
+            "description": "Get current time for a specified location",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "location": {
+                  "type": "string",
+                  "description": "The city name (e.g., 'New York', 'London')"
+                }
+              }
+            },
+            "strict": null
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d073f434d28c",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "iUduPiCYBRb"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d073f434d28c",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_Wv3G8aEQOJLNXGRaK3hAWzq3",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_weather"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "cqZKgzm65y"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d073f434d28c",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"lo",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "8"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d073f434d28c",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "catio",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "L"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d073f434d28c",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "n\": \"P",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "zbBLzavvnEdLz0"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d073f434d28c",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "aris",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "Gj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d073f434d28c",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "LQo"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d073f434d28c",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": "call_8xkOmOgJpV77n5W2dSx6ytW6",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_time"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "eltoncGlxI8Go"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d073f434d28c",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"lo",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "S"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d073f434d28c",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "catio",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "N"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d073f434d28c",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "n\": \"P",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "2bTn1MaAXYFoVK"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d073f434d28c",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "aris",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "VF"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d073f434d28c",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 1,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "BHi"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d073f434d28c",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": null,
+          "obfuscation": "WaYG"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-d073f434d28c",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_b1442291a8",
+          "usage": {
+            "completion_tokens": 44,
+            "prompt_tokens": 110,
+            "total_tokens": 154,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "aevj6ZWLqfCK6"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/responses/recordings/e3e2e64c57bb36f2a6ba5f68410d0b947d35c870ff825f06d8997a84dca1f5bf.json
+++ b/tests/integration/responses/recordings/e3e2e64c57bb36f2a6ba5f68410d0b947d35c870ff825f06d8997a84dca1f5bf.json
--- a/tests/integration/responses/test_tool_responses.py
+++ b/tests/integration/responses/test_tool_responses.py
@ -600,3 +600,155 @@ def test_response_streaming_multi_turn_tool_execution(responses_client, text_mod
            assert expected_output.lower() in final_response.output_text.lower(), (
                f"Expected '{expected_output}' to appear in response: {final_response.output_text}"
            )
+
+
+def test_max_tool_calls_with_function_tools(responses_client, text_model_id):
+    """Test handling of max_tool_calls with function tools in responses."""
+
+    max_tool_calls = 1
+    tools = [
+        {
+            "type": "function",
+            "name": "get_weather",
+            "description": "Get weather information for a specified location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "The city name (e.g., 'New York', 'London')",
+                    },
+                },
+            },
+        },
+        {
+            "type": "function",
+            "name": "get_time",
+            "description": "Get current time for a specified location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "The city name (e.g., 'New York', 'London')",
+                    },
+                },
+            },
+        },
+    ]
+
+    response = responses_client.responses.create(
+        model=text_model_id,
+        input="Can you tell me the weather in Paris and the current time?",
+        tools=tools,
+        stream=False,
+        max_tool_calls=max_tool_calls,
+    )
+
+    # Verify we got two function calls and that the max_tool_calls does not affect function tools
+    assert len(response.output) == 2
+    assert response.output[0].type == "function_call"
+    assert response.output[0].name == "get_weather"
+    assert response.output[0].status == "completed"
+    assert response.output[1].type == "function_call"
+    assert response.output[1].name == "get_time"
+    assert response.output[1].status == "completed"
+
+    # Verify we have a valid max_tool_calls field
+    assert response.max_tool_calls == max_tool_calls
+
+
+def test_max_tool_calls_invalid(responses_client, text_model_id):
+    """Test handling of invalid max_tool_calls in responses."""
+
+    input = "Search for today's top technology news."
+    invalid_max_tool_calls = 0
+    tools = [
+        {"type": "web_search"},
+    ]
+
+    # Create a response with an invalid max_tool_calls value i.e. 0
+    # Handle ValueError from LLS and BadRequestError from OpenAI client
+    with pytest.raises((ValueError, llama_stack_client.BadRequestError, openai.BadRequestError)) as excinfo:
+        responses_client.responses.create(
+            model=text_model_id,
+            input=input,
+            tools=tools,
+            stream=False,
+            max_tool_calls=invalid_max_tool_calls,
+        )
+
+    error_message = str(excinfo.value)
+    assert f"Invalid max_tool_calls={invalid_max_tool_calls}; should be >= 1" in error_message, (
+        f"Expected error message about invalid max_tool_calls, got: {error_message}"
+    )
+
+
+def test_max_tool_calls_with_mcp_tools(responses_client, text_model_id):
+    """Test handling of max_tool_calls with mcp tools in responses."""
+
+    with make_mcp_server(tools=dependency_tools()) as mcp_server_info:
+        input = "Get the experiment ID for 'boiling_point' and get the user ID for 'charlie'"
+        max_tool_calls = [1, 5]
+        tools = [
+            {"type": "mcp", "server_label": "localmcp", "server_url": mcp_server_info["server_url"]},
+        ]
+
+        # First create a response that triggers mcp tools without max_tool_calls
+        response = responses_client.responses.create(
+            model=text_model_id,
+            input=input,
+            tools=tools,
+            stream=False,
+        )
+
+        # Verify we got two mcp tool calls followed by a message
+        assert len(response.output) == 4
+        mcp_list_tools = [output for output in response.output if output.type == "mcp_list_tools"]
+        mcp_calls = [output for output in response.output if output.type == "mcp_call"]
+        message_outputs = [output for output in response.output if output.type == "message"]
+        assert len(mcp_list_tools) == 1
+        assert len(mcp_calls) == 2, f"Expected two mcp calls, got {len(mcp_calls)}"
+        assert len(message_outputs) == 1, f"Expected one message output, got {len(message_outputs)}"
+
+        # Next create a response that triggers mcp tools with max_tool_calls set to 1
+        response_2 = responses_client.responses.create(
+            model=text_model_id,
+            input=input,
+            tools=tools,
+            stream=False,
+            max_tool_calls=max_tool_calls[0],
+        )
+
+        # Verify we got one mcp tool call followed by a message
+        assert len(response_2.output) == 3
+        mcp_list_tools = [output for output in response_2.output if output.type == "mcp_list_tools"]
+        mcp_calls = [output for output in response_2.output if output.type == "mcp_call"]
+        message_outputs = [output for output in response_2.output if output.type == "message"]
+        assert len(mcp_list_tools) == 1
+        assert len(mcp_calls) == 1, f"Expected one mcp call, got {len(mcp_calls)}"
+        assert len(message_outputs) == 1, f"Expected one message output, got {len(message_outputs)}"
+
+        # Verify we have a valid max_tool_calls field
+        assert response_2.max_tool_calls == max_tool_calls[0]
+
+        # Finally create a response that triggers mcp tools with max_tool_calls set to 5
+        response_3 = responses_client.responses.create(
+            model=text_model_id,
+            input=input,
+            tools=tools,
+            stream=False,
+            max_tool_calls=max_tool_calls[1],
+        )
+
+        # Verify we got two mcp tool calls followed by a message
+        assert len(response_3.output) == 4
+        mcp_list_tools = [output for output in response_3.output if output.type == "mcp_list_tools"]
+        mcp_calls = [output for output in response_3.output if output.type == "mcp_call"]
+        message_outputs = [output for output in response_3.output if output.type == "message"]
+        assert len(mcp_list_tools) == 1
+        assert len(mcp_calls) == 2, f"Expected two mcp calls, got {len(mcp_calls)}"
+        assert len(message_outputs) == 1, f"Expected one message output, got {len(message_outputs)}"
+
+        # Verify we have a valid max_tool_calls field
+        assert response_3.max_tool_calls == max_tool_calls[1]
--- a/tests/integration/suites.py
+++ b/tests/integration/suites.py
@ -50,7 +50,7 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
        name="ollama",
        description="Local Ollama provider with text + safety models",
        env={
-            "OLLAMA_URL": "http://0.0.0.0:11434",
+            "OLLAMA_URL": "http://0.0.0.0:11434/v1",
            "SAFETY_MODEL": "ollama/llama-guard3:1b",
        },
        defaults={
@ -64,7 +64,7 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
        name="ollama",
        description="Local Ollama provider with a vision model",
        env={
-            "OLLAMA_URL": "http://0.0.0.0:11434",
+            "OLLAMA_URL": "http://0.0.0.0:11434/v1",
        },
        defaults={
            "vision_model": "ollama/llama3.2-vision:11b",
@ -75,7 +75,7 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
        name="ollama-postgres",
        description="Server-mode tests with Postgres-backed persistence",
        env={
-            "OLLAMA_URL": "http://0.0.0.0:11434",
+            "OLLAMA_URL": "http://0.0.0.0:11434/v1",
            "SAFETY_MODEL": "ollama/llama-guard3:1b",
            "POSTGRES_HOST": "127.0.0.1",
            "POSTGRES_PORT": "5432",
--- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
@ -25,6 +25,13 @@ from llama_stack.providers.utils.responses.responses_store import (
    ResponsesStore,
    _OpenAIResponseObjectWithInputAndMessages,
 )
+from llama_stack_api import (
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIFile,
+    OpenAIFileObject,
+    OpenAISystemMessageParam,
+    Prompt,
+)
 from llama_stack_api.agents import Order
 from llama_stack_api.inference import (
    OpenAIAssistantMessageParam,
@ -38,6 +45,8 @@ from llama_stack_api.inference import (
 )
 from llama_stack_api.openai_responses import (
    ListOpenAIResponseInputItem,
+    OpenAIResponseInputMessageContentFile,
+    OpenAIResponseInputMessageContentImage,
    OpenAIResponseInputMessageContentText,
    OpenAIResponseInputToolFunction,
    OpenAIResponseInputToolMCP,
@ -47,6 +56,7 @@ from llama_stack_api.openai_responses import (
    OpenAIResponseOutputMessageFunctionToolCall,
    OpenAIResponseOutputMessageMCPCall,
    OpenAIResponseOutputMessageWebSearchToolCall,
+    OpenAIResponsePrompt,
    OpenAIResponseText,
    OpenAIResponseTextFormat,
    WebSearchToolTypes,
@ -98,6 +108,19 @@ def mock_safety_api():
    return safety_api


+@pytest.fixture
+def mock_prompts_api():
+    prompts_api = AsyncMock()
+    return prompts_api
+
+
+@pytest.fixture
+def mock_files_api():
+    """Mock files API for testing."""
+    files_api = AsyncMock()
+    return files_api
+
+
@pytest.fixture
 def openai_responses_impl(
    mock_inference_api,
@ -107,6 +130,8 @@ def openai_responses_impl(
    mock_vector_io_api,
    mock_safety_api,
    mock_conversations_api,
+    mock_prompts_api,
+    mock_files_api,
 ):
    return OpenAIResponsesImpl(
        inference_api=mock_inference_api,
@ -116,6 +141,8 @@ def openai_responses_impl(
        vector_io_api=mock_vector_io_api,
        safety_api=mock_safety_api,
        conversations_api=mock_conversations_api,
+        prompts_api=mock_prompts_api,
+        files_api=mock_files_api,
    )


@ -499,7 +526,7 @@ async def test_create_openai_response_with_tool_call_function_arguments_none(ope
    mock_inference_api.openai_chat_completion.return_value = fake_stream_toolcall()


-async def test_create_openai_response_with_multiple_messages(openai_responses_impl, mock_inference_api):
+async def test_create_openai_response_with_multiple_messages(openai_responses_impl, mock_inference_api, mock_files_api):
    """Test creating an OpenAI response with multiple messages."""
    # Setup
    input_messages = [
@ -710,7 +737,7 @@ async def test_create_openai_response_with_instructions(openai_responses_impl, m


 async def test_create_openai_response_with_instructions_and_multiple_messages(
-    openai_responses_impl, mock_inference_api
+    openai_responses_impl, mock_inference_api, mock_files_api
 ):
    # Setup
    input_messages = [
@ -1242,3 +1269,489 @@ async def test_create_openai_response_with_output_types_as_input(

    assert stored_with_outputs.input == input_with_output_types
    assert len(stored_with_outputs.input) == 3
+
+
+async def test_create_openai_response_with_prompt(openai_responses_impl, mock_inference_api, mock_prompts_api):
+    """Test creating an OpenAI response with a prompt."""
+    input_text = "What is the capital of Ireland?"
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+    prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
+    prompt = Prompt(
+        prompt="You are a helpful {{ area_name }} assistant at {{ company_name }}. Always provide accurate information.",
+        prompt_id=prompt_id,
+        version=1,
+        variables=["area_name", "company_name"],
+        is_default=True,
+    )
+
+    openai_response_prompt = OpenAIResponsePrompt(
+        id=prompt_id,
+        version="1",
+        variables={
+            "area_name": OpenAIResponseInputMessageContentText(text="geography"),
+            "company_name": OpenAIResponseInputMessageContentText(text="Dummy Company"),
+        },
+    )
+
+    mock_prompts_api.get_prompt.return_value = prompt
+    mock_inference_api.openai_chat_completion.return_value = fake_stream()
+
+    result = await openai_responses_impl.create_openai_response(
+        input=input_text,
+        model=model,
+        prompt=openai_response_prompt,
+    )
+
+    mock_prompts_api.get_prompt.assert_called_with(prompt_id, 1)
+    mock_inference_api.openai_chat_completion.assert_called()
+    call_args = mock_inference_api.openai_chat_completion.call_args
+    sent_messages = call_args.args[0].messages
+    assert len(sent_messages) == 2
+
+    system_messages = [msg for msg in sent_messages if msg.role == "system"]
+    assert len(system_messages) == 1
+    assert (
+        system_messages[0].content
+        == "You are a helpful geography assistant at Dummy Company. Always provide accurate information."
+    )
+
+    user_messages = [msg for msg in sent_messages if msg.role == "user"]
+    assert len(user_messages) == 1
+    assert user_messages[0].content == input_text
+
+    assert result.model == model
+    assert result.status == "completed"
+    assert isinstance(result.prompt, OpenAIResponsePrompt)
+    assert result.prompt.id == prompt_id
+    assert result.prompt.variables == openai_response_prompt.variables
+    assert result.prompt.version == "1"
+
+
+async def test_prepend_prompt_successful_without_variables(openai_responses_impl, mock_prompts_api, mock_inference_api):
+    """Test prepend_prompt function without variables."""
+    input_text = "What is the capital of Ireland?"
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+    prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
+    prompt = Prompt(
+        prompt="You are a helpful assistant. Always provide accurate information.",
+        prompt_id=prompt_id,
+        version=1,
+        variables=[],
+        is_default=True,
+    )
+
+    openai_response_prompt = OpenAIResponsePrompt(id=prompt_id, version="1")
+
+    mock_prompts_api.get_prompt.return_value = prompt
+    mock_inference_api.openai_chat_completion.return_value = fake_stream()
+
+    await openai_responses_impl.create_openai_response(
+        input=input_text,
+        model=model,
+        prompt=openai_response_prompt,
+    )
+
+    mock_prompts_api.get_prompt.assert_called_with(prompt_id, 1)
+    mock_inference_api.openai_chat_completion.assert_called()
+    call_args = mock_inference_api.openai_chat_completion.call_args
+    sent_messages = call_args.args[0].messages
+    assert len(sent_messages) == 2
+    system_messages = [msg for msg in sent_messages if msg.role == "system"]
+    assert system_messages[0].content == "You are a helpful assistant. Always provide accurate information."
+
+
+async def test_prepend_prompt_invalid_variable(openai_responses_impl, mock_prompts_api):
+    """Test error handling in prepend_prompt function when prompt parameters contain invalid variables."""
+    prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
+    prompt = Prompt(
+        prompt="You are a {{ role }} assistant.",
+        prompt_id=prompt_id,
+        version=1,
+        variables=["role"],  # Only "role" is valid
+        is_default=True,
+    )
+
+    openai_response_prompt = OpenAIResponsePrompt(
+        id=prompt_id,
+        version="1",
+        variables={
+            "role": OpenAIResponseInputMessageContentText(text="helpful"),
+            "company": OpenAIResponseInputMessageContentText(
+                text="Dummy Company"
+            ),  # company is not in prompt.variables
+        },
+    )
+
+    mock_prompts_api.get_prompt.return_value = prompt
+
+    # Initial messages
+    messages = [OpenAIUserMessageParam(content="Test prompt")]
+
+    # Execute - should raise ValueError for invalid variable
+    with pytest.raises(ValueError, match="Variable company not found in prompt"):
+        await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
+
+    # Verify
+    mock_prompts_api.get_prompt.assert_called_once_with(prompt_id, 1)
+
+
+async def test_prepend_prompt_not_found(openai_responses_impl, mock_prompts_api):
+    """Test prepend_prompt function when prompt is not found."""
+    prompt_id = "pmpt_nonexistent"
+    openai_response_prompt = OpenAIResponsePrompt(id=prompt_id, version="1")
+
+    mock_prompts_api.get_prompt.return_value = None  # Prompt not found
+
+    # Initial messages
+    messages = [OpenAIUserMessageParam(content="Test prompt")]
+    initial_length = len(messages)
+
+    # Execute
+    result = await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
+
+    # Verify
+    mock_prompts_api.get_prompt.assert_called_once_with(prompt_id, 1)
+
+    # Should return None when prompt not found
+    assert result is None
+
+    # Messages should not be modified
+    assert len(messages) == initial_length
+    assert messages[0].content == "Test prompt"
+
+
+async def test_prepend_prompt_variable_substitution(openai_responses_impl, mock_prompts_api):
+    """Test complex variable substitution with multiple occurrences and special characters in prepend_prompt function."""
+    prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
+
+    # Support all whitespace variations: {{name}}, {{ name }}, {{ name}}, {{name }}, etc.
+    prompt = Prompt(
+        prompt="Hello {{name}}! You are working at {{ company}}. Your role is {{role}} at {{company}}. Remember, {{ name }}, to be {{ tone }}.",
+        prompt_id=prompt_id,
+        version=1,
+        variables=["name", "company", "role", "tone"],
+        is_default=True,
+    )
+
+    openai_response_prompt = OpenAIResponsePrompt(
+        id=prompt_id,
+        version="1",
+        variables={
+            "name": OpenAIResponseInputMessageContentText(text="Alice"),
+            "company": OpenAIResponseInputMessageContentText(text="Dummy Company"),
+            "role": OpenAIResponseInputMessageContentText(text="AI Assistant"),
+            "tone": OpenAIResponseInputMessageContentText(text="professional"),
+        },
+    )
+
+    mock_prompts_api.get_prompt.return_value = prompt
+
+    # Initial messages
+    messages = [OpenAIUserMessageParam(content="Test")]
+
+    # Execute
+    await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
+
+    # Verify
+    assert len(messages) == 2
+    assert isinstance(messages[0], OpenAISystemMessageParam)
+    expected_content = "Hello Alice! You are working at Dummy Company. Your role is AI Assistant at Dummy Company. Remember, Alice, to be professional."
+    assert messages[0].content == expected_content
+
+
+async def test_prepend_prompt_with_image_variable(openai_responses_impl, mock_prompts_api, mock_files_api):
+    """Test prepend_prompt with image variable - should create placeholder in system message and append image as separate user message."""
+    prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
+    prompt = Prompt(
+        prompt="Analyze this {{product_image}} and describe what you see.",
+        prompt_id=prompt_id,
+        version=1,
+        variables=["product_image"],
+        is_default=True,
+    )
+
+    # Mock file content and file metadata
+    mock_file_content = b"fake_image_data"
+    mock_files_api.openai_retrieve_file_content.return_value = type("obj", (object,), {"body": mock_file_content})()
+    mock_files_api.openai_retrieve_file.return_value = OpenAIFileObject(
+        object="file",
+        id="file-abc123",
+        bytes=len(mock_file_content),
+        created_at=1234567890,
+        expires_at=1234567890,
+        filename="product.jpg",
+        purpose="assistants",
+    )
+
+    openai_response_prompt = OpenAIResponsePrompt(
+        id=prompt_id,
+        version="1",
+        variables={
+            "product_image": OpenAIResponseInputMessageContentImage(
+                file_id="file-abc123",
+                detail="high",
+            )
+        },
+    )
+
+    mock_prompts_api.get_prompt.return_value = prompt
+
+    # Initial messages
+    messages = [OpenAIUserMessageParam(content="What do you think?")]
+
+    # Execute
+    await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
+
+    assert len(messages) == 3
+
+    # Check system message has placeholder
+    assert isinstance(messages[0], OpenAISystemMessageParam)
+    assert messages[0].content == "Analyze this [Image: product_image] and describe what you see."
+
+    # Check original user message is still there
+    assert isinstance(messages[1], OpenAIUserMessageParam)
+    assert messages[1].content == "What do you think?"
+
+    # Check new user message with image is appended
+    assert isinstance(messages[2], OpenAIUserMessageParam)
+    assert isinstance(messages[2].content, list)
+    assert len(messages[2].content) == 1
+
+    # Should be image with data URL
+    assert isinstance(messages[2].content[0], OpenAIChatCompletionContentPartImageParam)
+    assert messages[2].content[0].image_url.url.startswith("data:image/")
+    assert messages[2].content[0].image_url.detail == "high"
+
+
+async def test_prepend_prompt_with_file_variable(openai_responses_impl, mock_prompts_api, mock_files_api):
+    """Test prepend_prompt with file variable - should create placeholder in system message and append file as separate user message."""
+    prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
+    prompt = Prompt(
+        prompt="Review the document {{contract_file}} and summarize key points.",
+        prompt_id=prompt_id,
+        version=1,
+        variables=["contract_file"],
+        is_default=True,
+    )
+
+    # Mock file retrieval
+    mock_file_content = b"fake_pdf_content"
+    mock_files_api.openai_retrieve_file_content.return_value = type("obj", (object,), {"body": mock_file_content})()
+    mock_files_api.openai_retrieve_file.return_value = OpenAIFileObject(
+        object="file",
+        id="file-contract-789",
+        bytes=len(mock_file_content),
+        created_at=1234567890,
+        expires_at=1234567890,
+        filename="contract.pdf",
+        purpose="assistants",
+    )
+
+    openai_response_prompt = OpenAIResponsePrompt(
+        id=prompt_id,
+        version="1",
+        variables={
+            "contract_file": OpenAIResponseInputMessageContentFile(
+                file_id="file-contract-789",
+                filename="contract.pdf",
+            )
+        },
+    )
+
+    mock_prompts_api.get_prompt.return_value = prompt
+
+    # Initial messages
+    messages = [OpenAIUserMessageParam(content="Please review this.")]
+
+    # Execute
+    await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
+
+    assert len(messages) == 3
+
+    # Check system message has placeholder
+    assert isinstance(messages[0], OpenAISystemMessageParam)
+    assert messages[0].content == "Review the document [File: contract_file] and summarize key points."
+
+    # Check original user message is still there
+    assert isinstance(messages[1], OpenAIUserMessageParam)
+    assert messages[1].content == "Please review this."
+
+    # Check new user message with file is appended
+    assert isinstance(messages[2], OpenAIUserMessageParam)
+    assert isinstance(messages[2].content, list)
+    assert len(messages[2].content) == 1
+
+    # First part should be file with data URL
+    assert isinstance(messages[2].content[0], OpenAIFile)
+    assert messages[2].content[0].file.file_data.startswith("data:application/pdf;base64,")
+    assert messages[2].content[0].file.filename == "contract.pdf"
+    assert messages[2].content[0].file.file_id is None
+
+
+async def test_prepend_prompt_with_mixed_variables(openai_responses_impl, mock_prompts_api, mock_files_api):
+    """Test prepend_prompt with text, image, and file variables mixed together."""
+    prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
+    prompt = Prompt(
+        prompt="Hello {{name}}! Analyze {{photo}} and review {{document}}. Provide insights for {{company}}.",
+        prompt_id=prompt_id,
+        version=1,
+        variables=["name", "photo", "document", "company"],
+        is_default=True,
+    )
+
+    # Mock file retrieval for image and file
+    mock_image_content = b"fake_image_data"
+    mock_file_content = b"fake_doc_content"
+
+    async def mock_retrieve_file_content(file_id):
+        if file_id == "file-photo-123":
+            return type("obj", (object,), {"body": mock_image_content})()
+        elif file_id == "file-doc-456":
+            return type("obj", (object,), {"body": mock_file_content})()
+
+    mock_files_api.openai_retrieve_file_content.side_effect = mock_retrieve_file_content
+
+    def mock_retrieve_file(file_id):
+        if file_id == "file-photo-123":
+            return OpenAIFileObject(
+                object="file",
+                id="file-photo-123",
+                bytes=len(mock_image_content),
+                created_at=1234567890,
+                expires_at=1234567890,
+                filename="photo.jpg",
+                purpose="assistants",
+            )
+        elif file_id == "file-doc-456":
+            return OpenAIFileObject(
+                object="file",
+                id="file-doc-456",
+                bytes=len(mock_file_content),
+                created_at=1234567890,
+                expires_at=1234567890,
+                filename="doc.pdf",
+                purpose="assistants",
+            )
+
+    mock_files_api.openai_retrieve_file.side_effect = mock_retrieve_file
+
+    openai_response_prompt = OpenAIResponsePrompt(
+        id=prompt_id,
+        version="1",
+        variables={
+            "name": OpenAIResponseInputMessageContentText(text="Alice"),
+            "photo": OpenAIResponseInputMessageContentImage(file_id="file-photo-123", detail="auto"),
+            "document": OpenAIResponseInputMessageContentFile(file_id="file-doc-456", filename="doc.pdf"),
+            "company": OpenAIResponseInputMessageContentText(text="Acme Corp"),
+        },
+    )
+
+    mock_prompts_api.get_prompt.return_value = prompt
+
+    # Initial messages
+    messages = [OpenAIUserMessageParam(content="Here's my question.")]
+
+    # Execute
+    await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
+
+    assert len(messages) == 3
+
+    # Check system message has text and placeholders
+    assert isinstance(messages[0], OpenAISystemMessageParam)
+    expected_system = "Hello Alice! Analyze [Image: photo] and review [File: document]. Provide insights for Acme Corp."
+    assert messages[0].content == expected_system
+
+    # Check original user message is still there
+    assert isinstance(messages[1], OpenAIUserMessageParam)
+    assert messages[1].content == "Here's my question."
+
+    # Check new user message with media is appended (2 media items)
+    assert isinstance(messages[2], OpenAIUserMessageParam)
+    assert isinstance(messages[2].content, list)
+    assert len(messages[2].content) == 2
+
+    # First part should be image with data URL
+    assert isinstance(messages[2].content[0], OpenAIChatCompletionContentPartImageParam)
+    assert messages[2].content[0].image_url.url.startswith("data:image/")
+
+    # Second part should be file with data URL
+    assert isinstance(messages[2].content[1], OpenAIFile)
+    assert messages[2].content[1].file.file_data.startswith("data:application/pdf;base64,")
+    assert messages[2].content[1].file.filename == "doc.pdf"
+    assert messages[2].content[1].file.file_id is None
+
+
+async def test_prepend_prompt_with_image_using_image_url(openai_responses_impl, mock_prompts_api):
+    """Test prepend_prompt with image variable using image_url instead of file_id."""
+    prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
+    prompt = Prompt(
+        prompt="Describe {{screenshot}}.",
+        prompt_id=prompt_id,
+        version=1,
+        variables=["screenshot"],
+        is_default=True,
+    )
+
+    openai_response_prompt = OpenAIResponsePrompt(
+        id=prompt_id,
+        version="1",
+        variables={
+            "screenshot": OpenAIResponseInputMessageContentImage(
+                image_url="https://example.com/screenshot.png",
+                detail="low",
+            )
+        },
+    )
+
+    mock_prompts_api.get_prompt.return_value = prompt
+
+    # Initial messages
+    messages = [OpenAIUserMessageParam(content="What is this?")]
+
+    # Execute
+    await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
+
+    assert len(messages) == 3
+
+    # Check system message has placeholder
+    assert isinstance(messages[0], OpenAISystemMessageParam)
+    assert messages[0].content == "Describe [Image: screenshot]."
+
+    # Check original user message is still there
+    assert isinstance(messages[1], OpenAIUserMessageParam)
+    assert messages[1].content == "What is this?"
+
+    # Check new user message with image is appended
+    assert isinstance(messages[2], OpenAIUserMessageParam)
+    assert isinstance(messages[2].content, list)
+
+    # Image should use the provided URL
+    assert isinstance(messages[2].content[0], OpenAIChatCompletionContentPartImageParam)
+    assert messages[2].content[0].image_url.url == "https://example.com/screenshot.png"
+    assert messages[2].content[0].image_url.detail == "low"
+
+
+async def test_prepend_prompt_image_variable_missing_required_fields(openai_responses_impl, mock_prompts_api):
+    """Test prepend_prompt with image variable that has neither file_id nor image_url - should raise error."""
+    prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
+    prompt = Prompt(
+        prompt="Analyze {{bad_image}}.",
+        prompt_id=prompt_id,
+        version=1,
+        variables=["bad_image"],
+        is_default=True,
+    )
+
+    # Create image content with neither file_id nor image_url
+    openai_response_prompt = OpenAIResponsePrompt(
+        id=prompt_id,
+        version="1",
+        variables={"bad_image": OpenAIResponseInputMessageContentImage()},  # No file_id or image_url
+    )
+
+    mock_prompts_api.get_prompt.return_value = prompt
+    messages = [OpenAIUserMessageParam(content="Test")]
+
+    # Execute - should raise ValueError
+    with pytest.raises(ValueError, match="Image content must have either 'image_url' or 'file_id'"):
+        await openai_responses_impl._prepend_prompt(messages, openai_response_prompt)
--- a/tests/unit/providers/agents/meta_reference/test_openai_responses_conversations.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses_conversations.py
@ -39,6 +39,8 @@ def responses_impl_with_conversations(
    mock_vector_io_api,
    mock_conversations_api,
    mock_safety_api,
+    mock_prompts_api,
+    mock_files_api,
 ):
    """Create OpenAIResponsesImpl instance with conversations API."""
    return OpenAIResponsesImpl(
@ -49,6 +51,8 @@ def responses_impl_with_conversations(
        vector_io_api=mock_vector_io_api,
        conversations_api=mock_conversations_api,
        safety_api=mock_safety_api,
+        prompts_api=mock_prompts_api,
+        files_api=mock_files_api,
    )


--- a/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py
+++ b/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py
@ -5,6 +5,8 @@
 # the root directory of this source tree.


+from unittest.mock import AsyncMock
+
 import pytest

 from llama_stack.providers.inline.agents.meta_reference.responses.utils import (
@ -46,6 +48,12 @@ from llama_stack_api.openai_responses import (
 )


+@pytest.fixture
+def mock_files_api():
+    """Mock files API for testing."""
+    return AsyncMock()
+
+
 class TestConvertChatChoiceToResponseMessage:
    async def test_convert_string_content(self):
        choice = OpenAIChoice(
@ -78,17 +86,17 @@ class TestConvertChatChoiceToResponseMessage:


 class TestConvertResponseContentToChatContent:
-    async def test_convert_string_content(self):
-        result = await convert_response_content_to_chat_content("Simple string")
+    async def test_convert_string_content(self, mock_files_api):
+        result = await convert_response_content_to_chat_content("Simple string", mock_files_api)
        assert result == "Simple string"

-    async def test_convert_text_content_parts(self):
+    async def test_convert_text_content_parts(self, mock_files_api):
        content = [
            OpenAIResponseInputMessageContentText(text="First part"),
            OpenAIResponseOutputMessageContentOutputText(text="Second part"),
        ]

-        result = await convert_response_content_to_chat_content(content)
+        result = await convert_response_content_to_chat_content(content, mock_files_api)

        assert len(result) == 2
        assert isinstance(result[0], OpenAIChatCompletionContentPartTextParam)
@ -96,10 +104,10 @@ class TestConvertResponseContentToChatContent:
        assert isinstance(result[1], OpenAIChatCompletionContentPartTextParam)
        assert result[1].text == "Second part"

-    async def test_convert_image_content(self):
+    async def test_convert_image_content(self, mock_files_api):
        content = [OpenAIResponseInputMessageContentImage(image_url="https://example.com/image.jpg", detail="high")]

-        result = await convert_response_content_to_chat_content(content)
+        result = await convert_response_content_to_chat_content(content, mock_files_api)

        assert len(result) == 1
        assert isinstance(result[0], OpenAIChatCompletionContentPartImageParam)
--- a/tests/unit/providers/agents/meta_reference/test_responses_safety_utils.py
+++ b/tests/unit/providers/agents/meta_reference/test_responses_safety_utils.py
@ -30,6 +30,8 @@ def mock_apis():
        "vector_io_api": AsyncMock(),
        "conversations_api": AsyncMock(),
        "safety_api": AsyncMock(),
+        "prompts_api": AsyncMock(),
+        "files_api": AsyncMock(),
    }


--- a/tests/unit/providers/agents/meta_reference/test_safety_optional.py
+++ b/tests/unit/providers/agents/meta_reference/test_safety_optional.py
@ -0,0 +1,214 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""Tests for making Safety API optional in meta-reference agents provider.
+
+This test suite validates the changes introduced to fix issue #4165, which
+allows running the meta-reference agents provider without the Safety API.
+Safety API is now an optional dependency, and errors are raised at request time
+when guardrails are explicitly requested without Safety API configured.
+"""
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from llama_stack.core.datatypes import Api
+from llama_stack.core.storage.datatypes import KVStoreReference, ResponsesStoreReference
+from llama_stack.providers.inline.agents.meta_reference import get_provider_impl
+from llama_stack.providers.inline.agents.meta_reference.config import (
+    AgentPersistenceConfig,
+    MetaReferenceAgentsImplConfig,
+)
+from llama_stack.providers.inline.agents.meta_reference.responses.utils import (
+    run_guardrails,
+)
+
+
+@pytest.fixture
+def mock_persistence_config():
+    """Create a mock persistence configuration."""
+    return AgentPersistenceConfig(
+        agent_state=KVStoreReference(
+            backend="kv_default",
+            namespace="agents",
+        ),
+        responses=ResponsesStoreReference(
+            backend="sql_default",
+            table_name="responses",
+        ),
+    )
+
+
+@pytest.fixture
+def mock_deps():
+    """Create mock dependencies for the agents provider."""
+    # Create mock APIs
+    inference_api = AsyncMock()
+    vector_io_api = AsyncMock()
+    tool_runtime_api = AsyncMock()
+    tool_groups_api = AsyncMock()
+    conversations_api = AsyncMock()
+    prompts_api = AsyncMock()
+    files_api = AsyncMock()
+
+    return {
+        Api.inference: inference_api,
+        Api.vector_io: vector_io_api,
+        Api.tool_runtime: tool_runtime_api,
+        Api.tool_groups: tool_groups_api,
+        Api.conversations: conversations_api,
+        Api.prompts: prompts_api,
+        Api.files: files_api,
+    }
+
+
+class TestProviderInitialization:
+    """Test provider initialization with different safety API configurations."""
+
+    async def test_initialization_with_safety_api_present(self, mock_persistence_config, mock_deps):
+        """Test successful initialization when Safety API is configured."""
+        config = MetaReferenceAgentsImplConfig(persistence=mock_persistence_config)
+
+        # Add safety API to deps
+        safety_api = AsyncMock()
+        mock_deps[Api.safety] = safety_api
+
+        # Mock the initialize method to avoid actual initialization
+        with patch(
+            "llama_stack.providers.inline.agents.meta_reference.agents.MetaReferenceAgentsImpl.initialize",
+            new_callable=AsyncMock,
+        ):
+            # Should not raise any exception
+            provider = await get_provider_impl(config, mock_deps, policy=[], telemetry_enabled=False)
+            assert provider is not None
+
+    async def test_initialization_without_safety_api(self, mock_persistence_config, mock_deps):
+        """Test successful initialization when Safety API is not configured."""
+        config = MetaReferenceAgentsImplConfig(persistence=mock_persistence_config)
+
+        # Safety API is NOT in mock_deps - provider should still start
+        # Mock the initialize method to avoid actual initialization
+        with patch(
+            "llama_stack.providers.inline.agents.meta_reference.agents.MetaReferenceAgentsImpl.initialize",
+            new_callable=AsyncMock,
+        ):
+            # Should not raise any exception
+            provider = await get_provider_impl(config, mock_deps, policy=[], telemetry_enabled=False)
+            assert provider is not None
+            assert provider.safety_api is None
+
+
+class TestGuardrailsFunctionality:
+    """Test run_guardrails function with optional safety API."""
+
+    async def test_run_guardrails_with_none_safety_api(self):
+        """Test that run_guardrails returns None when safety_api is None."""
+        result = await run_guardrails(safety_api=None, messages="test message", guardrail_ids=["llama-guard"])
+        assert result is None
+
+    async def test_run_guardrails_with_empty_messages(self):
+        """Test that run_guardrails returns None for empty messages."""
+        # Test with None safety API
+        result = await run_guardrails(safety_api=None, messages="", guardrail_ids=["llama-guard"])
+        assert result is None
+
+        # Test with mock safety API
+        mock_safety_api = AsyncMock()
+        result = await run_guardrails(safety_api=mock_safety_api, messages="", guardrail_ids=["llama-guard"])
+        assert result is None
+
+    async def test_run_guardrails_with_none_safety_api_ignores_guardrails(self):
+        """Test that guardrails are skipped when safety_api is None, even if guardrail_ids are provided."""
+        # Should not raise exception, just return None
+        result = await run_guardrails(
+            safety_api=None,
+            messages="potentially harmful content",
+            guardrail_ids=["llama-guard", "content-filter"],
+        )
+        assert result is None
+
+    async def test_create_response_rejects_guardrails_without_safety_api(self, mock_persistence_config, mock_deps):
+        """Test that create_openai_response raises error when guardrails requested but Safety API unavailable."""
+        from llama_stack.providers.inline.agents.meta_reference.responses.openai_responses import (
+            OpenAIResponsesImpl,
+        )
+        from llama_stack_api import ResponseGuardrailSpec
+
+        # Create OpenAIResponsesImpl with no safety API
+        with patch("llama_stack.providers.inline.agents.meta_reference.responses.openai_responses.ResponsesStore"):
+            impl = OpenAIResponsesImpl(
+                inference_api=mock_deps[Api.inference],
+                tool_groups_api=mock_deps[Api.tool_groups],
+                tool_runtime_api=mock_deps[Api.tool_runtime],
+                responses_store=MagicMock(),
+                vector_io_api=mock_deps[Api.vector_io],
+                safety_api=None,  # No Safety API
+                conversations_api=mock_deps[Api.conversations],
+                prompts_api=mock_deps[Api.prompts],
+                files_api=mock_deps[Api.files],
+            )
+
+            # Test with string guardrail
+            with pytest.raises(ValueError) as exc_info:
+                await impl.create_openai_response(
+                    input="test input",
+                    model="test-model",
+                    guardrails=["llama-guard"],
+                )
+            assert "Cannot process guardrails: Safety API is not configured" in str(exc_info.value)
+
+            # Test with ResponseGuardrailSpec
+            with pytest.raises(ValueError) as exc_info:
+                await impl.create_openai_response(
+                    input="test input",
+                    model="test-model",
+                    guardrails=[ResponseGuardrailSpec(type="llama-guard")],
+                )
+            assert "Cannot process guardrails: Safety API is not configured" in str(exc_info.value)
+
+    async def test_create_response_succeeds_without_guardrails_and_no_safety_api(
+        self, mock_persistence_config, mock_deps
+    ):
+        """Test that create_openai_response works when no guardrails requested and Safety API unavailable."""
+        from llama_stack.providers.inline.agents.meta_reference.responses.openai_responses import (
+            OpenAIResponsesImpl,
+        )
+
+        # Create OpenAIResponsesImpl with no safety API
+        with (
+            patch("llama_stack.providers.inline.agents.meta_reference.responses.openai_responses.ResponsesStore"),
+            patch.object(OpenAIResponsesImpl, "_create_streaming_response", new_callable=AsyncMock) as mock_stream,
+        ):
+            # Mock the streaming response to return a simple async generator
+            async def mock_generator():
+                yield MagicMock()
+
+            mock_stream.return_value = mock_generator()
+
+            impl = OpenAIResponsesImpl(
+                inference_api=mock_deps[Api.inference],
+                tool_groups_api=mock_deps[Api.tool_groups],
+                tool_runtime_api=mock_deps[Api.tool_runtime],
+                responses_store=MagicMock(),
+                vector_io_api=mock_deps[Api.vector_io],
+                safety_api=None,  # No Safety API
+                conversations_api=mock_deps[Api.conversations],
+                prompts_api=mock_deps[Api.prompts],
+                files_api=mock_deps[Api.files],
+            )
+
+            # Should not raise when no guardrails requested
+            # Note: This will still fail later in execution due to mocking, but should pass the validation
+            try:
+                await impl.create_openai_response(
+                    input="test input",
+                    model="test-model",
+                    guardrails=None,  # No guardrails
+                )
+            except Exception as e:
+                # Ensure the error is NOT about missing Safety API
+                assert "Cannot process guardrails: Safety API is not configured" not in str(e)
--- a/tests/unit/providers/inference/test_inference_client_caching.py
+++ b/tests/unit/providers/inference/test_inference_client_caching.py
@ -120,7 +120,7 @@ from llama_stack.providers.remote.inference.watsonx.watsonx import WatsonXInfere
            VLLMInferenceAdapter,
            "llama_stack.providers.remote.inference.vllm.VLLMProviderDataValidator",
            {
-                "url": "http://fake",
+                "base_url": "http://fake",
            },
        ),
    ],
@ -153,7 +153,7 @@ def test_litellm_provider_data_used(config_cls, adapter_cls, provider_data_valid
    """Validate data for LiteLLM-based providers.  Similar to test_openai_provider_data_used, but without the
    assumption that there is an OpenAI-compatible client object."""

-    inference_adapter = adapter_cls(config=config_cls())
+    inference_adapter = adapter_cls(config=config_cls(base_url="http://fake"))

    inference_adapter.__provider_spec__ = MagicMock()
    inference_adapter.__provider_spec__.provider_data_validator = provider_data_validator
--- a/tests/unit/providers/inference/test_remote_vllm.py
+++ b/tests/unit/providers/inference/test_remote_vllm.py
@ -40,7 +40,7 @@ from llama_stack_api import (

@pytest.fixture(scope="function")
 async def vllm_inference_adapter():
-    config = VLLMInferenceAdapterConfig(url="http://mocked.localhost:12345")
+    config = VLLMInferenceAdapterConfig(base_url="http://mocked.localhost:12345")
    inference_adapter = VLLMInferenceAdapter(config=config)
    inference_adapter.model_store = AsyncMock()
    await inference_adapter.initialize()
@ -204,7 +204,7 @@ async def test_vllm_completion_extra_body():
    via extra_body to the underlying OpenAI client through the InferenceRouter.
    """
    # Set up the vLLM adapter
-    config = VLLMInferenceAdapterConfig(url="http://mocked.localhost:12345")
+    config = VLLMInferenceAdapterConfig(base_url="http://mocked.localhost:12345")
    vllm_adapter = VLLMInferenceAdapter(config=config)
    vllm_adapter.__provider_id__ = "vllm"
    await vllm_adapter.initialize()
@ -277,7 +277,7 @@ async def test_vllm_chat_completion_extra_body():
    via extra_body to the underlying OpenAI client through the InferenceRouter for chat completion.
    """
    # Set up the vLLM adapter
-    config = VLLMInferenceAdapterConfig(url="http://mocked.localhost:12345")
+    config = VLLMInferenceAdapterConfig(base_url="http://mocked.localhost:12345")
    vllm_adapter = VLLMInferenceAdapter(config=config)
    vllm_adapter.__provider_id__ = "vllm"
    await vllm_adapter.initialize()
--- a/tests/unit/providers/nvidia/test_rerank_inference.py
+++ b/tests/unit/providers/nvidia/test_rerank_inference.py
@ -146,7 +146,7 @@ async def test_hosted_model_not_in_endpoint_mapping():

 async def test_self_hosted_ignores_endpoint():
    adapter = create_adapter(
-        config=NVIDIAConfig(url="http://localhost:8000", api_key=None),
+        config=NVIDIAConfig(base_url="http://localhost:8000", api_key=None),
        rerank_endpoints={"test-model": "https://model.endpoint/rerank"},  # This should be ignored for self-hosted.
    )
    mock_session = MockSession(MockResponse())
--- a/tests/unit/providers/test_configs.py
+++ b/tests/unit/providers/test_configs.py
@ -4,8 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

+from typing import get_args, get_origin
+
 import pytest
-from pydantic import BaseModel
+from pydantic import BaseModel, HttpUrl

 from llama_stack.core.distribution import get_provider_registry, providable_apis
 from llama_stack.core.utils.dynamic import instantiate_class_type
@ -41,3 +43,55 @@ class TestProviderConfigurations:

        sample_config = config_type.sample_run_config(__distro_dir__="foobarbaz")
        assert isinstance(sample_config, dict), f"{config_class_name}.sample_run_config() did not return a dict"
+
+    def test_remote_inference_url_standardization(self):
+        """Verify all remote inference providers use standardized base_url configuration."""
+        provider_registry = get_provider_registry()
+        inference_providers = provider_registry.get("inference", {})
+
+        # Filter for remote providers only
+        remote_providers = {k: v for k, v in inference_providers.items() if k.startswith("remote::")}
+
+        failures = []
+        for provider_type, provider_spec in remote_providers.items():
+            try:
+                config_class_name = provider_spec.config_class
+                config_type = instantiate_class_type(config_class_name)
+
+                # Check that config has base_url field (not url)
+                if hasattr(config_type, "model_fields"):
+                    fields = config_type.model_fields
+
+                    # Should NOT have 'url' field (old pattern)
+                    if "url" in fields:
+                        failures.append(
+                            f"{provider_type}: Uses deprecated 'url' field instead of 'base_url'. "
+                            f"Please rename to 'base_url' for consistency."
+                        )
+
+                    # Should have 'base_url' field with HttpUrl | None type
+                    if "base_url" in fields:
+                        field_info = fields["base_url"]
+                        annotation = field_info.annotation
+
+                        # Check if it's HttpUrl or HttpUrl | None
+                        # get_origin() returns Union for (X | Y), None for plain types
+                        # get_args() returns the types inside Union, e.g. (HttpUrl, NoneType)
+                        is_valid = False
+                        if get_origin(annotation) is not None:  # It's a Union/Optional
+                            if HttpUrl in get_args(annotation):
+                                is_valid = True
+                        elif annotation == HttpUrl:  # Plain HttpUrl without | None
+                            is_valid = True
+
+                        if not is_valid:
+                            failures.append(
+                                f"{provider_type}: base_url field has incorrect type annotation. "
+                                f"Expected 'HttpUrl | None', got '{annotation}'"
+                            )
+
+            except Exception as e:
+                failures.append(f"{provider_type}: Error checking URL standardization: {str(e)}")
+
+        if failures:
+            pytest.fail("URL standardization violations found:\n" + "\n".join(f"  - {f}" for f in failures))
--- a/tests/unit/providers/utils/inference/test_openai_mixin.py
+++ b/tests/unit/providers/utils/inference/test_openai_mixin.py
@ -15,7 +15,14 @@ from pydantic import BaseModel, Field
 from llama_stack.core.request_headers import request_provider_data_context
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
-from llama_stack_api import Model, ModelType, OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
+from llama_stack_api import (
+    Model,
+    ModelType,
+    OpenAIChatCompletionRequestWithExtraBody,
+    OpenAICompletionRequestWithExtraBody,
+    OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIUserMessageParam,
+)


 class OpenAIMixinImpl(OpenAIMixin):
@ -834,3 +841,96 @@ class TestOpenAIMixinProviderDataApiKey:
        error_message = str(exc_info.value)
        assert "test_api_key" in error_message
        assert "x-llamastack-provider-data" in error_message
+
+
+class TestOpenAIMixinAllowedModelsInference:
+    """Test cases for allowed_models enforcement during inference requests"""
+
+    async def test_inference_with_allowed_models(self, mixin, mock_client_context):
+        """Test that all inference methods succeed with allowed models"""
+        mixin.config.allowed_models = ["gpt-4", "text-davinci-003", "text-embedding-ada-002"]
+
+        mock_client = MagicMock()
+        mock_client.chat.completions.create = AsyncMock(return_value=MagicMock())
+        mock_client.completions.create = AsyncMock(return_value=MagicMock())
+        mock_embedding_response = MagicMock()
+        mock_embedding_response.data = [MagicMock(embedding=[0.1, 0.2, 0.3])]
+        mock_embedding_response.usage = MagicMock(prompt_tokens=5, total_tokens=5)
+        mock_client.embeddings.create = AsyncMock(return_value=mock_embedding_response)
+
+        with mock_client_context(mixin, mock_client):
+            # Test chat completion
+            await mixin.openai_chat_completion(
+                OpenAIChatCompletionRequestWithExtraBody(
+                    model="gpt-4", messages=[OpenAIUserMessageParam(role="user", content="Hello")]
+                )
+            )
+            mock_client.chat.completions.create.assert_called_once()
+
+            # Test completion
+            await mixin.openai_completion(
+                OpenAICompletionRequestWithExtraBody(model="text-davinci-003", prompt="Hello")
+            )
+            mock_client.completions.create.assert_called_once()
+
+            # Test embeddings
+            await mixin.openai_embeddings(
+                OpenAIEmbeddingsRequestWithExtraBody(model="text-embedding-ada-002", input="test text")
+            )
+            mock_client.embeddings.create.assert_called_once()
+
+    async def test_inference_with_disallowed_models(self, mixin, mock_client_context):
+        """Test that all inference methods fail with disallowed models"""
+        mixin.config.allowed_models = ["gpt-4"]
+
+        mock_client = MagicMock()
+
+        with mock_client_context(mixin, mock_client):
+            # Test chat completion with disallowed model
+            with pytest.raises(ValueError, match="Model 'gpt-4-turbo' is not in the allowed models list"):
+                await mixin.openai_chat_completion(
+                    OpenAIChatCompletionRequestWithExtraBody(
+                        model="gpt-4-turbo", messages=[OpenAIUserMessageParam(role="user", content="Hello")]
+                    )
+                )
+
+            # Test completion with disallowed model
+            with pytest.raises(ValueError, match="Model 'text-davinci-002' is not in the allowed models list"):
+                await mixin.openai_completion(
+                    OpenAICompletionRequestWithExtraBody(model="text-davinci-002", prompt="Hello")
+                )
+
+            # Test embeddings with disallowed model
+            with pytest.raises(ValueError, match="Model 'text-embedding-3-large' is not in the allowed models list"):
+                await mixin.openai_embeddings(
+                    OpenAIEmbeddingsRequestWithExtraBody(model="text-embedding-3-large", input="test text")
+                )
+
+            mock_client.chat.completions.create.assert_not_called()
+            mock_client.completions.create.assert_not_called()
+            mock_client.embeddings.create.assert_not_called()
+
+    async def test_inference_with_no_restrictions(self, mixin, mock_client_context):
+        """Test that inference succeeds when allowed_models is None or empty list blocks all"""
+        # Test with None (no restrictions)
+        assert mixin.config.allowed_models is None
+        mock_client = MagicMock()
+        mock_client.chat.completions.create = AsyncMock(return_value=MagicMock())
+
+        with mock_client_context(mixin, mock_client):
+            await mixin.openai_chat_completion(
+                OpenAIChatCompletionRequestWithExtraBody(
+                    model="any-model", messages=[OpenAIUserMessageParam(role="user", content="Hello")]
+                )
+            )
+            mock_client.chat.completions.create.assert_called_once()
+
+        # Test with empty list (blocks all models)
+        mixin.config.allowed_models = []
+        with mock_client_context(mixin, mock_client):
+            with pytest.raises(ValueError, match="Model 'gpt-4' is not in the allowed models list"):
+                await mixin.openai_chat_completion(
+                    OpenAIChatCompletionRequestWithExtraBody(
+                        model="gpt-4", messages=[OpenAIUserMessageParam(role="user", content="Hello")]
+                    )
+                )
--- a/tests/unit/providers/vector_io/test_vector_utils.py
+++ b/tests/unit/providers/vector_io/test_vector_utils.py
@ -5,7 +5,7 @@
 # the root directory of this source tree.

 from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
-from llama_stack_api import Chunk, ChunkMetadata
+from llama_stack_api import Chunk, ChunkMetadata, VectorStoreFileObject

 # This test is a unit test for the chunk_utils.py helpers. This should only contain
 # tests which are specific to this file. More general (API-level) tests should be placed in
@ -78,3 +78,77 @@ def test_chunk_serialization():
    serialized_chunk = chunk.model_dump()
    assert serialized_chunk["chunk_id"] == "test-chunk-id"
    assert "chunk_id" in serialized_chunk
+
+
+def test_vector_store_file_object_attributes_validation():
+    """Test VectorStoreFileObject validates and sanitizes attributes at input boundary."""
+    # Test with metadata containing lists, nested dicts, and primitives
+    from llama_stack_api.vector_io import VectorStoreChunkingStrategyAuto
+
+    file_obj = VectorStoreFileObject(
+        id="file-123",
+        attributes={
+            "tags": ["transformers", "h100-compatible", "region:us"],  # List -> string
+            "model_name": "granite-3.3-8b",  # String preserved
+            "score": 0.95,  # Float preserved
+            "active": True,  # Bool preserved
+            "count": 42,  # Int -> float
+            "nested": {"key": "value"},  # Dict filtered out
+        },
+        chunking_strategy=VectorStoreChunkingStrategyAuto(),
+        created_at=1234567890,
+        status="completed",
+        vector_store_id="vs-123",
+    )
+
+    # Lists converted to comma-separated strings
+    assert file_obj.attributes["tags"] == "transformers, h100-compatible, region:us"
+    # Primitives preserved
+    assert file_obj.attributes["model_name"] == "granite-3.3-8b"
+    assert file_obj.attributes["score"] == 0.95
+    assert file_obj.attributes["active"] is True
+    assert file_obj.attributes["count"] == 42.0  # int -> float
+    # Complex types filtered out
+    assert "nested" not in file_obj.attributes
+
+
+def test_vector_store_file_object_attributes_constraints():
+    """Test VectorStoreFileObject enforces OpenAPI constraints on attributes."""
+    from llama_stack_api.vector_io import VectorStoreChunkingStrategyAuto
+
+    # Test max 16 properties
+    many_attrs = {f"key{i}": f"value{i}" for i in range(20)}
+    file_obj = VectorStoreFileObject(
+        id="file-123",
+        attributes=many_attrs,
+        chunking_strategy=VectorStoreChunkingStrategyAuto(),
+        created_at=1234567890,
+        status="completed",
+        vector_store_id="vs-123",
+    )
+    assert len(file_obj.attributes) == 16  # Max 16 properties
+
+    # Test max 64 char keys are filtered
+    long_key_attrs = {"a" * 65: "value", "valid_key": "value"}
+    file_obj = VectorStoreFileObject(
+        id="file-124",
+        attributes=long_key_attrs,
+        chunking_strategy=VectorStoreChunkingStrategyAuto(),
+        created_at=1234567890,
+        status="completed",
+        vector_store_id="vs-123",
+    )
+    assert "a" * 65 not in file_obj.attributes
+    assert "valid_key" in file_obj.attributes
+
+    # Test max 512 char string values are truncated
+    long_value_attrs = {"key": "x" * 600}
+    file_obj = VectorStoreFileObject(
+        id="file-125",
+        attributes=long_value_attrs,
+        chunking_strategy=VectorStoreChunkingStrategyAuto(),
+        created_at=1234567890,
+        status="completed",
+        vector_store_id="vs-123",
+    )
+    assert len(file_obj.attributes["key"]) == 512