Merge branch 'main' into use-openai-for-ollama

2025-10-09 21:18:38 +00:00 · 2025-09-15 15:31:03 -04:00 · 2025-09-15 15:31:03 -04:00 · 91fb6f42cb
commit 91fb6f42cb
parent 7b5685b1d9 01bdcce4d2
74 changed files with 8761 additions and 971 deletions
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@ -6,12 +6,25 @@


 import time
+import unicodedata

 import pytest

 from ..test_cases.test_case import TestCase


+def _normalize_text(text: str) -> str:
+    """
+    Normalize Unicode text by removing diacritical marks for comparison.
+
+    The test case streaming_01 expects the answer "Sol" for the question "What's the name of the Sun
+    in latin?", but the model is returning "sōl" (with a macron over the 'o'), which is the correct
+    Latin spelling. The test is failing because it's doing a simple case-insensitive string search
+    for "sol" but the actual response contains the diacritical mark.
+    """
+    return unicodedata.normalize("NFD", text).encode("ascii", "ignore").decode("ascii").lower()
+
+
 def provider_from_model(client_with_models, model_id):
    models = {m.identifier: m for m in client_with_models.models.list()}
    models.update({m.provider_resource_id: m for m in client_with_models.models.list()})
@ -42,6 +55,10 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
        "remote::groq",
        "remote::gemini",  # https://generativelanguage.googleapis.com/v1beta/openai/completions -> 404
        "remote::anthropic",  # at least claude-3-{5,7}-{haiku,sonnet}-* / claude-{sonnet,opus}-4-* are not supported
+        "remote::azure",  # {'error': {'code': 'OperationNotSupported', 'message': 'The completion operation
+        #  does not work with the specified model, gpt-5-mini. Please choose different model and try
+        #  again. You can learn more about which models can be used with each operation here:
+        #  https://go.microsoft.com/fwlink/?linkid=2197993.'}}"}
    ):
        pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.")

@ -157,7 +174,8 @@ def test_openai_completion_non_streaming_suffix(llama_stack_client, client_with_
    assert len(response.choices) > 0
    choice = response.choices[0]
    assert len(choice.text) > 5
-    assert "france" in choice.text.lower()
+    normalized_text = _normalize_text(choice.text)
+    assert "france" in normalized_text


@pytest.mark.parametrize(
@ -248,7 +266,9 @@ def test_openai_chat_completion_non_streaming(compat_client, client_with_models,
    )
    message_content = response.choices[0].message.content.lower().strip()
    assert len(message_content) > 0
-    assert expected.lower() in message_content
+    normalized_expected = _normalize_text(expected)
+    normalized_content = _normalize_text(message_content)
+    assert normalized_expected in normalized_content


@pytest.mark.parametrize(
@ -272,10 +292,13 @@ def test_openai_chat_completion_streaming(compat_client, client_with_models, tex
    )
    streamed_content = []
    for chunk in response:
-        if chunk.choices[0].delta.content:
+        # On some providers like Azure, the choices are empty on the first chunk, so we need to check for that
+        if chunk.choices and len(chunk.choices) > 0 and chunk.choices[0].delta.content:
            streamed_content.append(chunk.choices[0].delta.content.lower().strip())
    assert len(streamed_content) > 0
-    assert expected.lower() in "".join(streamed_content)
+    normalized_expected = _normalize_text(expected)
+    normalized_content = _normalize_text("".join(streamed_content))
+    assert normalized_expected in normalized_content


@pytest.mark.parametrize(
@ -308,8 +331,12 @@ def test_openai_chat_completion_streaming_with_n(compat_client, client_with_mode
                    streamed_content.get(choice.index, "") + choice.delta.content.lower().strip()
                )
    assert len(streamed_content) == 2
+    normalized_expected = _normalize_text(expected)
    for i, content in streamed_content.items():
-        assert expected.lower() in content, f"Choice {i}: Expected {expected.lower()} in {content}"
+        normalized_content = _normalize_text(content)
+        assert normalized_expected in normalized_content, (
+            f"Choice {i}: Expected {normalized_expected} in {normalized_content}"
+        )


@pytest.mark.parametrize(
@ -339,9 +366,9 @@ def test_inference_store(compat_client, client_with_models, text_model_id, strea
        content = ""
        response_id = None
        for chunk in response:
-            if response_id is None:
+            if response_id is None and chunk.id:
                response_id = chunk.id
-            if chunk.choices[0].delta.content:
+            if chunk.choices and len(chunk.choices) > 0 and chunk.choices[0].delta.content:
                content += chunk.choices[0].delta.content
    else:
        response_id = response.id
@ -410,11 +437,12 @@ def test_inference_store_tool_calls(compat_client, client_with_models, text_mode
        content = ""
        response_id = None
        for chunk in response:
-            if response_id is None:
+            if response_id is None and chunk.id:
                response_id = chunk.id
-            if delta := chunk.choices[0].delta:
-                if delta.content:
-                    content += delta.content
+            if chunk.choices and len(chunk.choices) > 0:
+                if delta := chunk.choices[0].delta:
+                    if delta.content:
+                        content += delta.content
    else:
        response_id = response.id
        content = response.choices[0].message.content
@ -484,4 +512,5 @@ def test_openai_chat_completion_non_streaming_with_file(openai_client, client_wi
        stream=False,
    )
    message_content = response.choices[0].message.content.lower().strip()
-    assert "hello world" in message_content
+    normalized_content = _normalize_text(message_content)
+    assert "hello world" in normalized_content
--- a/tests/integration/inference/test_text_inference.py
+++ b/tests/integration/inference/test_text_inference.py
@ -32,6 +32,7 @@ def skip_if_model_doesnt_support_completion(client_with_models, model_id):
            "remote::vertexai",
            "remote::groq",
            "remote::sambanova",
+            "remote::azure",
        )
        or "openai-compat" in provider.provider_type
    ):
@ -44,7 +45,7 @@ def skip_if_model_doesnt_support_json_schema_structured_output(client_with_model
    provider_id = models[model_id].provider_id
    providers = {p.provider_id: p for p in client_with_models.providers.list()}
    provider = providers[provider_id]
-    if provider.provider_type in ("remote::sambanova",):
+    if provider.provider_type in ("remote::sambanova", "remote::azure"):
        pytest.skip(
            f"Model {model_id} hosted by {provider.provider_type} doesn't support json_schema structured output"
        )
--- a/tests/integration/recordings/responses/0fda25b9241c.json
+++ b/tests/integration/recordings/responses/0fda25b9241c.json
@ -0,0 +1,71 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-5-mini",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Which planet do humans live on?"
+        }
+      ],
+      "stream": false
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-5-mini"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "chatcmpl-CECIXqfvjuluKkZtG3q2QJoSQhBU0",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "Humans live on Earth \u2014 the third planet from the Sun. It's the only known planet that naturally supports life, with a breathable atmosphere, liquid water, and temperatures suitable for living organisms.",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": [],
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            },
+            "content_filter_results": {}
+          }
+        ],
+        "created": 1757499901,
+        "model": "gpt-5-mini-2025-08-07",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 112,
+          "prompt_tokens": 13,
+          "total_tokens": 125,
+          "completion_tokens_details": {
+            "accepted_prediction_tokens": 0,
+            "audio_tokens": 0,
+            "reasoning_tokens": 64,
+            "rejected_prediction_tokens": 0
+          },
+          "prompt_tokens_details": {
+            "audio_tokens": 0,
+            "cached_tokens": 0
+          }
+        },
+        "prompt_filter_results": [
+          {
+            "prompt_index": 0,
+            "content_filter_results": {}
+          }
+        ]
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/2b2ad549510d.json
+++ b/tests/integration/recordings/responses/2b2ad549510d.json
@ -0,0 +1,448 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-5-mini",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Hello, world!"
+        }
+      ],
+      "stream": true
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-5-mini"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "",
+          "choices": [],
+          "created": 0,
+          "model": "",
+          "object": "",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null,
+          "prompt_filter_results": [
+            {
+              "prompt_index": 0,
+              "content_filter_results": {}
+            }
+          ]
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499910,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
+          "choices": [
+            {
+              "delta": {
+                "content": "Hello",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499910,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
+          "choices": [
+            {
+              "delta": {
+                "content": ",",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499910,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
+          "choices": [
+            {
+              "delta": {
+                "content": " world",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499910,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
+          "choices": [
+            {
+              "delta": {
+                "content": "!",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499910,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
+          "choices": [
+            {
+              "delta": {
+                "content": " Hi",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499910,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
+          "choices": [
+            {
+              "delta": {
+                "content": " \u2014",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499910,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
+          "choices": [
+            {
+              "delta": {
+                "content": " how",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499910,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
+          "choices": [
+            {
+              "delta": {
+                "content": " can",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499910,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
+          "choices": [
+            {
+              "delta": {
+                "content": " I",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499910,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
+          "choices": [
+            {
+              "delta": {
+                "content": " help",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499910,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
+          "choices": [
+            {
+              "delta": {
+                "content": " you",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499910,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
+          "choices": [
+            {
+              "delta": {
+                "content": " today",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499910,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
+          "choices": [
+            {
+              "delta": {
+                "content": "?",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499910,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499910,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      }
+    ],
+    "is_streaming": true
+  }
+}
--- a/tests/integration/recordings/responses/57b67d1b1a36.json
+++ b/tests/integration/recordings/responses/57b67d1b1a36.json
@ -0,0 +1,71 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-5-mini",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Which planet has rings around it with a name starting with letter S?"
+        }
+      ],
+      "stream": false
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-5-mini"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "chatcmpl-CECIkT5cbqFazpungtewksVePcUNa",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "Saturn. It's the planet famous for its prominent ring system made of ice and rock.",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": [],
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            },
+            "content_filter_results": {}
+          }
+        ],
+        "created": 1757499914,
+        "model": "gpt-5-mini-2025-08-07",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 156,
+          "prompt_tokens": 20,
+          "total_tokens": 176,
+          "completion_tokens_details": {
+            "accepted_prediction_tokens": 0,
+            "audio_tokens": 0,
+            "reasoning_tokens": 128,
+            "rejected_prediction_tokens": 0
+          },
+          "prompt_tokens_details": {
+            "audio_tokens": 0,
+            "cached_tokens": 0
+          }
+        },
+        "prompt_filter_results": [
+          {
+            "prompt_index": 0,
+            "content_filter_results": {}
+          }
+        ]
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/8752115f8d0c.json
+++ b/tests/integration/recordings/responses/8752115f8d0c.json
@ -0,0 +1,71 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-5-mini",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Hello, world!"
+        }
+      ],
+      "stream": false
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-5-mini"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "chatcmpl-CECIuyylsMNXspa83k8LrD8SQadNY",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "Hello! \ud83d\udc4b How can I help you today \u2014 answer a question, write or edit something, debug code, brainstorm ideas, or anything else?",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": [],
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            },
+            "content_filter_results": {}
+          }
+        ],
+        "created": 1757499924,
+        "model": "gpt-5-mini-2025-08-07",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 40,
+          "prompt_tokens": 10,
+          "total_tokens": 50,
+          "completion_tokens_details": {
+            "accepted_prediction_tokens": 0,
+            "audio_tokens": 0,
+            "reasoning_tokens": 0,
+            "rejected_prediction_tokens": 0
+          },
+          "prompt_tokens_details": {
+            "audio_tokens": 0,
+            "cached_tokens": 0
+          }
+        },
+        "prompt_filter_results": [
+          {
+            "prompt_index": 0,
+            "content_filter_results": {}
+          }
+        ]
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/94d11daee205.json
+++ b/tests/integration/recordings/responses/94d11daee205.json
--- a/tests/integration/recordings/responses/9f3d749cc1c8.json
+++ b/tests/integration/recordings/responses/9f3d749cc1c8.json
--- a/tests/integration/recordings/responses/c791119e6359.json
+++ b/tests/integration/recordings/responses/c791119e6359.json
@ -0,0 +1,98 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-5-mini",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What's the weather in Tokyo? Use the get_weather function to get the weather."
+        }
+      ],
+      "stream": false,
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_weather",
+            "description": "Get the weather in a given city",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "city": {
+                  "type": "string",
+                  "description": "The city to get the weather for"
+                }
+              }
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-5-mini"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "chatcmpl-CECIwq9Odd0mOJMmw7ytv8iEazH4H",
+        "choices": [
+          {
+            "finish_reason": "tool_calls",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": null,
+              "refusal": null,
+              "role": "assistant",
+              "annotations": [],
+              "audio": null,
+              "function_call": null,
+              "tool_calls": [
+                {
+                  "id": "call_yw18spRc1jjUlEyabbXBhB33",
+                  "function": {
+                    "arguments": "{\"city\":\"Tokyo\"}",
+                    "name": "get_weather"
+                  },
+                  "type": "function"
+                }
+              ]
+            },
+            "content_filter_results": {}
+          }
+        ],
+        "created": 1757499926,
+        "model": "gpt-5-mini-2025-08-07",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 88,
+          "prompt_tokens": 151,
+          "total_tokens": 239,
+          "completion_tokens_details": {
+            "accepted_prediction_tokens": 0,
+            "audio_tokens": 0,
+            "reasoning_tokens": 64,
+            "rejected_prediction_tokens": 0
+          },
+          "prompt_tokens_details": {
+            "audio_tokens": 0,
+            "cached_tokens": 0
+          }
+        },
+        "prompt_filter_results": [
+          {
+            "prompt_index": 0,
+            "content_filter_results": {}
+          }
+        ]
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/d3e27b7234e2.json
+++ b/tests/integration/recordings/responses/d3e27b7234e2.json
--- a/tests/integration/recordings/responses/fb785db7fafd.json
+++ b/tests/integration/recordings/responses/fb785db7fafd.json
@ -0,0 +1,310 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-5-mini",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What's the weather in Tokyo? Use the get_weather function to get the weather."
+        }
+      ],
+      "stream": true,
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_weather",
+            "description": "Get the weather in a given city",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "city": {
+                  "type": "string",
+                  "description": "The city to get the weather for"
+                }
+              }
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-5-mini"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "",
+          "choices": [],
+          "created": 0,
+          "model": "",
+          "object": "",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null,
+          "prompt_filter_results": [
+            {
+              "prompt_index": 0,
+              "content_filter_results": {}
+            }
+          ]
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECIiMMWyfACuKUYWEyYSazcnvRVo",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_TMbEoYn9q0ZKtoxav5LpD9Ts",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_weather"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499912,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECIiMMWyfACuKUYWEyYSazcnvRVo",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499912,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECIiMMWyfACuKUYWEyYSazcnvRVo",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "city",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499912,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECIiMMWyfACuKUYWEyYSazcnvRVo",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499912,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECIiMMWyfACuKUYWEyYSazcnvRVo",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "Tokyo",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499912,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECIiMMWyfACuKUYWEyYSazcnvRVo",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499912,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECIiMMWyfACuKUYWEyYSazcnvRVo",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499912,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      }
+    ],
+    "is_streaming": true
+  }
+}
--- a/tests/integration/recordings/responses/ff3271401fb4.json
+++ b/tests/integration/recordings/responses/ff3271401fb4.json
@ -0,0 +1,556 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-5-mini",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the name of the US captial?"
+        }
+      ],
+      "stream": true
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-5-mini"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "",
+          "choices": [],
+          "created": 0,
+          "model": "",
+          "object": "",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null,
+          "prompt_filter_results": [
+            {
+              "prompt_index": 0,
+              "content_filter_results": {}
+            }
+          ]
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499916,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499916,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+          "choices": [
+            {
+              "delta": {
+                "content": " capital",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499916,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499916,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499916,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+          "choices": [
+            {
+              "delta": {
+                "content": " United",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499916,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+          "choices": [
+            {
+              "delta": {
+                "content": " States",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499916,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499916,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+          "choices": [
+            {
+              "delta": {
+                "content": " Washington",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499916,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+          "choices": [
+            {
+              "delta": {
+                "content": ",",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499916,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+          "choices": [
+            {
+              "delta": {
+                "content": " D",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499916,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+          "choices": [
+            {
+              "delta": {
+                "content": ".C",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499916,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499916,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+          "choices": [
+            {
+              "delta": {
+                "content": " (",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499916,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+          "choices": [
+            {
+              "delta": {
+                "content": "District",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499916,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499916,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+          "choices": [
+            {
+              "delta": {
+                "content": " Columbia",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499916,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+          "choices": [
+            {
+              "delta": {
+                "content": ").",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499916,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null,
+              "content_filter_results": {}
+            }
+          ],
+          "created": 1757499916,
+          "model": "gpt-5-mini-2025-08-07",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      }
+    ],
+    "is_streaming": true
+  }
+}
--- a/tests/integration/telemetry/test_openai_telemetry.py
+++ b/tests/integration/telemetry/test_openai_telemetry.py
@ -49,16 +49,13 @@ def setup_openai_telemetry_data(llama_stack_client, text_model_id):
        traces = llama_stack_client.telemetry.query_traces(limit=10)
        if len(traces) >= 5:  # 5 OpenAI completion traces
            break
-        time.sleep(1)
+        time.sleep(0.1)

    if len(traces) < 5:
        pytest.fail(
            f"Failed to create sufficient OpenAI completion telemetry data after 30s. Got {len(traces)} traces."
        )

-    # Wait for 5 seconds to ensure traces has completed logging
-    time.sleep(5)
-
    yield


@ -185,11 +182,13 @@ def test_openai_completion_creates_telemetry(llama_stack_client, text_model_id):
    assert len(response.choices) > 0, "Response should have at least one choice"

    # Wait for telemetry to be recorded
-    time.sleep(3)
-
-    # Check that we have more traces now
-    final_traces = llama_stack_client.telemetry.query_traces(limit=20)
-    final_count = len(final_traces)
+    start_time = time.time()
+    while time.time() - start_time < 30:
+        final_traces = llama_stack_client.telemetry.query_traces(limit=20)
+        final_count = len(final_traces)
+        if final_count > initial_count:
+            break
+        time.sleep(0.1)

    # Should have at least as many traces as before (might have more due to other activity)
    assert final_count >= initial_count, "Should have at least as many traces after OpenAI call"
--- a/tests/integration/telemetry/test_telemetry.py
+++ b/tests/integration/telemetry/test_telemetry.py
@ -42,14 +42,11 @@ def setup_telemetry_data(llama_stack_client, text_model_id):
        traces = llama_stack_client.telemetry.query_traces(limit=10)
        if len(traces) >= 4:
            break
-        time.sleep(1)
+        time.sleep(0.1)

    if len(traces) < 4:
        pytest.fail(f"Failed to create sufficient telemetry data after 30s. Got {len(traces)} traces.")

-    # Wait for 5 seconds to ensure traces has completed logging
-    time.sleep(5)
-
    yield


--- a/tests/integration/telemetry/test_telemetry_metrics.py
+++ b/tests/integration/telemetry/test_telemetry_metrics.py
@ -46,10 +46,7 @@ def setup_telemetry_metrics_data(openai_client, client_with_models, text_model_i
                break
        except Exception:
            pass
-        time.sleep(1)
-
-    # Wait additional time to ensure all metrics are processed
-    time.sleep(5)
+        time.sleep(0.1)

    # Return the token lists for use in tests
    return {"prompt_tokens": prompt_tokens, "completion_tokens": completion_tokens, "total_tokens": total_tokens}
--- a/tests/integration/tool_runtime/test_rag_tool.py
+++ b/tests/integration/tool_runtime/test_rag_tool.py
@ -183,6 +183,110 @@ def test_vector_db_insert_from_url_and_query(
    assert any("llama2" in chunk.content.lower() for chunk in response2.chunks)


+def test_rag_tool_openai_apis(client_with_empty_registry, embedding_model_id, embedding_dimension):
+    vector_db_id = "test_openai_vector_db"
+
+    client_with_empty_registry.vector_dbs.register(
+        vector_db_id=vector_db_id,
+        embedding_model=embedding_model_id,
+        embedding_dimension=embedding_dimension,
+    )
+
+    available_vector_dbs = [vector_db.identifier for vector_db in client_with_empty_registry.vector_dbs.list()]
+    actual_vector_db_id = available_vector_dbs[0]
+
+    # different document formats that should work with OpenAI APIs
+    documents = [
+        Document(
+            document_id="text-doc",
+            content="This is a plain text document about machine learning algorithms.",
+            metadata={"type": "text", "category": "AI"},
+        ),
+        Document(
+            document_id="url-doc",
+            content="https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/chat.rst",
+            mime_type="text/plain",
+            metadata={"type": "url", "source": "pytorch"},
+        ),
+        Document(
+            document_id="data-url-doc",
+            content="data:text/plain;base64,VGhpcyBpcyBhIGRhdGEgVVJMIGRvY3VtZW50IGFib3V0IGRlZXAgbGVhcm5pbmcu",  # "This is a data URL document about deep learning."
+            metadata={"type": "data_url", "encoding": "base64"},
+        ),
+    ]
+
+    client_with_empty_registry.tool_runtime.rag_tool.insert(
+        documents=documents,
+        vector_db_id=actual_vector_db_id,
+        chunk_size_in_tokens=256,
+    )
+
+    files_list = client_with_empty_registry.files.list()
+    assert len(files_list.data) >= len(documents), (
+        f"Expected at least {len(documents)} files, got {len(files_list.data)}"
+    )
+
+    vector_store_files = client_with_empty_registry.vector_io.openai_list_files_in_vector_store(
+        vector_store_id=actual_vector_db_id
+    )
+    assert len(vector_store_files.data) >= len(documents), f"Expected at least {len(documents)} files in vector store"
+
+    response = client_with_empty_registry.tool_runtime.rag_tool.query(
+        vector_db_ids=[actual_vector_db_id],
+        content="Tell me about machine learning and deep learning",
+    )
+
+    assert_valid_text_response(response)
+    content_text = " ".join([chunk.text for chunk in response.content]).lower()
+    assert "machine learning" in content_text or "deep learning" in content_text
+
+
+def test_rag_tool_exception_handling(client_with_empty_registry, embedding_model_id, embedding_dimension):
+    vector_db_id = "test_exception_handling"
+
+    client_with_empty_registry.vector_dbs.register(
+        vector_db_id=vector_db_id,
+        embedding_model=embedding_model_id,
+        embedding_dimension=embedding_dimension,
+    )
+
+    available_vector_dbs = [vector_db.identifier for vector_db in client_with_empty_registry.vector_dbs.list()]
+    actual_vector_db_id = available_vector_dbs[0]
+
+    documents = [
+        Document(
+            document_id="valid-doc",
+            content="This is a valid document that should be processed successfully.",
+            metadata={"status": "valid"},
+        ),
+        Document(
+            document_id="invalid-url-doc",
+            content="https://nonexistent-domain-12345.com/invalid.txt",
+            metadata={"status": "invalid_url"},
+        ),
+        Document(
+            document_id="another-valid-doc",
+            content="This is another valid document for testing resilience.",
+            metadata={"status": "valid"},
+        ),
+    ]
+
+    client_with_empty_registry.tool_runtime.rag_tool.insert(
+        documents=documents,
+        vector_db_id=actual_vector_db_id,
+        chunk_size_in_tokens=256,
+    )
+
+    response = client_with_empty_registry.tool_runtime.rag_tool.query(
+        vector_db_ids=[actual_vector_db_id],
+        content="valid document",
+    )
+
+    assert_valid_text_response(response)
+    content_text = " ".join([chunk.text for chunk in response.content]).lower()
+    assert "valid document" in content_text
+
+
 def test_rag_tool_insert_and_query(client_with_empty_registry, embedding_model_id, embedding_dimension):
    providers = [p for p in client_with_empty_registry.providers.list() if p.api == "vector_io"]
    assert len(providers) > 0
@ -249,3 +353,107 @@ def test_rag_tool_insert_and_query(client_with_empty_registry, embedding_model_i
                "chunk_template": "This should raise a ValueError because it is missing the proper template variables",
            },
        )
+
+
+def test_rag_tool_query_generation(client_with_empty_registry, embedding_model_id, embedding_dimension):
+    vector_db_id = "test_query_generation_db"
+
+    client_with_empty_registry.vector_dbs.register(
+        vector_db_id=vector_db_id,
+        embedding_model=embedding_model_id,
+        embedding_dimension=embedding_dimension,
+    )
+
+    available_vector_dbs = [vector_db.identifier for vector_db in client_with_empty_registry.vector_dbs.list()]
+    actual_vector_db_id = available_vector_dbs[0]
+
+    documents = [
+        Document(
+            document_id="ai-doc",
+            content="Artificial intelligence and machine learning are transforming technology.",
+            metadata={"category": "AI"},
+        ),
+        Document(
+            document_id="banana-doc",
+            content="Don't bring a banana to a knife fight.",
+            metadata={"category": "wisdom"},
+        ),
+    ]
+
+    client_with_empty_registry.tool_runtime.rag_tool.insert(
+        documents=documents,
+        vector_db_id=actual_vector_db_id,
+        chunk_size_in_tokens=256,
+    )
+
+    response = client_with_empty_registry.tool_runtime.rag_tool.query(
+        vector_db_ids=[actual_vector_db_id],
+        content="Tell me about AI",
+    )
+
+    assert_valid_text_response(response)
+    content_text = " ".join([chunk.text for chunk in response.content]).lower()
+    assert "artificial intelligence" in content_text or "machine learning" in content_text
+
+
+def test_rag_tool_pdf_data_url_handling(client_with_empty_registry, embedding_model_id, embedding_dimension):
+    vector_db_id = "test_pdf_data_url_db"
+
+    client_with_empty_registry.vector_dbs.register(
+        vector_db_id=vector_db_id,
+        embedding_model=embedding_model_id,
+        embedding_dimension=embedding_dimension,
+    )
+
+    available_vector_dbs = [vector_db.identifier for vector_db in client_with_empty_registry.vector_dbs.list()]
+    actual_vector_db_id = available_vector_dbs[0]
+
+    sample_pdf = b"%PDF-1.3\n3 0 obj\n<</Type /Page\n/Parent 1 0 R\n/Resources 2 0 R\n/Contents 4 0 R>>\nendobj\n4 0 obj\n<</Filter /FlateDecode /Length 115>>\nstream\nx\x9c\x15\xcc1\x0e\x820\x18@\xe1\x9dS\xbcM]jk$\xd5\xd5(\x83!\x86\xa1\x17\xf8\xa3\xa5`LIh+\xd7W\xc6\xf7\r\xef\xc0\xbd\xd2\xaa\xb6,\xd5\xc5\xb1o\x0c\xa6VZ\xe3znn%\xf3o\xab\xb1\xe7\xa3:Y\xdc\x8bm\xeb\xf3&1\xc8\xd7\xd3\x97\xc82\xe6\x81\x87\xe42\xcb\x87Vb(\x12<\xdd<=}Jc\x0cL\x91\xee\xda$\xb5\xc3\xbd\xd7\xe9\x0f\x8d\x97 $\nendstream\nendobj\n1 0 obj\n<</Type /Pages\n/Kids [3 0 R ]\n/Count 1\n/MediaBox [0 0 595.28 841.89]\n>>\nendobj\n5 0 obj\n<</Type /Font\n/BaseFont /Helvetica\n/Subtype /Type1\n/Encoding /WinAnsiEncoding\n>>\nendobj\n2 0 obj\n<<\n/ProcSet [/PDF /Text /ImageB /ImageC /ImageI]\n/Font <<\n/F1 5 0 R\n>>\n/XObject <<\n>>\n>>\nendobj\n6 0 obj\n<<\n/Producer (PyFPDF 1.7.2 http://pyfpdf.googlecode.com/)\n/Title (This is a sample title.)\n/Author (Llama Stack Developers)\n/CreationDate (D:20250312165548)\n>>\nendobj\n7 0 obj\n<<\n/Type /Catalog\n/Pages 1 0 R\n/OpenAction [3 0 R /FitH null]\n/PageLayout /OneColumn\n>>\nendobj\nxref\n0 8\n0000000000 65535 f \n0000000272 00000 n \n0000000455 00000 n \n0000000009 00000 n \n0000000087 00000 n \n0000000359 00000 n \n0000000559 00000 n \n0000000734 00000 n \ntrailer\n<<\n/Size 8\n/Root 7 0 R\n/Info 6 0 R\n>>\nstartxref\n837\n%%EOF\n"
+
+    import base64
+
+    pdf_base64 = base64.b64encode(sample_pdf).decode("utf-8")
+    pdf_data_url = f"data:application/pdf;base64,{pdf_base64}"
+
+    documents = [
+        Document(
+            document_id="test-pdf-data-url",
+            content=pdf_data_url,
+            metadata={"type": "pdf", "source": "data_url"},
+        ),
+    ]
+
+    client_with_empty_registry.tool_runtime.rag_tool.insert(
+        documents=documents,
+        vector_db_id=actual_vector_db_id,
+        chunk_size_in_tokens=256,
+    )
+
+    files_list = client_with_empty_registry.files.list()
+    assert len(files_list.data) >= 1, "PDF should have been uploaded to Files API"
+
+    pdf_file = None
+    for file in files_list.data:
+        if file.filename and "test-pdf-data-url" in file.filename:
+            pdf_file = file
+            break
+
+    assert pdf_file is not None, "PDF file should be found in Files API"
+    assert pdf_file.bytes == len(sample_pdf), f"File size should match original PDF ({len(sample_pdf)} bytes)"
+
+    file_content = client_with_empty_registry.files.retrieve_content(pdf_file.id)
+    assert file_content.startswith(b"%PDF-"), "Retrieved file should be a valid PDF"
+
+    vector_store_files = client_with_empty_registry.vector_io.openai_list_files_in_vector_store(
+        vector_store_id=actual_vector_db_id
+    )
+    assert len(vector_store_files.data) >= 1, "PDF should be attached to vector store"
+
+    response = client_with_empty_registry.tool_runtime.rag_tool.query(
+        vector_db_ids=[actual_vector_db_id],
+        content="sample title",
+    )
+
+    assert_valid_text_response(response)
+    content_text = " ".join([chunk.text for chunk in response.content]).lower()
+    assert "sample title" in content_text or "title" in content_text