Merge branch 'main' into use-openai-for-cerebras

2025-10-04 04:04:14 +00:00 · 2025-09-23 16:21:31 -04:00 · 2025-09-23 16:21:31 -04:00 · e3ad762383
commit e3ad762383
parent 9ceb45f611 d07ebce4d9
29 changed files with 11729 additions and 172 deletions
--- a/docs/source/providers/inference/remote_databricks.md
+++ b/docs/source/providers/inference/remote_databricks.md
@ -9,13 +9,13 @@ Databricks inference provider for running models on Databricks' unified analytic
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `url` | `<class 'str'>` | No |  | The URL for the Databricks model serving endpoint |
-| `api_token` | `<class 'str'>` | No |  | The Databricks API token |
+| `api_token` | `<class 'pydantic.types.SecretStr'>` | No |  | The Databricks API token |
 ## Sample Configuration
 ```yaml
-url: ${env.DATABRICKS_URL:=}
+url: ${env.DATABRICKS_HOST:=}
-api_token: ${env.DATABRICKS_API_TOKEN:=}
+api_token: ${env.DATABRICKS_TOKEN:=}
 ```
--- a/llama_stack/core/server/server.py
+++ b/llama_stack/core/server/server.py
@ -25,7 +25,6 @@ from typing import Annotated, Any, get_origin
 import httpx
 import rich.pretty
 import yaml
 from aiohttp import hdrs
 from fastapi import Body, FastAPI, HTTPException, Request, Response
 from fastapi import Path as FastapiPath
 from fastapi.exceptions import RequestValidationError
@ -45,17 +44,13 @@ from llama_stack.core.datatypes import (
    process_cors_config,
 )
 from llama_stack.core.distribution import builtin_automatically_routed_apis
-from llama_stack.core.external import ExternalApiSpec, load_external_apis
+from llama_stack.core.external import load_external_apis
 from llama_stack.core.request_headers import (
    PROVIDER_DATA_VAR,
    request_provider_data_context,
    user_from_scope,
 )
-from llama_stack.core.server.routes import (
+from llama_stack.core.server.routes import get_all_api_routes
    find_matching_route,
    get_all_api_routes,
    initialize_route_impls,
 )
 from llama_stack.core.stack import (
    Stack,
    cast_image_name_to_string,
@ -73,13 +68,12 @@ from llama_stack.providers.inline.telemetry.meta_reference.telemetry import (
 )
 from llama_stack.providers.utils.telemetry.tracing import (
    CURRENT_TRACE_CONTEXT,
    end_trace,
    setup_logger,
    start_trace,
 )
 from .auth import AuthenticationMiddleware
 from .quota import QuotaMiddleware
 from .tracing import TracingMiddleware
 REPO_ROOT = Path(__file__).parent.parent.parent.parent
@ -299,65 +293,6 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable:
    return route_handler
 class TracingMiddleware:
    def __init__(self, app, impls, external_apis: dict[str, ExternalApiSpec]):
        self.app = app
        self.impls = impls
        self.external_apis = external_apis
        # FastAPI built-in paths that should bypass custom routing
        self.fastapi_paths = ("/docs", "/redoc", "/openapi.json", "/favicon.ico", "/static")
    async def __call__(self, scope, receive, send):
        if scope.get("type") == "lifespan":
            return await self.app(scope, receive, send)
        path = scope.get("path", "")
        # Check if the path is a FastAPI built-in path
        if path.startswith(self.fastapi_paths):
            # Pass through to FastAPI's built-in handlers
            logger.debug(f"Bypassing custom routing for FastAPI built-in path: {path}")
            return await self.app(scope, receive, send)
        if not hasattr(self, "route_impls"):
            self.route_impls = initialize_route_impls(self.impls, self.external_apis)
        try:
            _, _, route_path, webmethod = find_matching_route(
                scope.get("method", hdrs.METH_GET), path, self.route_impls
            )
        except ValueError:
            # If no matching endpoint is found, pass through to FastAPI
            logger.debug(f"No matching route found for path: {path}, falling back to FastAPI")
            return await self.app(scope, receive, send)
        trace_attributes = {"__location__": "server", "raw_path": path}
        # Extract W3C trace context headers and store as trace attributes
        headers = dict(scope.get("headers", []))
        traceparent = headers.get(b"traceparent", b"").decode()
        if traceparent:
            trace_attributes["traceparent"] = traceparent
        tracestate = headers.get(b"tracestate", b"").decode()
        if tracestate:
            trace_attributes["tracestate"] = tracestate
        trace_path = webmethod.descriptive_name or route_path
        trace_context = await start_trace(trace_path, trace_attributes)
        async def send_with_trace_id(message):
            if message["type"] == "http.response.start":
                headers = message.get("headers", [])
                headers.append([b"x-trace-id", str(trace_context.trace_id).encode()])
                message["headers"] = headers
            await send(message)
        try:
            return await self.app(scope, receive, send_with_trace_id)
        finally:
            await end_trace()
 class ClientVersionMiddleware:
    def __init__(self, app):
        self.app = app
--- a/llama_stack/core/server/tracing.py
+++ b/llama_stack/core/server/tracing.py
@ -0,0 +1,72 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from aiohttp import hdrs
 from llama_stack.core.external import ExternalApiSpec
 from llama_stack.core.server.routes import find_matching_route, initialize_route_impls
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.telemetry.tracing import end_trace, start_trace
 logger = get_logger(name=__name__, category="core::server")
 class TracingMiddleware:
    def __init__(self, app, impls, external_apis: dict[str, ExternalApiSpec]):
        self.app = app
        self.impls = impls
        self.external_apis = external_apis
        # FastAPI built-in paths that should bypass custom routing
        self.fastapi_paths = ("/docs", "/redoc", "/openapi.json", "/favicon.ico", "/static")
    async def __call__(self, scope, receive, send):
        if scope.get("type") == "lifespan":
            return await self.app(scope, receive, send)
        path = scope.get("path", "")
        # Check if the path is a FastAPI built-in path
        if path.startswith(self.fastapi_paths):
            # Pass through to FastAPI's built-in handlers
            logger.debug(f"Bypassing custom routing for FastAPI built-in path: {path}")
            return await self.app(scope, receive, send)
        if not hasattr(self, "route_impls"):
            self.route_impls = initialize_route_impls(self.impls, self.external_apis)
        try:
            _, _, route_path, webmethod = find_matching_route(
                scope.get("method", hdrs.METH_GET), path, self.route_impls
            )
        except ValueError:
            # If no matching endpoint is found, pass through to FastAPI
            logger.debug(f"No matching route found for path: {path}, falling back to FastAPI")
            return await self.app(scope, receive, send)
        trace_attributes = {"__location__": "server", "raw_path": path}
        # Extract W3C trace context headers and store as trace attributes
        headers = dict(scope.get("headers", []))
        traceparent = headers.get(b"traceparent", b"").decode()
        if traceparent:
            trace_attributes["traceparent"] = traceparent
        tracestate = headers.get(b"tracestate", b"").decode()
        if tracestate:
            trace_attributes["tracestate"] = tracestate
        trace_path = webmethod.descriptive_name or route_path
        trace_context = await start_trace(trace_path, trace_attributes)
        async def send_with_trace_id(message):
            if message["type"] == "http.response.start":
                headers = message.get("headers", [])
                headers.append([b"x-trace-id", str(trace_context.trace_id).encode()])
                message["headers"] = headers
            await send(message)
        try:
            return await self.app(scope, receive, send_with_trace_id)
        finally:
            await end_trace()
--- a/llama_stack/providers/registry/inference.py
+++ b/llama_stack/providers/registry/inference.py
@ -142,7 +142,7 @@ def available_providers() -> list[ProviderSpec]:
            api=Api.inference,
            adapter_type="databricks",
            provider_type="remote::databricks",
-            pip_packages=[],
+            pip_packages=["databricks-sdk"],
            module="llama_stack.providers.remote.inference.databricks",
            config_class="llama_stack.providers.remote.inference.databricks.DatabricksImplConfig",
            description="Databricks inference provider for running models on Databricks' unified analytics platform.",
--- a/llama_stack/providers/remote/inference/databricks/init.py
+++ b/llama_stack/providers/remote/inference/databricks/init.py
@ -5,10 +5,11 @@
 # the root directory of this source tree.
 from .config import DatabricksImplConfig
 from .databricks import DatabricksInferenceAdapter
 async def get_adapter_impl(config: DatabricksImplConfig, _deps):
    from .databricks import DatabricksInferenceAdapter
    assert isinstance(config, DatabricksImplConfig), f"Unexpected config type: {type(config)}"
    impl = DatabricksInferenceAdapter(config)
    await impl.initialize()
--- a/llama_stack/providers/remote/inference/databricks/config.py
+++ b/llama_stack/providers/remote/inference/databricks/config.py
@ -6,7 +6,7 @@
 from typing import Any
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, SecretStr
 from llama_stack.schema_utils import json_schema_type
@ -17,16 +17,16 @@ class DatabricksImplConfig(BaseModel):
        default=None,
        description="The URL for the Databricks model serving endpoint",
    )
-    api_token: str = Field(
+    api_token: SecretStr = Field(
-        default=None,
+        default=SecretStr(None),
        description="The Databricks API token",
    )
    @classmethod
    def sample_run_config(
        cls,
-        url: str = "${env.DATABRICKS_URL:=}",
+        url: str = "${env.DATABRICKS_HOST:=}",
-        api_token: str = "${env.DATABRICKS_API_TOKEN:=}",
+        api_token: str = "${env.DATABRICKS_TOKEN:=}",
        **kwargs: Any,
    ) -> dict[str, Any]:
        return {
--- a/llama_stack/providers/remote/inference/databricks/databricks.py
+++ b/llama_stack/providers/remote/inference/databricks/databricks.py
@ -4,23 +4,26 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from collections.abc import AsyncGenerator
+from collections.abc import AsyncIterator
 from typing import Any
-from openai import OpenAI
+from databricks.sdk import WorkspaceClient
 from llama_stack.apis.common.content_types import (
    InterleavedContent,
    InterleavedContentItem,
 )
 from llama_stack.apis.inference import (
    ChatCompletionRequest,
    ChatCompletionResponse,
    ChatCompletionResponseStreamChunk,
    CompletionResponse,
    CompletionResponseStreamChunk,
    EmbeddingsResponse,
    EmbeddingTaskType,
    Inference,
    LogProbConfig,
    Message,
-    OpenAIEmbeddingsResponse,
+    OpenAICompletion,
    ResponseFormat,
    SamplingParams,
    TextTruncation,
@ -29,49 +32,50 @@ from llama_stack.apis.inference import (
    ToolDefinition,
    ToolPromptFormat,
 )
-from llama_stack.models.llama.sku_types import CoreModelId
+from llama_stack.apis.models import Model, ModelType
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.model_registry import (
-    ModelRegistryHelper,
+    ProviderModelEntry,
    build_hf_repo_model_entry,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
    OpenAIChatCompletionToLlamaStackMixin,
    OpenAICompletionToLlamaStackMixin,
    get_sampling_options,
    process_chat_completion_response,
    process_chat_completion_stream_response,
 )
 from llama_stack.providers.utils.inference.prompt_adapter import (
    chat_completion_request_to_prompt,
 )
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from .config import DatabricksImplConfig
-SAFETY_MODELS_ENTRIES = []
+logger = get_logger(name=__name__, category="inference::databricks")
-# https://docs.databricks.com/aws/en/machine-learning/model-serving/foundation-model-overview
+
-MODEL_ENTRIES = [
+# source: https://docs.databricks.com/aws/en/machine-learning/foundation-model-apis/supported-models
-    build_hf_repo_model_entry(
+EMBEDDING_MODEL_ENTRIES = {
-        "databricks-meta-llama-3-1-70b-instruct",
+    "databricks-gte-large-en": ProviderModelEntry(
-        CoreModelId.llama3_1_70b_instruct.value,
+        provider_model_id="databricks-gte-large-en",
        metadata={
            "embedding_dimension": 1024,
            "context_length": 8192,
        },
    ),
-    build_hf_repo_model_entry(
+    "databricks-bge-large-en": ProviderModelEntry(
-        "databricks-meta-llama-3-1-405b-instruct",
+        provider_model_id="databricks-bge-large-en",
-        CoreModelId.llama3_1_405b_instruct.value,
+        metadata={
            "embedding_dimension": 1024,
            "context_length": 512,
        },
    ),
-] + SAFETY_MODELS_ENTRIES
+}
 class DatabricksInferenceAdapter(
-    ModelRegistryHelper,
+    OpenAIMixin,
    Inference,
    OpenAIChatCompletionToLlamaStackMixin,
    OpenAICompletionToLlamaStackMixin,
 ):
    def __init__(self, config: DatabricksImplConfig) -> None:
        ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES)
        self.config = config
    def get_api_key(self) -> str:
        return self.config.api_token.get_secret_value()
    def get_base_url(self) -> str:
        return f"{self.config.url}/serving-endpoints"
    async def initialize(self) -> None:
        return
@ -80,72 +84,54 @@ class DatabricksInferenceAdapter(
    async def completion(
        self,
-        model: str,
+        model_id: str,
        content: InterleavedContent,
        sampling_params: SamplingParams | None = None,
        response_format: ResponseFormat | None = None,
        stream: bool | None = False,
        logprobs: LogProbConfig | None = None,
-    ) -> AsyncGenerator:
+    ) -> CompletionResponse | AsyncIterator[CompletionResponseStreamChunk]:
        raise NotImplementedError()
    async def openai_completion(
        self,
        model: str,
        prompt: str | list[str] | list[int] | list[list[int]],
        best_of: int | None = None,
        echo: bool | None = None,
        frequency_penalty: float | None = None,
        logit_bias: dict[str, float] | None = None,
        logprobs: bool | None = None,
        max_tokens: int | None = None,
        n: int | None = None,
        presence_penalty: float | None = None,
        seed: int | None = None,
        stop: str | list[str] | None = None,
        stream: bool | None = None,
        stream_options: dict[str, Any] | None = None,
        temperature: float | None = None,
        top_p: float | None = None,
        user: str | None = None,
        guided_choice: list[str] | None = None,
        prompt_logprobs: int | None = None,
        suffix: str | None = None,
    ) -> OpenAICompletion:
        raise NotImplementedError()
    async def chat_completion(
        self,
-        model: str,
+        model_id: str,
        messages: list[Message],
        sampling_params: SamplingParams | None = None,
        response_format: ResponseFormat | None = None,
        tools: list[ToolDefinition] | None = None,
        tool_choice: ToolChoice | None = ToolChoice.auto,
        tool_prompt_format: ToolPromptFormat | None = None,
        response_format: ResponseFormat | None = None,
        stream: bool | None = False,
        logprobs: LogProbConfig | None = None,
        tool_config: ToolConfig | None = None,
-    ) -> AsyncGenerator:
+    ) -> ChatCompletionResponse | AsyncIterator[ChatCompletionResponseStreamChunk]:
-        if sampling_params is None:
+        raise NotImplementedError()
            sampling_params = SamplingParams()
        request = ChatCompletionRequest(
            model=model,
            messages=messages,
            sampling_params=sampling_params,
            tools=tools or [],
            stream=stream,
            logprobs=logprobs,
            tool_config=tool_config,
        )
        client = OpenAI(base_url=self.config.url, api_key=self.config.api_token)
        if stream:
            return self._stream_chat_completion(request, client)
        else:
            return await self._nonstream_chat_completion(request, client)
    async def _nonstream_chat_completion(
        self, request: ChatCompletionRequest, client: OpenAI
    ) -> ChatCompletionResponse:
        params = self._get_params(request)
        r = client.completions.create(**params)
        return process_chat_completion_response(r, request)
    async def _stream_chat_completion(self, request: ChatCompletionRequest, client: OpenAI) -> AsyncGenerator:
        params = self._get_params(request)
        async def _to_async_generator():
            s = client.completions.create(**params)
            for chunk in s:
                yield chunk
        stream = _to_async_generator()
        async for chunk in process_chat_completion_stream_response(stream, request):
            yield chunk
    def _get_params(self, request: ChatCompletionRequest) -> dict:
        return {
            "model": request.model,
            "prompt": chat_completion_request_to_prompt(request, self.get_llama_model(request.model)),
            "stream": request.stream,
            **get_sampling_options(request.sampling_params),
        }
    async def embeddings(
        self,
@ -157,12 +143,39 @@ class DatabricksInferenceAdapter(
    ) -> EmbeddingsResponse:
        raise NotImplementedError()
-    async def openai_embeddings(
+    async def list_models(self) -> list[Model] | None:
-        self,
+        self._model_cache = {}  # from OpenAIMixin
-        model: str,
+        ws_client = WorkspaceClient(host=self.config.url, token=self.get_api_key())  # TODO: this is not async
-        input: str | list[str],
+        endpoints = ws_client.serving_endpoints.list()
-        encoding_format: str | None = "float",
+        for endpoint in endpoints:
-        dimensions: int | None = None,
+            model = Model(
-        user: str | None = None,
+                provider_id=self.__provider_id__,
-    ) -> OpenAIEmbeddingsResponse:
+                provider_resource_id=endpoint.name,
-        raise NotImplementedError()
+                identifier=endpoint.name,
            )
            if endpoint.task == "llm/v1/chat":
                model.model_type = ModelType.llm  # this is redundant, but informative
            elif endpoint.task == "llm/v1/embeddings":
                if endpoint.name not in EMBEDDING_MODEL_ENTRIES:
                    logger.warning(f"No metadata information available for embedding model {endpoint.name}, skipping.")
                    continue
                model.model_type = ModelType.embedding
                model.metadata = EMBEDDING_MODEL_ENTRIES[endpoint.name].metadata
            else:
                logger.warning(f"Unknown model type, skipping: {endpoint}")
                continue
            self._model_cache[endpoint.name] = model
        return list(self._model_cache.values())
    async def register_model(self, model: Model) -> Model:
        if not await self.check_model_availability(model.provider_resource_id):
            raise ValueError(f"Model {model.provider_resource_id} is not available in Databricks workspace.")
        return model
    async def unregister_model(self, model_id: str) -> None:
        pass
    async def should_refresh_models(self) -> bool:
        return False
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@ -504,7 +504,7 @@ class VLLMInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin, Inference, ModelsPro
        except ValueError:
            pass  # Ignore statically unknown model, will check live listing
        try:
-            res = await self.client.models.list()
+            res = self.client.models.list()
        except APIConnectionError as e:
            raise ValueError(
                f"Failed to connect to vLLM at {self.config.url}. Please check if vLLM is running and accessible at that URL."
--- a/llama_stack/providers/utils/inference/openai_mixin.py
+++ b/llama_stack/providers/utils/inference/openai_mixin.py
@ -296,7 +296,7 @@ class OpenAIMixin(ABC):
        return OpenAIEmbeddingsResponse(
            data=data,
-            model=response.model,
+            model=model,
            usage=usage,
        )
--- a/llama_stack/testing/inference_recorder.py
+++ b/llama_stack/testing/inference_recorder.py
@ -267,6 +267,10 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
        raise ValueError(f"Unknown client type: {client_type}")
    url = base_url.rstrip("/") + endpoint
    # Special handling for Databricks URLs to avoid leaking workspace info
    # e.g. https://adb-1234567890123456.7.cloud.databricks.com -> https://...cloud.databricks.com
    if "cloud.databricks.com" in url:
        url = "__databricks__" + url.split("cloud.databricks.com")[-1]
    method = "POST"
    headers = {}
    body = kwargs
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@ -99,6 +99,7 @@ def skip_if_doesnt_support_n(client_with_models, model_id):
        "remote::together",  # `n` > 1 is not supported when streaming tokens. Please disable `stream`
        # Error code 400 - {'message': '"n" > 1 is not currently supported', 'type': 'invalid_request_error', 'param': 'n', 'code': 'wrong_api_format'}
        "remote::cerebras",
        "remote::databricks",  # Bad request: parameter "n" must be equal to 1 for streaming mode
    ):
        pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support n param.")
@ -111,6 +112,7 @@ def skip_if_model_doesnt_support_openai_chat_completion(client_with_models, mode
        "inline::vllm",
        "remote::bedrock",
        "remote::databricks",
        "remote::cerebras",
        "remote::runpod",
        "remote::watsonx",  # watsonx returns 404 when hitting the /openai/v1 endpoint
    ):
--- a/tests/integration/inference/test_openai_embeddings.py
+++ b/tests/integration/inference/test_openai_embeddings.py
@ -42,6 +42,7 @@ def skip_if_model_doesnt_support_encoding_format_base64(client, model_id):
    provider = provider_from_model(client, model_id)
    if provider.provider_type in (
        "remote::together",  # param silently ignored, always returns floats
        "remote::databricks",  # param silently ignored, always returns floats
        "remote::fireworks",  # param silently ignored, always returns list of floats
    ):
        pytest.skip(f"Model {model_id} hosted by {provider.provider_type} does not support encoding_format='base64'.")
@ -52,6 +53,8 @@ def skip_if_model_doesnt_support_variable_dimensions(client_with_models, model_i
    if provider.provider_type in (
        "remote::together",  # returns 400
        "inline::sentence-transformers",
        # Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Bad request: json: unknown field "dimensions"\n'}
        "remote::databricks",
    ):
        pytest.skip(
            f"Model {model_id} hosted by {provider.provider_type} does not support variable output embedding dimensions."
@ -75,7 +78,6 @@ def skip_if_model_doesnt_support_openai_embeddings(client, model_id):
        "inline::meta-reference",
        "remote::bedrock",
        "remote::cerebras",
        "remote::databricks",
        "remote::runpod",
        "remote::sambanova",
        "remote::tgi",
--- a/tests/integration/recordings/responses/121a72d1c4cf.json
+++ b/tests/integration/recordings/responses/121a72d1c4cf.json
@ -0,0 +1,728 @@
 {
  "request": {
    "method": "POST",
    "url": "__databricks__/serving-endpoints/v1/chat/completions",
    "headers": {},
    "body": {
      "model": "databricks-meta-llama-3-3-70b-instruct",
      "messages": [
        {
          "role": "user",
          "content": "Hello, world!"
        }
      ],
      "stream": true
    },
    "endpoint": "/v1/chat/completions",
    "model": "databricks-meta-llama-3-3-70b-instruct"
  },
  "response": {
    "body": [
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_7268e4ee-3b8e-461e-80dc-608e76f3801d",
          "choices": [
            {
              "delta": {
                "content": "",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326500,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 3,
            "prompt_tokens": 14,
            "total_tokens": 17,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_7268e4ee-3b8e-461e-80dc-608e76f3801d",
          "choices": [
            {
              "delta": {
                "content": "Hello! ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326500,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 3,
            "prompt_tokens": 14,
            "total_tokens": 17,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_7268e4ee-3b8e-461e-80dc-608e76f3801d",
          "choices": [
            {
              "delta": {
                "content": "It's ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326500,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 5,
            "prompt_tokens": 14,
            "total_tokens": 19,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_7268e4ee-3b8e-461e-80dc-608e76f3801d",
          "choices": [
            {
              "delta": {
                "content": "nice ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326500,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 6,
            "prompt_tokens": 14,
            "total_tokens": 20,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_7268e4ee-3b8e-461e-80dc-608e76f3801d",
          "choices": [
            {
              "delta": {
                "content": "to ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326500,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 7,
            "prompt_tokens": 14,
            "total_tokens": 21,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_7268e4ee-3b8e-461e-80dc-608e76f3801d",
          "choices": [
            {
              "delta": {
                "content": "meet ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326500,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 8,
            "prompt_tokens": 14,
            "total_tokens": 22,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_7268e4ee-3b8e-461e-80dc-608e76f3801d",
          "choices": [
            {
              "delta": {
                "content": "you. ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326500,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 10,
            "prompt_tokens": 14,
            "total_tokens": 24,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_7268e4ee-3b8e-461e-80dc-608e76f3801d",
          "choices": [
            {
              "delta": {
                "content": "Is ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326500,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 11,
            "prompt_tokens": 14,
            "total_tokens": 25,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_7268e4ee-3b8e-461e-80dc-608e76f3801d",
          "choices": [
            {
              "delta": {
                "content": "there ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326500,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 12,
            "prompt_tokens": 14,
            "total_tokens": 26,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_7268e4ee-3b8e-461e-80dc-608e76f3801d",
          "choices": [
            {
              "delta": {
                "content": "something ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326500,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 13,
            "prompt_tokens": 14,
            "total_tokens": 27,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_7268e4ee-3b8e-461e-80dc-608e76f3801d",
          "choices": [
            {
              "delta": {
                "content": "I ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326500,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 14,
            "prompt_tokens": 14,
            "total_tokens": 28,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_7268e4ee-3b8e-461e-80dc-608e76f3801d",
          "choices": [
            {
              "delta": {
                "content": "can ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326500,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 15,
            "prompt_tokens": 14,
            "total_tokens": 29,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_7268e4ee-3b8e-461e-80dc-608e76f3801d",
          "choices": [
            {
              "delta": {
                "content": "help ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326500,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 16,
            "prompt_tokens": 14,
            "total_tokens": 30,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_7268e4ee-3b8e-461e-80dc-608e76f3801d",
          "choices": [
            {
              "delta": {
                "content": "you ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326500,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 17,
            "prompt_tokens": 14,
            "total_tokens": 31,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_7268e4ee-3b8e-461e-80dc-608e76f3801d",
          "choices": [
            {
              "delta": {
                "content": "with ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326500,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 18,
            "prompt_tokens": 14,
            "total_tokens": 32,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_7268e4ee-3b8e-461e-80dc-608e76f3801d",
          "choices": [
            {
              "delta": {
                "content": "or ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326500,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 19,
            "prompt_tokens": 14,
            "total_tokens": 33,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_7268e4ee-3b8e-461e-80dc-608e76f3801d",
          "choices": [
            {
              "delta": {
                "content": "would ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326500,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 20,
            "prompt_tokens": 14,
            "total_tokens": 34,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_7268e4ee-3b8e-461e-80dc-608e76f3801d",
          "choices": [
            {
              "delta": {
                "content": "you ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326500,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 21,
            "prompt_tokens": 14,
            "total_tokens": 35,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_7268e4ee-3b8e-461e-80dc-608e76f3801d",
          "choices": [
            {
              "delta": {
                "content": "like ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326500,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 22,
            "prompt_tokens": 14,
            "total_tokens": 36,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_7268e4ee-3b8e-461e-80dc-608e76f3801d",
          "choices": [
            {
              "delta": {
                "content": "to ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326500,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 23,
            "prompt_tokens": 14,
            "total_tokens": 37,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_7268e4ee-3b8e-461e-80dc-608e76f3801d",
          "choices": [
            {
              "delta": {
                "content": "chat?",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326500,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 25,
            "prompt_tokens": 14,
            "total_tokens": 39,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_7268e4ee-3b8e-461e-80dc-608e76f3801d",
          "choices": [
            {
              "delta": {
                "content": "",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": "stop",
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326500,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 25,
            "prompt_tokens": 14,
            "total_tokens": 39,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      }
    ],
    "is_streaming": true
  }
 }
--- a/tests/integration/recordings/responses/249b7f0ddde6.json
+++ b/tests/integration/recordings/responses/249b7f0ddde6.json
@ -0,0 +1,56 @@
 {
  "request": {
    "method": "POST",
    "url": "__databricks__/serving-endpoints/v1/chat/completions",
    "headers": {},
    "body": {
      "model": "databricks-meta-llama-3-3-70b-instruct",
      "messages": [
        {
          "role": "user",
          "content": "Hello, world!"
        }
      ],
      "stream": false
    },
    "endpoint": "/v1/chat/completions",
    "model": "databricks-meta-llama-3-3-70b-instruct"
  },
  "response": {
    "body": {
      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
      "__data__": {
        "id": "chatcmpl_52eec823-4235-473d-b25a-f0af4ebd4837",
        "choices": [
          {
            "finish_reason": "stop",
            "index": 0,
            "logprobs": null,
            "message": {
              "content": "Hello! It's great to meet you. Is there something I can help you with, or would you like to chat?",
              "refusal": null,
              "role": "assistant",
              "annotations": null,
              "audio": null,
              "function_call": null,
              "tool_calls": null
            }
          }
        ],
        "created": 1758326506,
        "model": "meta-llama-3.3-70b-instruct-121024",
        "object": "chat.completion",
        "service_tier": null,
        "system_fingerprint": null,
        "usage": {
          "completion_tokens": 26,
          "prompt_tokens": 14,
          "total_tokens": 40,
          "completion_tokens_details": null,
          "prompt_tokens_details": null
        }
      }
    },
    "is_streaming": false
  }
 }
--- a/tests/integration/recordings/responses/28648cf8d421.json
+++ b/tests/integration/recordings/responses/28648cf8d421.json
@ -0,0 +1,56 @@
 {
  "request": {
    "method": "POST",
    "url": "__databricks__/serving-endpoints/v1/chat/completions",
    "headers": {},
    "body": {
      "model": "databricks-meta-llama-3-3-70b-instruct",
      "messages": [
        {
          "role": "user",
          "content": "Which planet do humans live on?"
        }
      ],
      "stream": false
    },
    "endpoint": "/v1/chat/completions",
    "model": "databricks-meta-llama-3-3-70b-instruct"
  },
  "response": {
    "body": {
      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
      "__data__": {
        "id": "chatcmpl_e846ea96-9636-4eb4-bde4-84510478617b",
        "choices": [
          {
            "finish_reason": "stop",
            "index": 0,
            "logprobs": null,
            "message": {
              "content": "Humans live on the planet Earth.",
              "refusal": null,
              "role": "assistant",
              "annotations": null,
              "audio": null,
              "function_call": null,
              "tool_calls": null
            }
          }
        ],
        "created": 1758326497,
        "model": "meta-llama-3.3-70b-instruct-121024",
        "object": "chat.completion",
        "service_tier": null,
        "system_fingerprint": null,
        "usage": {
          "completion_tokens": 8,
          "prompt_tokens": 17,
          "total_tokens": 25,
          "completion_tokens_details": null,
          "prompt_tokens_details": null
        }
      }
    },
    "is_streaming": false
  }
 }
--- a/tests/integration/recordings/responses/29585e055e6f.json
+++ b/tests/integration/recordings/responses/29585e055e6f.json
@ -0,0 +1,56 @@
 {
  "request": {
    "method": "POST",
    "url": "__databricks__/serving-endpoints/v1/chat/completions",
    "headers": {},
    "body": {
      "model": "databricks-meta-llama-3-3-70b-instruct",
      "messages": [
        {
          "role": "user",
          "content": "Which planet has rings around it with a name starting with letter S?"
        }
      ],
      "stream": false
    },
    "endpoint": "/v1/chat/completions",
    "model": "databricks-meta-llama-3-3-70b-instruct"
  },
  "response": {
    "body": {
      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
      "__data__": {
        "id": "chatcmpl_094a74d8-2e39-45ce-8eb9-64d505bd24e9",
        "choices": [
          {
            "finish_reason": "stop",
            "index": 0,
            "logprobs": null,
            "message": {
              "content": "The answer is Saturn! Saturn is a planet in our solar system that is known for its stunning ring system. The rings of Saturn are made up of ice and rock particles that range in size from tiny dust grains to massive boulders. They are a beautiful sight to behold, and astronomers and space enthusiasts alike have been fascinated by them for centuries.\n\nSo, the planet with rings around it with a name starting with the letter S is indeed Saturn!",
              "refusal": null,
              "role": "assistant",
              "annotations": null,
              "audio": null,
              "function_call": null,
              "tool_calls": null
            }
          }
        ],
        "created": 1758326504,
        "model": "meta-llama-3.3-70b-instruct-121024",
        "object": "chat.completion",
        "service_tier": null,
        "system_fingerprint": null,
        "usage": {
          "completion_tokens": 91,
          "prompt_tokens": 24,
          "total_tokens": 115,
          "completion_tokens_details": null,
          "prompt_tokens_details": null
        }
      }
    },
    "is_streaming": false
  }
 }
--- a/tests/integration/recordings/responses/3ef0f9aab128.json
+++ b/tests/integration/recordings/responses/3ef0f9aab128.json
@ -0,0 +1,344 @@
 {
  "request": {
    "method": "POST",
    "url": "__databricks__/serving-endpoints/v1/chat/completions",
    "headers": {},
    "body": {
      "model": "databricks-meta-llama-3-3-70b-instruct",
      "messages": [
        {
          "role": "user",
          "content": "What's the name of the Sun in latin?"
        }
      ],
      "stream": true
    },
    "endpoint": "/v1/chat/completions",
    "model": "databricks-meta-llama-3-3-70b-instruct"
  },
  "response": {
    "body": [
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_2c653de2-afd4-4075-bc8d-8200562a191b",
          "choices": [
            {
              "delta": {
                "content": "",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326497,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 2,
            "prompt_tokens": 20,
            "total_tokens": 22,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_2c653de2-afd4-4075-bc8d-8200562a191b",
          "choices": [
            {
              "delta": {
                "content": "The ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326497,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 2,
            "prompt_tokens": 20,
            "total_tokens": 22,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_2c653de2-afd4-4075-bc8d-8200562a191b",
          "choices": [
            {
              "delta": {
                "content": "Latin ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326497,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 3,
            "prompt_tokens": 20,
            "total_tokens": 23,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_2c653de2-afd4-4075-bc8d-8200562a191b",
          "choices": [
            {
              "delta": {
                "content": "name ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326497,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 4,
            "prompt_tokens": 20,
            "total_tokens": 24,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_2c653de2-afd4-4075-bc8d-8200562a191b",
          "choices": [
            {
              "delta": {
                "content": "for ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326497,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 5,
            "prompt_tokens": 20,
            "total_tokens": 25,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_2c653de2-afd4-4075-bc8d-8200562a191b",
          "choices": [
            {
              "delta": {
                "content": "the ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326497,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 6,
            "prompt_tokens": 20,
            "total_tokens": 26,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_2c653de2-afd4-4075-bc8d-8200562a191b",
          "choices": [
            {
              "delta": {
                "content": "Sun ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326497,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 7,
            "prompt_tokens": 20,
            "total_tokens": 27,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_2c653de2-afd4-4075-bc8d-8200562a191b",
          "choices": [
            {
              "delta": {
                "content": "is ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326497,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 8,
            "prompt_tokens": 20,
            "total_tokens": 28,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_2c653de2-afd4-4075-bc8d-8200562a191b",
          "choices": [
            {
              "delta": {
                "content": "\"Sol\".",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326498,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 11,
            "prompt_tokens": 20,
            "total_tokens": 31,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_2c653de2-afd4-4075-bc8d-8200562a191b",
          "choices": [
            {
              "delta": {
                "content": "",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": "stop",
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326498,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 11,
            "prompt_tokens": 20,
            "total_tokens": 31,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      }
    ],
    "is_streaming": true
  }
 }
--- a/tests/integration/recordings/responses/441e2832387f.json
+++ b/tests/integration/recordings/responses/441e2832387f.json
--- a/tests/integration/recordings/responses/5fa0e98f3d84.json
+++ b/tests/integration/recordings/responses/5fa0e98f3d84.json
--- a/tests/integration/recordings/responses/6d937e5e9233.json
+++ b/tests/integration/recordings/responses/6d937e5e9233.json
--- a/tests/integration/recordings/responses/7f53b458dad9.json
+++ b/tests/integration/recordings/responses/7f53b458dad9.json
@ -0,0 +1,83 @@
 {
  "request": {
    "method": "POST",
    "url": "__databricks__/serving-endpoints/v1/chat/completions",
    "headers": {},
    "body": {
      "model": "databricks-meta-llama-3-3-70b-instruct",
      "messages": [
        {
          "role": "user",
          "content": "What's the weather in Tokyo? Use the get_weather function to get the weather."
        }
      ],
      "stream": false,
      "tools": [
        {
          "type": "function",
          "function": {
            "name": "get_weather",
            "description": "Get the weather in a given city",
            "parameters": {
              "type": "object",
              "properties": {
                "city": {
                  "type": "string",
                  "description": "The city to get the weather for"
                }
              }
            }
          }
        }
      ]
    },
    "endpoint": "/v1/chat/completions",
    "model": "databricks-meta-llama-3-3-70b-instruct"
  },
  "response": {
    "body": {
      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
      "__data__": {
        "id": "chatcmpl_e54eaa97-ace3-4af6-b3a2-b1627bc77488",
        "choices": [
          {
            "finish_reason": "tool_calls",
            "index": 0,
            "logprobs": null,
            "message": {
              "content": null,
              "refusal": null,
              "role": "assistant",
              "annotations": null,
              "audio": null,
              "function_call": null,
              "tool_calls": [
                {
                  "id": "call_9c7f9e5f-c6eb-4c3c-a7b3-e9fe0e786b50",
                  "function": {
                    "arguments": "{ \"city\": \"Tokyo\" }",
                    "name": "get_weather"
                  },
                  "type": "function"
                }
              ]
            }
          }
        ],
        "created": 1758326507,
        "model": "meta-llama-3.3-70b-instruct-121024",
        "object": "chat.completion",
        "service_tier": null,
        "system_fingerprint": null,
        "usage": {
          "completion_tokens": 15,
          "prompt_tokens": 682,
          "total_tokens": 697,
          "completion_tokens_details": null,
          "prompt_tokens_details": null
        }
      }
    },
    "is_streaming": false
  }
 }
--- a/tests/integration/recordings/responses/875323ed9913.json
+++ b/tests/integration/recordings/responses/875323ed9913.json
--- a/tests/integration/recordings/responses/9b9e8cf39b15.json
+++ b/tests/integration/recordings/responses/9b9e8cf39b15.json
--- a/tests/integration/recordings/responses/d64ffaa0de6f.json
+++ b/tests/integration/recordings/responses/d64ffaa0de6f.json
--- a/tests/integration/recordings/responses/e509387fc329.json
+++ b/tests/integration/recordings/responses/e509387fc329.json
@ -0,0 +1,168 @@
 {
  "request": {
    "method": "POST",
    "url": "__databricks__/serving-endpoints/v1/chat/completions",
    "headers": {},
    "body": {
      "model": "databricks-meta-llama-3-3-70b-instruct",
      "messages": [
        {
          "role": "user",
          "content": "What's the weather in Tokyo? Use the get_weather function to get the weather."
        }
      ],
      "stream": true,
      "tools": [
        {
          "type": "function",
          "function": {
            "name": "get_weather",
            "description": "Get the weather in a given city",
            "parameters": {
              "type": "object",
              "properties": {
                "city": {
                  "type": "string",
                  "description": "The city to get the weather for"
                }
              }
            }
          }
        }
      ]
    },
    "endpoint": "/v1/chat/completions",
    "model": "databricks-meta-llama-3-3-70b-instruct"
  },
  "response": {
    "body": [
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_4c3ae1bf-991d-4266-a12d-b1e97ecbb7a0",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": [
                  {
                    "index": 0,
                    "id": "call_87aed80e-f856-468f-9523-52db3018d83d",
                    "function": {
                      "arguments": "",
                      "name": "get_weather"
                    },
                    "type": "function"
                  }
                ]
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326502,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 15,
            "prompt_tokens": 682,
            "total_tokens": 697,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_4c3ae1bf-991d-4266-a12d-b1e97ecbb7a0",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": [
                  {
                    "index": 0,
                    "id": null,
                    "function": {
                      "arguments": "{ \"city\": \"Tokyo\" }",
                      "name": null
                    },
                    "type": null
                  }
                ]
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326502,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 15,
            "prompt_tokens": 682,
            "total_tokens": 697,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_4c3ae1bf-991d-4266-a12d-b1e97ecbb7a0",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": [
                  {
                    "index": 0,
                    "id": null,
                    "function": {
                      "arguments": "",
                      "name": null
                    },
                    "type": null
                  }
                ]
              },
              "finish_reason": "tool_calls",
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326502,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 15,
            "prompt_tokens": 682,
            "total_tokens": 697,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      }
    ],
    "is_streaming": true
  }
 }
--- a/tests/integration/recordings/responses/ecf6f0c51485.json
+++ b/tests/integration/recordings/responses/ecf6f0c51485.json
@ -0,0 +1,536 @@
 {
  "request": {
    "method": "POST",
    "url": "__databricks__/serving-endpoints/v1/chat/completions",
    "headers": {},
    "body": {
      "model": "databricks-meta-llama-3-3-70b-instruct",
      "messages": [
        {
          "role": "user",
          "content": "What is the name of the US captial?"
        }
      ],
      "stream": true
    },
    "endpoint": "/v1/chat/completions",
    "model": "databricks-meta-llama-3-3-70b-instruct"
  },
  "response": {
    "body": [
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_40266680-5422-4e7a-bc40-74eb1efdafbc",
          "choices": [
            {
              "delta": {
                "content": "",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326504,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 2,
            "prompt_tokens": 20,
            "total_tokens": 22,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_40266680-5422-4e7a-bc40-74eb1efdafbc",
          "choices": [
            {
              "delta": {
                "content": "The ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326504,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 2,
            "prompt_tokens": 20,
            "total_tokens": 22,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_40266680-5422-4e7a-bc40-74eb1efdafbc",
          "choices": [
            {
              "delta": {
                "content": "capital ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326504,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 3,
            "prompt_tokens": 20,
            "total_tokens": 23,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_40266680-5422-4e7a-bc40-74eb1efdafbc",
          "choices": [
            {
              "delta": {
                "content": "of ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326504,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 4,
            "prompt_tokens": 20,
            "total_tokens": 24,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_40266680-5422-4e7a-bc40-74eb1efdafbc",
          "choices": [
            {
              "delta": {
                "content": "the ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326504,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 5,
            "prompt_tokens": 20,
            "total_tokens": 25,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_40266680-5422-4e7a-bc40-74eb1efdafbc",
          "choices": [
            {
              "delta": {
                "content": "United ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326504,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 6,
            "prompt_tokens": 20,
            "total_tokens": 26,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_40266680-5422-4e7a-bc40-74eb1efdafbc",
          "choices": [
            {
              "delta": {
                "content": "States ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326504,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 7,
            "prompt_tokens": 20,
            "total_tokens": 27,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_40266680-5422-4e7a-bc40-74eb1efdafbc",
          "choices": [
            {
              "delta": {
                "content": "is ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326504,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 8,
            "prompt_tokens": 20,
            "total_tokens": 28,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_40266680-5422-4e7a-bc40-74eb1efdafbc",
          "choices": [
            {
              "delta": {
                "content": "Washington, ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326504,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 10,
            "prompt_tokens": 20,
            "total_tokens": 30,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_40266680-5422-4e7a-bc40-74eb1efdafbc",
          "choices": [
            {
              "delta": {
                "content": "D.C. ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326504,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 13,
            "prompt_tokens": 20,
            "total_tokens": 33,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_40266680-5422-4e7a-bc40-74eb1efdafbc",
          "choices": [
            {
              "delta": {
                "content": "(short ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326504,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 15,
            "prompt_tokens": 20,
            "total_tokens": 35,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_40266680-5422-4e7a-bc40-74eb1efdafbc",
          "choices": [
            {
              "delta": {
                "content": "for ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326504,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 16,
            "prompt_tokens": 20,
            "total_tokens": 36,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_40266680-5422-4e7a-bc40-74eb1efdafbc",
          "choices": [
            {
              "delta": {
                "content": "District ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326504,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 17,
            "prompt_tokens": 20,
            "total_tokens": 37,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_40266680-5422-4e7a-bc40-74eb1efdafbc",
          "choices": [
            {
              "delta": {
                "content": "of ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326504,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 18,
            "prompt_tokens": 20,
            "total_tokens": 38,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_40266680-5422-4e7a-bc40-74eb1efdafbc",
          "choices": [
            {
              "delta": {
                "content": "Columbia).",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326504,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 20,
            "prompt_tokens": 20,
            "total_tokens": 40,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl_40266680-5422-4e7a-bc40-74eb1efdafbc",
          "choices": [
            {
              "delta": {
                "content": "",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": "stop",
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1758326504,
          "model": "meta-llama-3.3-70b-instruct-121024",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": null,
          "usage": {
            "completion_tokens": 20,
            "prompt_tokens": 20,
            "total_tokens": 40,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
        }
      }
    ],
    "is_streaming": true
  }
 }
--- a/tests/integration/recordings/responses/ffd7b58fded8.json
+++ b/tests/integration/recordings/responses/ffd7b58fded8.json
--- a/tests/integration/suites.py
+++ b/tests/integration/suites.py
@ -87,7 +87,7 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
        description="OpenAI GPT models for high-quality responses and tool calling",
        defaults={
            "text_model": "openai/gpt-4o",
-            "embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
+            "embedding_model": "openai/text-embedding-3-small",
        },
    ),
    "tgi": Setup(
@ -115,6 +115,14 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
            "text_model": "cerebras/llama-3.3-70b",
        },
    ),
    "databricks": Setup(
        name="databricks",
        description="Databricks models",
        defaults={
            "text_model": "databricks/databricks-meta-llama-3-3-70b-instruct",
            "embedding_model": "databricks/databricks-bge-large-en",
        },
    ),
    "fireworks": Setup(
        name="fireworks",
        description="Fireworks provider with a text model",
--- a/tests/unit/providers/inference/test_remote_vllm.py
+++ b/tests/unit/providers/inference/test_remote_vllm.py
@ -62,7 +62,7 @@ from llama_stack.providers.remote.inference.vllm.vllm import (
@pytest.fixture(scope="module")
 def mock_openai_models_list():
-    with patch("openai.resources.models.AsyncModels.list", new_callable=AsyncMock) as mock_list:
+    with patch("openai.resources.models.AsyncModels.list") as mock_list:
        yield mock_list