feat: update Cerebras inference provider to support dynamic model listing (#3481)

# What does this PR do? - update Cerebras to use OpenAIMixin - enable openai completions tests - enable openai chat completions tests - disable with n > 1 tests - add recording for --setup cerebras --subdirs inference --pattern openai ## Test Plan `./scripts/integration-tests.sh --stack-config server:ci-tests --setup cerebras --subdirs inference --pattern openai` ``` tests/integration/inference/test_openai_completion.py::test_openai_completion_non_streaming[txt=cerebras/llama-3.3-70b-inference:completion:sanity] instantiating llama_stack_client Port 8321 is already in use, assuming server is already running... llama_stack_client instantiated in 0.053s PASSED [ 2%] tests/integration/inference/test_openai_completion.py::test_openai_completion_non_streaming_suffix[txt=cerebras/llama-3.3-70b-inference:completion:suffix] SKIPPED (Suffix is not supported for the model: cerebras/llama-3.3-70b.) [ 4%] tests/integration/inference/test_openai_completion.py::test_openai_completion_streaming[txt=cerebras/llama-3.3-70b-inference:completion:sanity] PASSED [ 6%] tests/integration/inference/test_openai_completion.py::test_openai_completion_prompt_logprobs[txt=cerebras/llama-3.3-70b-1] SKIPPED (Model cerebras/llama-3.3-70b hosted by remote::cerebras doesn't support vllm extra_body parameters.) [ 8%] tests/integration/inference/test_openai_completion.py::test_openai_completion_guided_choice[txt=cerebras/llama-3.3-70b] SKIPPED (Model cerebras/llama-3.3-70b hosted by remote::cerebras doesn't support vllm extra_body parameters.) [ 10%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[openai_client-txt=cerebras/llama-3.3-70b-inference:chat_completion:non_streaming_01] PASSED [ 12%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[openai_client-txt=cerebras/llama-3.3-70b-inference:chat_completion:streaming_01] PASSED [ 14%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[openai_client-txt=cerebras/llama-3.3-70b-inference:chat_completion:streaming_01] SKIPPED (Model cerebras/llama-3.3-70b hosted by remote::cere...) [ 17%] tests/integration/inference/test_openai_completion.py::test_inference_store[openai_client-txt=cerebras/llama-3.3-70b-True] PASSED [ 19%] tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=cerebras/llama-3.3-70b-True] PASSED [ 21%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming_with_file[txt=cerebras/llama-3.3-70b] SKIPPED (Model cerebras/llama-3.3-70b hosted by remote::cerebras doesn't support chat completion calls wit...) [ 23%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_single_string[openai_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test) [ 25%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_multiple_strings[openai_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test) [ 27%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_float[openai_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test) [ 29%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_dimensions[openai_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test) [ 31%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_user_parameter[openai_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test) [ 34%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_empty_list_error[openai_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test) [ 36%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_invalid_model_error[openai_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test) [ 38%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[openai_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test) [ 40%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_base64[openai_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test) [ 42%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_base64_batch_processing[openai_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test) [ 44%] tests/integration/inference/test_openai_completion.py::test_openai_completion_prompt_logprobs[txt=cerebras/llama-3.3-70b-0] SKIPPED (Model cerebras/llama-3.3-70b hosted by remote::cerebras doesn't support vllm extra_body parameters.) [ 46%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[openai_client-txt=cerebras/llama-3.3-70b-inference:chat_completion:non_streaming_02] PASSED [ 48%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[openai_client-txt=cerebras/llama-3.3-70b-inference:chat_completion:streaming_02] PASSED [ 51%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[openai_client-txt=cerebras/llama-3.3-70b-inference:chat_completion:streaming_02] SKIPPED (Model cerebras/llama-3.3-70b hosted by remote::cere...) [ 53%] tests/integration/inference/test_openai_completion.py::test_inference_store[openai_client-txt=cerebras/llama-3.3-70b-False] PASSED [ 55%] tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=cerebras/llama-3.3-70b-False] PASSED [ 57%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_single_string[llama_stack_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test) [ 59%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_multiple_strings[llama_stack_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test) [ 61%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_float[llama_stack_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test) [ 63%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_dimensions[llama_stack_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test) [ 65%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_user_parameter[llama_stack_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test) [ 68%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_empty_list_error[llama_stack_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test) [ 70%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_invalid_model_error[llama_stack_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test) [ 72%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[llama_stack_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test) [ 74%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_base64[llama_stack_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test) [ 76%] tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_base64_batch_processing[llama_stack_client-cerebras/llama-3.3-70b-None-None-None-384] SKIPPED (embedding_model_id empty - skipping test) [ 78%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=cerebras/llama-3.3-70b-inference:chat_completion:non_streaming_01] PASSED [ 80%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=cerebras/llama-3.3-70b-inference:chat_completion:streaming_01] PASSED [ 82%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[client_with_models-txt=cerebras/llama-3.3-70b-inference:chat_completion:streaming_01] SKIPPED (Model cerebras/llama-3.3-70b hosted by remote:...) [ 85%] tests/integration/inference/test_openai_completion.py::test_inference_store[client_with_models-txt=cerebras/llama-3.3-70b-True] PASSED [ 87%] tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=cerebras/llama-3.3-70b-True] PASSED [ 89%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=cerebras/llama-3.3-70b-inference:chat_completion:non_streaming_02] PASSED [ 91%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=cerebras/llama-3.3-70b-inference:chat_completion:streaming_02] PASSED [ 93%] tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[client_with_models-txt=cerebras/llama-3.3-70b-inference:chat_completion:streaming_02] SKIPPED (Model cerebras/llama-3.3-70b hosted by remote:...) [ 95%] tests/integration/inference/test_openai_completion.py::test_inference_store[client_with_models-txt=cerebras/llama-3.3-70b-False] PASSED [ 97%] tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=cerebras/llama-3.3-70b-False] PASSED [100%] =================================================================================================================== slowest 10 durations ==================================================================================================================== 0.37s call tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[openai_client-txt=cerebras/llama-3.3-70b-inference:chat_completion:non_streaming_01] 0.34s call tests/integration/inference/test_openai_completion.py::test_inference_store[openai_client-txt=cerebras/llama-3.3-70b-False] 0.18s call tests/integration/inference/test_openai_completion.py::test_inference_store[client_with_models-txt=cerebras/llama-3.3-70b-True] 0.17s setup tests/integration/inference/test_openai_completion.py::test_openai_completion_non_streaming[txt=cerebras/llama-3.3-70b-inference:completion:sanity] 0.15s call tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=cerebras/llama-3.3-70b-True] 0.13s call tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=cerebras/llama-3.3-70b-True] 0.12s call tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=cerebras/llama-3.3-70b-False] 0.12s call tests/integration/inference/test_openai_completion.py::test_inference_store[openai_client-txt=cerebras/llama-3.3-70b-True] 0.12s call tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=cerebras/llama-3.3-70b-False] 0.08s call tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=cerebras/llama-3.3-70b-inference:chat_completion:streaming_02] ================================================================================================================== short test summary info ================================================================================================================== SKIPPED [1] tests/integration/inference/test_openai_completion.py:75: Suffix is not supported for the model: cerebras/llama-3.3-70b. SKIPPED [3] tests/integration/inference/test_openai_completion.py:123: Model cerebras/llama-3.3-70b hosted by remote::cerebras doesn't support vllm extra_body parameters. SKIPPED [4] tests/integration/inference/test_openai_completion.py:103: Model cerebras/llama-3.3-70b hosted by remote::cerebras doesn't support n param. SKIPPED [1] tests/integration/inference/test_openai_completion.py:129: Model cerebras/llama-3.3-70b hosted by remote::cerebras doesn't support chat completion calls with base64 encoded files. SKIPPED [2] tests/integration/inference/test_openai_embeddings.py:90: embedding_model_id empty - skipping test SKIPPED [2] tests/integration/inference/test_openai_embeddings.py:112: embedding_model_id empty - skipping test SKIPPED [2] tests/integration/inference/test_openai_embeddings.py:136: embedding_model_id empty - skipping test SKIPPED [2] tests/integration/inference/test_openai_embeddings.py:154: embedding_model_id empty - skipping test SKIPPED [2] tests/integration/inference/test_openai_embeddings.py:175: embedding_model_id empty - skipping test SKIPPED [2] tests/integration/inference/test_openai_embeddings.py:195: embedding_model_id empty - skipping test SKIPPED [2] tests/integration/inference/test_openai_embeddings.py:206: embedding_model_id empty - skipping test SKIPPED [2] tests/integration/inference/test_openai_embeddings.py:217: embedding_model_id empty - skipping test SKIPPED [2] tests/integration/inference/test_openai_embeddings.py:244: embedding_model_id empty - skipping test SKIPPED [2] tests/integration/inference/test_openai_embeddings.py:278: embedding_model_id empty - skipping test ================================================================================================= 18 passed, 29 skipped, 50 deselected, 4 warnings in 3.02s ================================================================================================= ```
2025-12-14 09:02:37 +00:00 · 2025-09-23 16:26:00 -04:00 · 2025-09-23 16:26:00 -04:00 · ce7a3b4dff
commit ce7a3b4dff
parent d07ebce4d9
16 changed files with 3370 additions and 13 deletions
--- a/docs/source/providers/inference/remote_cerebras.md
+++ b/docs/source/providers/inference/remote_cerebras.md
@ -9,7 +9,7 @@ Cerebras inference provider for running models on Cerebras Cloud platform.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `base_url` | `<class 'str'>` | No | https://api.cerebras.ai | Base URL for the Cerebras API |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Cerebras API Key |
+| `api_key` | `<class 'pydantic.types.SecretStr'>` | No |  | Cerebras API Key |

 ## Sample Configuration

--- a/llama_stack/providers/remote/inference/cerebras/cerebras.py
+++ b/llama_stack/providers/remote/inference/cerebras/cerebras.py
@ -5,6 +5,7 @@
 # the root directory of this source tree.

 from collections.abc import AsyncGenerator
+from urllib.parse import urljoin

 from cerebras.cloud.sdk import AsyncCerebras

@ -35,14 +36,13 @@ from llama_stack.providers.utils.inference.model_registry import (
    ModelRegistryHelper,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
-    OpenAIChatCompletionToLlamaStackMixin,
-    OpenAICompletionToLlamaStackMixin,
    get_sampling_options,
    process_chat_completion_response,
    process_chat_completion_stream_response,
    process_completion_response,
    process_completion_stream_response,
 )
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack.providers.utils.inference.prompt_adapter import (
    chat_completion_request_to_prompt,
    completion_request_to_prompt,
@ -53,10 +53,9 @@ from .models import MODEL_ENTRIES


 class CerebrasInferenceAdapter(
+    OpenAIMixin,
    ModelRegistryHelper,
    Inference,
-    OpenAIChatCompletionToLlamaStackMixin,
-    OpenAICompletionToLlamaStackMixin,
 ):
    def __init__(self, config: CerebrasImplConfig) -> None:
        ModelRegistryHelper.__init__(
@ -66,11 +65,17 @@ class CerebrasInferenceAdapter(
        self.config = config

        # TODO: make this use provider data, etc. like other providers
-        self.client = AsyncCerebras(
+        self._cerebras_client = AsyncCerebras(
            base_url=self.config.base_url,
            api_key=self.config.api_key.get_secret_value(),
        )

+    def get_api_key(self) -> str:
+        return self.config.api_key.get_secret_value()
+
+    def get_base_url(self) -> str:
+        return urljoin(self.config.base_url, "v1")
+
    async def initialize(self) -> None:
        return

@ -107,14 +112,14 @@ class CerebrasInferenceAdapter(
    async def _nonstream_completion(self, request: CompletionRequest) -> CompletionResponse:
        params = await self._get_params(request)

-        r = await self.client.completions.create(**params)
+        r = await self._cerebras_client.completions.create(**params)

        return process_completion_response(r)

    async def _stream_completion(self, request: CompletionRequest) -> AsyncGenerator:
        params = await self._get_params(request)

-        stream = await self.client.completions.create(**params)
+        stream = await self._cerebras_client.completions.create(**params)

        async for chunk in process_completion_stream_response(stream):
            yield chunk
@ -156,14 +161,14 @@ class CerebrasInferenceAdapter(
    async def _nonstream_chat_completion(self, request: CompletionRequest) -> CompletionResponse:
        params = await self._get_params(request)

-        r = await self.client.completions.create(**params)
+        r = await self._cerebras_client.completions.create(**params)

        return process_chat_completion_response(r, request)

    async def _stream_chat_completion(self, request: CompletionRequest) -> AsyncGenerator:
        params = await self._get_params(request)

-        stream = await self.client.completions.create(**params)
+        stream = await self._cerebras_client.completions.create(**params)

        async for chunk in process_chat_completion_stream_response(stream, request):
            yield chunk
--- a/llama_stack/providers/remote/inference/cerebras/config.py
+++ b/llama_stack/providers/remote/inference/cerebras/config.py
@ -20,8 +20,8 @@ class CerebrasImplConfig(BaseModel):
        default=os.environ.get("CEREBRAS_BASE_URL", DEFAULT_BASE_URL),
        description="Base URL for the Cerebras API",
    )
-    api_key: SecretStr | None = Field(
-        default=os.environ.get("CEREBRAS_API_KEY"),
+    api_key: SecretStr = Field(
+        default=SecretStr(os.environ.get("CEREBRAS_API_KEY")),
        description="Cerebras API Key",
    )

--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@ -40,7 +40,6 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
        "inline::sentence-transformers",
        "inline::vllm",
        "remote::bedrock",
-        "remote::cerebras",
        "remote::databricks",
        # Technically Nvidia does support OpenAI completions, but none of their hosted models
        # support both completions and chat completions endpoint and all the Llama models are
@ -98,6 +97,8 @@ def skip_if_doesnt_support_n(client_with_models, model_id):
        #  the entered value was 2. Update the candidateCount value and try again.', 'status': 'INVALID_ARGUMENT'}
        "remote::tgi",  # TGI ignores n param silently
        "remote::together",  # `n` > 1 is not supported when streaming tokens. Please disable `stream`
+        # Error code 400 - {'message': '"n" > 1 is not currently supported', 'type': 'invalid_request_error', 'param': 'n', 'code': 'wrong_api_format'}
+        "remote::cerebras",
        "remote::databricks",  # Bad request: parameter "n" must be equal to 1 for streaming mode
    ):
        pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support n param.")
@ -110,6 +111,7 @@ def skip_if_model_doesnt_support_openai_chat_completion(client_with_models, mode
        "inline::sentence-transformers",
        "inline::vllm",
        "remote::bedrock",
+        "remote::databricks",
        "remote::cerebras",
        "remote::runpod",
        "remote::watsonx",  # watsonx returns 404 when hitting the /openai/v1 endpoint
--- a/tests/integration/recordings/responses/0547d0909f24.json
+++ b/tests/integration/recordings/responses/0547d0909f24.json
@ -0,0 +1,53 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.cerebras.ai/v1/v1/completions",
+    "headers": {},
+    "body": {
+      "model": "llama-3.3-70b",
+      "prompt": "Respond to this question and explain your answer. Complete the sentence using one word: Roses are red, violets are ",
+      "stream": false,
+      "extra_body": {}
+    },
+    "endpoint": "/v1/completions",
+    "model": "llama-3.3-70b"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.completion.Completion",
+      "__data__": {
+        "id": "chatcmpl-6438a448-bbbd-4da1-af88-19390676b0e9",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "text": " blue, sugar is white, but my heart is ________________________.\nA) black\nB) pink\nC) blank\nD) broken\nMy answer is D) broken. This is because the traditional romantic poem has a positive tone until it comes to the heart, which represents the speaker's emotional state. The word \"broken\" shows that the speaker is hurting, which adds a element of sadness to the poem. This is a typical way to express sorrow or longing in poetry.\nThe best answer is D.<|eot_id|>"
+          }
+        ],
+        "created": 1758191351,
+        "model": "llama-3.3-70b",
+        "object": "text_completion",
+        "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+        "usage": {
+          "completion_tokens": 105,
+          "prompt_tokens": 26,
+          "total_tokens": 131,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": {
+            "audio_tokens": null,
+            "cached_tokens": 0
+          }
+        },
+        "time_info": {
+          "queue_time": 0.00016155,
+          "prompt_time": 0.001595551,
+          "completion_time": 0.107480394,
+          "total_time": 0.11038637161254883,
+          "created": 1758191351
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/0648374e43e7.json
+++ b/tests/integration/recordings/responses/0648374e43e7.json
@ -0,0 +1,146 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.cerebras.ai/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama-3.3-70b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What's the weather in Tokyo? Use the get_weather function to get the weather."
+        }
+      ],
+      "stream": true,
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_weather",
+            "description": "Get the weather in a given city",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "city": {
+                  "type": "string",
+                  "description": "The city to get the weather for"
+                }
+              }
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama-3.3-70b"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-8b6a9499-1a5f-46dc-96b7-3d2b71eecd99",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191362,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-8b6a9499-1a5f-46dc-96b7-3d2b71eecd99",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "439c86fe5",
+                    "function": {
+                      "arguments": "{\"city\": \"Tokyo\"}",
+                      "name": "get_weather"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191362,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-8b6a9499-1a5f-46dc-96b7-3d2b71eecd99",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191362,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": {
+            "completion_tokens": 12,
+            "prompt_tokens": 248,
+            "total_tokens": 260,
+            "completion_tokens_details": null,
+            "prompt_tokens_details": {
+              "audio_tokens": null,
+              "cached_tokens": 0
+            }
+          },
+          "time_info": {
+            "queue_time": 0.00016941,
+            "prompt_time": 0.007276727,
+            "completion_time": 0.00388514,
+            "total_time": 0.013146162033081055,
+            "created": 1758191362
+          }
+        }
+      }
+    ],
+    "is_streaming": true
+  }
+}
--- a/tests/integration/recordings/responses/0d3290adae1d.json
+++ b/tests/integration/recordings/responses/0d3290adae1d.json
@ -0,0 +1,93 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.cerebras.ai/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama-3.3-70b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What's the weather in Tokyo? Use the get_weather function to get the weather."
+        }
+      ],
+      "stream": false,
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_weather",
+            "description": "Get the weather in a given city",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "city": {
+                  "type": "string",
+                  "description": "The city to get the weather for"
+                }
+              }
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama-3.3-70b"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "chatcmpl-6228def9-c13d-4d7a-9029-e2c638a16f1b",
+        "choices": [
+          {
+            "finish_reason": "tool_calls",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": null,
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": [
+                {
+                  "id": "1c40cbc30",
+                  "function": {
+                    "arguments": "{\"city\": \"Tokyo\"}",
+                    "name": "get_weather"
+                  },
+                  "type": "function"
+                }
+              ]
+            }
+          }
+        ],
+        "created": 1758191364,
+        "model": "llama-3.3-70b",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+        "usage": {
+          "completion_tokens": 12,
+          "prompt_tokens": 248,
+          "total_tokens": 260,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": {
+            "audio_tokens": null,
+            "cached_tokens": 0
+          }
+        },
+        "time_info": {
+          "queue_time": 0.00041449,
+          "prompt_time": 0.007237483,
+          "completion_time": 0.003803105,
+          "total_time": 0.013348102569580078,
+          "created": 1758191364
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/2983cc1d79f0.json
+++ b/tests/integration/recordings/responses/2983cc1d79f0.json
@ -0,0 +1,742 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.cerebras.ai/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama-3.3-70b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Hello, world!"
+        }
+      ],
+      "stream": true
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama-3.3-70b"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191361,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
+          "choices": [
+            {
+              "delta": {
+                "content": "Hello",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191361,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
+          "choices": [
+            {
+              "delta": {
+                "content": "!",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191361,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
+          "choices": [
+            {
+              "delta": {
+                "content": " It",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191361,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
+          "choices": [
+            {
+              "delta": {
+                "content": "'s",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191361,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
+          "choices": [
+            {
+              "delta": {
+                "content": " nice",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191361,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
+          "choices": [
+            {
+              "delta": {
+                "content": " to",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191361,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
+          "choices": [
+            {
+              "delta": {
+                "content": " meet",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191361,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
+          "choices": [
+            {
+              "delta": {
+                "content": " you",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191361,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191361,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
+          "choices": [
+            {
+              "delta": {
+                "content": " Is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191361,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
+          "choices": [
+            {
+              "delta": {
+                "content": " there",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191361,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
+          "choices": [
+            {
+              "delta": {
+                "content": " something",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191361,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
+          "choices": [
+            {
+              "delta": {
+                "content": " I",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191361,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
+          "choices": [
+            {
+              "delta": {
+                "content": " can",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191361,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
+          "choices": [
+            {
+              "delta": {
+                "content": " help",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191361,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
+          "choices": [
+            {
+              "delta": {
+                "content": " you",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191361,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
+          "choices": [
+            {
+              "delta": {
+                "content": " with",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191361,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
+          "choices": [
+            {
+              "delta": {
+                "content": " or",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191361,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
+          "choices": [
+            {
+              "delta": {
+                "content": " would",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191361,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
+          "choices": [
+            {
+              "delta": {
+                "content": " you",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191361,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
+          "choices": [
+            {
+              "delta": {
+                "content": " like",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191361,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
+          "choices": [
+            {
+              "delta": {
+                "content": " to",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191361,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
+          "choices": [
+            {
+              "delta": {
+                "content": " chat",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191361,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
+          "choices": [
+            {
+              "delta": {
+                "content": "?",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191361,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191361,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-de2bf7d0-0f5d-4f44-977c-209ab8ffa29d",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191361,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": {
+            "completion_tokens": 25,
+            "prompt_tokens": 39,
+            "total_tokens": 64,
+            "completion_tokens_details": null,
+            "prompt_tokens_details": {
+              "audio_tokens": null,
+              "cached_tokens": 0
+            }
+          },
+          "time_info": {
+            "queue_time": 0.00030481,
+            "prompt_time": 0.002094315,
+            "completion_time": 0.011856632,
+            "total_time": 0.016039371490478516,
+            "created": 1758191361
+          }
+        }
+      }
+    ],
+    "is_streaming": true
+  }
+}
--- a/tests/integration/recordings/responses/3cdb5cab6ce6.json
+++ b/tests/integration/recordings/responses/3cdb5cab6ce6.json
@ -0,0 +1,66 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.cerebras.ai/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama-3.3-70b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Which planet do humans live on?"
+        }
+      ],
+      "stream": false
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama-3.3-70b"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "chatcmpl-839aab91-21a7-4ed9-b224-d22e524eda37",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "Humans live on Earth.",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 1758191360,
+        "model": "llama-3.3-70b",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+        "usage": {
+          "completion_tokens": 6,
+          "prompt_tokens": 42,
+          "total_tokens": 48,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": {
+            "audio_tokens": null,
+            "cached_tokens": 0
+          }
+        },
+        "time_info": {
+          "queue_time": 0.00028033,
+          "prompt_time": 0.001467015,
+          "completion_time": 0.007069593,
+          "total_time": 0.010509490966796875,
+          "created": 1758191360
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/50a8dc5b8ece.json
+++ b/tests/integration/recordings/responses/50a8dc5b8ece.json
@ -0,0 +1,612 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.cerebras.ai/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama-3.3-70b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the name of the US captial?"
+        }
+      ],
+      "stream": true
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama-3.3-70b"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191363,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191363,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
+          "choices": [
+            {
+              "delta": {
+                "content": " name",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191363,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191363,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191363,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
+          "choices": [
+            {
+              "delta": {
+                "content": " US",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191363,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
+          "choices": [
+            {
+              "delta": {
+                "content": " capital",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191363,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191363,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
+          "choices": [
+            {
+              "delta": {
+                "content": " Washington",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191363,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
+          "choices": [
+            {
+              "delta": {
+                "content": ",",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191363,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
+          "choices": [
+            {
+              "delta": {
+                "content": " D",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191363,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
+          "choices": [
+            {
+              "delta": {
+                "content": ".C",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191363,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191363,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
+          "choices": [
+            {
+              "delta": {
+                "content": " (",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191363,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
+          "choices": [
+            {
+              "delta": {
+                "content": "short",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191363,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
+          "choices": [
+            {
+              "delta": {
+                "content": " for",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191363,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
+          "choices": [
+            {
+              "delta": {
+                "content": " District",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191363,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191363,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
+          "choices": [
+            {
+              "delta": {
+                "content": " Columbia",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191363,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
+          "choices": [
+            {
+              "delta": {
+                "content": ").",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191363,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191363,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-adc9cfae-89ba-4938-9137-37a1f46d1596",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191363,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": {
+            "completion_tokens": 20,
+            "prompt_tokens": 45,
+            "total_tokens": 65,
+            "completion_tokens_details": null,
+            "prompt_tokens_details": {
+              "audio_tokens": null,
+              "cached_tokens": 0
+            }
+          },
+          "time_info": {
+            "queue_time": 0.000509825,
+            "prompt_time": 0.002284829,
+            "completion_time": 0.008430168,
+            "total_time": 0.012710094451904297,
+            "created": 1758191363
+          }
+        }
+      }
+    ],
+    "is_streaming": true
+  }
+}
--- a/tests/integration/recordings/responses/a1c5bf09ea53.json
+++ b/tests/integration/recordings/responses/a1c5bf09ea53.json
@ -0,0 +1,66 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.cerebras.ai/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama-3.3-70b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Hello, world!"
+        }
+      ],
+      "stream": false
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama-3.3-70b"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "chatcmpl-1dcfef1f-f955-4158-a1fc-0c2643b60e4e",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "Hello! It's nice to meet you. Is there something I can help you with or would you like to chat?",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 1758191362,
+        "model": "llama-3.3-70b",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+        "usage": {
+          "completion_tokens": 25,
+          "prompt_tokens": 39,
+          "total_tokens": 64,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": {
+            "audio_tokens": null,
+            "cached_tokens": 0
+          }
+        },
+        "time_info": {
+          "queue_time": 0.083508803,
+          "prompt_time": 0.003352167,
+          "completion_time": 0.011506416,
+          "total_time": 0.09965348243713379,
+          "created": 1758191362
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/af6ca03dcbc3.json
+++ b/tests/integration/recordings/responses/af6ca03dcbc3.json
--- a/tests/integration/recordings/responses/b459f403a5ae.json
+++ b/tests/integration/recordings/responses/b459f403a5ae.json
@ -0,0 +1,66 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.cerebras.ai/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama-3.3-70b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Which planet has rings around it with a name starting with letter S?"
+        }
+      ],
+      "stream": false
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama-3.3-70b"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "chatcmpl-e9e83004-bcd0-47f8-97c3-8e3d789a6573",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "The planet with rings around it that starts with the letter S is Saturn. Saturn's rings are one of the most prominent and well-known ring systems in our solar system.",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 1758191362,
+        "model": "llama-3.3-70b",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+        "usage": {
+          "completion_tokens": 35,
+          "prompt_tokens": 49,
+          "total_tokens": 84,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": {
+            "audio_tokens": null,
+            "cached_tokens": 0
+          }
+        },
+        "time_info": {
+          "queue_time": 0.00091223,
+          "prompt_time": 0.00239449,
+          "completion_time": 0.013951346,
+          "total_time": 0.01872849464416504,
+          "created": 1758191362
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/eefb4206a4a9.json
+++ b/tests/integration/recordings/responses/eefb4206a4a9.json
@ -0,0 +1,378 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.cerebras.ai/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama-3.3-70b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What's the name of the Sun in latin?"
+        }
+      ],
+      "stream": true
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama-3.3-70b"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-9decaa3e-f7e6-4e9b-a7f3-c00fdb748534",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191360,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-9decaa3e-f7e6-4e9b-a7f3-c00fdb748534",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191360,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-9decaa3e-f7e6-4e9b-a7f3-c00fdb748534",
+          "choices": [
+            {
+              "delta": {
+                "content": " Latin",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191360,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-9decaa3e-f7e6-4e9b-a7f3-c00fdb748534",
+          "choices": [
+            {
+              "delta": {
+                "content": " name",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191360,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-9decaa3e-f7e6-4e9b-a7f3-c00fdb748534",
+          "choices": [
+            {
+              "delta": {
+                "content": " for",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191360,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-9decaa3e-f7e6-4e9b-a7f3-c00fdb748534",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191360,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-9decaa3e-f7e6-4e9b-a7f3-c00fdb748534",
+          "choices": [
+            {
+              "delta": {
+                "content": " Sun",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191360,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-9decaa3e-f7e6-4e9b-a7f3-c00fdb748534",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191360,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-9decaa3e-f7e6-4e9b-a7f3-c00fdb748534",
+          "choices": [
+            {
+              "delta": {
+                "content": " \"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191360,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-9decaa3e-f7e6-4e9b-a7f3-c00fdb748534",
+          "choices": [
+            {
+              "delta": {
+                "content": "Sol",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191360,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-9decaa3e-f7e6-4e9b-a7f3-c00fdb748534",
+          "choices": [
+            {
+              "delta": {
+                "content": "\".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191360,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-9decaa3e-f7e6-4e9b-a7f3-c00fdb748534",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191360,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-9decaa3e-f7e6-4e9b-a7f3-c00fdb748534",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758191360,
+          "model": "llama-3.3-70b",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_c5ec625e72d41732d8fd",
+          "usage": {
+            "completion_tokens": 11,
+            "prompt_tokens": 45,
+            "total_tokens": 56,
+            "completion_tokens_details": null,
+            "prompt_tokens_details": {
+              "audio_tokens": null,
+              "cached_tokens": 0
+            }
+          },
+          "time_info": {
+            "queue_time": 9.281e-05,
+            "prompt_time": 0.002694912,
+            "completion_time": 0.003747467,
+            "total_time": 0.008375167846679688,
+            "created": 1758191360
+          }
+        }
+      }
+    ],
+    "is_streaming": true
+  }
+}
--- a/tests/integration/recordings/responses/models-bd032f995f2a-af43cc69.json
+++ b/tests/integration/recordings/responses/models-bd032f995f2a-af43cc69.json
@ -0,0 +1,96 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.cerebras.ai/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama-4-maverick-17b-128e-instruct",
+          "created": 0,
+          "object": "model",
+          "owned_by": "Cerebras"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama-4-scout-17b-16e-instruct",
+          "created": 0,
+          "object": "model",
+          "owned_by": "Cerebras"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "qwen-3-235b-a22b-instruct-2507",
+          "created": 0,
+          "object": "model",
+          "owned_by": "Cerebras"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama3.1-8b",
+          "created": 0,
+          "object": "model",
+          "owned_by": "Cerebras"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "qwen-3-32b",
+          "created": 0,
+          "object": "model",
+          "owned_by": "Cerebras"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-oss-120b",
+          "created": 0,
+          "object": "model",
+          "owned_by": "Cerebras"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "qwen-3-235b-a22b-thinking-2507",
+          "created": 0,
+          "object": "model",
+          "owned_by": "Cerebras"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama-3.3-70b",
+          "created": 0,
+          "object": "model",
+          "owned_by": "Cerebras"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "qwen-3-coder-480b",
+          "created": 0,
+          "object": "model",
+          "owned_by": "Cerebras"
+        }
+      }
+    ],
+    "is_streaming": false
+  }
+}
--- a/tests/integration/suites.py
+++ b/tests/integration/suites.py
@ -108,6 +108,13 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
            "embedding_model": "together/togethercomputer/m2-bert-80M-32k-retrieval",
        },
    ),
+    "cerebras": Setup(
+        name="cerebras",
+        description="Cerebras models",
+        defaults={
+            "text_model": "cerebras/llama-3.3-70b",
+        },
+    ),
    "databricks": Setup(
        name="databricks",
        description="Databricks models",