Merge remote-tracking branch 'origin/main' into TamiTakamiya/tool-param-definition-update

2025-10-03 19:57:35 +00:00 · 2025-09-27 11:24:11 -07:00 · 2025-09-27 11:24:11 -07:00 · 27c5365f99
commit 27c5365f99
parent 7c30243ae8 c392f3a0f4
47 changed files with 49304 additions and 1334 deletions
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@ -210,55 +210,6 @@
                }
            }
        },
-        "/v1/inference/completion": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "If stream=False, returns a CompletionResponse with the full completion. If stream=True, returns an SSE event stream of CompletionResponseStreamChunk.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/CompletionResponse"
-                                }
-                            },
-                            "text/event-stream": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/CompletionResponseStreamChunk"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Inference"
-                ],
-                "summary": "Generate a completion for the given content using the specified model.",
-                "description": "Generate a completion for the given content using the specified model.",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/CompletionRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
        "/v1/agents": {
            "get": {
                "responses": {
@ -7324,126 +7275,6 @@
                "title": "ToolCallDelta",
                "description": "A tool call content delta for streaming responses."
            },
-            "CompletionRequest": {
-                "type": "object",
-                "properties": {
-                    "model_id": {
-                        "type": "string",
-                        "description": "The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint."
-                    },
-                    "content": {
-                        "$ref": "#/components/schemas/InterleavedContent",
-                        "description": "The content to generate a completion for."
-                    },
-                    "sampling_params": {
-                        "$ref": "#/components/schemas/SamplingParams",
-                        "description": "(Optional) Parameters to control the sampling strategy."
-                    },
-                    "response_format": {
-                        "$ref": "#/components/schemas/ResponseFormat",
-                        "description": "(Optional) Grammar specification for guided (structured) decoding."
-                    },
-                    "stream": {
-                        "type": "boolean",
-                        "description": "(Optional) If True, generate an SSE event stream of the response. Defaults to False."
-                    },
-                    "logprobs": {
-                        "type": "object",
-                        "properties": {
-                            "top_k": {
-                                "type": "integer",
-                                "default": 0,
-                                "description": "How many tokens (for each position) to return log probabilities for."
-                            }
-                        },
-                        "additionalProperties": false,
-                        "description": "(Optional) If specified, log probabilities for each token position will be returned."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "model_id",
-                    "content"
-                ],
-                "title": "CompletionRequest"
-            },
-            "CompletionResponse": {
-                "type": "object",
-                "properties": {
-                    "metrics": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/MetricInResponse"
-                        },
-                        "description": "(Optional) List of metrics associated with the API response"
-                    },
-                    "content": {
-                        "type": "string",
-                        "description": "The generated completion text"
-                    },
-                    "stop_reason": {
-                        "type": "string",
-                        "enum": [
-                            "end_of_turn",
-                            "end_of_message",
-                            "out_of_tokens"
-                        ],
-                        "description": "Reason why generation stopped"
-                    },
-                    "logprobs": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/TokenLogProbs"
-                        },
-                        "description": "Optional log probabilities for generated tokens"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "content",
-                    "stop_reason"
-                ],
-                "title": "CompletionResponse",
-                "description": "Response from a completion request."
-            },
-            "CompletionResponseStreamChunk": {
-                "type": "object",
-                "properties": {
-                    "metrics": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/MetricInResponse"
-                        },
-                        "description": "(Optional) List of metrics associated with the API response"
-                    },
-                    "delta": {
-                        "type": "string",
-                        "description": "New content generated since last chunk. This can be one or more tokens."
-                    },
-                    "stop_reason": {
-                        "type": "string",
-                        "enum": [
-                            "end_of_turn",
-                            "end_of_message",
-                            "out_of_tokens"
-                        ],
-                        "description": "Optional reason why generation stopped, if complete"
-                    },
-                    "logprobs": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/TokenLogProbs"
-                        },
-                        "description": "Optional log probabilities for generated tokens"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "delta"
-                ],
-                "title": "CompletionResponseStreamChunk",
-                "description": "A chunk of a streamed completion response."
-            },
            "AgentConfig": {
                "type": "object",
                "properties": {
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -132,43 +132,6 @@ paths:
            schema:
              $ref: '#/components/schemas/ChatCompletionRequest'
        required: true
-  /v1/inference/completion:
-    post:
-      responses:
-        '200':
-          description: >-
-            If stream=False, returns a CompletionResponse with the full completion.
-            If stream=True, returns an SSE event stream of CompletionResponseStreamChunk.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/CompletionResponse'
-            text/event-stream:
-              schema:
-                $ref: '#/components/schemas/CompletionResponseStreamChunk'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Inference
-      summary: >-
-        Generate a completion for the given content using the specified model.
-      description: >-
-        Generate a completion for the given content using the specified model.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/CompletionRequest'
-        required: true
  /v1/agents:
    get:
      responses:
@ -5302,112 +5265,6 @@ components:
      title: ToolCallDelta
      description: >-
        A tool call content delta for streaming responses.
-    CompletionRequest:
-      type: object
-      properties:
-        model_id:
-          type: string
-          description: >-
-            The identifier of the model to use. The model must be registered with
-            Llama Stack and available via the /models endpoint.
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            The content to generate a completion for.
-        sampling_params:
-          $ref: '#/components/schemas/SamplingParams'
-          description: >-
-            (Optional) Parameters to control the sampling strategy.
-        response_format:
-          $ref: '#/components/schemas/ResponseFormat'
-          description: >-
-            (Optional) Grammar specification for guided (structured) decoding.
-        stream:
-          type: boolean
-          description: >-
-            (Optional) If True, generate an SSE event stream of the response. Defaults
-            to False.
-        logprobs:
-          type: object
-          properties:
-            top_k:
-              type: integer
-              default: 0
-              description: >-
-                How many tokens (for each position) to return log probabilities for.
-          additionalProperties: false
-          description: >-
-            (Optional) If specified, log probabilities for each token position will
-            be returned.
-      additionalProperties: false
-      required:
-        - model_id
-        - content
-      title: CompletionRequest
-    CompletionResponse:
-      type: object
-      properties:
-        metrics:
-          type: array
-          items:
-            $ref: '#/components/schemas/MetricInResponse'
-          description: >-
-            (Optional) List of metrics associated with the API response
-        content:
-          type: string
-          description: The generated completion text
-        stop_reason:
-          type: string
-          enum:
-            - end_of_turn
-            - end_of_message
-            - out_of_tokens
-          description: Reason why generation stopped
-        logprobs:
-          type: array
-          items:
-            $ref: '#/components/schemas/TokenLogProbs'
-          description: >-
-            Optional log probabilities for generated tokens
-      additionalProperties: false
-      required:
-        - content
-        - stop_reason
-      title: CompletionResponse
-      description: Response from a completion request.
-    CompletionResponseStreamChunk:
-      type: object
-      properties:
-        metrics:
-          type: array
-          items:
-            $ref: '#/components/schemas/MetricInResponse'
-          description: >-
-            (Optional) List of metrics associated with the API response
-        delta:
-          type: string
-          description: >-
-            New content generated since last chunk. This can be one or more tokens.
-        stop_reason:
-          type: string
-          enum:
-            - end_of_turn
-            - end_of_message
-            - out_of_tokens
-          description: >-
-            Optional reason why generation stopped, if complete
-        logprobs:
-          type: array
-          items:
-            $ref: '#/components/schemas/TokenLogProbs'
-          description: >-
-            Optional log probabilities for generated tokens
-      additionalProperties: false
-      required:
-        - delta
-      title: CompletionResponseStreamChunk
-      description: >-
-        A chunk of a streamed completion response.
    AgentConfig:
      type: object
      properties:
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@ -1008,7 +1008,6 @@ class InferenceProvider(Protocol):

    model_store: ModelStore | None = None

-    @webmethod(route="/inference/completion", method="POST", level=LLAMA_STACK_API_V1)
    async def completion(
        self,
        model_id: str,
--- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
@ -224,10 +224,6 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
        return _GLOBAL_STORAGE["gauges"][name]

    def _log_metric(self, event: MetricEvent) -> None:
-        # Always log to console if console sink is enabled (debug)
-        if TelemetrySink.CONSOLE in self.config.sinks:
-            logger.debug(f"METRIC: {event.metric}={event.value} {event.unit} {event.attributes}")
-
        # Add metric as an event to the current span
        try:
            with self._lock:
--- a/llama_stack/providers/remote/inference/fireworks/fireworks.py
+++ b/llama_stack/providers/remote/inference/fireworks/fireworks.py
@ -61,6 +61,7 @@ logger = get_logger(name=__name__, category="inference::fireworks")
 class FireworksInferenceAdapter(OpenAIMixin, ModelRegistryHelper, Inference, NeedsRequestProviderData):
    embedding_model_metadata = {
        "nomic-ai/nomic-embed-text-v1.5": {"embedding_dimension": 768, "context_length": 8192},
+        "accounts/fireworks/models/qwen3-embedding-8b": {"embedding_dimension": 4096, "context_length": 40960},
    }

    def __init__(self, config: FireworksImplConfig) -> None:
--- a/llama_stack/providers/utils/inference/prompt_adapter.py
+++ b/llama_stack/providers/utils/inference/prompt_adapter.py
@ -192,6 +192,14 @@ async def localize_image_content(uri: str) -> tuple[bytes, str] | None:
                format = "png"

        return content, format
+    elif uri.startswith("data"):
+        # data:image/{format};base64,{data}
+        match = re.match(r"data:image/(\w+);base64,(.+)", uri)
+        if not match:
+            raise ValueError(f"Invalid data URL format, {uri[:40]}...")
+        fmt, image_data = match.groups()
+        content = base64.b64decode(image_data)
+        return content, fmt
    else:
        return None

--- a/tests/integration/inference/test_embedding.py
+++ b/tests/integration/inference/test_embedding.py
@ -1,303 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-
-#
-# Test plan:
-#
-#  Types of input:
-#   - array of a string
-#   - array of a image (ImageContentItem, either URL or base64 string)
-#   - array of a text (TextContentItem)
-#  Types of output:
-#   - list of list of floats
-#  Params:
-#   - text_truncation
-#     - absent w/ long text -> error
-#     - none w/ long text -> error
-#     - absent w/ short text -> ok
-#     - none w/ short text -> ok
-#     - end w/ long text -> ok
-#     - end w/ short text -> ok
-#     - start w/ long text -> ok
-#     - start w/ short text -> ok
-#   - output_dimension
-#     - response dimension matches
-#   - task_type, only for asymmetric models
-#     - query embedding != passage embedding
-#  Negative:
-#   - long string
-#   - long text
-#
-# Todo:
-#  - negative tests
-#    - empty
-#      - empty list
-#      - empty string
-#      - empty text
-#      - empty image
-#    - long
-#      - large image
-#      - appropriate combinations
-#    - batch size
-#      - many inputs
-#    - invalid
-#      - invalid URL
-#      - invalid base64
-#
-# Notes:
-#  - use llama_stack_client fixture
-#  - use pytest.mark.parametrize when possible
-#  - no accuracy tests: only check the type of output, not the content
-#
-
-import pytest
-from llama_stack_client import BadRequestError as LlamaStackBadRequestError
-from llama_stack_client.types import EmbeddingsResponse
-from llama_stack_client.types.shared.interleaved_content import (
-    ImageContentItem,
-    ImageContentItemImage,
-    ImageContentItemImageURL,
-    TextContentItem,
-)
-from openai import BadRequestError as OpenAIBadRequestError
-
-from llama_stack.core.library_client import LlamaStackAsLibraryClient
-
-DUMMY_STRING = "hello"
-DUMMY_STRING2 = "world"
-DUMMY_LONG_STRING = "NVDA " * 10240
-DUMMY_TEXT = TextContentItem(text=DUMMY_STRING, type="text")
-DUMMY_TEXT2 = TextContentItem(text=DUMMY_STRING2, type="text")
-DUMMY_LONG_TEXT = TextContentItem(text=DUMMY_LONG_STRING, type="text")
-# TODO(mf): add a real image URL and base64 string
-DUMMY_IMAGE_URL = ImageContentItem(
-    image=ImageContentItemImage(url=ImageContentItemImageURL(uri="https://example.com/image.jpg")), type="image"
-)
-DUMMY_IMAGE_BASE64 = ImageContentItem(image=ImageContentItemImage(data="base64string"), type="image")
-SUPPORTED_PROVIDERS = {"remote::nvidia"}
-MODELS_SUPPORTING_MEDIA = {}
-MODELS_SUPPORTING_OUTPUT_DIMENSION = {"nvidia/llama-3.2-nv-embedqa-1b-v2"}
-MODELS_REQUIRING_TASK_TYPE = {
-    "nvidia/llama-3.2-nv-embedqa-1b-v2",
-    "nvidia/nv-embedqa-e5-v5",
-    "nvidia/nv-embedqa-mistral-7b-v2",
-    "snowflake/arctic-embed-l",
-}
-MODELS_SUPPORTING_TASK_TYPE = MODELS_REQUIRING_TASK_TYPE
-
-
-def default_task_type(model_id):
-    """
-    Some models require a task type parameter. This provides a default value for
-    testing those models.
-    """
-    if model_id in MODELS_REQUIRING_TASK_TYPE:
-        return {"task_type": "query"}
-    return {}
-
-
-@pytest.mark.parametrize(
-    "contents",
-    [
-        [DUMMY_STRING, DUMMY_STRING2],
-        [DUMMY_TEXT, DUMMY_TEXT2],
-    ],
-    ids=[
-        "list[string]",
-        "list[text]",
-    ],
-)
-def test_embedding_text(llama_stack_client, embedding_model_id, contents, inference_provider_type):
-    if inference_provider_type not in SUPPORTED_PROVIDERS:
-        pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
-    response = llama_stack_client.inference.embeddings(
-        model_id=embedding_model_id, contents=contents, **default_task_type(embedding_model_id)
-    )
-    assert isinstance(response, EmbeddingsResponse)
-    assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents)
-    assert isinstance(response.embeddings[0], list)
-    assert isinstance(response.embeddings[0][0], float)
-
-
-@pytest.mark.parametrize(
-    "contents",
-    [
-        [DUMMY_IMAGE_URL, DUMMY_IMAGE_BASE64],
-        [DUMMY_IMAGE_URL, DUMMY_STRING, DUMMY_IMAGE_BASE64, DUMMY_TEXT],
-    ],
-    ids=[
-        "list[url,base64]",
-        "list[url,string,base64,text]",
-    ],
-)
-def test_embedding_image(llama_stack_client, embedding_model_id, contents, inference_provider_type):
-    if inference_provider_type not in SUPPORTED_PROVIDERS:
-        pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
-    if embedding_model_id not in MODELS_SUPPORTING_MEDIA:
-        pytest.xfail(f"{embedding_model_id} doesn't support media")
-    response = llama_stack_client.inference.embeddings(
-        model_id=embedding_model_id, contents=contents, **default_task_type(embedding_model_id)
-    )
-    assert isinstance(response, EmbeddingsResponse)
-    assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents)
-    assert isinstance(response.embeddings[0], list)
-    assert isinstance(response.embeddings[0][0], float)
-
-
-@pytest.mark.parametrize(
-    "text_truncation",
-    [
-        "end",
-        "start",
-    ],
-)
-@pytest.mark.parametrize(
-    "contents",
-    [
-        [DUMMY_LONG_TEXT],
-        [DUMMY_STRING],
-    ],
-    ids=[
-        "long",
-        "short",
-    ],
-)
-def test_embedding_truncation(
-    llama_stack_client, embedding_model_id, text_truncation, contents, inference_provider_type
-):
-    if inference_provider_type not in SUPPORTED_PROVIDERS:
-        pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
-    response = llama_stack_client.inference.embeddings(
-        model_id=embedding_model_id,
-        contents=contents,
-        text_truncation=text_truncation,
-        **default_task_type(embedding_model_id),
-    )
-    assert isinstance(response, EmbeddingsResponse)
-    assert len(response.embeddings) == 1
-    assert isinstance(response.embeddings[0], list)
-    assert isinstance(response.embeddings[0][0], float)
-
-
-@pytest.mark.parametrize(
-    "text_truncation",
-    [
-        None,
-        "none",
-    ],
-)
-@pytest.mark.parametrize(
-    "contents",
-    [
-        [DUMMY_LONG_TEXT],
-        [DUMMY_LONG_STRING],
-    ],
-    ids=[
-        "long-text",
-        "long-str",
-    ],
-)
-def test_embedding_truncation_error(
-    llama_stack_client, embedding_model_id, text_truncation, contents, inference_provider_type
-):
-    if inference_provider_type not in SUPPORTED_PROVIDERS:
-        pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
-    # Using LlamaStackClient from llama_stack_client will raise llama_stack_client.BadRequestError
-    # While using LlamaStackAsLibraryClient from llama_stack.distribution.library_client will raise the error that the backend raises
-    error_type = (
-        OpenAIBadRequestError
-        if isinstance(llama_stack_client, LlamaStackAsLibraryClient)
-        else LlamaStackBadRequestError
-    )
-    with pytest.raises(error_type):
-        llama_stack_client.inference.embeddings(
-            model_id=embedding_model_id,
-            contents=[DUMMY_LONG_TEXT],
-            text_truncation=text_truncation,
-            **default_task_type(embedding_model_id),
-        )
-
-
-def test_embedding_output_dimension(llama_stack_client, embedding_model_id, inference_provider_type):
-    if inference_provider_type not in SUPPORTED_PROVIDERS:
-        pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
-    if embedding_model_id not in MODELS_SUPPORTING_OUTPUT_DIMENSION:
-        pytest.xfail(f"{embedding_model_id} doesn't support output_dimension")
-    base_response = llama_stack_client.inference.embeddings(
-        model_id=embedding_model_id, contents=[DUMMY_STRING], **default_task_type(embedding_model_id)
-    )
-    test_response = llama_stack_client.inference.embeddings(
-        model_id=embedding_model_id,
-        contents=[DUMMY_STRING],
-        **default_task_type(embedding_model_id),
-        output_dimension=32,
-    )
-    assert len(base_response.embeddings[0]) != len(test_response.embeddings[0])
-    assert len(test_response.embeddings[0]) == 32
-
-
-def test_embedding_task_type(llama_stack_client, embedding_model_id, inference_provider_type):
-    if inference_provider_type not in SUPPORTED_PROVIDERS:
-        pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
-    if embedding_model_id not in MODELS_SUPPORTING_TASK_TYPE:
-        pytest.xfail(f"{embedding_model_id} doesn't support task_type")
-    query_embedding = llama_stack_client.inference.embeddings(
-        model_id=embedding_model_id, contents=[DUMMY_STRING], task_type="query"
-    )
-    document_embedding = llama_stack_client.inference.embeddings(
-        model_id=embedding_model_id, contents=[DUMMY_STRING], task_type="document"
-    )
-    assert query_embedding.embeddings != document_embedding.embeddings
-
-
-@pytest.mark.parametrize(
-    "text_truncation",
-    [
-        None,
-        "none",
-        "end",
-        "start",
-    ],
-)
-def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_truncation, inference_provider_type):
-    if inference_provider_type not in SUPPORTED_PROVIDERS:
-        pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
-    response = llama_stack_client.inference.embeddings(
-        model_id=embedding_model_id,
-        contents=[DUMMY_STRING],
-        text_truncation=text_truncation,
-        **default_task_type(embedding_model_id),
-    )
-    assert isinstance(response, EmbeddingsResponse)
-    assert len(response.embeddings) == 1
-    assert isinstance(response.embeddings[0], list)
-    assert isinstance(response.embeddings[0][0], float)
-
-
-@pytest.mark.parametrize(
-    "text_truncation",
-    [
-        "NONE",
-        "END",
-        "START",
-        "left",
-        "right",
-    ],
-)
-def test_embedding_text_truncation_error(
-    llama_stack_client, embedding_model_id, text_truncation, inference_provider_type
-):
-    if inference_provider_type not in SUPPORTED_PROVIDERS:
-        pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
-    error_type = ValueError if isinstance(llama_stack_client, LlamaStackAsLibraryClient) else LlamaStackBadRequestError
-    with pytest.raises(error_type):
-        llama_stack_client.inference.embeddings(
-            model_id=embedding_model_id,
-            contents=[DUMMY_STRING],
-            text_truncation=text_truncation,
-            **default_task_type(embedding_model_id),
-        )
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@ -9,6 +9,7 @@ import time
 import unicodedata

 import pytest
+from pydantic import BaseModel

 from ..test_cases.test_case import TestCase

@ -62,6 +63,14 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
        pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.")


+def skip_if_doesnt_support_completions_logprobs(client_with_models, model_id):
+    provider_type = provider_from_model(client_with_models, model_id).provider_type
+    if provider_type in (
+        "remote::ollama",  # logprobs is ignored
+    ):
+        pytest.skip(f"Model {model_id} hosted by {provider_type} doesn't support /v1/completions logprobs.")
+
+
 def skip_if_model_doesnt_support_suffix(client_with_models, model_id):
    # To test `fim` ( fill in the middle ) completion, we need to use a model that supports suffix.
    # Use this to specifically test this API functionality.
@ -205,28 +214,6 @@ def test_openai_completion_streaming(llama_stack_client, client_with_models, tex
    assert len(content_str) > 10


-@pytest.mark.parametrize(
-    "prompt_logprobs",
-    [
-        1,
-        0,
-    ],
-)
-def test_openai_completion_prompt_logprobs(llama_stack_client, client_with_models, text_model_id, prompt_logprobs):
-    skip_if_provider_isnt_vllm(client_with_models, text_model_id)
-
-    prompt = "Hello, world!"
-    response = llama_stack_client.completions.create(
-        model=text_model_id,
-        prompt=prompt,
-        stream=False,
-        prompt_logprobs=prompt_logprobs,
-    )
-    assert len(response.choices) > 0
-    choice = response.choices[0]
-    assert len(choice.prompt_logprobs) > 0
-
-
 def test_openai_completion_guided_choice(llama_stack_client, client_with_models, text_model_id):
    skip_if_provider_isnt_vllm(client_with_models, text_model_id)

@ -518,3 +505,214 @@ def test_openai_chat_completion_non_streaming_with_file(openai_client, client_wi
    message_content = response.choices[0].message.content.lower().strip()
    normalized_content = _normalize_text(message_content)
    assert "hello world" in normalized_content
+
+
+@pytest.mark.parametrize(
+    "test_case",
+    [
+        "inference:completion:stop_sequence",
+    ],
+)
+def test_openai_completion_stop_sequence(client_with_models, openai_client, text_model_id, test_case):
+    skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id)
+
+    tc = TestCase(test_case)
+
+    response = openai_client.completions.create(
+        model=text_model_id,
+        prompt=tc["content"],
+        stop="1963",
+        stream=False,
+    )
+    assert len(response.choices) > 0
+    choice = response.choices[0]
+    assert "1963" not in choice.text
+
+    response = openai_client.completions.create(
+        model=text_model_id,
+        prompt=tc["content"],
+        stop=["blathering", "1963"],
+        stream=False,
+    )
+    assert len(response.choices) > 0
+    choice = response.choices[0]
+    assert "1963" not in choice.text
+
+
+@pytest.mark.parametrize(
+    "test_case",
+    [
+        "inference:completion:log_probs",
+    ],
+)
+def test_openai_completion_logprobs(client_with_models, openai_client, text_model_id, test_case):
+    skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id)
+    skip_if_doesnt_support_completions_logprobs(client_with_models, text_model_id)
+
+    tc = TestCase(test_case)
+
+    response = openai_client.completions.create(
+        model=text_model_id,
+        prompt=tc["content"],
+        logprobs=5,
+    )
+    assert len(response.choices) > 0
+    choice = response.choices[0]
+    assert choice.text, "Response text should not be empty"
+    assert choice.logprobs, "Logprobs should not be empty"
+    logprobs = choice.logprobs
+    assert logprobs.token_logprobs, "Response tokens should not be empty"
+    assert len(logprobs.tokens) == len(logprobs.token_logprobs)
+    assert len(logprobs.token_logprobs) == len(logprobs.top_logprobs)
+    for i, (token, prob) in enumerate(zip(logprobs.tokens, logprobs.token_logprobs, strict=True)):
+        assert logprobs.top_logprobs[i][token] == prob
+        assert len(logprobs.top_logprobs[i]) == 5
+
+
+@pytest.mark.parametrize(
+    "test_case",
+    [
+        "inference:completion:log_probs",
+    ],
+)
+def test_openai_completion_logprobs_streaming(client_with_models, openai_client, text_model_id, test_case):
+    skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id)
+    skip_if_doesnt_support_completions_logprobs(client_with_models, text_model_id)
+
+    tc = TestCase(test_case)
+
+    response = openai_client.completions.create(
+        model=text_model_id,
+        prompt=tc["content"],
+        logprobs=3,
+        stream=True,
+        max_tokens=5,
+    )
+    for chunk in response:
+        choice = chunk.choices[0]
+        choice = response.choices[0]
+        if choice.text:  # if there's a token, we expect logprobs
+            assert choice.logprobs, "Logprobs should not be empty"
+            logprobs = choice.logprobs
+            assert logprobs.token_logprobs, "Response tokens should not be empty"
+            assert len(logprobs.tokens) == len(logprobs.token_logprobs)
+            assert len(logprobs.token_logprobs) == len(logprobs.top_logprobs)
+            for i, (token, prob) in enumerate(zip(logprobs.tokens, logprobs.token_logprobs, strict=True)):
+                assert logprobs.top_logprobs[i][token] == prob
+                assert len(logprobs.top_logprobs[i]) == 3
+        else:  # no token, no logprobs
+            assert not choice.logprobs, "Logprobs should be empty"
+
+
+@pytest.mark.parametrize(
+    "test_case",
+    [
+        "inference:chat_completion:tool_calling",
+    ],
+)
+def test_openai_chat_completion_with_tools(openai_client, text_model_id, test_case):
+    tc = TestCase(test_case)
+
+    response = openai_client.chat.completions.create(
+        model=text_model_id,
+        messages=tc["messages"],
+        tools=tc["tools"],
+        tool_choice="auto",
+        stream=False,
+    )
+    assert len(response.choices) == 1
+    assert len(response.choices[0].message.tool_calls) == 1
+    tool_call = response.choices[0].message.tool_calls[0]
+    assert tool_call.function.name == tc["tools"][0]["function"]["name"]
+    assert "location" in tool_call.function.arguments
+    assert tc["expected"]["location"] in tool_call.function.arguments
+
+
+@pytest.mark.parametrize(
+    "test_case",
+    [
+        "inference:chat_completion:tool_calling",
+    ],
+)
+def test_openai_chat_completion_with_tools_and_streaming(openai_client, text_model_id, test_case):
+    tc = TestCase(test_case)
+
+    response = openai_client.chat.completions.create(
+        model=text_model_id,
+        messages=tc["messages"],
+        tools=tc["tools"],
+        tool_choice="auto",
+        stream=True,
+    )
+    # Accumulate tool calls from streaming chunks
+    tool_calls = []
+    for chunk in response:
+        if chunk.choices and chunk.choices[0].delta.tool_calls:
+            for i, tc_delta in enumerate(chunk.choices[0].delta.tool_calls):
+                while len(tool_calls) <= i:
+                    tool_calls.append({"function": {"name": "", "arguments": ""}})
+                if tc_delta.function and tc_delta.function.name:
+                    tool_calls[i]["function"]["name"] = tc_delta.function.name
+                if tc_delta.function and tc_delta.function.arguments:
+                    tool_calls[i]["function"]["arguments"] += tc_delta.function.arguments
+    assert len(tool_calls) == 1
+    tool_call = tool_calls[0]
+    assert tool_call["function"]["name"] == tc["tools"][0]["function"]["name"]
+    assert "location" in tool_call["function"]["arguments"]
+    assert tc["expected"]["location"] in tool_call["function"]["arguments"]
+
+
+@pytest.mark.parametrize(
+    "test_case",
+    [
+        "inference:chat_completion:tool_calling",
+    ],
+)
+def test_openai_chat_completion_with_tool_choice_none(openai_client, text_model_id, test_case):
+    tc = TestCase(test_case)
+
+    response = openai_client.chat.completions.create(
+        model=text_model_id,
+        messages=tc["messages"],
+        tools=tc["tools"],
+        tool_choice="none",
+        stream=False,
+    )
+    assert len(response.choices) == 1
+    tool_calls = response.choices[0].message.tool_calls
+    assert tool_calls is None or len(tool_calls) == 0
+
+
+@pytest.mark.parametrize(
+    "test_case",
+    [
+        "inference:chat_completion:structured_output",
+    ],
+)
+def test_openai_chat_completion_structured_output(openai_client, text_model_id, test_case):
+    # Note: Skip condition may need adjustment for OpenAI client
+    class AnswerFormat(BaseModel):
+        first_name: str
+        last_name: str
+        year_of_birth: int
+
+    tc = TestCase(test_case)
+
+    response = openai_client.chat.completions.create(
+        model=text_model_id,
+        messages=tc["messages"],
+        response_format={
+            "type": "json_schema",
+            "json_schema": {
+                "name": "AnswerFormat",
+                "schema": AnswerFormat.model_json_schema(),
+            },
+        },
+        stream=False,
+    )
+    print(response.choices[0].message.content)
+    answer = AnswerFormat.model_validate_json(response.choices[0].message.content)
+    expected = tc["expected"]
+    assert answer.first_name == expected["first_name"]
+    assert answer.last_name == expected["last_name"]
+    assert answer.year_of_birth == expected["year_of_birth"]
--- a/tests/integration/inference/test_text_inference.py
+++ b/tests/integration/inference/test_text_inference.py
@ -1,545 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-
-from time import sleep
-
-import pytest
-from pydantic import BaseModel
-
-from llama_stack.models.llama.sku_list import resolve_model
-
-from ..test_cases.test_case import TestCase
-
-PROVIDER_LOGPROBS_TOP_K = {"remote::together", "remote::fireworks", "remote::vllm"}
-
-
-def skip_if_model_doesnt_support_completion(client_with_models, model_id):
-    models = {m.identifier: m for m in client_with_models.models.list()}
-    models.update({m.provider_resource_id: m for m in client_with_models.models.list()})
-    provider_id = models[model_id].provider_id
-    providers = {p.provider_id: p for p in client_with_models.providers.list()}
-    provider = providers[provider_id]
-    if (
-        provider.provider_type
-        in (
-            "remote::openai",
-            "remote::anthropic",
-            "remote::gemini",
-            "remote::vertexai",
-            "remote::groq",
-            "remote::sambanova",
-            "remote::azure",
-        )
-        or "openai-compat" in provider.provider_type
-    ):
-        pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support completion")
-
-
-def skip_if_model_doesnt_support_json_schema_structured_output(client_with_models, model_id):
-    models = {m.identifier: m for m in client_with_models.models.list()}
-    models.update({m.provider_resource_id: m for m in client_with_models.models.list()})
-    provider_id = models[model_id].provider_id
-    providers = {p.provider_id: p for p in client_with_models.providers.list()}
-    provider = providers[provider_id]
-    if provider.provider_type in ("remote::sambanova", "remote::azure", "remote::watsonx"):
-        pytest.skip(
-            f"Model {model_id} hosted by {provider.provider_type} doesn't support json_schema structured output"
-        )
-
-
-def get_llama_model(client_with_models, model_id):
-    models = {}
-    for m in client_with_models.models.list():
-        models[m.identifier] = m
-        models[m.provider_resource_id] = m
-
-    assert model_id in models, f"Model {model_id} not found"
-
-    model = models[model_id]
-    ids = (model.identifier, model.provider_resource_id)
-    for mid in ids:
-        if resolve_model(mid):
-            return mid
-
-    return model.metadata.get("llama_model", None)
-
-
-@pytest.mark.parametrize(
-    "test_case",
-    [
-        "inference:completion:sanity",
-    ],
-)
-def test_text_completion_non_streaming(client_with_models, text_model_id, test_case):
-    skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
-    tc = TestCase(test_case)
-
-    response = client_with_models.inference.completion(
-        content=tc["content"],
-        stream=False,
-        model_id=text_model_id,
-        sampling_params={
-            "max_tokens": 50,
-        },
-    )
-    assert len(response.content) > 10
-    # assert "blue" in response.content.lower().strip()
-
-
-@pytest.mark.parametrize(
-    "test_case",
-    [
-        "inference:completion:sanity",
-    ],
-)
-def test_text_completion_streaming(client_with_models, text_model_id, test_case):
-    skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
-    tc = TestCase(test_case)
-
-    response = client_with_models.inference.completion(
-        content=tc["content"],
-        stream=True,
-        model_id=text_model_id,
-        sampling_params={
-            "max_tokens": 50,
-        },
-    )
-    streamed_content = [chunk.delta for chunk in response]
-    content_str = "".join(streamed_content).lower().strip()
-    # assert "blue" in content_str
-    assert len(content_str) > 10
-
-
-@pytest.mark.parametrize(
-    "test_case",
-    [
-        "inference:completion:stop_sequence",
-    ],
-)
-def test_text_completion_stop_sequence(client_with_models, text_model_id, inference_provider_type, test_case):
-    skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
-    # This is only supported/tested for remote vLLM: https://github.com/meta-llama/llama-stack/issues/1771
-    if inference_provider_type != "remote::vllm":
-        pytest.xfail(f"{inference_provider_type} doesn't support 'stop' parameter yet")
-    tc = TestCase(test_case)
-
-    response = client_with_models.inference.completion(
-        content=tc["content"],
-        stream=True,
-        model_id=text_model_id,
-        sampling_params={
-            "max_tokens": 50,
-            "stop": ["1963"],
-        },
-    )
-    streamed_content = [chunk.delta for chunk in response]
-    content_str = "".join(streamed_content).lower().strip()
-    assert "1963" not in content_str
-
-
-@pytest.mark.parametrize(
-    "test_case",
-    [
-        "inference:completion:log_probs",
-    ],
-)
-def test_text_completion_log_probs_non_streaming(client_with_models, text_model_id, inference_provider_type, test_case):
-    skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
-    if inference_provider_type not in PROVIDER_LOGPROBS_TOP_K:
-        pytest.xfail(f"{inference_provider_type} doesn't support log probs yet")
-
-    tc = TestCase(test_case)
-
-    response = client_with_models.inference.completion(
-        content=tc["content"],
-        stream=False,
-        model_id=text_model_id,
-        sampling_params={
-            "max_tokens": 5,
-        },
-        logprobs={
-            "top_k": 1,
-        },
-    )
-    assert response.logprobs, "Logprobs should not be empty"
-    assert 1 <= len(response.logprobs) <= 5  # each token has 1 logprob and here max_tokens=5
-    assert all(len(logprob.logprobs_by_token) == 1 for logprob in response.logprobs)
-
-
-@pytest.mark.parametrize(
-    "test_case",
-    [
-        "inference:completion:log_probs",
-    ],
-)
-def test_text_completion_log_probs_streaming(client_with_models, text_model_id, inference_provider_type, test_case):
-    skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
-    if inference_provider_type not in PROVIDER_LOGPROBS_TOP_K:
-        pytest.xfail(f"{inference_provider_type} doesn't support log probs yet")
-
-    tc = TestCase(test_case)
-
-    response = client_with_models.inference.completion(
-        content=tc["content"],
-        stream=True,
-        model_id=text_model_id,
-        sampling_params={
-            "max_tokens": 5,
-        },
-        logprobs={
-            "top_k": 1,
-        },
-    )
-    streamed_content = list(response)
-    for chunk in streamed_content:
-        if chunk.delta:  # if there's a token, we expect logprobs
-            assert chunk.logprobs, "Logprobs should not be empty"
-            assert all(len(logprob.logprobs_by_token) == 1 for logprob in chunk.logprobs)
-        else:  # no token, no logprobs
-            assert not chunk.logprobs, "Logprobs should be empty"
-
-
-@pytest.mark.parametrize(
-    "test_case",
-    [
-        "inference:completion:structured_output",
-    ],
-)
-def test_text_completion_structured_output(client_with_models, text_model_id, test_case):
-    skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
-    skip_if_model_doesnt_support_json_schema_structured_output(client_with_models, text_model_id)
-
-    class AnswerFormat(BaseModel):
-        name: str
-        year_born: str
-        year_retired: str
-
-    tc = TestCase(test_case)
-
-    user_input = tc["user_input"]
-    response = client_with_models.inference.completion(
-        model_id=text_model_id,
-        content=user_input,
-        stream=False,
-        sampling_params={
-            "max_tokens": 50,
-        },
-        response_format={
-            "type": "json_schema",
-            "json_schema": AnswerFormat.model_json_schema(),
-        },
-    )
-    answer = AnswerFormat.model_validate_json(response.content)
-    expected = tc["expected"]
-    assert answer.name == expected["name"]
-    assert answer.year_born == expected["year_born"]
-    assert answer.year_retired == expected["year_retired"]
-
-
-@pytest.mark.parametrize(
-    "test_case",
-    [
-        "inference:chat_completion:non_streaming_01",
-        "inference:chat_completion:non_streaming_02",
-    ],
-)
-def test_text_chat_completion_non_streaming(client_with_models, text_model_id, test_case):
-    tc = TestCase(test_case)
-    question = tc["question"]
-    expected = tc["expected"]
-
-    response = client_with_models.inference.chat_completion(
-        model_id=text_model_id,
-        messages=[
-            {
-                "role": "user",
-                "content": question,
-            }
-        ],
-        stream=False,
-    )
-    message_content = response.completion_message.content.lower().strip()
-    assert len(message_content) > 0
-    assert expected.lower() in message_content
-
-
-@pytest.mark.parametrize(
-    "test_case",
-    [
-        "inference:chat_completion:streaming_01",
-        "inference:chat_completion:streaming_02",
-    ],
-)
-def test_text_chat_completion_streaming(client_with_models, text_model_id, test_case):
-    tc = TestCase(test_case)
-    question = tc["question"]
-    expected = tc["expected"]
-
-    response = client_with_models.inference.chat_completion(
-        model_id=text_model_id,
-        messages=[{"role": "user", "content": question}],
-        stream=True,
-        timeout=120,  # Increase timeout to 2 minutes for large conversation history
-    )
-    streamed_content = [str(chunk.event.delta.text.lower().strip()) for chunk in response]
-    assert len(streamed_content) > 0
-    assert expected.lower() in "".join(streamed_content)
-
-
-@pytest.mark.parametrize(
-    "test_case",
-    [
-        "inference:chat_completion:tool_calling",
-    ],
-)
-def test_text_chat_completion_with_tool_calling_and_non_streaming(client_with_models, text_model_id, test_case):
-    tc = TestCase(test_case)
-
-    response = client_with_models.inference.chat_completion(
-        model_id=text_model_id,
-        messages=tc["messages"],
-        tools=tc["tools"],
-        tool_choice="auto",
-        stream=False,
-    )
-    # some models can return content for the response in addition to the tool call
-    assert response.completion_message.role == "assistant"
-
-    assert len(response.completion_message.tool_calls) == 1
-    assert response.completion_message.tool_calls[0].tool_name == tc["tools"][0]["tool_name"]
-    assert response.completion_message.tool_calls[0].arguments == tc["expected"]
-
-
-# Will extract streamed text and separate it from tool invocation content
-# The returned tool inovcation content will be a string so it's easy to comapare with expected value
-# e.g. "[get_weather, {'location': 'San Francisco, CA'}]"
-def extract_tool_invocation_content(response):
-    tool_invocation_content: str = ""
-    for chunk in response:
-        delta = chunk.event.delta
-        if delta.type == "tool_call" and delta.parse_status == "succeeded":
-            call = delta.tool_call
-            tool_invocation_content += f"[{call.tool_name}, {call.arguments}]"
-    return tool_invocation_content
-
-
-@pytest.mark.parametrize(
-    "test_case",
-    [
-        "inference:chat_completion:tool_calling",
-    ],
-)
-def test_text_chat_completion_with_tool_calling_and_streaming(client_with_models, text_model_id, test_case):
-    tc = TestCase(test_case)
-
-    response = client_with_models.inference.chat_completion(
-        model_id=text_model_id,
-        messages=tc["messages"],
-        tools=tc["tools"],
-        tool_choice="auto",
-        stream=True,
-    )
-    tool_invocation_content = extract_tool_invocation_content(response)
-    expected_tool_name = tc["tools"][0]["tool_name"]
-    expected_argument = tc["expected"]
-    assert tool_invocation_content == f"[{expected_tool_name}, {expected_argument}]"
-
-
-@pytest.mark.parametrize(
-    "test_case",
-    [
-        "inference:chat_completion:tool_calling",
-    ],
-)
-def test_text_chat_completion_with_tool_choice_required(client_with_models, text_model_id, test_case):
-    tc = TestCase(test_case)
-
-    response = client_with_models.inference.chat_completion(
-        model_id=text_model_id,
-        messages=tc["messages"],
-        tools=tc["tools"],
-        tool_config={
-            "tool_choice": "required",
-        },
-        stream=True,
-    )
-    tool_invocation_content = extract_tool_invocation_content(response)
-    expected_tool_name = tc["tools"][0]["tool_name"]
-    expected_argument = tc["expected"]
-    assert tool_invocation_content == f"[{expected_tool_name}, {expected_argument}]"
-
-
-@pytest.mark.parametrize(
-    "test_case",
-    [
-        "inference:chat_completion:tool_calling",
-    ],
-)
-def test_text_chat_completion_with_tool_choice_none(client_with_models, text_model_id, test_case):
-    tc = TestCase(test_case)
-
-    response = client_with_models.inference.chat_completion(
-        model_id=text_model_id,
-        messages=tc["messages"],
-        tools=tc["tools"],
-        tool_config={"tool_choice": "none"},
-        stream=True,
-    )
-    tool_invocation_content = extract_tool_invocation_content(response)
-    assert tool_invocation_content == ""
-
-
-@pytest.mark.parametrize(
-    "test_case",
-    [
-        "inference:chat_completion:structured_output",
-    ],
-)
-def test_text_chat_completion_structured_output(client_with_models, text_model_id, test_case):
-    skip_if_model_doesnt_support_json_schema_structured_output(client_with_models, text_model_id)
-
-    class NBAStats(BaseModel):
-        year_for_draft: int
-        num_seasons_in_nba: int
-
-    class AnswerFormat(BaseModel):
-        first_name: str
-        last_name: str
-        year_of_birth: int
-        nba_stats: NBAStats
-
-    tc = TestCase(test_case)
-
-    response = client_with_models.inference.chat_completion(
-        model_id=text_model_id,
-        messages=tc["messages"],
-        response_format={
-            "type": "json_schema",
-            "json_schema": AnswerFormat.model_json_schema(),
-        },
-        stream=False,
-    )
-    answer = AnswerFormat.model_validate_json(response.completion_message.content)
-    expected = tc["expected"]
-    assert answer.first_name == expected["first_name"]
-    assert answer.last_name == expected["last_name"]
-    assert answer.year_of_birth == expected["year_of_birth"]
-    assert answer.nba_stats.num_seasons_in_nba == expected["num_seasons_in_nba"]
-    assert answer.nba_stats.year_for_draft == expected["year_for_draft"]
-
-
-@pytest.mark.parametrize("streaming", [True, False])
-@pytest.mark.parametrize(
-    "test_case",
-    [
-        "inference:chat_completion:tool_calling_tools_absent",
-    ],
-)
-def test_text_chat_completion_tool_calling_tools_not_in_request(
-    client_with_models, text_model_id, test_case, streaming
-):
-    tc = TestCase(test_case)
-
-    # TODO: more dynamic lookup on tool_prompt_format for model family
-    tool_prompt_format = "json" if "3.1" in text_model_id else "python_list"
-    request = {
-        "model_id": text_model_id,
-        "messages": tc["messages"],
-        "tools": tc["tools"],
-        "tool_choice": "auto",
-        "tool_prompt_format": tool_prompt_format,
-        "stream": streaming,
-    }
-
-    response = client_with_models.inference.chat_completion(**request)
-
-    if streaming:
-        for chunk in response:
-            delta = chunk.event.delta
-            if delta.type == "tool_call" and delta.parse_status == "succeeded":
-                assert delta.tool_call.tool_name == "get_object_namespace_list"
-            if delta.type == "tool_call" and delta.parse_status == "failed":
-                # expect raw message that failed to parse in tool_call
-                assert isinstance(delta.tool_call, str)
-                assert len(delta.tool_call) > 0
-    else:
-        for tc in response.completion_message.tool_calls:
-            assert tc.tool_name == "get_object_namespace_list"
-
-
-@pytest.mark.parametrize(
-    "test_case",
-    [
-        # Tests if the model can handle simple messages like "Hi" or
-        # a message unrelated to one of the tool calls
-        "inference:chat_completion:text_then_tool",
-        # Tests if the model can do full tool call with responses correctly
-        "inference:chat_completion:tool_then_answer",
-        # Tests if model can generate multiple params and
-        # read outputs correctly
-        "inference:chat_completion:array_parameter",
-    ],
-)
-def test_text_chat_completion_with_multi_turn_tool_calling(client_with_models, text_model_id, test_case):
-    """This test tests the model's tool calling loop in various scenarios"""
-    if "llama-4" not in text_model_id.lower() and "llama4" not in text_model_id.lower():
-        pytest.xfail("Not tested for non-llama4 models yet")
-
-    tc = TestCase(test_case)
-    messages = []
-
-    # keep going until either
-    # 1. we have messages to test in multi-turn
-    # 2. no messages bust last message is tool response
-    while len(tc["messages"]) > 0 or (len(messages) > 0 and messages[-1]["role"] == "tool"):
-        # do not take new messages if last message is tool response
-        if len(messages) == 0 or messages[-1]["role"] != "tool":
-            new_messages = tc["messages"].pop(0)
-            messages += new_messages
-
-        # pprint(messages)
-        response = client_with_models.inference.chat_completion(
-            model_id=text_model_id,
-            messages=messages,
-            tools=tc["tools"],
-            stream=False,
-            sampling_params={
-                "strategy": {
-                    "type": "top_p",
-                    "top_p": 0.9,
-                    "temperature": 0.6,
-                }
-            },
-        )
-        op_msg = response.completion_message
-        messages.append(op_msg.model_dump())
-        # print(op_msg)
-
-        assert op_msg.role == "assistant"
-        expected = tc["expected"].pop(0)
-        assert len(op_msg.tool_calls) == expected["num_tool_calls"]
-
-        if expected["num_tool_calls"] > 0:
-            assert op_msg.tool_calls[0].tool_name == expected["tool_name"]
-            assert op_msg.tool_calls[0].arguments == expected["tool_arguments"]
-
-            tool_response = tc["tool_responses"].pop(0)
-            messages.append(
-                # Tool Response Message
-                {
-                    "role": "tool",
-                    "call_id": op_msg.tool_calls[0].call_id,
-                    "content": tool_response["response"],
-                }
-            )
-        else:
-            actual_answer = op_msg.content.lower()
-            # pprint(actual_answer)
-            assert expected["answer"] in actual_answer
-
-        # sleep to avoid rate limit
-        sleep(1)
--- a/tests/integration/inference/test_vision_inference.py
+++ b/tests/integration/inference/test_vision_inference.py
@ -25,16 +25,19 @@ def base64_image_data(image_path):
    return base64.b64encode(image_path.read_bytes()).decode("utf-8")


+@pytest.fixture
+def base64_image_url(base64_image_data):
+    return f"data:image/png;base64,{base64_image_data}"
+
+
 def test_image_chat_completion_non_streaming(client_with_models, vision_model_id):
    message = {
        "role": "user",
        "content": [
            {
-                "type": "image",
-                "image": {
-                    "url": {
-                        "uri": "https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/integration/inference/dog.png"
-                    },
+                "type": "image_url",
+                "image_url": {
+                    "url": "https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/integration/inference/dog.png"
                },
            },
            {
@ -43,12 +46,12 @@ def test_image_chat_completion_non_streaming(client_with_models, vision_model_id
            },
        ],
    }
-    response = client_with_models.inference.chat_completion(
-        model_id=vision_model_id,
+    response = client_with_models.chat.completions.create(
+        model=vision_model_id,
        messages=[message],
        stream=False,
    )
-    message_content = response.completion_message.content.lower().strip()
+    message_content = response.choices[0].message.content.lower().strip()
    assert len(message_content) > 0
    assert any(expected in message_content for expected in {"dog", "puppy", "pup"})

@ -68,8 +71,13 @@ def multi_image_data():
    return encoded_files


+@pytest.fixture
+def multi_image_url(multi_image_data):
+    return [f"data:image/jpeg;base64,{data}" for data in multi_image_data]
+
+
@pytest.mark.parametrize("stream", [True, False])
-def test_image_chat_completion_multiple_images(client_with_models, vision_model_id, multi_image_data, stream):
+def test_image_chat_completion_multiple_images(client_with_models, vision_model_id, multi_image_url, stream):
    supported_models = ["llama-4", "gpt-4o", "llama4"]
    if not any(model in vision_model_id.lower() for model in supported_models):
        pytest.skip(
@ -81,15 +89,15 @@ def test_image_chat_completion_multiple_images(client_with_models, vision_model_
            "role": "user",
            "content": [
                {
-                    "type": "image",
-                    "image": {
-                        "data": multi_image_data[0],
+                    "type": "image_url",
+                    "image_url": {
+                        "url": multi_image_url[0],
                    },
                },
                {
-                    "type": "image",
-                    "image": {
-                        "data": multi_image_data[1],
+                    "type": "image_url",
+                    "image_url": {
+                        "url": multi_image_url[1],
                    },
                },
                {
@ -99,17 +107,17 @@ def test_image_chat_completion_multiple_images(client_with_models, vision_model_
            ],
        },
    ]
-    response = client_with_models.inference.chat_completion(
-        model_id=vision_model_id,
+    response = client_with_models.chat.completions.create(
+        model=vision_model_id,
        messages=messages,
        stream=stream,
    )
    if stream:
        message_content = ""
        for chunk in response:
-            message_content += chunk.event.delta.text
+            message_content += chunk.choices[0].delta.content
    else:
-        message_content = response.completion_message.content
+        message_content = response.choices[0].message.content
    assert len(message_content) > 0
    assert any(expected in message_content.lower().strip() for expected in {"bedroom"}), message_content

@ -125,17 +133,17 @@ def test_image_chat_completion_multiple_images(client_with_models, vision_model_
            "role": "user",
            "content": [
                {
-                    "type": "image",
-                    "image": {
-                        "data": multi_image_data[2],
+                    "type": "image_url",
+                    "image_url": {
+                        "url": multi_image_data[2],
                    },
                },
                {"type": "text", "text": "How about this one?"},
            ],
        },
    )
-    response = client_with_models.inference.chat_completion(
-        model_id=vision_model_id,
+    response = client_with_models.chat.completions.create(
+        model=vision_model_id,
        messages=messages,
        stream=stream,
    )
@ -144,7 +152,7 @@ def test_image_chat_completion_multiple_images(client_with_models, vision_model_
        for chunk in response:
            message_content += chunk.event.delta.text
    else:
-        message_content = response.completion_message.content
+        message_content = response.choices[0].message.content
    assert len(message_content) > 0
    assert any(expected in message_content.lower().strip() for expected in {"sword", "shield"}), message_content

@ -154,11 +162,9 @@ def test_image_chat_completion_streaming(client_with_models, vision_model_id):
        "role": "user",
        "content": [
            {
-                "type": "image",
-                "image": {
-                    "url": {
-                        "uri": "https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/integration/inference/dog.png"
-                    },
+                "type": "image_url",
+                "image_url": {
+                    "url": "https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/integration/inference/dog.png"
                },
            },
            {
@ -167,23 +173,23 @@ def test_image_chat_completion_streaming(client_with_models, vision_model_id):
            },
        ],
    }
-    response = client_with_models.inference.chat_completion(
-        model_id=vision_model_id,
+    response = client_with_models.chat.completions.create(
+        model=vision_model_id,
        messages=[message],
        stream=True,
    )
    streamed_content = ""
    for chunk in response:
-        streamed_content += chunk.event.delta.text.lower()
+        streamed_content += chunk.choices[0].delta.content.lower()
    assert len(streamed_content) > 0
    assert any(expected in streamed_content for expected in {"dog", "puppy", "pup"})


-def test_image_chat_completion_base64(client_with_models, vision_model_id, base64_image_data):
+def test_image_chat_completion_base64(client_with_models, vision_model_id, base64_image_url):
    image_spec = {
-        "type": "image",
-        "image": {
-            "data": base64_image_data,
+        "type": "image_url",
+        "image_url": {
+            "url": base64_image_url,
        },
    }

@ -197,10 +203,10 @@ def test_image_chat_completion_base64(client_with_models, vision_model_id, base6
            },
        ],
    }
-    response = client_with_models.inference.chat_completion(
-        model_id=vision_model_id,
+    response = client_with_models.chat.completions.create(
+        model=vision_model_id,
        messages=[message],
        stream=False,
    )
-    message_content = response.completion_message.content.lower().strip()
+    message_content = response.choices[0].message.content.lower().strip()
    assert len(message_content) > 0
--- a/tests/integration/recordings/responses/1c0a34fa2e0c.json
+++ b/tests/integration/recordings/responses/1c0a34fa2e0c.json
@ -0,0 +1,31 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.fireworks.ai/inference/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "accounts/fireworks/models/qwen3-embedding-8b",
+      "input": [],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "accounts/fireworks/models/qwen3-embedding-8b"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [],
+        "model": "accounts/fireworks/models/qwen3-embedding-8b",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 0,
+          "total_tokens": 0,
+          "completion_tokens": 0
+        },
+        "perf_metrics": null
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/235c36771a8a.json
+++ b/tests/integration/recordings/responses/235c36771a8a.json
--- a/tests/integration/recordings/responses/239f4768f5aa.json
+++ b/tests/integration/recordings/responses/239f4768f5aa.json
@ -0,0 +1,89 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "system",
+          "content": "You are a helpful assistant. Michael Jordan was born in 1963. He played basketball for the Chicago Bulls for 15 seasons."
+        },
+        {
+          "role": "user",
+          "content": "Please give me information about Michael Jordan."
+        }
+      ],
+      "response_format": {
+        "type": "json_schema",
+        "json_schema": {
+          "name": "AnswerFormat",
+          "schema": {
+            "properties": {
+              "first_name": {
+                "title": "First Name",
+                "type": "string"
+              },
+              "last_name": {
+                "title": "Last Name",
+                "type": "string"
+              },
+              "year_of_birth": {
+                "title": "Year Of Birth",
+                "type": "integer"
+              }
+            },
+            "required": [
+              "first_name",
+              "last_name",
+              "year_of_birth"
+            ],
+            "title": "AnswerFormat",
+            "type": "object"
+          }
+        }
+      },
+      "stream": false
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "chatcmpl-433",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "{\"first_name\": \"Michael\", \"last_name\": \"Jordan\", \"year_of_birth\": 1963}\n\n   \t\t\t\t\t\t\t\t\t\t\t \t\t   ",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 1758979490,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 31,
+          "prompt_tokens": 60,
+          "total_tokens": 91,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/24e106063719.json
+++ b/tests/integration/recordings/responses/24e106063719.json
@ -0,0 +1,31 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.fireworks.ai/inference/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "accounts/fireworks/models/qwen3-embedding-8b",
+      "input": [],
+      "encoding_format": "base64"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "accounts/fireworks/models/qwen3-embedding-8b"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [],
+        "model": "accounts/fireworks/models/qwen3-embedding-8b",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 0,
+          "total_tokens": 0,
+          "completion_tokens": 0
+        },
+        "perf_metrics": null
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/25649d730247.json
+++ b/tests/integration/recordings/responses/25649d730247.json
@ -0,0 +1,316 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What's the name of the Sun in latin?"
+        }
+      ],
+      "stream": true
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920389,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
+          "choices": [
+            {
+              "delta": {
+                "content": "The Latin",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920389,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
+          "choices": [
+            {
+              "delta": {
+                "content": " name",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920389,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
+          "choices": [
+            {
+              "delta": {
+                "content": " for",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920389,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920389,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
+          "choices": [
+            {
+              "delta": {
+                "content": " Sun",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920389,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920389,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
+          "choices": [
+            {
+              "delta": {
+                "content": " \"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920389,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
+          "choices": [
+            {
+              "delta": {
+                "content": "Sol",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920389,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
+          "choices": [
+            {
+              "delta": {
+                "content": "\".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920389,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920389,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": {
+            "completion_tokens": 11,
+            "prompt_tokens": 20,
+            "total_tokens": 31,
+            "completion_tokens_details": null,
+            "prompt_tokens_details": null
+          }
+        }
+      }
+    ],
+    "is_streaming": true
+  }
+}
--- a/tests/integration/recordings/responses/2a5a4e821bc8.json
+++ b/tests/integration/recordings/responses/2a5a4e821bc8.json
@ -0,0 +1,44 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "prompt": "Hello, world!",
+      "logprobs": false,
+      "stream": false,
+      "extra_body": {}
+    },
+    "endpoint": "/v1/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.completion.Completion",
+      "__data__": {
+        "id": "cmpl-74",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "text": "Hello! How can I assist you today?"
+          }
+        ],
+        "created": 1758975636,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 10,
+          "prompt_tokens": 29,
+          "total_tokens": 39,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/2fef6eda9cd7.json
+++ b/tests/integration/recordings/responses/2fef6eda9cd7.json
--- a/tests/integration/recordings/responses/38ea441b5f83.json
+++ b/tests/integration/recordings/responses/38ea441b5f83.json
@ -0,0 +1,92 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "system",
+          "content": "Pretend you are a weather assistant."
+        },
+        {
+          "role": "user",
+          "content": "What's the weather like in San Francisco, CA?"
+        }
+      ],
+      "stream": false,
+      "tool_choice": "auto",
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_weather",
+            "description": "Get the current weather",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "location": {
+                  "type": "string",
+                  "description": "The city and state (both required), e.g. San Francisco, CA."
+                }
+              },
+              "required": [
+                "location"
+              ]
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "chatcmpl-761",
+        "choices": [
+          {
+            "finish_reason": "tool_calls",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": [
+                {
+                  "id": "call_cj8ownwc",
+                  "function": {
+                    "arguments": "{\"location\":\"San Francisco, CA\"}",
+                    "name": "get_weather"
+                  },
+                  "type": "function",
+                  "index": 0
+                }
+              ]
+            }
+          }
+        ],
+        "created": 1758975113,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 18,
+          "prompt_tokens": 185,
+          "total_tokens": 203,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/5b2088233334.json
+++ b/tests/integration/recordings/responses/5b2088233334.json
@ -0,0 +1,44 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "prompt": "Hello, world!",
+      "logprobs": true,
+      "stream": false,
+      "extra_body": {}
+    },
+    "endpoint": "/v1/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.completion.Completion",
+      "__data__": {
+        "id": "cmpl-809",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "text": "Hello! It's nice to meet you. Is there anything I can help you with or would you like to chat?"
+          }
+        ],
+        "created": 1758975633,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 25,
+          "prompt_tokens": 29,
+          "total_tokens": 54,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/651af76045af.json
+++ b/tests/integration/recordings/responses/651af76045af.json
@ -0,0 +1,550 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the name of the US captial?"
+        }
+      ],
+      "stream": true
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920398,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
+          "choices": [
+            {
+              "delta": {
+                "content": "The name",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920398,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920398,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920398,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
+          "choices": [
+            {
+              "delta": {
+                "content": " US",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920398,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
+          "choices": [
+            {
+              "delta": {
+                "content": " capital",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920398,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920398,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
+          "choices": [
+            {
+              "delta": {
+                "content": " Washington",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920398,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
+          "choices": [
+            {
+              "delta": {
+                "content": ",",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920398,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
+          "choices": [
+            {
+              "delta": {
+                "content": " D",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920398,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
+          "choices": [
+            {
+              "delta": {
+                "content": ".C",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920398,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920398,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
+          "choices": [
+            {
+              "delta": {
+                "content": " (",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920398,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
+          "choices": [
+            {
+              "delta": {
+                "content": "short",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920398,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
+          "choices": [
+            {
+              "delta": {
+                "content": " for",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920398,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
+          "choices": [
+            {
+              "delta": {
+                "content": " District",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920398,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920398,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
+          "choices": [
+            {
+              "delta": {
+                "content": " Columbia",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920398,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
+          "choices": [
+            {
+              "delta": {
+                "content": ").",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920398,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920398,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": {
+            "completion_tokens": 20,
+            "prompt_tokens": 20,
+            "total_tokens": 40,
+            "completion_tokens_details": null,
+            "prompt_tokens_details": null
+          }
+        }
+      }
+    ],
+    "is_streaming": true
+  }
+}
--- a/tests/integration/recordings/responses/65c12de0a1db.json
+++ b/tests/integration/recordings/responses/65c12de0a1db.json
@ -0,0 +1,60 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "system",
+          "content": "Pretend you are a weather assistant."
+        },
+        {
+          "role": "user",
+          "content": "What's the weather like in San Francisco, CA?"
+        }
+      ],
+      "stream": false
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "chatcmpl-123",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "Hello! As of my knowledge cutoff on December 15th, I have the latest information for you. However, please note that my data may not be entirely up-to-date.\n\nCurrently, and based on historical climate patterns, it appears to be a partly cloudy day with mild temperatures in San Francisco, CA. Expect a temperature range of around 48\u00b0F (9\u00b0C) to 54\u00b0F (12\u00b0C). It's likely to be a breezy day, with winds blowing at about 13 mph (21 km/h).\n\nHowever, if I were to look into more recent weather patterns or forecasts, I would recommend checking the latest conditions directly from reliable sources such as the National Weather Service or local news outlets for more accurate and up-to-date information.\n\nPlease let me know how I can further assist you.",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 1758978071,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 163,
+          "prompt_tokens": 45,
+          "total_tokens": 208,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/7eace23f03df.json
+++ b/tests/integration/recordings/responses/7eace23f03df.json
--- a/tests/integration/recordings/responses/88ce59013228.json
+++ b/tests/integration/recordings/responses/88ce59013228.json
@ -0,0 +1,56 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.fireworks.ai/inference/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "accounts/fireworks/models/qwen3-embedding-8b",
+      "input": "Test dimensions parameter",
+      "encoding_format": "float",
+      "dimensions": 16
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "accounts/fireworks/models/qwen3-embedding-8b"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              -0.9296875,
+              5.1875,
+              -2.140625,
+              0.171875,
+              -2.25,
+              -0.8359375,
+              -0.828125,
+              1.15625,
+              2.328125,
+              -1.0078125,
+              -3.0,
+              4.09375,
+              0.8359375,
+              0.1015625,
+              2.015625,
+              -1.0859375
+            ],
+            "index": 0,
+            "object": "embedding",
+            "raw_output": null
+          }
+        ],
+        "model": "accounts/fireworks/models/qwen3-embedding-8b",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 5,
+          "total_tokens": 5,
+          "completion_tokens": 0
+        },
+        "perf_metrics": null
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/8cdb7e65fcfe.json
+++ b/tests/integration/recordings/responses/8cdb7e65fcfe.json
@ -0,0 +1,39 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.fireworks.ai/inference/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "accounts/fireworks/models/qwen3-embedding-8b",
+      "input": "Test dimensions parameter",
+      "encoding_format": "base64",
+      "dimensions": 16
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "accounts/fireworks/models/qwen3-embedding-8b"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": "AABuvwAApkAAAAnAAAAwPgAAEMAAAFa/AABUvwAAlD8AABVAAACBvwAAQMAAAINAAABWPwAA0D0AAAFAAACLvw==",
+            "index": 0,
+            "object": "embedding",
+            "raw_output": null
+          }
+        ],
+        "model": "accounts/fireworks/models/qwen3-embedding-8b",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 5,
+          "total_tokens": 5,
+          "completion_tokens": 0
+        },
+        "perf_metrics": null
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/901b5e7db4aa.json
+++ b/tests/integration/recordings/responses/901b5e7db4aa.json
@ -0,0 +1,56 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Hello, world!"
+        }
+      ],
+      "stream": false
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "1d64ff81-b7c4-40c6-9509-cca71759da3e",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "Hello! It's nice to meet you. Is there something I can help you with, or would you like to chat?",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 1758920401,
+        "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 26,
+          "prompt_tokens": 14,
+          "total_tokens": 40,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/969a9a757e0c.json
+++ b/tests/integration/recordings/responses/969a9a757e0c.json
@ -0,0 +1,74 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What's the weather in Tokyo? Use the get_weather function to get the weather."
+        }
+      ],
+      "stream": false,
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_weather",
+            "description": "Get the weather in a given city",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "city": {
+                  "type": "string",
+                  "description": "The city to get the weather for"
+                }
+              }
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "0fe94e7d-f25b-4843-ba0a-e402e0764830",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "I can\u2019t help with that. If you're looking for current weather information, I recommend checking a weather website or app, such as AccuWeather or Weather.com. Is there anything else I can help you with?",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 1758920402,
+        "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 45,
+          "prompt_tokens": 27,
+          "total_tokens": 72,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/9c1a4c5336a7.json
+++ b/tests/integration/recordings/responses/9c1a4c5336a7.json
--- a/tests/integration/recordings/responses/a369881bb3a2.json
+++ b/tests/integration/recordings/responses/a369881bb3a2.json
@ -0,0 +1,55 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Test trace 0"
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "chatcmpl-272",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "I'm happy to help you with a test. Since we are in the middle of a text-based conversation, I'll do my best to simulate a simple test tracing process.\n\n**Trace Test Results**\n\nTo perform this test, please follow these steps:\n\n1. Type \"test\" on command mode.\n2. Press Enter.\n\nNow, let's start tracing...\n\nTest Tracing Results:\nTest Case: General Functions\nTest Case Result: PASS\n\nSystem Response:\n\n```\n# System Boot Time: 2023-10-13T14:30:00\n# CPU Temperature: 35\u00b0C\n# Disk Space Available: 80%\n```\n\nNext Steps?\n\nType 'done' to exit the test, or 'run' for more tests.",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 1758978134,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 152,
+          "prompt_tokens": 29,
+          "total_tokens": 181,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/a46b77ffd494.json
+++ b/tests/integration/recordings/responses/a46b77ffd494.json
@ -0,0 +1,44 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "prompt": "Return the exact same sentence and don't add additional words): Michael Jordan was born in the year of 1963",
+      "stop": "1963",
+      "stream": false,
+      "extra_body": {}
+    },
+    "endpoint": "/v1/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.completion.Completion",
+      "__data__": {
+        "id": "cmpl-183",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "text": "Michael Jordan was born in the year of "
+          }
+        ],
+        "created": 1758978053,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 11,
+          "prompt_tokens": 48,
+          "total_tokens": 59,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/aa745b14fe67.json
+++ b/tests/integration/recordings/responses/aa745b14fe67.json
--- a/tests/integration/recordings/responses/c3dbccc5de74.json
+++ b/tests/integration/recordings/responses/c3dbccc5de74.json
@ -0,0 +1,112 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "system",
+          "content": "Pretend you are a weather assistant."
+        },
+        {
+          "role": "user",
+          "content": "What's the weather like in San Francisco, CA?"
+        }
+      ],
+      "stream": true,
+      "tool_choice": "auto",
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_weather",
+            "description": "Get the current weather",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "location": {
+                  "type": "string",
+                  "description": "The city and state (both required), e.g. San Francisco, CA."
+                }
+              },
+              "required": [
+                "location"
+              ]
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-634",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_wubm4yax",
+                    "function": {
+                      "arguments": "{\"location\":\"San Francisco, CA\"}",
+                      "name": "get_weather"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758975115,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-634",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758975115,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      }
+    ],
+    "is_streaming": true
+  }
+}
--- a/tests/integration/recordings/responses/c8e196049fe4.json
+++ b/tests/integration/recordings/responses/c8e196049fe4.json
@ -0,0 +1,47 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "prompt": "Return the exact same sentence and don't add additional words): Michael Jordan was born in the year of 1963",
+      "stop": [
+        "blathering",
+        "1963"
+      ],
+      "stream": false,
+      "extra_body": {}
+    },
+    "endpoint": "/v1/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.completion.Completion",
+      "__data__": {
+        "id": "cmpl-381",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "text": "Michael Jordan was born in the year of "
+          }
+        ],
+        "created": 1758978056,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 11,
+          "prompt_tokens": 48,
+          "total_tokens": 59,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/ca332c91adee.json
+++ b/tests/integration/recordings/responses/ca332c91adee.json
--- a/tests/integration/recordings/responses/cb1099daed49.json
+++ b/tests/integration/recordings/responses/cb1099daed49.json
@ -0,0 +1,55 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Test trace 1"
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "chatcmpl-122",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "It appears you're trying to initiate a conversation or test the functionality of this AI system. I'm happy to chat with you!\n\nWould you like to:\nA) Ask me a question on a specific topic\nB) Engage in a conversational dialogue on a topic of your choice\nC) Play a text-based game\nD) Test my language understanding capabilities\n\nPlease respond with the letter of your preferred activity.",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 1758978142,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 85,
+          "prompt_tokens": 29,
+          "total_tokens": 114,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/d10fc0f9ac66.json
+++ b/tests/integration/recordings/responses/d10fc0f9ac66.json
--- a/tests/integration/recordings/responses/d45ca9107508.json
+++ b/tests/integration/recordings/responses/d45ca9107508.json
@ -0,0 +1,43 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.fireworks.ai/inference/v1/v1/completions",
+    "headers": {},
+    "body": {
+      "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+      "prompt": "Respond to this question and explain your answer. Complete the sentence using one word: Roses are red, violets are ",
+      "stream": false,
+      "extra_body": {}
+    },
+    "endpoint": "/v1/completions",
+    "model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.completion.Completion",
+      "__data__": {
+        "id": "1bbb8db5-63e5-40cd-8ffe-59e0e88bf8f0",
+        "choices": [
+          {
+            "finish_reason": "length",
+            "index": 0,
+            "logprobs": null,
+            "text": "4. At the beginning of the year, a woman has $5,000"
+          }
+        ],
+        "created": 1758920353,
+        "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+        "object": "text_completion",
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 16,
+          "prompt_tokens": 25,
+          "total_tokens": 41,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/d927b47032de.json
+++ b/tests/integration/recordings/responses/d927b47032de.json
@ -31,14 +31,14 @@
    "body": {
      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
      "__data__": {
-        "id": "chatcmpl-489",
+        "id": "chatcmpl-51",
        "choices": [
          {
            "finish_reason": "stop",
            "index": 0,
            "logprobs": null,
            "message": {
-              "content": "The image is a photograph of a young golden retriever puppy from the chest up. The puppy faces the camera and its tongue is out of its mouth, as if to say hello. It appears to be between 1 and 3 months of age. It is a fluffy little golden retriever puppy with very little fat. Its fur is light blond and very fluffy. It has a small, round black nose. It is in front of a blurry background of warm yellows and greys.",
+              "content": "The image features a close-up of a golden retriever puppy with its mouth agape. The puppy has cream-color fur with golden patches on its big ears, which are held slightly out to the sides. Its dark eyes appear black from across a great distance, while the black nose is surrounded by white fur. The puppy's mouth is wide open, revealing a healthy pink tongue and what appears to be a green leaf (likely a blade of grass) stuck in its mouth. The puppy is facing the camera directly, with its paws pressed up tight against its body. The puppy is sitting in an open field with a golden brown grass carpet. The puppy appears happy. The image is well-compressed with great digital sharpness.",
              "refusal": null,
              "role": "assistant",
              "annotations": null,
@ -48,15 +48,15 @@
            }
          }
        ],
-        "created": 1758461767,
+        "created": 1756724768,
        "model": "llama3.2-vision:11b",
        "object": "chat.completion",
        "service_tier": null,
        "system_fingerprint": "fp_ollama",
        "usage": {
-          "completion_tokens": 100,
+          "completion_tokens": 147,
          "prompt_tokens": 18,
-          "total_tokens": 118,
+          "total_tokens": 165,
          "completion_tokens_details": null,
          "prompt_tokens_details": null
        }
--- a/tests/integration/recordings/responses/e22f98c05933.json
+++ b/tests/integration/recordings/responses/e22f98c05933.json
--- a/tests/integration/recordings/responses/e4daa5642f6e.json
+++ b/tests/integration/recordings/responses/e4daa5642f6e.json
@ -0,0 +1,56 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Which planet has rings around it with a name starting with letter S?"
+        }
+      ],
+      "stream": false
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "26632ea9-3481-419d-bc0d-83c177257bc4",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "There are two planets in our solar system with ring systems that have names starting with the letter S:\n\n1. **Saturn** - Its ring system is one of the most iconic and well-known in our solar system. The rings are made up of ice and rock particles that range in size from tiny dust grains to massive boulders.\n2. **Saturn's moon** - The ring system of **Saturn's moon, Rhea**, is sometimes referred to as a \"ring system\" even though it's much smaller and less prominent than Saturn's. However, it's worth noting that Rhea's ring system is not as well-known as Saturn's.\n\nIf you're looking for a planet with a ring system that starts with the letter S and is not a moon, then the answer is Saturn!",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 1758920397,
+        "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 164,
+          "prompt_tokens": 24,
+          "total_tokens": 188,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/e99f14805360.json
+++ b/tests/integration/recordings/responses/e99f14805360.json
@ -0,0 +1,706 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Hello, world!"
+        }
+      ],
+      "stream": true
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "d583f66e-de11-4210-8153-54be000a2783",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920391,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "d583f66e-de11-4210-8153-54be000a2783",
+          "choices": [
+            {
+              "delta": {
+                "content": "Hello!",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920391,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "d583f66e-de11-4210-8153-54be000a2783",
+          "choices": [
+            {
+              "delta": {
+                "content": " It",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920391,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "d583f66e-de11-4210-8153-54be000a2783",
+          "choices": [
+            {
+              "delta": {
+                "content": "'s",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920391,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "d583f66e-de11-4210-8153-54be000a2783",
+          "choices": [
+            {
+              "delta": {
+                "content": " nice",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920391,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "d583f66e-de11-4210-8153-54be000a2783",
+          "choices": [
+            {
+              "delta": {
+                "content": " to",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920391,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "d583f66e-de11-4210-8153-54be000a2783",
+          "choices": [
+            {
+              "delta": {
+                "content": " meet",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920391,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "d583f66e-de11-4210-8153-54be000a2783",
+          "choices": [
+            {
+              "delta": {
+                "content": " you",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920391,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "d583f66e-de11-4210-8153-54be000a2783",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920391,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "d583f66e-de11-4210-8153-54be000a2783",
+          "choices": [
+            {
+              "delta": {
+                "content": " Is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920391,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "d583f66e-de11-4210-8153-54be000a2783",
+          "choices": [
+            {
+              "delta": {
+                "content": " there",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920391,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "d583f66e-de11-4210-8153-54be000a2783",
+          "choices": [
+            {
+              "delta": {
+                "content": " something",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920391,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "d583f66e-de11-4210-8153-54be000a2783",
+          "choices": [
+            {
+              "delta": {
+                "content": " I",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920391,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "d583f66e-de11-4210-8153-54be000a2783",
+          "choices": [
+            {
+              "delta": {
+                "content": " can",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920391,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "d583f66e-de11-4210-8153-54be000a2783",
+          "choices": [
+            {
+              "delta": {
+                "content": " help",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920391,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "d583f66e-de11-4210-8153-54be000a2783",
+          "choices": [
+            {
+              "delta": {
+                "content": " you",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920391,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "d583f66e-de11-4210-8153-54be000a2783",
+          "choices": [
+            {
+              "delta": {
+                "content": " with",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920391,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "d583f66e-de11-4210-8153-54be000a2783",
+          "choices": [
+            {
+              "delta": {
+                "content": ",",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920391,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "d583f66e-de11-4210-8153-54be000a2783",
+          "choices": [
+            {
+              "delta": {
+                "content": " or",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920391,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "d583f66e-de11-4210-8153-54be000a2783",
+          "choices": [
+            {
+              "delta": {
+                "content": " would",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920391,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "d583f66e-de11-4210-8153-54be000a2783",
+          "choices": [
+            {
+              "delta": {
+                "content": " you",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920391,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "d583f66e-de11-4210-8153-54be000a2783",
+          "choices": [
+            {
+              "delta": {
+                "content": " like",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920391,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "d583f66e-de11-4210-8153-54be000a2783",
+          "choices": [
+            {
+              "delta": {
+                "content": " to",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920391,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "d583f66e-de11-4210-8153-54be000a2783",
+          "choices": [
+            {
+              "delta": {
+                "content": " chat",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920391,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "d583f66e-de11-4210-8153-54be000a2783",
+          "choices": [
+            {
+              "delta": {
+                "content": "?",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920391,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "d583f66e-de11-4210-8153-54be000a2783",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758920391,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": {
+            "completion_tokens": 26,
+            "prompt_tokens": 14,
+            "total_tokens": 40,
+            "completion_tokens_details": null,
+            "prompt_tokens_details": null
+          }
+        }
+      }
+    ],
+    "is_streaming": true
+  }
+}
--- a/tests/integration/recordings/responses/f3cbd3f07e60.json
+++ b/tests/integration/recordings/responses/f3cbd3f07e60.json
@ -0,0 +1,996 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.fireworks.ai/inference/v1/v1/completions",
+    "headers": {},
+    "body": {
+      "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+      "prompt": "Respond to this question and explain your answer. Complete the sentence using one word: Roses are red, violets are ",
+      "max_tokens": 50,
+      "stream": true,
+      "extra_body": {}
+    },
+    "endpoint": "/v1/completions",
+    "model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " a"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " type"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " of"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " __________________"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": "_____"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": ".\n\n"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": "##"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " Step"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " "
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": "1"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": ":"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " Identify"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " the"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " type"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " of"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " flower"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " mentioned"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " in"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " the"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " sentence"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": ".\n"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": "The"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " sentence"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " mentions"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " \""
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": "vio"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": "lets"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": ".\"\n\n"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": "##"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " Step"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " "
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": "2"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": ":"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " Determine"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " the"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " type"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " of"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " flower"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " v"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": "io"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": "lets"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " are"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": ".\n"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": "V"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": "io"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": "lets"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " are"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " a"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " type"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " of"
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "c9c1f727-afe7-430a-b759-df1dc392266c",
+          "choices": [
+            {
+              "finish_reason": "length",
+              "index": 0,
+              "logprobs": null,
+              "text": ""
+            }
+          ],
+          "created": 1758920354,
+          "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": {
+            "completion_tokens": 50,
+            "prompt_tokens": 25,
+            "total_tokens": 75,
+            "completion_tokens_details": null,
+            "prompt_tokens_details": null
+          }
+        }
+      }
+    ],
+    "is_streaming": true
+  }
+}
--- a/tests/integration/recordings/responses/f6469c4656dd.json
+++ b/tests/integration/recordings/responses/f6469c4656dd.json
--- a/tests/integration/recordings/responses/f701ad342bd8.json
+++ b/tests/integration/recordings/responses/f701ad342bd8.json
@ -0,0 +1,56 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Which planet do humans live on?"
+        }
+      ],
+      "stream": false
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "0fd60cd7-dc72-45b7-808c-4da91de80093",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "Humans live on a planet called Earth.",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 1758920388,
+        "model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 9,
+          "prompt_tokens": 17,
+          "total_tokens": 26,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/models-7d9446738fd7-d5d684a3.json
+++ b/tests/integration/recordings/responses/models-7d9446738fd7-d5d684a3.json
@ -0,0 +1,527 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.fireworks.ai/inference/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/flux-1-dev-fp8",
+          "created": 1729532889,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "FLUMINA_BASE_MODEL",
+          "supports_chat": false,
+          "supports_image_input": false,
+          "supports_tools": false
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/tvergho-87e44d/models/debatecards-70b-ft-3epoch-dpo-v2",
+          "created": 1743381121,
+          "object": "model",
+          "owned_by": "tvergho-87e44d",
+          "kind": "HF_PEFT_ADDON",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/flux-kontext-max",
+          "created": 1750714611,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "FLUMINA_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": true,
+          "supports_tools": false
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/flux-kontext-pro",
+          "created": 1750488264,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "FLUMINA_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": true,
+          "supports_tools": false
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b",
+          "created": 1748467427,
+          "object": "model",
+          "owned_by": "sentientfoundation-serverless",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-v3",
+          "created": 1735576668,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new",
+          "created": 1739563474,
+          "object": "model",
+          "owned_by": "sentientfoundation",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/gpt-oss-120b",
+          "created": 1754345600,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct",
+          "created": 1753211090,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 262144
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-30b-a3b-thinking-2507",
+          "created": 1753916446,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507",
+          "created": 1753124424,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 262144
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-235b-a22b-thinking-2507",
+          "created": 1753455434,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 262144
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-embedding-8b",
+          "created": 1755707090,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 40960
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-v3-0324",
+          "created": 1742827220,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 163840
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-v3p1-terminus",
+          "created": 1758586241,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 163840
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/kimi-k2-instruct",
+          "created": 1752259096,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/gpt-oss-20b",
+          "created": 1754345466,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+          "created": 1743878495,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": true,
+          "supports_tools": true,
+          "context_length": 1048576
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct",
+          "created": 1754063588,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 262144
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+          "created": 1733442103,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen2p5-vl-32b-instruct",
+          "created": 1743392739,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": true,
+          "supports_tools": false,
+          "context_length": 128000
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-235b-a22b",
+          "created": 1745885249,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/glm-4p5-air",
+          "created": 1754089426,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-r1",
+          "created": 1737397673,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 163840
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "created": 1721692808,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-r1-basic",
+          "created": 1742306746,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 163840
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-v3p1",
+          "created": 1755758988,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 163840
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/flux-1-schnell-fp8",
+          "created": 1729535376,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "FLUMINA_BASE_MODEL",
+          "supports_chat": false,
+          "supports_image_input": false,
+          "supports_tools": false
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/glm-4p5",
+          "created": 1753809636,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/kimi-k2-instruct-0905",
+          "created": 1757018994,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 262144
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/llama-v3p1-405b-instruct",
+          "created": 1721428386,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/llama4-scout-instruct-basic",
+          "created": 1743878279,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": true,
+          "supports_tools": true,
+          "context_length": 1048576
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-30b-a3b",
+          "created": 1745878133,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/llama-v3p1-70b-instruct",
+          "created": 1721287357,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-r1-0528",
+          "created": 1748456377,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 163840
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/mixtral-8x22b-instruct",
+          "created": 1713375508,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 65536
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507",
+          "created": 1753808388,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 262144
+        }
+      }
+    ],
+    "is_streaming": false
+  }
+}
--- a/tests/integration/suites.py
+++ b/tests/integration/suites.py
@ -127,9 +127,8 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
        name="fireworks",
        description="Fireworks provider with a text model",
        defaults={
-            "text_model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
-            "vision_model": "accounts/fireworks/models/llama-v3p2-90b-vision-instruct",
-            "embedding_model": "nomic-ai/nomic-embed-text-v1.5",
+            "text_model": "fireworks/accounts/fireworks/models/llama-v3p1-8b-instruct",
+            "embedding_model": "fireworks/accounts/fireworks/models/qwen3-embedding-8b",
        },
    ),
 }
--- a/tests/integration/telemetry/test_telemetry.py
+++ b/tests/integration/telemetry/test_telemetry.py
@ -32,8 +32,8 @@ def setup_telemetry_data(llama_stack_client, text_model_id):
        )

    for i in range(2):
-        llama_stack_client.inference.chat_completion(
-            model_id=text_model_id, messages=[{"role": "user", "content": f"Test trace {i}"}]
+        llama_stack_client.chat.completions.create(
+            model=text_model_id, messages=[{"role": "user", "content": f"Test trace {i}"}]
        )

    start_time = time.time()
--- a/tests/integration/test_cases/inference/chat_completion.json
+++ b/tests/integration/test_cases/inference/chat_completion.json
@ -83,12 +83,19 @@
      ],
      "tools": [
        {
-          "tool_name": "get_weather",
-          "description": "Get the current weather",
-          "parameters": {
-            "location": {
-              "param_type": "string",
-              "description": "The city and state (both required), e.g. San Francisco, CA."
+          "type": "function",
+          "function": {
+            "name": "get_weather",
+            "description": "Get the current weather",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "location": {
+                  "type": "string",
+                  "description": "The city and state (both required), e.g. San Francisco, CA."
+                }
+              },
+              "required": ["location"]
            }
          }
        }
@ -116,12 +123,19 @@
      ],
      "tools": [
        {
-          "tool_name": "get_weather",
-          "description": "Get the current weather",
-          "parameters": {
-            "location": {
-              "param_type": "string",
-              "description": "The city and state (both required), e.g. San Francisco, CA."
+          "type": "function",
+          "function": {
+            "name": "get_weather",
+            "description": "Get the current weather",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "location": {
+                  "type": "string",
+                  "description": "The city and state (both required), e.g. San Francisco, CA."
+                }
+              },
+              "required": ["location"]
            }
          }
        }
@ -162,12 +176,19 @@
      ],
      "tools": [
        {
-          "tool_name": "get_weather",
-          "description": "Get the current weather",
-          "parameters": {
-            "location": {
-              "param_type": "string",
-              "description": "The city and state (both required), e.g. San Francisco, CA."
+          "type": "function",
+          "function": {
+            "name": "get_weather",
+            "description": "Get the current weather",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "location": {
+                  "type": "string",
+                  "description": "The city and state (both required), e.g. San Francisco, CA."
+                }
+              },
+              "required": ["location"]
            }
          }
        }
@ -192,66 +213,6 @@
      ]
    }
  },
-  "array_parameter": {
-    "data": {
-      "messages": [
-        [
-          {
-            "role": "user",
-            "content": "Please add a new product with name 'Widget', price 19.99, in stock, and tags ['new', 'sale'] and give me the product id."
-          }
-        ]
-      ],
-      "tools": [
-        {
-          "tool_name": "addProduct",
-          "description": "Get the current weather",
-          "parameters": {
-            "name": {
-              "param_type": "string",
-              "description": "Name of the product"
-            },
-            "price": {
-              "param_type": "number",
-              "description": "Price of the product"
-            },
-            "inStock": {
-              "param_type": "boolean",
-              "description": "Availability status of the product."
-            },
-            "tags": {
-              "param_type": "list[str]",
-              "description": "List of product tags"
-            }
-          }
-        }
-      ],
-      "tool_responses": [
-        {
-          "response": "{'response': 'Successfully added product with id: 123'}"
-        }
-      ],
-      "expected": [
-        {
-          "num_tool_calls": 1,
-          "tool_name": "addProduct",
-          "tool_arguments": {
-            "name": "Widget",
-            "price": 19.99,
-            "inStock": true,
-            "tags": [
-              "new",
-              "sale"
-            ]
-          }
-        },
-        {
-          "num_tool_calls": 0,
-          "answer": "123"
-        }
-      ]
-    }
-  },
  "sample_messages_tool_calling": {
    "data": {
      "messages": [
@ -270,13 +231,19 @@
      ],
      "tools": [
        {
-          "tool_name": "get_weather",
-          "description": "Get the current weather",
-          "parameters": {
-            "location": {
-              "param_type": "string",
-              "description": "The city and state, e.g. San Francisco, CA",
-              "required": true
+          "type": "function",
+          "function": {
+            "name": "get_weather",
+            "description": "Get the current weather",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "location": {
+                  "type": "string",
+                  "description": "The city and state (both required), e.g. San Francisco, CA."
+                }
+              },
+              "required": ["location"]
            }
          }
        }
@ -343,18 +310,23 @@
      ],
      "tools": [
        {
-          "tool_name": "get_object_namespace_list",
-          "description": "Get the list of objects in a namespace",
-          "parameters": {
-            "kind": {
-              "param_type": "string",
-              "description": "the type of object",
-              "required": true
-            },
-            "namespace": {
-              "param_type": "string",
-              "description": "the name of the namespace",
-              "required": true
+          "type": "function",
+          "function": {
+            "name": "get_object_namespace_list",
+            "description": "Get the list of objects in a namespace",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "kind": {
+                  "type": "string",
+                  "description": "the type of object"
+                },
+                "namespace": {
+                  "type": "string",
+                  "description": "the name of the namespace"
+                }
+              },
+              "required": ["kind", "namespace"]
            }
          }
        }