Merge branch 'main' into store_registeration_bug_fix

2025-12-06 18:40:57 +00:00 · 2025-09-16 18:18:56 -07:00 · 2025-09-16 18:18:56 -07:00 · 43a2600158
commit 43a2600158
parent 6502779f09 ececc323d3
58 changed files with 21962 additions and 343 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -1380,6 +1380,40 @@
                        }
                    }
                ]
+            },
+            "delete": {
+                "responses": {
+                    "200": {
+                        "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Benchmarks"
+                ],
+                "description": "Unregister a benchmark.",
+                "parameters": [
+                    {
+                        "name": "benchmark_id",
+                        "in": "path",
+                        "description": "The ID of the benchmark to unregister.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
            }
        },
        "/v1/openai/v1/chat/completions/{completion_id}": {
@ -1620,6 +1654,40 @@
                        }
                    }
                ]
+            },
+            "delete": {
+                "responses": {
+                    "200": {
+                        "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "ScoringFunctions"
+                ],
+                "description": "Unregister a scoring function.",
+                "parameters": [
+                    {
+                        "name": "scoring_fn_id",
+                        "in": "path",
+                        "description": "The ID of the scoring function to unregister.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
            }
        },
        "/v1/shields/{identifier}": {
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -954,6 +954,30 @@ paths:
          required: true
          schema:
            type: string
+    delete:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Benchmarks
+      description: Unregister a benchmark.
+      parameters:
+        - name: benchmark_id
+          in: path
+          description: The ID of the benchmark to unregister.
+          required: true
+          schema:
+            type: string
  /v1/openai/v1/chat/completions/{completion_id}:
    get:
      responses:
@ -1119,6 +1143,31 @@ paths:
          required: true
          schema:
            type: string
+    delete:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - ScoringFunctions
+      description: Unregister a scoring function.
+      parameters:
+        - name: scoring_fn_id
+          in: path
+          description: >-
+            The ID of the scoring function to unregister.
+          required: true
+          schema:
+            type: string
  /v1/shields/{identifier}:
    get:
      responses:
--- a/docs/source/apis/api_leveling.md
+++ b/docs/source/apis/api_leveling.md
@ -0,0 +1,94 @@
+# Llama Stack API Stability Leveling
+
+In order to provide a stable experience in Llama Stack, the various APIs need different stability levels indicating the level of support, backwards compatability, and overall production readiness.
+
+## Different Levels
+
+### v1alpha
+
+- Little to no expectation of support between versions
+- Breaking changes are permitted
+- Datatypes and parameters can break
+- Routes can be added and removed
+
+#### Graduation Criteria
+
+- an API can graduate from `v1alpha` to `v1beta` if the team has identified the extent of the non-optional routes and the shape of their parameters/return types for the API eg. `/v1/openai/chat/completions`. Optional types can change.
+- CRUD must stay stable once in `v1beta`. This is a commitment to backward compatibility, guaranteeing that most code you write against the v1beta version will not break during future updates. We may make additive changes (like adding a new, optional field to a response), but we will not make breaking changes (like renaming an existing "modelName" field to "name", changing an ID's data type from an integer to a string, or altering an endpoint URL).
+- for OpenAI APIs, a comparison to the OpenAI spec for the specific API can be done to ensure completeness.
+
+### v1beta
+
+- API routes remain consistent between versions
+- Parameters and return types are not ensured between versions
+- API, besides minor fixes and adjustments, should be _almost_ v1. Changes should not be drastic.
+
+#### Graduation Criteria
+
+- an API can graduate from `v1beta` to `v1` if the API surface and datatypes are complete as identified by the team. The parameters and return types that are mandatory for each route are stable. All aspects of graduating from `v1alpha1` to `v1beta` apply as well.
+- Optional parameters, routes, or parts of the return type can be added after graduating to `v1`
+
+### v1 (stable)
+
+- Considered stable
+- Backwards compatible between Z-streams
+  - Y-stream breaking changes must go through the proper approval and announcement process.
+- Datatypes for a route and its return types cannot change between Z-streams
+  - Y-stream datatype changes should be sparing, unless the changes are additional net-new parameters
+- Must have proper conformance testing as outlined in https://github.com/llamastack/llama-stack/issues/3237
+
+### v2+ (Major Versions)
+
+Introducing a new major version like `/v2` is a significant and disruptive event that should be treated as a last resort. It is reserved for essential changes to a stable `/v1` API that are fundamentally backward-incompatible and cannot be implemented through additive, non-breaking changes or breaking changes across X/Y-Stream releases (x.y.z).
+
+If a `/v2` version is deemed absolutely necessary, it must adhere to the following protocol to ensure a sane and predictable transition for users:
+
+#### Lifecycle Progression
+
+ A new major version must follow the same stability lifecycle as `/v1`. It will be introduced as `/v2alpha`, mature to `/v2beta`, and finally become stable as `/v2`.
+
+#### Coexistence:
+
+The new `/v2` API must be introduced alongside the existing `/v1` API and run in parallel. It must not replace the `/v1` API immediately.
+
+#### Deprecation Policy:
+
+When a `/v2` API is introduced, a clear and generous deprecation policy for the `/v1` API must be published simultaneously. This policy must outline the timeline for the eventual removal of the `/v1` API, giving users ample time to migrate.
+
+### API Stability vs. Provider Stability
+
+The leveling introduced in this document relates to the stability of the API and not specifically the providers within the API.
+
+Providers can iterate as much as they want on functionality as long as they work within the bounds of an API. If they need to change the API, then the API should not be `/v1`, or those breaking changes can only happen on a y-stream release basis.
+
+### Approval and Announcement Process for Breaking Changes
+
+- **PR Labeling**: Any pull request that introduces a breaking API change must be clearly labeled with `breaking-change`.
+- **PR Title/Commit**: Any pull request that introduces a breaking API change must contain `BREAKING CHANGE` in the title and commit footer. Alternatively, the commit can include `!`, eg. `feat(api)!: title goes here` This is outlined in the [conventional commits documentation](https://www.conventionalcommits.org/en/v1.0.0/#specification)
+- **Maintainer Review**: At least one maintainer must explicitly acknowledge the breaking change during review by applying the `breaking-change` label. An approval must come with this label or the acknowledgement this label has already been applied.
+- **Announcement**: Breaking changes require inclusion in release notes and, if applicable, a separate communication (e.g., Discord, Github Issues, or GitHub Discussions) prior to release.
+
+If a PR has proper approvals, labels, and commit/title hygiene, the failing API conformance tests will be bypassed.
+
+
+## Enforcement
+
+### Migration of API routes under `/v1alpha`, `/v1beta`, and `/v1`
+
+Instead of placing every API under `/v1`, any API that is not fully stable or complete should go under `/v1alpha` or `/v1beta`. For example, at the time of this writing,  `post_training` belongs here, as well as any OpenAI-compatible API whose surface does not exactly match the upstream OpenAI API it mimics.
+
+This migration is crucial as we get Llama Stack in the hands of users who intend to productize various APIs. A clear view of what is stable and what is actively being developed will enable users to pick and choose various APIs to build their products on.
+
+This migration will be a breaking change for any API moving out of `/v1`. Ideally, this should happen before 0.3.0 and especially 1.0.0.
+
+### `x-stability` tags in the OpenAPI spec for oasdiff
+
+`x-stability` tags allow tools like oasdiff to enforce different rules for different stability levels; these tags should match the routes: [oasdiff stability](https://github.com/oasdiff/oasdiff/blob/main/docs/STABILITY.md)
+
+### Testing
+
+The testing of each stable API is already outlined in [issue #3237](https://github.com/llamastack/llama-stack/issues/3237) and is being worked on. These sorts of conformance tests should apply primarily to `/v1` APIs only, with `/v1alpha` and `/v1beta` having any tests the maintainers see fit as well as basic testing to ensure the routing works properly.
+
+### New APIs going forward
+
+Any subsequently introduced APIs should be introduced as `/v1alpha`
--- a/docs/source/providers/inference/remote_watsonx.md
+++ b/docs/source/providers/inference/remote_watsonx.md
@ -9,8 +9,8 @@ IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `url` | `<class 'str'>` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | The watsonx API key, only needed of using the hosted service |
-| `project_id` | `str \| None` | No |  | The Project ID key, only needed of using the hosted service |
+| `api_key` | `pydantic.types.SecretStr \| None` | No |  | The watsonx API key |
+| `project_id` | `str \| None` | No |  | The Project ID key |
 | `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |

 ## Sample Configuration
--- a/llama_stack/apis/benchmarks/benchmarks.py
+++ b/llama_stack/apis/benchmarks/benchmarks.py
@ -93,3 +93,11 @@ class Benchmarks(Protocol):
        :param metadata: The metadata to use for the benchmark.
        """
        ...
+
+    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE")
+    async def unregister_benchmark(self, benchmark_id: str) -> None:
+        """Unregister a benchmark.
+
+        :param benchmark_id: The ID of the benchmark to unregister.
+        """
+        ...
--- a/llama_stack/apis/scoring_functions/scoring_functions.py
+++ b/llama_stack/apis/scoring_functions/scoring_functions.py
@ -197,3 +197,11 @@ class ScoringFunctions(Protocol):
        :param params: The parameters for the scoring function for benchmark eval, these can be overridden for app eval.
        """
        ...
+
+    @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="DELETE")
+    async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
+        """Unregister a scoring function.
+
+        :param scoring_fn_id: The ID of the scoring function to unregister.
+        """
+        ...
--- a/llama_stack/core/routing_tables/benchmarks.py
+++ b/llama_stack/core/routing_tables/benchmarks.py
@ -56,3 +56,7 @@ class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
            provider_resource_id=provider_benchmark_id,
        )
        await self.register_object(benchmark)
+
+    async def unregister_benchmark(self, benchmark_id: str) -> None:
+        existing_benchmark = await self.get_benchmark(benchmark_id)
+        await self.unregister_object(existing_benchmark)
--- a/llama_stack/core/routing_tables/common.py
+++ b/llama_stack/core/routing_tables/common.py
@ -64,6 +64,10 @@ async def unregister_object_from_provider(obj: RoutableObject, p: Any) -> None:
        return await p.unregister_shield(obj.identifier)
    elif api == Api.datasetio:
        return await p.unregister_dataset(obj.identifier)
+    elif api == Api.eval:
+        return await p.unregister_benchmark(obj.identifier)
+    elif api == Api.scoring:
+        return await p.unregister_scoring_function(obj.identifier)
    elif api == Api.tool_runtime:
        return await p.unregister_toolgroup(obj.identifier)
    else:
--- a/llama_stack/core/routing_tables/scoring_functions.py
+++ b/llama_stack/core/routing_tables/scoring_functions.py
@ -60,3 +60,7 @@ class ScoringFunctionsRoutingTable(CommonRoutingTableImpl, ScoringFunctions):
        )
        scoring_fn.provider_id = provider_id
        await self.register_object(scoring_fn)
+
+    async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
+        existing_scoring_fn = await self.get_scoring_function(scoring_fn_id)
+        await self.unregister_object(existing_scoring_fn)
--- a/llama_stack/distributions/watsonx/run.yaml
+++ b/llama_stack/distributions/watsonx/run.yaml
@ -10,6 +10,7 @@ apis:
 - telemetry
 - tool_runtime
 - vector_io
+- files
 providers:
  inference:
  - provider_id: watsonx
@ -94,6 +95,14 @@ providers:
    provider_type: inline::rag-runtime
  - provider_id: model-context-protocol
    provider_type: remote::model-context-protocol
+  files:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/watsonx/files}
+      metadata_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/files_metadata.db
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/registry.db
--- a/llama_stack/distributions/watsonx/watsonx.py
+++ b/llama_stack/distributions/watsonx/watsonx.py
@ -9,6 +9,7 @@ from pathlib import Path
 from llama_stack.apis.models import ModelType
 from llama_stack.core.datatypes import BuildProvider, ModelInput, Provider, ToolGroupInput
 from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings, get_model_registry
+from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
 from llama_stack.providers.inline.inference.sentence_transformers import (
    SentenceTransformersInferenceConfig,
 )
@ -16,7 +17,7 @@ from llama_stack.providers.remote.inference.watsonx import WatsonXConfig
 from llama_stack.providers.remote.inference.watsonx.models import MODEL_ENTRIES


-def get_distribution_template() -> DistributionTemplate:
+def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
    providers = {
        "inference": [
            BuildProvider(provider_type="remote::watsonx"),
@ -42,6 +43,7 @@ def get_distribution_template() -> DistributionTemplate:
            BuildProvider(provider_type="inline::rag-runtime"),
            BuildProvider(provider_type="remote::model-context-protocol"),
        ],
+        "files": [BuildProvider(provider_type="inline::localfs")],
    }

    inference_provider = Provider(
@ -79,9 +81,14 @@ def get_distribution_template() -> DistributionTemplate:
        },
    )

+    files_provider = Provider(
+        provider_id="meta-reference-files",
+        provider_type="inline::localfs",
+        config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+    )
    default_models, _ = get_model_registry(available_models)
    return DistributionTemplate(
-        name="watsonx",
+        name=name,
        distro_type="remote_hosted",
        description="Use watsonx for running LLM inference",
        container_image=None,
@ -92,6 +99,7 @@ def get_distribution_template() -> DistributionTemplate:
            "run.yaml": RunConfigSettings(
                provider_overrides={
                    "inference": [inference_provider, embedding_provider],
+                    "files": [files_provider],
                },
                default_models=default_models + [embedding_model],
                default_tool_groups=default_tool_groups,
--- a/llama_stack/providers/inline/eval/meta_reference/eval.py
+++ b/llama_stack/providers/inline/eval/meta_reference/eval.py
@ -75,6 +75,13 @@ class MetaReferenceEvalImpl(
        )
        self.benchmarks[task_def.identifier] = task_def

+    async def unregister_benchmark(self, benchmark_id: str) -> None:
+        if benchmark_id in self.benchmarks:
+            del self.benchmarks[benchmark_id]
+
+        key = f"{EVAL_TASKS_PREFIX}{benchmark_id}"
+        await self.kvstore.delete(key)
+
    async def run_eval(
        self,
        benchmark_id: str,
--- a/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
+++ b/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
@ -63,6 +63,9 @@ class LlmAsJudgeScoringImpl(
    async def register_scoring_function(self, function_def: ScoringFn) -> None:
        self.llm_as_judge_fn.register_scoring_fn_def(function_def)

+    async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
+        self.llm_as_judge_fn.unregister_scoring_fn_def(scoring_fn_id)
+
    async def score_batch(
        self,
        dataset_id: str,
--- a/llama_stack/providers/remote/eval/nvidia/eval.py
+++ b/llama_stack/providers/remote/eval/nvidia/eval.py
@ -51,18 +51,23 @@ class NVIDIAEvalImpl(

    async def shutdown(self) -> None: ...

-    async def _evaluator_get(self, path):
+    async def _evaluator_get(self, path: str):
        """Helper for making GET requests to the evaluator service."""
        response = requests.get(url=f"{self.config.evaluator_url}{path}")
        response.raise_for_status()
        return response.json()

-    async def _evaluator_post(self, path, data):
+    async def _evaluator_post(self, path: str, data: dict[str, Any]):
        """Helper for making POST requests to the evaluator service."""
        response = requests.post(url=f"{self.config.evaluator_url}{path}", json=data)
        response.raise_for_status()
        return response.json()

+    async def _evaluator_delete(self, path: str) -> None:
+        """Helper for making DELETE requests to the evaluator service."""
+        response = requests.delete(url=f"{self.config.evaluator_url}{path}")
+        response.raise_for_status()
+
    async def register_benchmark(self, task_def: Benchmark) -> None:
        """Register a benchmark as an evaluation configuration."""
        await self._evaluator_post(
@ -75,6 +80,10 @@ class NVIDIAEvalImpl(
            },
        )

+    async def unregister_benchmark(self, benchmark_id: str) -> None:
+        """Unregister a benchmark evaluation configuration from NeMo Evaluator."""
+        await self._evaluator_delete(f"/v1/evaluation/configs/{DEFAULT_NAMESPACE}/{benchmark_id}")
+
    async def run_eval(
        self,
        benchmark_id: str,
--- a/llama_stack/providers/remote/inference/tgi/tgi.py
+++ b/llama_stack/providers/remote/inference/tgi/tgi.py
@ -8,6 +8,7 @@
 from collections.abc import AsyncGenerator

 from huggingface_hub import AsyncInferenceClient, HfApi
+from pydantic import SecretStr

 from llama_stack.apis.common.content_types import (
    InterleavedContent,
@ -33,6 +34,7 @@ from llama_stack.apis.inference import (
    ToolPromptFormat,
 )
 from llama_stack.apis.models import Model
+from llama_stack.apis.models.models import ModelType
 from llama_stack.log import get_logger
 from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.datatypes import ModelsProtocolPrivate
@ -41,16 +43,15 @@ from llama_stack.providers.utils.inference.model_registry import (
    build_hf_repo_model_entry,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
-    OpenAIChatCompletionToLlamaStackMixin,
    OpenAICompatCompletionChoice,
    OpenAICompatCompletionResponse,
-    OpenAICompletionToLlamaStackMixin,
    get_sampling_options,
    process_chat_completion_response,
    process_chat_completion_stream_response,
    process_completion_response,
    process_completion_stream_response,
 )
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack.providers.utils.inference.prompt_adapter import (
    chat_completion_request_to_model_input_info,
    completion_request_to_prompt_model_input_info,
@ -73,26 +74,49 @@ def build_hf_repo_model_entries():


 class _HfAdapter(
+    OpenAIMixin,
    Inference,
-    OpenAIChatCompletionToLlamaStackMixin,
-    OpenAICompletionToLlamaStackMixin,
    ModelsProtocolPrivate,
 ):
-    client: AsyncInferenceClient
+    url: str
+    api_key: SecretStr
+
+    hf_client: AsyncInferenceClient
    max_tokens: int
    model_id: str

+    overwrite_completion_id = True  # TGI always returns id=""
+
    def __init__(self) -> None:
        self.register_helper = ModelRegistryHelper(build_hf_repo_model_entries())
        self.huggingface_repo_to_llama_model_id = {
            model.huggingface_repo: model.descriptor() for model in all_registered_models() if model.huggingface_repo
        }

+    def get_api_key(self):
+        return self.api_key.get_secret_value()
+
+    def get_base_url(self):
+        return self.url
+
    async def shutdown(self) -> None:
        pass

+    async def list_models(self) -> list[Model] | None:
+        models = []
+        async for model in self.client.models.list():
+            models.append(
+                Model(
+                    identifier=model.id,
+                    provider_resource_id=model.id,
+                    provider_id=self.__provider_id__,
+                    metadata={},
+                    model_type=ModelType.llm,
+                )
+            )
+        return models
+
    async def register_model(self, model: Model) -> Model:
-        model = await self.register_helper.register_model(model)
        if model.provider_resource_id != self.model_id:
            raise ValueError(
                f"Model {model.provider_resource_id} does not match the model {self.model_id} served by TGI."
@ -176,7 +200,7 @@ class _HfAdapter(
        params = await self._get_params_for_completion(request)

        async def _generate_and_convert_to_openai_compat():
-            s = await self.client.text_generation(**params)
+            s = await self.hf_client.text_generation(**params)
            async for chunk in s:
                token_result = chunk.token
                finish_reason = None
@ -194,7 +218,7 @@ class _HfAdapter(

    async def _nonstream_completion(self, request: CompletionRequest) -> AsyncGenerator:
        params = await self._get_params_for_completion(request)
-        r = await self.client.text_generation(**params)
+        r = await self.hf_client.text_generation(**params)

        choice = OpenAICompatCompletionChoice(
            finish_reason=r.details.finish_reason,
@ -241,7 +265,7 @@ class _HfAdapter(

    async def _nonstream_chat_completion(self, request: ChatCompletionRequest) -> ChatCompletionResponse:
        params = await self._get_params(request)
-        r = await self.client.text_generation(**params)
+        r = await self.hf_client.text_generation(**params)

        choice = OpenAICompatCompletionChoice(
            finish_reason=r.details.finish_reason,
@ -256,7 +280,7 @@ class _HfAdapter(
        params = await self._get_params(request)

        async def _generate_and_convert_to_openai_compat():
-            s = await self.client.text_generation(**params)
+            s = await self.hf_client.text_generation(**params)
            async for chunk in s:
                token_result = chunk.token

@ -308,18 +332,21 @@ class TGIAdapter(_HfAdapter):
        if not config.url:
            raise ValueError("You must provide a URL in run.yaml (or via the TGI_URL environment variable) to use TGI.")
        log.info(f"Initializing TGI client with url={config.url}")
-        self.client = AsyncInferenceClient(model=config.url, provider="hf-inference")
-        endpoint_info = await self.client.get_endpoint_info()
+        self.hf_client = AsyncInferenceClient(model=config.url, provider="hf-inference")
+        endpoint_info = await self.hf_client.get_endpoint_info()
        self.max_tokens = endpoint_info["max_total_tokens"]
        self.model_id = endpoint_info["model_id"]
+        self.url = f"{config.url.rstrip('/')}/v1"
+        self.api_key = SecretStr("NO_KEY")


 class InferenceAPIAdapter(_HfAdapter):
    async def initialize(self, config: InferenceAPIImplConfig) -> None:
-        self.client = AsyncInferenceClient(model=config.huggingface_repo, token=config.api_token.get_secret_value())
-        endpoint_info = await self.client.get_endpoint_info()
+        self.hf_client = AsyncInferenceClient(model=config.huggingface_repo, token=config.api_token.get_secret_value())
+        endpoint_info = await self.hf_client.get_endpoint_info()
        self.max_tokens = endpoint_info["max_total_tokens"]
        self.model_id = endpoint_info["model_id"]
+        # TODO: how do we set url for this?


 class InferenceEndpointAdapter(_HfAdapter):
@ -331,6 +358,7 @@ class InferenceEndpointAdapter(_HfAdapter):
        endpoint.wait(timeout=60)

        # Initialize the adapter
-        self.client = endpoint.async_client
+        self.hf_client = endpoint.async_client
        self.model_id = endpoint.repository
        self.max_tokens = int(endpoint.raw["model"]["image"]["custom"]["env"]["MAX_TOTAL_TOKENS"])
+        # TODO: how do we set url for this?
--- a/llama_stack/providers/remote/inference/together/models.py
+++ b/llama_stack/providers/remote/inference/together/models.py
@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from llama_stack.apis.models import ModelType
 from llama_stack.models.llama.sku_types import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
    ProviderModelEntry,
@ -21,7 +20,47 @@ SAFETY_MODELS_ENTRIES = [
        CoreModelId.llama_guard_3_11b_vision.value,
    ),
 ]
-MODEL_ENTRIES = [
+
+# source: https://docs.together.ai/docs/serverless-models#embedding-models
+EMBEDDING_MODEL_ENTRIES = {
+    "togethercomputer/m2-bert-80M-32k-retrieval": ProviderModelEntry(
+        provider_model_id="togethercomputer/m2-bert-80M-32k-retrieval",
+        metadata={
+            "embedding_dimension": 768,
+            "context_length": 32768,
+        },
+    ),
+    "BAAI/bge-large-en-v1.5": ProviderModelEntry(
+        provider_model_id="BAAI/bge-large-en-v1.5",
+        metadata={
+            "embedding_dimension": 1024,
+            "context_length": 512,
+        },
+    ),
+    "BAAI/bge-base-en-v1.5": ProviderModelEntry(
+        provider_model_id="BAAI/bge-base-en-v1.5",
+        metadata={
+            "embedding_dimension": 768,
+            "context_length": 512,
+        },
+    ),
+    "Alibaba-NLP/gte-modernbert-base": ProviderModelEntry(
+        provider_model_id="Alibaba-NLP/gte-modernbert-base",
+        metadata={
+            "embedding_dimension": 768,
+            "context_length": 8192,
+        },
+    ),
+    "intfloat/multilingual-e5-large-instruct": ProviderModelEntry(
+        provider_model_id="intfloat/multilingual-e5-large-instruct",
+        metadata={
+            "embedding_dimension": 1024,
+            "context_length": 512,
+        },
+    ),
+}
+MODEL_ENTRIES = (
+    [
        build_hf_repo_model_entry(
            "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
            CoreModelId.llama3_1_8b_instruct.value,
@ -50,22 +89,6 @@ MODEL_ENTRIES = [
            "meta-llama/Llama-3.3-70B-Instruct-Turbo",
            CoreModelId.llama3_3_70b_instruct.value,
        ),
-    ProviderModelEntry(
-        provider_model_id="togethercomputer/m2-bert-80M-8k-retrieval",
-        model_type=ModelType.embedding,
-        metadata={
-            "embedding_dimension": 768,
-            "context_length": 8192,
-        },
-    ),
-    ProviderModelEntry(
-        provider_model_id="togethercomputer/m2-bert-80M-32k-retrieval",
-        model_type=ModelType.embedding,
-        metadata={
-            "embedding_dimension": 768,
-            "context_length": 32768,
-        },
-    ),
        build_hf_repo_model_entry(
            "meta-llama/Llama-4-Scout-17B-16E-Instruct",
            CoreModelId.llama4_scout_17b_16e_instruct.value,
@ -74,4 +97,7 @@ MODEL_ENTRIES = [
            "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
            CoreModelId.llama4_maverick_17b_128e_instruct.value,
        ),
-] + SAFETY_MODELS_ENTRIES
+    ]
+    + SAFETY_MODELS_ENTRIES
+    + list(EMBEDDING_MODEL_ENTRIES.values())
+)
--- a/llama_stack/providers/remote/inference/together/together.py
+++ b/llama_stack/providers/remote/inference/together/together.py
@ -4,11 +4,11 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from collections.abc import AsyncGenerator, AsyncIterator
-from typing import Any
+from collections.abc import AsyncGenerator

-from openai import AsyncOpenAI
+from openai import NOT_GIVEN, AsyncOpenAI
 from together import AsyncTogether
+from together.constants import BASE_URL

 from llama_stack.apis.common.content_types import (
    InterleavedContent,
@ -23,12 +23,7 @@ from llama_stack.apis.inference import (
    Inference,
    LogProbConfig,
    Message,
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAICompletion,
    OpenAIEmbeddingsResponse,
-    OpenAIMessageParam,
-    OpenAIResponseFormatParam,
    ResponseFormat,
    ResponseFormatType,
    SamplingParams,
@ -38,18 +33,20 @@ from llama_stack.apis.inference import (
    ToolDefinition,
    ToolPromptFormat,
 )
+from llama_stack.apis.inference.inference import OpenAIEmbeddingUsage
+from llama_stack.apis.models import Model, ModelType
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
 from llama_stack.providers.utils.inference.openai_compat import (
    convert_message_to_openai_dict,
    get_sampling_options,
-    prepare_openai_completion_params,
    process_chat_completion_response,
    process_chat_completion_stream_response,
    process_completion_response,
    process_completion_stream_response,
 )
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack.providers.utils.inference.prompt_adapter import (
    chat_completion_request_to_prompt,
    completion_request_to_prompt,
@ -59,15 +56,22 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
 )

 from .config import TogetherImplConfig
-from .models import MODEL_ENTRIES
+from .models import EMBEDDING_MODEL_ENTRIES, MODEL_ENTRIES

 logger = get_logger(name=__name__, category="inference::together")


-class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProviderData):
+class TogetherInferenceAdapter(OpenAIMixin, ModelRegistryHelper, Inference, NeedsRequestProviderData):
    def __init__(self, config: TogetherImplConfig) -> None:
        ModelRegistryHelper.__init__(self, MODEL_ENTRIES, config.allowed_models)
        self.config = config
+        self._model_cache: dict[str, Model] = {}
+
+    def get_api_key(self):
+        return self.config.api_key.get_secret_value()
+
+    def get_base_url(self):
+        return BASE_URL

    async def initialize(self) -> None:
        pass
@ -255,6 +259,37 @@ class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProvi
        embeddings = [item.embedding for item in r.data]
        return EmbeddingsResponse(embeddings=embeddings)

+    async def list_models(self) -> list[Model] | None:
+        self._model_cache = {}
+        # Together's /v1/models is not compatible with OpenAI's /v1/models. Together support ticket #13355 -> will not fix, use Together's own client
+        for m in await self._get_client().models.list():
+            if m.type == "embedding":
+                if m.id not in EMBEDDING_MODEL_ENTRIES:
+                    logger.warning(f"Unknown embedding dimension for model {m.id}, skipping.")
+                    continue
+                self._model_cache[m.id] = Model(
+                    provider_id=self.__provider_id__,
+                    provider_resource_id=EMBEDDING_MODEL_ENTRIES[m.id].provider_model_id,
+                    identifier=m.id,
+                    model_type=ModelType.embedding,
+                    metadata=EMBEDDING_MODEL_ENTRIES[m.id].metadata,
+                )
+            else:
+                self._model_cache[m.id] = Model(
+                    provider_id=self.__provider_id__,
+                    provider_resource_id=m.id,
+                    identifier=m.id,
+                    model_type=ModelType.llm,
+                )
+
+        return self._model_cache.values()
+
+    async def should_refresh_models(self) -> bool:
+        return True
+
+    async def check_model_availability(self, model):
+        return model in self._model_cache
+
    async def openai_embeddings(
        self,
        model: str,
@ -263,125 +298,39 @@ class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProvi
        dimensions: int | None = None,
        user: str | None = None,
    ) -> OpenAIEmbeddingsResponse:
-        raise NotImplementedError()
+        """
+        Together's OpenAI-compatible embeddings endpoint is not compatible with
+        the standard OpenAI embeddings endpoint.

-    async def openai_completion(
-        self,
-        model: str,
-        prompt: str | list[str] | list[int] | list[list[int]],
-        best_of: int | None = None,
-        echo: bool | None = None,
-        frequency_penalty: float | None = None,
-        logit_bias: dict[str, float] | None = None,
-        logprobs: bool | None = None,
-        max_tokens: int | None = None,
-        n: int | None = None,
-        presence_penalty: float | None = None,
-        seed: int | None = None,
-        stop: str | list[str] | None = None,
-        stream: bool | None = None,
-        stream_options: dict[str, Any] | None = None,
-        temperature: float | None = None,
-        top_p: float | None = None,
-        user: str | None = None,
-        guided_choice: list[str] | None = None,
-        prompt_logprobs: int | None = None,
-        suffix: str | None = None,
-    ) -> OpenAICompletion:
-        model_obj = await self.model_store.get_model(model)
-        params = await prepare_openai_completion_params(
-            model=model_obj.provider_resource_id,
-            prompt=prompt,
-            best_of=best_of,
-            echo=echo,
-            frequency_penalty=frequency_penalty,
-            logit_bias=logit_bias,
-            logprobs=logprobs,
-            max_tokens=max_tokens,
-            n=n,
-            presence_penalty=presence_penalty,
-            seed=seed,
-            stop=stop,
-            stream=stream,
-            stream_options=stream_options,
-            temperature=temperature,
-            top_p=top_p,
-            user=user,
+        The endpoint -
+         - does not return usage information
+         - does not support user param, returns 400 Unrecognized request arguments supplied: user
+         - does not support dimensions param, returns 400 Unrecognized request arguments supplied: dimensions
+         - does not support encoding_format param, always returns floats, never base64
+        """
+        # Together support ticket #13332 -> will not fix
+        if user is not None:
+            raise ValueError("Together's embeddings endpoint does not support user param.")
+        # Together support ticket #13333 -> escalated
+        if dimensions is not None:
+            raise ValueError("Together's embeddings endpoint does not support dimensions param.")
+        # Together support ticket #13331 -> will not fix, compute client side
+        if encoding_format not in (None, NOT_GIVEN, "float"):
+            raise ValueError("Together's embeddings endpoint only supports encoding_format='float'.")
+
+        response = await self.client.embeddings.create(
+            model=await self._get_provider_model_id(model),
+            input=input,
        )
-        return await self._get_openai_client().completions.create(**params)  # type: ignore

-    async def openai_chat_completion(
-        self,
-        model: str,
-        messages: list[OpenAIMessageParam],
-        frequency_penalty: float | None = None,
-        function_call: str | dict[str, Any] | None = None,
-        functions: list[dict[str, Any]] | None = None,
-        logit_bias: dict[str, float] | None = None,
-        logprobs: bool | None = None,
-        max_completion_tokens: int | None = None,
-        max_tokens: int | None = None,
-        n: int | None = None,
-        parallel_tool_calls: bool | None = None,
-        presence_penalty: float | None = None,
-        response_format: OpenAIResponseFormatParam | None = None,
-        seed: int | None = None,
-        stop: str | list[str] | None = None,
-        stream: bool | None = None,
-        stream_options: dict[str, Any] | None = None,
-        temperature: float | None = None,
-        tool_choice: str | dict[str, Any] | None = None,
-        tools: list[dict[str, Any]] | None = None,
-        top_logprobs: int | None = None,
-        top_p: float | None = None,
-        user: str | None = None,
-    ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
-        model_obj = await self.model_store.get_model(model)
-        params = await prepare_openai_completion_params(
-            model=model_obj.provider_resource_id,
-            messages=messages,
-            frequency_penalty=frequency_penalty,
-            function_call=function_call,
-            functions=functions,
-            logit_bias=logit_bias,
-            logprobs=logprobs,
-            max_completion_tokens=max_completion_tokens,
-            max_tokens=max_tokens,
-            n=n,
-            parallel_tool_calls=parallel_tool_calls,
-            presence_penalty=presence_penalty,
-            response_format=response_format,
-            seed=seed,
-            stop=stop,
-            stream=stream,
-            stream_options=stream_options,
-            temperature=temperature,
-            tool_choice=tool_choice,
-            tools=tools,
-            top_logprobs=top_logprobs,
-            top_p=top_p,
-            user=user,
+        response.model = model  # return the user the same model id they provided, avoid exposing the provider model id
+
+        # Together support ticket #13330 -> escalated
+        #  - togethercomputer/m2-bert-80M-32k-retrieval *does not* return usage information
+        if not hasattr(response, "usage") or response.usage is None:
+            logger.warning(
+                f"Together's embedding endpoint for {model} did not return usage information, substituting -1s."
            )
-        if params.get("stream", False):
-            return self._stream_openai_chat_completion(params)
-        return await self._get_openai_client().chat.completions.create(**params)  # type: ignore
+            response.usage = OpenAIEmbeddingUsage(prompt_tokens=-1, total_tokens=-1)

-    async def _stream_openai_chat_completion(self, params: dict) -> AsyncGenerator:
-        # together.ai sometimes adds usage data to the stream, even if include_usage is False
-        # This causes an unexpected final chunk with empty choices array to be sent
-        # to clients that may not handle it gracefully.
-        include_usage = False
-        if params.get("stream_options", None):
-            include_usage = params["stream_options"].get("include_usage", False)
-        stream = await self._get_openai_client().chat.completions.create(**params)
-
-        seen_finish_reason = False
-        async for chunk in stream:
-            # Final usage chunk with no choices that the user didn't request, so discard
-            if not include_usage and seen_finish_reason and len(chunk.choices) == 0:
-                break
-            yield chunk
-            for choice in chunk.choices:
-                if choice.finish_reason:
-                    seen_finish_reason = True
-                    break
+        return response
--- a/llama_stack/providers/remote/inference/watsonx/config.py
+++ b/llama_stack/providers/remote/inference/watsonx/config.py
@ -26,11 +26,11 @@ class WatsonXConfig(BaseModel):
    )
    api_key: SecretStr | None = Field(
        default_factory=lambda: os.getenv("WATSONX_API_KEY"),
-        description="The watsonx API key, only needed of using the hosted service",
+        description="The watsonx API key",
    )
    project_id: str | None = Field(
        default_factory=lambda: os.getenv("WATSONX_PROJECT_ID"),
-        description="The Project ID key, only needed of using the hosted service",
+        description="The Project ID key",
    )
    timeout: int = Field(
        default=60,
--- a/llama_stack/providers/remote/inference/watsonx/watsonx.py
+++ b/llama_stack/providers/remote/inference/watsonx/watsonx.py
@ -38,6 +38,7 @@ from llama_stack.apis.inference import (
    TopKSamplingStrategy,
    TopPSamplingStrategy,
 )
+from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
 from llama_stack.providers.utils.inference.openai_compat import (
    OpenAICompatCompletionChoice,
@ -57,14 +58,29 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
 from . import WatsonXConfig
 from .models import MODEL_ENTRIES

+logger = get_logger(name=__name__, category="inference::watsonx")
+
+
+# Note on structured output
+# WatsonX returns responses with a json embedded into a string.
+# Examples:
+
+# ChatCompletionResponse(completion_message=CompletionMessage(content='```json\n{\n
+# "first_name": "Michael",\n  "last_name": "Jordan",\n'...)
+# Not even a valid JSON, but we can still extract the JSON from the content
+
+# CompletionResponse(content=' \nThe best answer is $\\boxed{\\{"name": "Michael Jordan",
+# "year_born": "1963", "year_retired": "2003"\\}}$')
+# Find the start of the boxed content
+

 class WatsonXInferenceAdapter(Inference, ModelRegistryHelper):
    def __init__(self, config: WatsonXConfig) -> None:
        ModelRegistryHelper.__init__(self, MODEL_ENTRIES)

-        print(f"Initializing watsonx InferenceAdapter({config.url})...")
-
+        logger.info(f"Initializing watsonx InferenceAdapter({config.url})...")
        self._config = config
+        self._openai_client: AsyncOpenAI | None = None

        self._project_id = self._config.project_id

--- a/llama_stack/providers/utils/inference/openai_mixin.py
+++ b/llama_stack/providers/utils/inference/openai_mixin.py
@ -4,6 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

+import uuid
 from abc import ABC, abstractmethod
 from collections.abc import AsyncIterator
 from typing import Any
@ -43,6 +44,12 @@ class OpenAIMixin(ABC):
      The model_store is set in routing_tables/common.py during provider initialization.
    """

+    # Allow subclasses to control whether to overwrite the 'id' field in OpenAI responses
+    # is overwritten with a client-side generated id.
+    #
+    # This is useful for providers that do not return a unique id in the response.
+    overwrite_completion_id: bool = False
+
    @abstractmethod
    def get_api_key(self) -> str:
        """
@ -110,6 +117,23 @@ class OpenAIMixin(ABC):
            raise ValueError(f"Model {model} has no provider_resource_id")
        return model_obj.provider_resource_id

+    async def _maybe_overwrite_id(self, resp: Any, stream: bool | None) -> Any:
+        if not self.overwrite_completion_id:
+            return resp
+
+        new_id = f"cltsd-{uuid.uuid4()}"
+        if stream:
+
+            async def _gen():
+                async for chunk in resp:
+                    chunk.id = new_id
+                    yield chunk
+
+            return _gen()
+        else:
+            resp.id = new_id
+            return resp
+
    async def openai_completion(
        self,
        model: str,
@ -147,7 +171,7 @@ class OpenAIMixin(ABC):
            extra_body["guided_choice"] = guided_choice

        # TODO: fix openai_completion to return type compatible with OpenAI's API response
-        return await self.client.completions.create(  # type: ignore[no-any-return]
+        resp = await self.client.completions.create(
            **await prepare_openai_completion_params(
                model=await self._get_provider_model_id(model),
                prompt=prompt,
@ -171,6 +195,8 @@ class OpenAIMixin(ABC):
            extra_body=extra_body,
        )

+        return await self._maybe_overwrite_id(resp, stream)  # type: ignore[no-any-return]
+
    async def openai_chat_completion(
        self,
        model: str,
@ -200,8 +226,7 @@ class OpenAIMixin(ABC):
        """
        Direct OpenAI chat completion API call.
        """
-        # Type ignore because return types are compatible
-        return await self.client.chat.completions.create(  # type: ignore[no-any-return]
+        resp = await self.client.chat.completions.create(
            **await prepare_openai_completion_params(
                model=await self._get_provider_model_id(model),
                messages=messages,
@ -229,6 +254,8 @@ class OpenAIMixin(ABC):
            )
        )

+        return await self._maybe_overwrite_id(resp, stream)  # type: ignore[no-any-return]
+
    async def openai_embeddings(
        self,
        model: str,
--- a/llama_stack/testing/inference_recorder.py
+++ b/llama_stack/testing/inference_recorder.py
@ -7,7 +7,6 @@
 from __future__ import annotations  # for forward references

 import hashlib
-import inspect
 import json
 import os
 from collections.abc import Generator
@ -243,11 +242,10 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
    global _current_mode, _current_storage

    if _current_mode == InferenceMode.LIVE or _current_storage is None:
-        # Normal operation
-        if inspect.iscoroutinefunction(original_method):
-            return await original_method(self, *args, **kwargs)
-        else:
+        if endpoint == "/v1/models":
            return original_method(self, *args, **kwargs)
+        else:
+            return await original_method(self, *args, **kwargs)

    # Get base URL based on client type
    if client_type == "openai":
@ -298,10 +296,10 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
            )

    elif _current_mode == InferenceMode.RECORD:
-        if inspect.iscoroutinefunction(original_method):
-            response = await original_method(self, *args, **kwargs)
-        else:
+        if endpoint == "/v1/models":
            response = original_method(self, *args, **kwargs)
+        else:
+            response = await original_method(self, *args, **kwargs)

        # we want to store the result of the iterator, not the iterator itself
        if endpoint == "/v1/models":
--- a/llama_stack/ui/package-lock.json
+++ b/llama_stack/ui/package-lock.json
@ -18,7 +18,7 @@
        "class-variance-authority": "^0.7.1",
        "clsx": "^2.1.1",
        "framer-motion": "^12.23.12",
-        "llama-stack-client": "^0.2.21",
+        "llama-stack-client": "^0.2.22",
        "lucide-react": "^0.542.0",
        "next": "15.5.3",
        "next-auth": "^4.24.11",
@ -10314,9 +10314,9 @@
      "license": "MIT"
    },
    "node_modules/llama-stack-client": {
-      "version": "0.2.21",
-      "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.21.tgz",
-      "integrity": "sha512-rjU2Vx5xStxDYavU8K1An/SYXiQQjroLcK98B+p0Paz/a7OgRao2S0YwvThJjPUyChY4fO03UIXP9LpmHqlXWQ==",
+      "version": "0.2.22",
+      "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.22.tgz",
+      "integrity": "sha512-7aW3UQj5MwjV73Brd+yQ1e4W1W33nhozyeHM5tzOgbsVZ88tL78JNiNvyFqDR5w6V9XO4/uSGGiQVG6v83yR4w==",
      "license": "MIT",
      "dependencies": {
        "@types/node": "^18.11.18",
--- a/llama_stack/ui/package.json
+++ b/llama_stack/ui/package.json
@ -23,7 +23,7 @@
    "class-variance-authority": "^0.7.1",
    "clsx": "^2.1.1",
    "framer-motion": "^12.23.12",
-    "llama-stack-client": "^0.2.21",
+    "llama-stack-client": "^0.2.22",
    "lucide-react": "^0.542.0",
    "next": "15.5.3",
    "next-auth": "^4.24.11",
--- a/pyproject.toml
+++ b/pyproject.toml
@ -7,7 +7,7 @@ required-version = ">=0.7.0"

 [project]
 name = "llama_stack"
-version = "0.2.21"
+version = "0.2.22"
 authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
 description = "Llama Stack"
 readme = "README.md"
@ -31,12 +31,12 @@ dependencies = [
    "huggingface-hub>=0.34.0,<1.0",
    "jinja2>=3.1.6",
    "jsonschema",
-    "llama-stack-client>=0.2.21",
+    "llama-stack-client>=0.2.22",
    "openai>=1.100.0",                                # for expires_after support
    "prompt-toolkit",
    "python-dotenv",
    "python-jose[cryptography]",
-    "pydantic>=2",
+    "pydantic>=2.11.9",
    "rich",
    "starlette",
    "termcolor",
@ -55,7 +55,7 @@ dependencies = [
 ui = [
    "streamlit",
    "pandas",
-    "llama-stack-client>=0.2.21",
+    "llama-stack-client>=0.2.22",
    "streamlit-option-menu",
 ]

@ -141,7 +141,7 @@ docs = [
    "sphinxcontrib.openapi",
    "requests",
 ]
-codegen = ["rich", "pydantic", "jinja2>=3.1.6"]
+codegen = ["rich", "pydantic>=2.11.9", "jinja2>=3.1.6"]
 benchmark = [
    "locust>=2.39.1",
 ]
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@ -48,7 +48,6 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
        "remote::nvidia",
        "remote::runpod",
        "remote::sambanova",
-        "remote::tgi",
        "remote::vertexai",
        # {"error":{"message":"Unknown request URL: GET /openai/v1/completions. Please check the URL for typos,
        # or see the docs at https://console.groq.com/docs/","type":"invalid_request_error","code":"unknown_url"}}
@ -59,6 +58,7 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
        #  does not work with the specified model, gpt-5-mini. Please choose different model and try
        #  again. You can learn more about which models can be used with each operation here:
        #  https://go.microsoft.com/fwlink/?linkid=2197993.'}}"}
+        "remote::watsonx",  # return 404 when hitting the /openai/v1 endpoint
    ):
        pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.")

@ -96,6 +96,8 @@ def skip_if_doesnt_support_n(client_with_models, model_id):
        "remote::vertexai",
        #  Error code: 400 - [{'error': {'code': 400, 'message': 'Unable to submit request because candidateCount must be 1 but
        #  the entered value was 2. Update the candidateCount value and try again.', 'status': 'INVALID_ARGUMENT'}
+        "remote::tgi",  # TGI ignores n param silently
+        "remote::together",  # `n` > 1 is not supported when streaming tokens. Please disable `stream`
    ):
        pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support n param.")

@ -110,7 +112,7 @@ def skip_if_model_doesnt_support_openai_chat_completion(client_with_models, mode
        "remote::cerebras",
        "remote::databricks",
        "remote::runpod",
-        "remote::tgi",
+        "remote::watsonx",  # watsonx returns 404 when hitting the /openai/v1 endpoint
    ):
        pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI chat completions.")

--- a/tests/integration/inference/test_openai_embeddings.py
+++ b/tests/integration/inference/test_openai_embeddings.py
@ -29,9 +29,35 @@ def provider_from_model(client_with_models, model_id):
    return providers[provider_id]


-def skip_if_model_doesnt_support_variable_dimensions(model_id):
-    if "text-embedding-3" not in model_id:
-        pytest.skip("{model_id} does not support variable output embedding dimensions")
+def skip_if_model_doesnt_support_user_param(client, model_id):
+    provider = provider_from_model(client, model_id)
+    if provider.provider_type in (
+        "remote::together",  # service returns 400
+    ):
+        pytest.skip(f"Model {model_id} hosted by {provider.provider_type} does not support user param.")
+
+
+def skip_if_model_doesnt_support_encoding_format_base64(client, model_id):
+    provider = provider_from_model(client, model_id)
+    if provider.provider_type in (
+        "remote::together",  # param silently ignored, always returns floats
+    ):
+        pytest.skip(f"Model {model_id} hosted by {provider.provider_type} does not support encoding_format='base64'.")
+
+
+def skip_if_model_doesnt_support_variable_dimensions(client_with_models, model_id):
+    provider = provider_from_model(client_with_models, model_id)
+    if provider.provider_type in (
+        "remote::together",  # returns 400
+        "inline::sentence-transformers",
+    ):
+        pytest.skip(
+            f"Model {model_id} hosted by {provider.provider_type} does not support variable output embedding dimensions."
+        )
+    if provider.provider_type == "remote::openai" and "text-embedding-3" not in model_id:
+        pytest.skip(
+            f"Model {model_id} hosted by {provider.provider_type} does not support variable output embedding dimensions."
+        )


@pytest.fixture(params=["openai_client", "llama_stack_client"])
@ -92,6 +118,7 @@ def test_openai_embeddings_multiple_strings(compat_client, client_with_models, e
    response = compat_client.embeddings.create(
        model=embedding_model_id,
        input=input_texts,
+        encoding_format="float",
    )

    assert response.object == "list"
@ -127,7 +154,7 @@ def test_openai_embeddings_with_encoding_format_float(compat_client, client_with
 def test_openai_embeddings_with_dimensions(compat_client, client_with_models, embedding_model_id):
    """Test OpenAI embeddings endpoint with custom dimensions parameter."""
    skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id)
-    skip_if_model_doesnt_support_variable_dimensions(embedding_model_id)
+    skip_if_model_doesnt_support_variable_dimensions(client_with_models, embedding_model_id)

    input_text = "Test dimensions parameter"
    dimensions = 16
@ -148,6 +175,7 @@ def test_openai_embeddings_with_dimensions(compat_client, client_with_models, em
 def test_openai_embeddings_with_user_parameter(compat_client, client_with_models, embedding_model_id):
    """Test OpenAI embeddings endpoint with user parameter."""
    skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id)
+    skip_if_model_doesnt_support_user_param(client_with_models, embedding_model_id)

    input_text = "Test user parameter"
    user_id = "test-user-123"
@ -196,11 +224,13 @@ def test_openai_embeddings_different_inputs_different_outputs(compat_client, cli
    response1 = compat_client.embeddings.create(
        model=embedding_model_id,
        input=input_text1,
+        encoding_format="float",
    )

    response2 = compat_client.embeddings.create(
        model=embedding_model_id,
        input=input_text2,
+        encoding_format="float",
    )

    embedding1 = response1.data[0].embedding
@ -214,7 +244,8 @@ def test_openai_embeddings_different_inputs_different_outputs(compat_client, cli
 def test_openai_embeddings_with_encoding_format_base64(compat_client, client_with_models, embedding_model_id):
    """Test OpenAI embeddings endpoint with base64 encoding format."""
    skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id)
-    skip_if_model_doesnt_support_variable_dimensions(embedding_model_id)
+    skip_if_model_doesnt_support_encoding_format_base64(client_with_models, embedding_model_id)
+    skip_if_model_doesnt_support_variable_dimensions(client_with_models, embedding_model_id)

    input_text = "Test base64 encoding format"
    dimensions = 12
@ -247,6 +278,7 @@ def test_openai_embeddings_with_encoding_format_base64(compat_client, client_wit
 def test_openai_embeddings_base64_batch_processing(compat_client, client_with_models, embedding_model_id):
    """Test OpenAI embeddings endpoint with base64 encoding for batch processing."""
    skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id)
+    skip_if_model_doesnt_support_encoding_format_base64(client_with_models, embedding_model_id)

    input_texts = ["First text for base64", "Second text for base64", "Third text for base64"]

--- a/tests/integration/inference/test_text_inference.py
+++ b/tests/integration/inference/test_text_inference.py
@ -45,7 +45,7 @@ def skip_if_model_doesnt_support_json_schema_structured_output(client_with_model
    provider_id = models[model_id].provider_id
    providers = {p.provider_id: p for p in client_with_models.providers.list()}
    provider = providers[provider_id]
-    if provider.provider_type in ("remote::sambanova", "remote::azure"):
+    if provider.provider_type in ("remote::sambanova", "remote::azure", "remote::watsonx"):
        pytest.skip(
            f"Model {model_id} hosted by {provider.provider_type} doesn't support json_schema structured output"
        )
@ -211,6 +211,7 @@ def test_text_completion_log_probs_streaming(client_with_models, text_model_id,
 )
 def test_text_completion_structured_output(client_with_models, text_model_id, test_case):
    skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
+    skip_if_model_doesnt_support_json_schema_structured_output(client_with_models, text_model_id)

    class AnswerFormat(BaseModel):
        name: str
--- a/tests/integration/recordings/responses/07c5fa34d9ca.json
+++ b/tests/integration/recordings/responses/07c5fa34d9ca.json
@ -0,0 +1,800 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.together.xyz/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "togethercomputer/m2-bert-80M-32k-retrieval",
+      "input": "Test encoding format"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "togethercomputer/m2-bert-80M-32k-retrieval"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              -0.011256923,
+              0.0037174695,
+              0.047607094,
+              -0.03605117,
+              0.022678856,
+              0.0022196341,
+              0.008172763,
+              -0.07876377,
+              -0.012652523,
+              -0.124776885,
+              -0.07201225,
+              0.011470616,
+              0.020233244,
+              -0.03953407,
+              0.017867543,
+              -0.07615726,
+              0.015161683,
+              0.01493531,
+              0.0021282644,
+              0.02805457,
+              0.0008320583,
+              0.022922216,
+              0.049158294,
+              -0.03197842,
+              0.020910429,
+              0.03798574,
+              0.032469492,
+              0.009267314,
+              0.0883011,
+              0.0032435523,
+              0.013633923,
+              0.0457091,
+              -0.022143621,
+              -0.0007423012,
+              -0.03613117,
+              0.052107,
+              0.02962152,
+              0.045084383,
+              0.044733327,
+              0.11753868,
+              0.05730107,
+              0.026509244,
+              -0.056454167,
+              -0.017637681,
+              0.030301955,
+              0.04790331,
+              -0.025398305,
+              -0.019705286,
+              0.11366949,
+              0.05800383,
+              -0.0072742635,
+              0.100181706,
+              0.1609472,
+              0.0053162435,
+              0.01714287,
+              -0.023215268,
+              0.042824704,
+              0.04082185,
+              0.030668061,
+              -0.06529372,
+              0.008288249,
+              0.0325246,
+              0.009664108,
+              -0.031153189,
+              0.044064675,
+              0.10059426,
+              0.036557477,
+              0.009674479,
+              0.016028037,
+              0.02236809,
+              0.056538712,
+              -0.12828006,
+              0.016760435,
+              0.015355689,
+              -0.00070172164,
+              -0.0076741586,
+              -0.02880062,
+              -0.011680436,
+              -0.036522433,
+              -0.030315956,
+              0.023295958,
+              0.031333964,
+              0.042397793,
+              -0.063102156,
+              0.0669075,
+              -0.07678097,
+              0.0616129,
+              -0.0071245604,
+              -0.021313114,
+              0.0040440215,
+              0.04436404,
+              0.05289292,
+              0.05803014,
+              0.032691576,
+              0.037537806,
+              -0.09712317,
+              -0.0061692744,
+              0.008186577,
+              -0.0151672475,
+              -0.05499382,
+              -0.11011894,
+              -0.017255861,
+              0.061501417,
+              0.03551128,
+              0.056205165,
+              0.07500363,
+              0.023062926,
+              0.10787879,
+              0.063290246,
+              -0.021196125,
+              -0.005724647,
+              0.019805718,
+              -0.0063712946,
+              -0.049270064,
+              -0.024442751,
+              0.018587058,
+              -0.082689136,
+              -0.019034613,
+              0.005483609,
+              0.03418548,
+              -0.008317338,
+              0.06888298,
+              -0.037655607,
+              -0.05362105,
+              -0.010807861,
+              0.069666155,
+              -0.01777964,
+              -0.015136251,
+              -0.026567455,
+              -0.08084807,
+              -0.078372054,
+              0.039493512,
+              0.013156698,
+              0.07340631,
+              0.12035369,
+              -0.05765069,
+              0.025966862,
+              -0.0045753582,
+              -0.030865112,
+              0.039448086,
+              -0.037273232,
+              0.047059145,
+              -0.029127738,
+              -0.024217308,
+              0.02748501,
+              -0.048555836,
+              0.017913114,
+              -0.055981673,
+              -0.005601368,
+              -0.04045025,
+              -0.017308103,
+              0.06272273,
+              0.012256746,
+              0.01575095,
+              -0.026737463,
+              0.04115108,
+              0.07562276,
+              -0.01140116,
+              0.022552952,
+              0.0443809,
+              -0.030472409,
+              -0.021670958,
+              -0.037897367,
+              0.017250286,
+              -0.033001736,
+              -0.048738975,
+              -0.06429833,
+              -0.015412785,
+              0.0036735258,
+              0.023700202,
+              0.035861194,
+              -0.05393875,
+              0.048050668,
+              0.032297045,
+              0.021352977,
+              -0.05701748,
+              0.0008330949,
+              -0.006661303,
+              -0.0070953164,
+              -0.043984424,
+              0.052504774,
+              0.027689766,
+              0.031661708,
+              -0.050054867,
+              -0.015419155,
+              -0.013700429,
+              -0.03579233,
+              -0.08926211,
+              -0.034341693,
+              -0.01738188,
+              -0.0065487004,
+              -0.051955026,
+              0.0019674778,
+              0.0015172043,
+              0.024915336,
+              0.010987228,
+              0.061529815,
+              0.09077649,
+              0.04394813,
+              -0.07503514,
+              0.043345768,
+              -0.028357483,
+              0.06312762,
+              0.025069924,
+              0.028561853,
+              0.043048594,
+              0.017411513,
+              -0.025240859,
+              -0.0056393985,
+              0.054039005,
+              0.008721963,
+              -0.039967448,
+              0.0012871448,
+              0.0052062417,
+              0.005563228,
+              0.042596456,
+              -0.008794862,
+              -0.044669237,
+              0.04184779,
+              0.008726271,
+              0.10136058,
+              0.040724736,
+              0.14168875,
+              -0.017516509,
+              -0.11203568,
+              0.0010548063,
+              -0.058536656,
+              0.01673066,
+              0.007502946,
+              -0.035662595,
+              0.034719367,
+              -0.0060368567,
+              0.13295838,
+              0.026423598,
+              0.056147255,
+              0.04473965,
+              0.045232397,
+              0.07171366,
+              0.009358642,
+              -0.021109166,
+              0.033915937,
+              0.0380073,
+              -0.01451498,
+              -0.021589639,
+              0.062518574,
+              -0.017531183,
+              -0.030811403,
+              0.024500312,
+              0.05383414,
+              -0.1335839,
+              0.01834579,
+              -0.051048376,
+              0.07460228,
+              0.03231806,
+              0.00962887,
+              0.05156732,
+              0.016169788,
+              0.0062234807,
+              -0.09062714,
+              -0.08959952,
+              0.025153147,
+              -0.030351512,
+              -0.04339584,
+              0.007234872,
+              0.014588551,
+              0.022614833,
+              -0.08844599,
+              -0.009002514,
+              -0.114522785,
+              0.08118862,
+              -0.03023919,
+              0.007820294,
+              0.043863248,
+              -0.043678157,
+              -0.036323708,
+              0.006777855,
+              -0.019326974,
+              -0.0664114,
+              -0.019019991,
+              0.073445216,
+              -0.039277073,
+              -0.0157583,
+              -0.01931436,
+              -0.027121417,
+              -0.028259363,
+              -0.107222356,
+              0.11150329,
+              -0.012612926,
+              -0.025338905,
+              0.029330198,
+              0.011753977,
+              0.009784897,
+              0.042475123,
+              -0.004051051,
+              -0.014803267,
+              -0.04530689,
+              -0.01848677,
+              -0.050840423,
+              0.01814009,
+              0.0051442874,
+              -0.033988528,
+              0.0033705293,
+              -0.05515113,
+              -0.023601055,
+              -0.06183089,
+              0.012501645,
+              -0.08027637,
+              0.022573682,
+              0.079796925,
+              -0.00926268,
+              -0.02180816,
+              0.0059841494,
+              -0.018863965,
+              -0.011257763,
+              0.055679787,
+              -0.018714463,
+              -0.04081558,
+              -0.017017504,
+              0.026006198,
+              -0.03687599,
+              -0.05399378,
+              0.042955294,
+              0.00079697353,
+              -0.0015601065,
+              0.026138263,
+              -0.01198548,
+              0.07594801,
+              -0.0049053924,
+              -0.001241132,
+              0.022863775,
+              0.025632044,
+              -0.023908222,
+              -0.02252925,
+              0.042020634,
+              -0.060588334,
+              0.05498828,
+              -0.03466166,
+              0.003202133,
+              -0.015508297,
+              -0.021138275,
+              0.007791096,
+              0.052594397,
+              -0.08649948,
+              0.038542755,
+              0.011088168,
+              0.049710445,
+              -0.015898548,
+              0.013559725,
+              -0.0012927915,
+              -0.078937665,
+              -0.0470789,
+              0.02421941,
+              0.0050838543,
+              -0.051634457,
+              0.014016644,
+              0.059073824,
+              -0.01279741,
+              0.006315097,
+              0.028651753,
+              -0.023221422,
+              -0.049021006,
+              -0.08123552,
+              -0.027243393,
+              -0.026543872,
+              0.040068373,
+              0.01465917,
+              0.01366034,
+              -0.07191417,
+              -0.007906117,
+              -0.06743931,
+              -0.040284913,
+              0.046346053,
+              -0.015108051,
+              -0.067285545,
+              0.020757562,
+              -0.03144588,
+              -0.02684228,
+              -0.030008601,
+              0.0008360872,
+              -0.012667347,
+              -0.0782403,
+              0.02436115,
+              -0.054881096,
+              -0.010856299,
+              -0.07653927,
+              -0.044655506,
+              -0.02075821,
+              0.023765713,
+              0.0083463555,
+              0.026002545,
+              -0.003060633,
+              0.060491852,
+              0.032562606,
+              0.029937308,
+              -0.022013078,
+              0.07388013,
+              0.017152807,
+              -0.07095613,
+              -0.03923808,
+              0.0017680842,
+              0.0038672008,
+              -0.053012144,
+              -0.016951663,
+              0.027642388,
+              0.016483316,
+              -0.015618807,
+              -0.11136081,
+              0.006826955,
+              -0.010586094,
+              -0.05052998,
+              -0.04226535,
+              -0.031801827,
+              -0.020531418,
+              -0.06278464,
+              -0.062224947,
+              0.0769673,
+              -0.0706861,
+              0.026174366,
+              -0.041260213,
+              0.058052614,
+              -0.046227556,
+              -0.05443509,
+              0.007650712,
+              -0.061986744,
+              -0.00546975,
+              -0.042977307,
+              -0.0147894155,
+              0.045748055,
+              -0.01602859,
+              0.018538997,
+              0.073324144,
+              -0.105757244,
+              -0.010215157,
+              0.0069961487,
+              -0.010474333,
+              0.007267861,
+              -0.043416463,
+              0.04171331,
+              0.012246647,
+              -0.024870023,
+              0.0067938967,
+              0.023995718,
+              0.037606664,
+              -0.034879085,
+              0.107255146,
+              0.019311333,
+              0.008084773,
+              0.015113109,
+              0.04807634,
+              -0.011898967,
+              0.0028230203,
+              0.004201883,
+              -0.019952193,
+              -0.083809994,
+              0.025964422,
+              0.010652608,
+              0.021981532,
+              -0.029947964,
+              0.10096241,
+              -0.0018155909,
+              -0.078443065,
+              0.035357803,
+              0.030101022,
+              0.08652985,
+              -0.020698488,
+              0.06619985,
+              0.011043828,
+              0.022531942,
+              0.059432585,
+              -0.08669654,
+              0.023926888,
+              0.006353244,
+              -0.046637908,
+              -0.072916985,
+              -0.04355625,
+              -0.010734682,
+              -0.06298886,
+              0.11202974,
+              -0.008399903,
+              0.04045217,
+              -0.049840588,
+              -0.051897135,
+              0.04921834,
+              0.018730633,
+              0.07189677,
+              -0.020521715,
+              0.10433443,
+              -0.0035553537,
+              0.015335822,
+              -0.03326729,
+              -0.05246277,
+              -0.038786076,
+              0.04000599,
+              -0.028919725,
+              -0.017996594,
+              -0.007428113,
+              -0.003258321,
+              0.0127034895,
+              -0.0062633064,
+              0.0007574967,
+              -0.060385525,
+              -0.018971093,
+              0.062526286,
+              -0.025764955,
+              0.05286283,
+              0.043842334,
+              0.044092383,
+              -0.037126385,
+              -0.018775577,
+              0.007996275,
+              -0.00028039515,
+              -0.06591952,
+              0.039109394,
+              0.022268493,
+              0.033030964,
+              0.010780152,
+              0.051087722,
+              -0.07398754,
+              0.02156791,
+              -0.03391487,
+              0.01900175,
+              -0.03438655,
+              -0.050286565,
+              -0.029407075,
+              0.013486627,
+              0.006069821,
+              0.03566702,
+              -0.046612754,
+              0.030740444,
+              -0.0637836,
+              0.020758858,
+              0.013579259,
+              0.015677635,
+              0.07067559,
+              -0.03354964,
+              -0.09833861,
+              -0.045598283,
+              0.046094477,
+              -0.018735003,
+              0.0013117951,
+              0.020225674,
+              -0.025771514,
+              -0.011772435,
+              0.020403381,
+              0.048393097,
+              -0.001137191,
+              -0.008214463,
+              -0.024194324,
+              0.012559411,
+              0.028170707,
+              -0.038262583,
+              -0.010594243,
+              0.008866333,
+              0.02652175,
+              0.010765866,
+              0.02152175,
+              0.007194773,
+              -0.021046689,
+              -0.047594506,
+              -0.05342931,
+              0.044459403,
+              -0.00075621146,
+              0.021768885,
+              0.061362576,
+              0.03243972,
+              0.023200674,
+              0.012056035,
+              -0.010374278,
+              -0.06796502,
+              -0.0056832493,
+              0.048799623,
+              -0.035878677,
+              -0.020508701,
+              0.03527651,
+              0.096402384,
+              -0.027735645,
+              0.11728837,
+              0.022490505,
+              -0.08394513,
+              -0.010033967,
+              0.024851669,
+              -0.019062884,
+              0.00039440763,
+              -0.10133529,
+              0.011722217,
+              -0.04434193,
+              -0.030069547,
+              0.030103652,
+              -0.017366616,
+              0.046203658,
+              -0.04393208,
+              -0.05095759,
+              -0.04554081,
+              -0.029142734,
+              0.01689045,
+              0.008356038,
+              -0.035321265,
+              -0.02382173,
+              -0.0015672153,
+              0.06304823,
+              -0.008137697,
+              -0.014463008,
+              0.045292154,
+              -0.06497864,
+              0.015265712,
+              0.008239593,
+              -0.08195689,
+              0.037012544,
+              0.04680898,
+              0.007484248,
+              0.02335733,
+              -0.06787198,
+              -0.062197443,
+              -0.06841327,
+              -0.039720036,
+              -0.0105394935,
+              -0.057220835,
+              -0.039479975,
+              0.029730098,
+              0.0697698,
+              0.0280752,
+              0.0137115335,
+              -0.0045632124,
+              -0.01313052,
+              0.07553262,
+              -0.04117193,
+              -0.14872926,
+              0.028015105,
+              -0.047134113,
+              -0.016151398,
+              -0.081647106,
+              -0.02221662,
+              -0.036281105,
+              -0.023036504,
+              0.0612415,
+              -0.018361837,
+              -0.0238258,
+              -0.0022532772,
+              0.1537845,
+              0.006872191,
+              -0.044352733,
+              -0.0026320857,
+              -0.08600976,
+              0.005572628,
+              0.053448226,
+              -0.015072955,
+              -0.029777542,
+              -0.019132927,
+              0.053970527,
+              0.005238485,
+              -0.02418231,
+              -0.12369688,
+              0.0014781327,
+              0.059662092,
+              -0.011181213,
+              0.01400666,
+              0.023866476,
+              -0.059490796,
+              -0.054530527,
+              -0.011234197,
+              0.013823349,
+              -0.012150345,
+              -0.09948839,
+              0.023659766,
+              0.014326883,
+              -0.02229736,
+              -0.0024076505,
+              -0.10091382,
+              0.08174192,
+              -0.024408998,
+              -0.023222951,
+              0.011201234,
+              0.013236311,
+              0.04317295,
+              0.051764306,
+              0.07648576,
+              -0.00061111146,
+              -0.088623054,
+              -0.037177067,
+              0.038964123,
+              -0.029959839,
+              0.033466227,
+              -0.08635276,
+              0.04128183,
+              -0.020397836,
+              0.056285754,
+              -0.02570748,
+              0.05911732,
+              0.0061064134,
+              -0.01733281,
+              -0.0875996,
+              -0.0127257295,
+              -0.013593507,
+              -0.04925175,
+              0.01888016,
+              -0.032455195,
+              -0.023753202,
+              0.052025676,
+              0.06000905,
+              0.04137704,
+              0.004952635,
+              -0.02542677,
+              0.00017748028,
+              -0.041987997,
+              0.04760188,
+              0.068178274,
+              -0.060950078,
+              -0.05742421,
+              0.054274186,
+              -0.048096504,
+              0.034568857,
+              0.0012921172,
+              0.0705816,
+              -0.014679933,
+              -0.001761971,
+              -0.029119784,
+              0.008006632,
+              0.018063113,
+              -0.05880496,
+              -0.052486468,
+              0.010976936,
+              0.03688557,
+              0.061141517,
+              -0.009467033,
+              -0.035062946,
+              -0.06794524,
+              -0.0609979,
+              0.015924038,
+              -0.03805085,
+              0.03977454,
+              -0.015656536,
+              0.014254484,
+              -0.030620195,
+              -0.038830906,
+              -0.013730216,
+              -0.070247106,
+              -0.074514836,
+              0.037831023,
+              0.027780455,
+              0.0073002693,
+              -0.050368425,
+              0.040389538,
+              0.035920046,
+              0.025425838,
+              0.006255748,
+              -0.017454483,
+              -0.02307413,
+              0.05788845,
+              0.018672187,
+              0.033335716,
+              0.01855402,
+              0.07957198,
+              -0.0029801806,
+              -0.057038378,
+              0.010123766,
+              0.038190138,
+              0.0333764,
+              0.075057626,
+              0.00592374,
+              0.06380629,
+              -0.028154025,
+              0.07188246,
+              -0.056649268,
+              -0.019166004,
+              0.053392358,
+              0.13961181,
+              -0.08459373,
+              0.03255955
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "togethercomputer/m2-bert-80M-32k-retrieval",
+        "object": "list",
+        "usage": null
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/0c1f45455d3b.json
+++ b/tests/integration/recordings/responses/0c1f45455d3b.json
@ -0,0 +1,59 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.together.xyz/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Hello, world!"
+        }
+      ],
+      "stream": false
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "oBUtgGr-4Yz4kd-9801a2f00b2b42e8",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "Hello! It's nice to meet you. Is there something I can help you with or would you like to chat?",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": []
+            },
+            "seed": 1098425109146507500
+          }
+        ],
+        "created": 1758039052,
+        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 25,
+          "prompt_tokens": 39,
+          "total_tokens": 64,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null,
+          "cached_tokens": 0
+        },
+        "prompt": []
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/17030e75309f.json
+++ b/tests/integration/recordings/responses/17030e75309f.json
@ -0,0 +1,800 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.together.xyz/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "togethercomputer/m2-bert-80M-32k-retrieval",
+      "input": "This is completely different content"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "togethercomputer/m2-bert-80M-32k-retrieval"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              0.020581583,
+              0.03996682,
+              0.06342483,
+              -0.046694994,
+              -0.07684763,
+              -0.05265455,
+              -0.053058416,
+              -0.008007386,
+              -0.04512141,
+              0.03718547,
+              -0.026790882,
+              0.039592147,
+              0.08868821,
+              -0.054975007,
+              0.022950895,
+              -0.03249339,
+              0.05376096,
+              0.04878751,
+              0.06144113,
+              0.08925032,
+              -0.06345507,
+              -0.0008829904,
+              0.07914291,
+              -0.028592229,
+              -0.048433058,
+              -0.0351529,
+              0.028880889,
+              -0.08001268,
+              -0.04552556,
+              -0.080687605,
+              0.1400234,
+              0.14326853,
+              0.02891313,
+              -0.05588759,
+              0.007262874,
+              0.026984219,
+              0.09121335,
+              0.050748702,
+              0.017702162,
+              -0.035733465,
+              0.1328057,
+              -0.08973662,
+              -0.050988093,
+              -0.009071953,
+              0.00674055,
+              0.0138731655,
+              -0.024637444,
+              -0.0019375099,
+              0.019351467,
+              0.041681487,
+              0.09368255,
+              0.0052818935,
+              0.027539922,
+              -0.031472813,
+              0.042352878,
+              0.07326235,
+              0.010973438,
+              0.06776053,
+              0.06473745,
+              0.031266563,
+              0.00057834754,
+              -0.002110916,
+              0.16004054,
+              -0.0535361,
+              0.04453045,
+              0.050499436,
+              0.03501775,
+              -0.003733677,
+              0.020598825,
+              -0.079224035,
+              0.07070447,
+              -0.060201976,
+              0.006393084,
+              -0.003781692,
+              0.070510566,
+              -0.047214407,
+              0.06080987,
+              -0.0877733,
+              -0.08569845,
+              -0.018021964,
+              0.06378409,
+              0.027565937,
+              0.038700324,
+              -0.1248613,
+              0.00903349,
+              -0.08429076,
+              0.016536232,
+              0.025240825,
+              0.00043874417,
+              -0.004602262,
+              0.0457946,
+              -0.03598806,
+              0.056914188,
+              0.044693712,
+              0.011178773,
+              -0.020428436,
+              0.036093723,
+              0.031189999,
+              0.07220326,
+              -0.066868156,
+              -0.020061923,
+              -0.0563857,
+              -0.013928966,
+              -0.034524415,
+              0.0041604545,
+              -0.047119446,
+              0.033624567,
+              0.06970587,
+              -0.033320673,
+              -0.0413748,
+              0.01094969,
+              -0.0100499755,
+              0.004480598,
+              0.02067311,
+              -0.021157527,
+              0.022485765,
+              0.03633523,
+              0.0049809627,
+              0.02181411,
+              0.049156368,
+              0.06253565,
+              0.059981186,
+              -0.031591866,
+              -0.049331754,
+              0.033537455,
+              0.021542493,
+              0.009435254,
+              0.025516914,
+              0.025417773,
+              -0.07066102,
+              0.011794456,
+              0.06311989,
+              0.011093616,
+              0.08549021,
+              -0.04281618,
+              0.011115061,
+              0.07443118,
+              0.021961706,
+              -0.02724888,
+              -0.00047235374,
+              0.016601468,
+              0.043411057,
+              0.03835865,
+              0.01029931,
+              0.008437206,
+              -0.057274926,
+              -0.045377273,
+              -0.09733081,
+              -0.009755395,
+              0.028172465,
+              0.043972567,
+              0.0968819,
+              0.052496422,
+              0.031553026,
+              -0.019291716,
+              0.034150966,
+              0.1310106,
+              0.02864821,
+              -0.047452684,
+              0.016342362,
+              -0.06591784,
+              -0.064888336,
+              -0.03380424,
+              -0.08384223,
+              0.023302404,
+              -0.020427782,
+              0.019540966,
+              0.02240307,
+              0.026848866,
+              -0.0018868797,
+              -0.031800512,
+              -0.073483676,
+              0.08840526,
+              -0.02696041,
+              -0.042041607,
+              0.030633071,
+              0.020918656,
+              0.06119309,
+              -0.048348967,
+              0.036555305,
+              0.033583682,
+              0.019630525,
+              -0.03500669,
+              -0.020821452,
+              0.012256841,
+              0.06733756,
+              0.036884613,
+              -0.080063485,
+              0.019956889,
+              -0.01994667,
+              0.0011630546,
+              -0.08307688,
+              -0.040326167,
+              -0.03293244,
+              -0.014897417,
+              0.03977495,
+              0.036790676,
+              0.020645684,
+              0.015943283,
+              -0.05961047,
+              0.036905374,
+              0.006005009,
+              0.033375766,
+              -0.015491932,
+              -0.07008363,
+              -0.031575754,
+              -0.0065630106,
+              -0.013962699,
+              -0.012629252,
+              0.046026245,
+              0.007901817,
+              -0.117550366,
+              -0.06314231,
+              0.05348636,
+              0.10863247,
+              0.053361807,
+              0.055756297,
+              -0.026388792,
+              -0.011777907,
+              -0.07197253,
+              0.010918023,
+              0.020021347,
+              0.14850953,
+              -0.043404948,
+              -0.04262303,
+              -0.04904758,
+              -0.014644666,
+              -0.0018742547,
+              -0.0054880613,
+              -0.015058903,
+              -0.03137978,
+              -0.09884002,
+              0.048087206,
+              -0.00044948232,
+              -0.059237186,
+              0.01681299,
+              0.06357592,
+              0.09665662,
+              -0.032431144,
+              -0.021346267,
+              -0.03630939,
+              0.108024776,
+              0.011421504,
+              0.00090062595,
+              0.09738569,
+              0.07588425,
+              -0.038476508,
+              0.008637763,
+              0.03942589,
+              0.03673421,
+              -0.008536316,
+              -0.035427485,
+              -0.0571462,
+              0.077514425,
+              -0.014574157,
+              -0.06636753,
+              0.0356625,
+              0.00055575924,
+              -0.008948914,
+              0.00082343427,
+              0.0511982,
+              0.03143358,
+              -0.03388075,
+              -0.013724427,
+              0.0551338,
+              -0.007191376,
+              -0.05363105,
+              -0.07718383,
+              -0.008230843,
+              0.10335533,
+              0.013668598,
+              -0.08284561,
+              0.05179483,
+              -0.08437943,
+              -0.017510848,
+              -0.05778264,
+              0.044004828,
+              -0.02612715,
+              -0.0058190715,
+              0.013293448,
+              -0.005663543,
+              0.0037016177,
+              -0.020699238,
+              0.00277368,
+              0.041328322,
+              -0.052624915,
+              0.020320976,
+              0.0033441507,
+              -0.11465616,
+              -0.059619453,
+              -0.029252917,
+              0.014145012,
+              -0.049234822,
+              0.025969574,
+              0.04118447,
+              0.017938918,
+              -0.009885965,
+              0.012801603,
+              -0.0007332413,
+              -0.0012993023,
+              -0.052635074,
+              0.064850755,
+              0.004576457,
+              -0.018446025,
+              -0.069130346,
+              0.018532049,
+              0.006330208,
+              0.039377607,
+              0.11237417,
+              0.055357743,
+              -0.0038629018,
+              0.048188694,
+              0.052925084,
+              -0.011272187,
+              -0.012422014,
+              0.005874242,
+              -0.0007749841,
+              -0.058404274,
+              -0.022589723,
+              0.031956926,
+              0.0470711,
+              0.027993023,
+              -0.06112344,
+              -0.0119517995,
+              -0.09797626,
+              -0.073644884,
+              0.07465703,
+              0.09884925,
+              -0.035564825,
+              -0.040369682,
+              0.014445328,
+              -0.052219898,
+              -0.027498178,
+              0.036846854,
+              -0.09408649,
+              -0.00027856976,
+              0.028489627,
+              0.002446708,
+              -0.043065134,
+              -0.030562297,
+              0.07565528,
+              -0.0256914,
+              -0.12143018,
+              0.09360902,
+              0.015026368,
+              0.058814585,
+              -0.01885037,
+              0.04901136,
+              0.009521308,
+              -0.0067844316,
+              -0.06265128,
+              0.029733902,
+              0.019703392,
+              -0.029863501,
+              0.033668272,
+              -0.015967827,
+              -0.024716265,
+              0.07095029,
+              0.07264489,
+              -0.021480447,
+              -0.040650267,
+              -0.11752601,
+              0.019378915,
+              -0.042310815,
+              0.05690114,
+              -0.01413233,
+              0.058113046,
+              -0.073345415,
+              -0.059576523,
+              -0.09720947,
+              0.012149926,
+              0.057291746,
+              -0.03505685,
+              -0.038375836,
+              0.0149342865,
+              -0.001562935,
+              -0.023513826,
+              0.00014910847,
+              0.022598296,
+              -0.071317434,
+              -0.06260575,
+              4.0522777e-05,
+              -0.086758316,
+              -0.013101295,
+              -0.02990748,
+              -0.08461068,
+              0.016139807,
+              0.06101953,
+              -0.08451055,
+              -0.046145856,
+              -0.048467644,
+              0.060105037,
+              0.024200678,
+              0.052542347,
+              0.041119967,
+              -0.0068898834,
+              0.09487794,
+              0.012641435,
+              -0.13026047,
+              0.06284531,
+              0.018659385,
+              -0.07564698,
+              0.006965884,
+              -0.036618453,
+              0.118192144,
+              -0.04771263,
+              0.023280941,
+              0.054039616,
+              -0.114724584,
+              -0.0918062,
+              0.038803104,
+              -0.09954885,
+              0.008216844,
+              -0.030975524,
+              -0.030176945,
+              0.0397766,
+              -0.0061745024,
+              0.071971394,
+              -0.041089423,
+              0.033857126,
+              0.03961017,
+              -0.03826589,
+              0.038435444,
+              -0.0860421,
+              0.08869605,
+              -0.028628873,
+              -0.05565758,
+              0.056920726,
+              0.020458337,
+              0.05994542,
+              0.08241441,
+              0.0400861,
+              -0.0045191804,
+              0.0030094406,
+              -0.007466077,
+              -0.02953672,
+              -0.068642505,
+              0.060889505,
+              -0.029501854,
+              -0.048823155,
+              0.015409609,
+              0.018862283,
+              -0.016425489,
+              -0.087497436,
+              0.067643866,
+              -0.033761434,
+              -0.054749027,
+              -0.03657711,
+              0.038102675,
+              -0.06197178,
+              0.045409728,
+              -0.02127562,
+              0.064449035,
+              -0.0056471447,
+              0.067553245,
+              -0.07137091,
+              0.017407946,
+              -0.09813906,
+              -0.046500444,
+              -0.058283363,
+              -0.018302118,
+              -0.025382183,
+              -0.04259567,
+              0.022398086,
+              -0.09098867,
+              0.043438766,
+              -0.07656342,
+              0.0028111413,
+              0.030880956,
+              -0.07750997,
+              0.07084878,
+              0.05344556,
+              0.0052658613,
+              -0.025303314,
+              -0.04759683,
+              -0.017034022,
+              0.02855913,
+              -0.04999449,
+              0.01974624,
+              0.07708244,
+              -0.011766297,
+              0.057390995,
+              -0.04652422,
+              0.023833811,
+              0.05608237,
+              0.05765577,
+              0.05078112,
+              0.046039928,
+              -0.055372067,
+              -0.044933185,
+              -0.08522771,
+              -0.09142792,
+              0.012817157,
+              -0.026148932,
+              -0.07331254,
+              0.11312438,
+              0.055893615,
+              -0.013500698,
+              0.008603385,
+              0.00057156937,
+              -0.091709465,
+              0.08057745,
+              -0.011340835,
+              -0.016915537,
+              0.0011427286,
+              0.09740327,
+              -0.029696029,
+              -0.047760956,
+              0.015541391,
+              0.0955123,
+              0.021890407,
+              -0.02908531,
+              0.030994056,
+              0.03820344,
+              -0.062488347,
+              0.015730608,
+              0.021182666,
+              -0.043783836,
+              0.02782434,
+              0.11151618,
+              0.052450567,
+              0.00037089732,
+              0.03351987,
+              -0.0054050605,
+              -0.033424556,
+              0.10350312,
+              0.065157756,
+              0.03392563,
+              0.010131469,
+              -0.053846426,
+              -0.0022781377,
+              0.0014610494,
+              0.005763698,
+              0.0426489,
+              -0.08206464,
+              -0.07099776,
+              -0.04228286,
+              0.07337842,
+              0.047744617,
+              0.04284143,
+              0.06959166,
+              0.013133698,
+              -0.030711556,
+              0.009055728,
+              0.06162162,
+              0.017240932,
+              -0.039795205,
+              -0.10877084,
+              0.024329182,
+              -0.0049141976,
+              -0.038892467,
+              -0.012901915,
+              -0.095080145,
+              0.05290344,
+              0.021141307,
+              0.03017632,
+              -0.0044154925,
+              -0.10163907,
+              -0.08186605,
+              -0.023801327,
+              0.035552323,
+              0.039041802,
+              -0.032427292,
+              0.07541,
+              0.10233232,
+              0.018622704,
+              -0.013646388,
+              -0.008619573,
+              0.020216271,
+              -0.07897946,
+              0.063637026,
+              -0.08652915,
+              -0.0100032855,
+              0.046902858,
+              0.076707095,
+              0.02531022,
+              0.05425257,
+              0.015954422,
+              -0.033368777,
+              -0.025112148,
+              -0.01394599,
+              -0.04062625,
+              0.056534503,
+              -0.04304168,
+              -0.060214523,
+              0.016551849,
+              -0.006314451,
+              0.060458317,
+              0.027808908,
+              0.040655438,
+              -0.031415448,
+              -0.120496035,
+              -0.04355332,
+              0.002170874,
+              0.013876282,
+              -0.011508199,
+              -0.046841078,
+              0.076444104,
+              0.08982719,
+              0.0846208,
+              0.029678846,
+              -0.086331986,
+              0.14421903,
+              -0.0030989156,
+              0.01598773,
+              0.059804816,
+              -0.0464971,
+              -0.0058899643,
+              0.02542227,
+              -0.020552263,
+              0.10621325,
+              -0.023809364,
+              -0.13324538,
+              -0.075492345,
+              0.06716611,
+              -0.040477127,
+              -0.046582364,
+              -0.07376809,
+              0.024235222,
+              0.070477486,
+              0.11006968,
+              -0.04869493,
+              0.078016356,
+              -0.07615679,
+              0.08063025,
+              -0.016255612,
+              -0.051746953,
+              0.08059405,
+              -0.0025989392,
+              -0.073428795,
+              -0.03987752,
+              0.098251894,
+              -0.006217126,
+              -0.028130062,
+              -0.051326722,
+              -0.0470711,
+              -0.016759045,
+              -0.039230157,
+              -0.020525763,
+              0.07148479,
+              -0.05419997,
+              -0.025775867,
+              0.0070432695,
+              -0.006410803,
+              0.027631486,
+              0.037966132,
+              -0.025654731,
+              -0.023324372,
+              0.026257442,
+              -0.034822363,
+              -0.010826962,
+              0.020623349,
+              0.0523646,
+              -0.022230538,
+              0.028196862,
+              0.023292363,
+              0.12025986,
+              -0.022648653,
+              -0.061013527,
+              -0.040045265,
+              0.022293845,
+              -0.016287014,
+              -0.08896512,
+              -0.021426601,
+              0.05109808,
+              0.038455352,
+              0.055882193,
+              0.10342665,
+              0.06503611,
+              0.07195616,
+              -0.013601524,
+              0.028618002,
+              0.03990776,
+              0.03236452,
+              0.07085622,
+              0.0055737793,
+              0.013130723,
+              -0.066394895,
+              0.021342268,
+              0.0026651763,
+              -0.012577644,
+              0.049445108,
+              0.049437333,
+              0.0047207237,
+              -0.02006381,
+              0.02022424,
+              0.05142978,
+              0.01725655,
+              0.00037797724,
+              0.039846063,
+              -0.11509461,
+              -0.013602717,
+              -0.066661686,
+              -0.020612884,
+              0.012832718,
+              -0.091352694,
+              -0.09389515,
+              0.07369748,
+              0.056452867,
+              0.10581744,
+              -0.06383743,
+              0.036662158,
+              -0.07204409,
+              0.012689036,
+              -0.025724197,
+              0.040817674,
+              -0.06890574,
+              0.0055584335,
+              0.031956017,
+              0.0014588524,
+              0.098465145,
+              0.0054196557,
+              0.056656968,
+              0.03322914,
+              -0.040962957,
+              -0.015689995,
+              -0.034545593,
+              -0.052660752,
+              -0.044768244,
+              -0.04419147,
+              -0.11039146,
+              0.015522225,
+              0.0052053384,
+              -0.08471112,
+              0.025280464,
+              -0.03353502,
+              -0.018717872,
+              -0.020738749,
+              0.0021664763,
+              -0.011238148,
+              0.02322494,
+              0.010894536,
+              -0.09676859,
+              0.01013113,
+              0.0035604087,
+              -0.0060942546,
+              -0.027839229,
+              -0.0037214137,
+              0.053193003,
+              -0.070640355,
+              -0.07783396,
+              0.005814805,
+              0.0064411093,
+              -0.023913933,
+              0.030543711,
+              -0.07979223,
+              -0.008982119,
+              0.043360766,
+              -0.048063844,
+              0.0017047173,
+              0.06882568,
+              -0.03443207,
+              0.015080402,
+              -0.049461022,
+              0.045471057,
+              -0.031460688,
+              -0.0028212033,
+              0.044725604,
+              0.0026248703,
+              -0.0329393,
+              -0.034404054,
+              0.024516258,
+              0.002614168,
+              -0.047855787,
+              -0.03149,
+              0.14646776,
+              -0.047660008,
+              0.021453902
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "togethercomputer/m2-bert-80M-32k-retrieval",
+        "object": "list",
+        "usage": null
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/27463384d1a3.json
+++ b/tests/integration/recordings/responses/27463384d1a3.json
@ -0,0 +1,56 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8080/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Hello, world!"
+        }
+      ],
+      "stream": false
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "<think>\nOkay, the user just said \"Hello, world!\" so I need to respond in a friendly way. My prompt says to respond in the same style, so I should start with \"Hello, world!\" but maybe add some helpful information. Let me think. Since the user is probably testing or just sharing, a simple \"Hello, world!\" with a question would be best for user interaction. I'll make sure to keep it positive and open-ended.\n</think>\n\nHello, world! \ud83d\ude0a What do you need today?",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 1757550395,
+        "model": "Qwen/Qwen3-0.6B",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+        "usage": {
+          "completion_tokens": 108,
+          "prompt_tokens": 12,
+          "total_tokens": 120,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/432a346b2ed8.json
+++ b/tests/integration/recordings/responses/432a346b2ed8.json
--- a/tests/integration/recordings/responses/4ca6152a0eb8.json
+++ b/tests/integration/recordings/responses/4ca6152a0eb8.json
@ -0,0 +1,59 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.together.xyz/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Which planet has rings around it with a name starting with letter S?"
+        }
+      ],
+      "stream": false
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "oBUtaEp-62bZhn-9801a2718d0ed123",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "The planet with rings around it that starts with the letter S is Saturn. Saturn's ring system is one of the most prominent and well-known in our solar system.",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": []
+            },
+            "seed": 2387155844510162400
+          }
+        ],
+        "created": 1758039032,
+        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 34,
+          "prompt_tokens": 49,
+          "total_tokens": 83,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null,
+          "cached_tokens": 0
+        },
+        "prompt": []
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/4d4440c8641b.json
+++ b/tests/integration/recordings/responses/4d4440c8641b.json
--- a/tests/integration/recordings/responses/511eb1b92e34.json
+++ b/tests/integration/recordings/responses/511eb1b92e34.json
--- a/tests/integration/recordings/responses/565b1072cb9d.json
+++ b/tests/integration/recordings/responses/565b1072cb9d.json
@ -0,0 +1,46 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.together.xyz/v1/v1/completions",
+    "headers": {},
+    "body": {
+      "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+      "prompt": "Respond to this question and explain your answer. Complete the sentence using one word: Roses are red, violets are ",
+      "stream": false,
+      "extra_body": {}
+    },
+    "endpoint": "/v1/completions",
+    "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.completion.Completion",
+      "__data__": {
+        "id": "oBUswCe-62bZhn-98019f663cac0f68",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "text": " _______________________. \n\n## Step 1: Identify the traditional completion of the sentence.\nThe traditional completion of the sentence \"Roses are red, violets are...\" is based on a well-known poem.\n\n## Step 2: Recall the poem.\nThe poem states, \"Roses are red, violets are blue...\"\n\n## Step 3: Determine the word that completes the sentence.\nBased on the poem, the word that completes the sentence is \"blue\".\n\nThe final answer is: $\\boxed{blue}$",
+            "seed": 4892505926413923000
+          }
+        ],
+        "created": 1758038908,
+        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+        "object": "text.completion",
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 106,
+          "prompt_tokens": 25,
+          "total_tokens": 131,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null,
+          "cached_tokens": 0
+        },
+        "prompt": []
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/6730dcde0b73.json
+++ b/tests/integration/recordings/responses/6730dcde0b73.json
@ -0,0 +1,756 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.together.xyz/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Hello, world!"
+        }
+      ],
+      "stream": true
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtTzC-62bZhn-9801a1ee1bea25d8",
+          "choices": [
+            {
+              "delta": {
+                "content": "Hello",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 9906
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": "Hello",
+              "seed": null
+            }
+          ],
+          "created": 1758039011,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtTzC-62bZhn-9801a1ee1bea25d8",
+          "choices": [
+            {
+              "delta": {
+                "content": "!",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": "!",
+              "seed": null
+            }
+          ],
+          "created": 1758039011,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtTzC-62bZhn-9801a1ee1bea25d8",
+          "choices": [
+            {
+              "delta": {
+                "content": " It",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 1102
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " It",
+              "seed": null
+            }
+          ],
+          "created": 1758039011,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtTzC-62bZhn-9801a1ee1bea25d8",
+          "choices": [
+            {
+              "delta": {
+                "content": "'s",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 596
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": "'s",
+              "seed": null
+            }
+          ],
+          "created": 1758039011,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtTzC-62bZhn-9801a1ee1bea25d8",
+          "choices": [
+            {
+              "delta": {
+                "content": " nice",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 6555
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " nice",
+              "seed": null
+            }
+          ],
+          "created": 1758039011,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtTzC-62bZhn-9801a1ee1bea25d8",
+          "choices": [
+            {
+              "delta": {
+                "content": " to",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 311
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " to",
+              "seed": null
+            }
+          ],
+          "created": 1758039011,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtTzC-62bZhn-9801a1ee1bea25d8",
+          "choices": [
+            {
+              "delta": {
+                "content": " meet",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 3449
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " meet",
+              "seed": null
+            }
+          ],
+          "created": 1758039011,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtTzC-62bZhn-9801a1ee1bea25d8",
+          "choices": [
+            {
+              "delta": {
+                "content": " you",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 499
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " you",
+              "seed": null
+            }
+          ],
+          "created": 1758039011,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtTzC-62bZhn-9801a1ee1bea25d8",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 13
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": ".",
+              "seed": null
+            }
+          ],
+          "created": 1758039011,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtTzC-62bZhn-9801a1ee1bea25d8",
+          "choices": [
+            {
+              "delta": {
+                "content": " Is",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 2209
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " Is",
+              "seed": null
+            }
+          ],
+          "created": 1758039011,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtTzC-62bZhn-9801a1ee1bea25d8",
+          "choices": [
+            {
+              "delta": {
+                "content": " there",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 1070
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " there",
+              "seed": null
+            }
+          ],
+          "created": 1758039011,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtTzC-62bZhn-9801a1ee1bea25d8",
+          "choices": [
+            {
+              "delta": {
+                "content": " something",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 2555
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " something",
+              "seed": null
+            }
+          ],
+          "created": 1758039011,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtTzC-62bZhn-9801a1ee1bea25d8",
+          "choices": [
+            {
+              "delta": {
+                "content": " I",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 358
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " I",
+              "seed": null
+            }
+          ],
+          "created": 1758039011,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtTzC-62bZhn-9801a1ee1bea25d8",
+          "choices": [
+            {
+              "delta": {
+                "content": " can",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 649
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " can",
+              "seed": null
+            }
+          ],
+          "created": 1758039011,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtTzC-62bZhn-9801a1ee1bea25d8",
+          "choices": [
+            {
+              "delta": {
+                "content": " help",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 1520
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " help",
+              "seed": null
+            }
+          ],
+          "created": 1758039011,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtTzC-62bZhn-9801a1ee1bea25d8",
+          "choices": [
+            {
+              "delta": {
+                "content": " you",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 499
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " you",
+              "seed": null
+            }
+          ],
+          "created": 1758039011,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtTzC-62bZhn-9801a1ee1bea25d8",
+          "choices": [
+            {
+              "delta": {
+                "content": " with",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 449
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " with",
+              "seed": null
+            }
+          ],
+          "created": 1758039011,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtTzC-62bZhn-9801a1ee1bea25d8",
+          "choices": [
+            {
+              "delta": {
+                "content": " or",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 477
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " or",
+              "seed": null
+            }
+          ],
+          "created": 1758039011,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtTzC-62bZhn-9801a1ee1bea25d8",
+          "choices": [
+            {
+              "delta": {
+                "content": " would",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 1053
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " would",
+              "seed": null
+            }
+          ],
+          "created": 1758039011,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtTzC-62bZhn-9801a1ee1bea25d8",
+          "choices": [
+            {
+              "delta": {
+                "content": " you",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 499
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " you",
+              "seed": null
+            }
+          ],
+          "created": 1758039011,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtTzC-62bZhn-9801a1ee1bea25d8",
+          "choices": [
+            {
+              "delta": {
+                "content": " like",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 1093
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " like",
+              "seed": null
+            }
+          ],
+          "created": 1758039011,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtTzC-62bZhn-9801a1ee1bea25d8",
+          "choices": [
+            {
+              "delta": {
+                "content": " to",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 311
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " to",
+              "seed": null
+            }
+          ],
+          "created": 1758039011,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtTzC-62bZhn-9801a1ee1bea25d8",
+          "choices": [
+            {
+              "delta": {
+                "content": " chat",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 6369
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " chat",
+              "seed": null
+            }
+          ],
+          "created": 1758039011,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtTzC-62bZhn-9801a1ee1bea25d8",
+          "choices": [
+            {
+              "delta": {
+                "content": "?",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 30
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": "?",
+              "seed": null
+            }
+          ],
+          "created": 1758039011,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtTzC-62bZhn-9801a1ee1bea25d8",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 128009
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null,
+              "text": "",
+              "seed": 16158686754257986000
+            }
+          ],
+          "created": 1758039011,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": {
+            "completion_tokens": 25,
+            "prompt_tokens": 39,
+            "total_tokens": 64,
+            "completion_tokens_details": null,
+            "prompt_tokens_details": null,
+            "cached_tokens": 0
+          }
+        }
+      }
+    ],
+    "is_streaming": true
+  }
+}
--- a/tests/integration/recordings/responses/6857b19d3f0a.json
+++ b/tests/integration/recordings/responses/6857b19d3f0a.json
@ -0,0 +1,87 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.together.xyz/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What's the weather in Tokyo? Use the get_weather function to get the weather."
+        }
+      ],
+      "stream": false,
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_weather",
+            "description": "Get the weather in a given city",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "city": {
+                  "type": "string",
+                  "description": "The city to get the weather for"
+                }
+              }
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "oBUth9w-62bZhn-9801a3026bd20c8a",
+        "choices": [
+          {
+            "finish_reason": "tool_calls",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": null,
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": [
+                {
+                  "id": "call_8prwkicthj6bjfqa9ye64y2b",
+                  "function": {
+                    "arguments": "{\"city\":\"Tokyo\"}",
+                    "name": "get_weather"
+                  },
+                  "type": "function",
+                  "index": 0
+                }
+              ]
+            },
+            "seed": 977986247412336500
+          }
+        ],
+        "created": 1758039055,
+        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 24,
+          "prompt_tokens": 193,
+          "total_tokens": 217,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null,
+          "cached_tokens": 0
+        },
+        "prompt": []
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/6c4e2e207e8a.json
+++ b/tests/integration/recordings/responses/6c4e2e207e8a.json
@ -0,0 +1,59 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.together.xyz/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Which planet do humans live on?"
+        }
+      ],
+      "stream": false
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "oBUtMpf-62bZhn-9801a16bc8d642d3",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "Humans live on Earth.",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": []
+            },
+            "seed": 14150443913665712000
+          }
+        ],
+        "created": 1758038990,
+        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 6,
+          "prompt_tokens": 42,
+          "total_tokens": 48,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null,
+          "cached_tokens": 0
+        },
+        "prompt": []
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/72e075bf28e8.json
+++ b/tests/integration/recordings/responses/72e075bf28e8.json
@ -0,0 +1,800 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.together.xyz/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "togethercomputer/m2-bert-80M-32k-retrieval",
+      "input": "Hello, world!"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "togethercomputer/m2-bert-80M-32k-retrieval"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              -0.017041557,
+              -0.07436493,
+              0.02897635,
+              -0.032216743,
+              0.0056444216,
+              -0.029015187,
+              0.06512343,
+              -0.040310342,
+              0.05263593,
+              0.0068842396,
+              0.019191971,
+              -0.0064884443,
+              -0.01664521,
+              0.014244285,
+              0.036390014,
+              -0.040292,
+              0.031780273,
+              0.0039553884,
+              -0.055303488,
+              -0.028992416,
+              -0.02059435,
+              0.05677091,
+              -0.043668333,
+              -0.014273451,
+              0.15328151,
+              -0.023603301,
+              -0.049825363,
+              0.007869072,
+              -0.010882995,
+              -0.033912696,
+              0.053697765,
+              -0.00093928695,
+              0.0017799847,
+              0.038871024,
+              -0.069678165,
+              -0.067093275,
+              0.025772842,
+              -0.057590123,
+              -0.015825877,
+              0.020131286,
+              0.020742312,
+              0.003915491,
+              -0.018451879,
+              0.020440312,
+              -0.023613403,
+              -0.039568678,
+              -0.013152008,
+              -0.01871725,
+              0.021348018,
+              -0.019964654,
+              0.038607903,
+              0.018397795,
+              -0.0063561443,
+              -0.018936336,
+              -0.060981557,
+              -0.02152846,
+              0.027057847,
+              0.0014626224,
+              -0.018241309,
+              -0.07473041,
+              -0.02377323,
+              -0.033910733,
+              0.02569418,
+              -0.024951216,
+              -0.0076659806,
+              -0.015425462,
+              0.006604636,
+              0.09833969,
+              -0.005054596,
+              0.008841989,
+              -0.01836461,
+              -0.018554095,
+              0.011605144,
+              -0.016599955,
+              -0.062196333,
+              -0.0037542647,
+              -0.025220644,
+              -0.027834827,
+              -0.020460974,
+              -0.050503097,
+              0.032119684,
+              -0.023387104,
+              0.050067227,
+              -0.05834235,
+              0.023189448,
+              -0.021862485,
+              0.023831544,
+              -0.016663097,
+              -0.041609522,
+              0.025361128,
+              0.002924296,
+              0.01852158,
+              0.08960255,
+              -0.003265466,
+              -0.058762494,
+              -0.06428431,
+              -0.014671485,
+              -0.046800107,
+              0.02691456,
+              -0.0059303525,
+              -0.015431455,
+              0.022179665,
+              0.014044907,
+              0.012218545,
+              0.0053836405,
+              -0.025096457,
+              0.009438382,
+              0.032498095,
+              0.06879721,
+              0.056900814,
+              0.019497631,
+              -0.122159146,
+              -0.106994465,
+              -0.017456975,
+              0.047223866,
+              0.06569824,
+              0.04780035,
+              0.018039258,
+              -0.0011028647,
+              -0.05067006,
+              0.0106863845,
+              0.027489506,
+              -0.014593985,
+              -0.039851535,
+              -0.09175489,
+              0.037555773,
+              -0.060439512,
+              0.008525801,
+              0.0071557434,
+              -0.057973035,
+              -0.054225244,
+              0.051505033,
+              -0.0008626373,
+              0.069083415,
+              0.064380065,
+              0.09843996,
+              0.0062191207,
+              -0.041505292,
+              -0.05381256,
+              -0.0073601264,
+              -0.03288613,
+              0.011711341,
+              -0.09244605,
+              0.0069717136,
+              -0.05722877,
+              0.041075893,
+              0.06521969,
+              -0.0018537377,
+              0.016272636,
+              0.008761483,
+              -0.029342752,
+              0.020412564,
+              -0.07015791,
+              0.033616304,
+              0.039998446,
+              0.01602917,
+              0.044467725,
+              -0.08176377,
+              -0.036885373,
+              0.03468746,
+              0.0024068495,
+              0.00056306267,
+              0.02546511,
+              -0.053339135,
+              -0.027220095,
+              -0.021510394,
+              0.054806393,
+              -0.005447777,
+              -0.05690438,
+              -0.028497366,
+              0.01873974,
+              -0.035461064,
+              -0.00019089226,
+              -0.04914238,
+              0.030303763,
+              0.013396073,
+              0.015789565,
+              -0.07714792,
+              -0.062155712,
+              -0.00677417,
+              0.02850476,
+              0.031491462,
+              0.014566345,
+              0.012163924,
+              0.11814501,
+              -0.0043511004,
+              -0.017920421,
+              0.004205825,
+              -0.0015928322,
+              -0.012145554,
+              0.01663168,
+              -0.071173735,
+              0.0029570858,
+              0.12899451,
+              0.004157568,
+              0.010501232,
+              0.07710632,
+              0.062119417,
+              0.021002673,
+              -0.023212241,
+              -0.04327007,
+              -0.0567023,
+              0.04590105,
+              0.0019161925,
+              0.02637205,
+              0.029331107,
+              -0.029769177,
+              -0.050466795,
+              -0.08057371,
+              0.007419741,
+              -0.008777471,
+              0.02217743,
+              0.013535721,
+              0.03426775,
+              0.04592361,
+              0.009423588,
+              -0.023030678,
+              -0.024462381,
+              0.054334357,
+              0.06710402,
+              0.077300854,
+              0.0300022,
+              -0.0035417816,
+              -0.0046773576,
+              -0.0927158,
+              -0.0218652,
+              -0.043468982,
+              -0.035734102,
+              -0.038873542,
+              -0.0412869,
+              -0.016015923,
+              0.0038303286,
+              0.08523618,
+              -0.05200533,
+              -0.014904317,
+              -0.016793448,
+              0.04478206,
+              -0.017161047,
+              0.02638292,
+              0.007849463,
+              -0.040533304,
+              -0.017599737,
+              0.047704253,
+              0.034988616,
+              -0.013908102,
+              0.044121094,
+              0.040395457,
+              -0.010402818,
+              0.0063570403,
+              -0.014962749,
+              0.025776524,
+              0.023681043,
+              0.006042675,
+              0.017647373,
+              0.016301101,
+              -0.07793374,
+              -0.004771094,
+              0.012728924,
+              -0.00047885205,
+              -0.051591527,
+              0.03612118,
+              -0.02209703,
+              0.052075963,
+              -0.021613466,
+              -0.026258182,
+              0.008102769,
+              -0.04963262,
+              0.00062747014,
+              -0.012579783,
+              0.076374784,
+              -0.047350414,
+              -0.007680664,
+              0.062471915,
+              -0.0061351187,
+              -0.043617643,
+              0.023878522,
+              -0.09653609,
+              0.018392054,
+              -0.039719462,
+              0.065271765,
+              0.034548305,
+              0.004219043,
+              -0.003628092,
+              0.0047836183,
+              0.0132732885,
+              -0.028140727,
+              -0.015683327,
+              -0.052812085,
+              -0.019410037,
+              0.06812139,
+              -0.041178964,
+              0.014646207,
+              -0.0037439142,
+              0.0003088275,
+              -0.04985693,
+              0.0223661,
+              0.008887433,
+              0.0049061268,
+              0.042707395,
+              -0.021471359,
+              -0.06471383,
+              0.0022036259,
+              0.030178884,
+              -0.002764245,
+              -0.0063233464,
+              -0.04146522,
+              -0.008236624,
+              0.0037351896,
+              -0.027550086,
+              -0.0137326885,
+              0.0055276263,
+              0.0016785853,
+              0.050191414,
+              0.02629574,
+              -0.009129228,
+              0.06351977,
+              -0.037435655,
+              0.0467174,
+              -0.012987377,
+              -0.007550927,
+              -0.004503205,
+              0.010520655,
+              0.064984836,
+              0.009879768,
+              0.055787366,
+              -0.042653065,
+              0.024189176,
+              0.0378726,
+              -0.032453574,
+              0.043519154,
+              0.020133087,
+              -0.055212636,
+              -0.016188117,
+              0.03764466,
+              -0.022142444,
+              0.11164031,
+              0.019020407,
+              -0.008950892,
+              0.0517199,
+              0.0014494535,
+              0.041113462,
+              -0.0912906,
+              -0.04723132,
+              0.008548748,
+              0.028231544,
+              0.023689618,
+              -0.039103802,
+              -0.034011997,
+              -0.04731894,
+              0.03309799,
+              -0.044572156,
+              -0.116778485,
+              -0.028786778,
+              0.05798776,
+              0.05287191,
+              -0.0039562676,
+              -0.08213019,
+              -0.01224603,
+              -0.012757768,
+              0.035721667,
+              0.012440343,
+              0.0053813523,
+              -0.072770126,
+              0.0066190604,
+              0.038976185,
+              -0.037760906,
+              -0.0031381482,
+              -0.052277293,
+              -0.016870236,
+              -0.053451907,
+              -0.05629483,
+              -0.034493946,
+              -0.0048654405,
+              0.022051724,
+              0.028501945,
+              0.025858566,
+              -0.023936177,
+              -0.098391004,
+              -0.030646492,
+              -0.049461726,
+              -0.00086931954,
+              0.03593346,
+              0.015843417,
+              -0.03276966,
+              0.008957432,
+              -0.022735167,
+              -0.012159252,
+              0.07607085,
+              -0.059834506,
+              0.004478244,
+              0.03439635,
+              0.03683821,
+              0.062883355,
+              0.054430448,
+              -0.029807799,
+              0.0032295138,
+              0.08891875,
+              -0.026941199,
+              -0.00618463,
+              -0.022683868,
+              -0.024138795,
+              -0.036633875,
+              0.02097464,
+              -0.003001584,
+              0.020455033,
+              0.043717608,
+              0.06566654,
+              -0.029039463,
+              -0.0066977167,
+              -0.04504434,
+              0.022257777,
+              0.054422457,
+              0.029796708,
+              0.009008146,
+              0.028205348,
+              0.06255052,
+              -0.004475601,
+              0.059329458,
+              -0.038065027,
+              -0.027933009,
+              -0.07060949,
+              0.013978787,
+              -0.051300917,
+              0.02945564,
+              -0.008552103,
+              -0.009436655,
+              0.039747514,
+              -0.016741823,
+              0.04740887,
+              0.03521937,
+              -0.012574282,
+              -0.089222826,
+              -0.043515395,
+              -0.04158566,
+              0.0016020355,
+              0.02684753,
+              -0.019394692,
+              -0.02156877,
+              0.06316388,
+              0.01663444,
+              0.015482924,
+              0.047349654,
+              -0.028341234,
+              0.013805591,
+              -0.010708488,
+              -0.07627738,
+              0.08611209,
+              0.0089956885,
+              0.034438204,
+              0.016312746,
+              -0.03412846,
+              0.0770598,
+              -0.06790466,
+              0.036359854,
+              0.08038976,
+              0.023465984,
+              -0.019832904,
+              -0.0011524013,
+              -0.03804293,
+              0.04106918,
+              -0.028220456,
+              0.032340813,
+              -0.030669356,
+              -0.004353358,
+              -0.019439798,
+              0.0020563425,
+              0.03015629,
+              -0.06430176,
+              0.0034439075,
+              -0.045720384,
+              -0.06526568,
+              -0.0004192516,
+              -0.016580455,
+              -0.012596616,
+              0.039126,
+              -0.04699455,
+              -0.008973794,
+              0.015056125,
+              0.018929023,
+              -0.07840811,
+              -0.014792519,
+              -0.0044317124,
+              0.019588342,
+              0.035912346,
+              -0.035739247,
+              0.058755044,
+              -0.01856197,
+              0.021155646,
+              -0.073580906,
+              -0.04310776,
+              -0.023147091,
+              -0.010232029,
+              0.06352039,
+              0.039570276,
+              0.020424508,
+              0.051613245,
+              0.013395984,
+              -0.003908009,
+              -0.04643392,
+              0.019592889,
+              -0.008484923,
+              0.0031434586,
+              -0.046069775,
+              -0.01765311,
+              -0.041277196,
+              -0.070297986,
+              0.012561737,
+              -0.003500738,
+              -0.01729488,
+              -0.0033254062,
+              0.053035453,
+              -0.054218896,
+              -0.029708259,
+              -0.0047281524,
+              0.019236762,
+              -0.12249525,
+              0.03018237,
+              -0.028753102,
+              -0.031858314,
+              0.0811298,
+              -0.005711499,
+              -0.057587985,
+              0.014153141,
+              0.0006705577,
+              -0.024263157,
+              0.016729265,
+              -0.03195949,
+              -0.007259763,
+              -0.0035231581,
+              -0.03890975,
+              0.011460382,
+              -0.06591321,
+              -0.023756726,
+              -0.023958001,
+              0.030074941,
+              -0.0040949634,
+              -0.048368257,
+              -0.029692868,
+              0.027246583,
+              -0.024747347,
+              0.014442731,
+              -0.00832639,
+              -0.0002390868,
+              -0.013635633,
+              0.0035843733,
+              0.02354072,
+              -0.012829061,
+              -0.0060750768,
+              -0.044952527,
+              -0.05725624,
+              0.031746052,
+              -0.024419094,
+              0.032444403,
+              -0.029308707,
+              0.034302235,
+              -0.022495607,
+              0.015296428,
+              -0.0057196384,
+              -7.8588724e-05,
+              0.060303975,
+              0.06299601,
+              0.028222265,
+              -0.0071411408,
+              0.015196491,
+              0.02031155,
+              0.039635558,
+              0.079736926,
+              0.008736669,
+              -0.023079613,
+              -0.04490686,
+              -0.021764707,
+              -0.015199573,
+              0.036019534,
+              -0.0046079857,
+              0.04429082,
+              -0.04291344,
+              -0.05991891,
+              -0.006501417,
+              0.010603077,
+              0.03435066,
+              -0.065568395,
+              -0.04424192,
+              0.035055783,
+              0.019717937,
+              0.032764338,
+              0.021240309,
+              -0.01646063,
+              0.007835414,
+              0.06857148,
+              -0.013750999,
+              0.028333688,
+              -0.078255735,
+              -0.047899257,
+              -0.0006370693,
+              0.012606231,
+              0.012178417,
+              -0.013057751,
+              -0.008095854,
+              -0.013466724,
+              0.019036459,
+              -0.025450038,
+              0.021131655,
+              -0.02505666,
+              0.012961284,
+              0.0004236046,
+              -0.023920864,
+              -0.055114083,
+              0.082351916,
+              0.028973032,
+              0.025259241,
+              0.098259576,
+              -0.007385416,
+              0.003546012,
+              -0.05316339,
+              -0.04186183,
+              0.043638214,
+              -0.069299474,
+              -0.013284585,
+              -0.010019175,
+              0.012883975,
+              0.014200739,
+              -0.013508286,
+              0.0086570075,
+              -0.020393575,
+              0.10617594,
+              0.028786503,
+              -0.018674662,
+              0.026763268,
+              -0.0062548965,
+              -0.07215284,
+              0.055464335,
+              0.0029595464,
+              -0.009364344,
+              -0.096402094,
+              0.02823341,
+              -0.022853011,
+              0.04750492,
+              0.008378555,
+              0.016491622,
+              0.01860681,
+              0.048116222,
+              0.106049344,
+              -0.028929656,
+              -0.008896546,
+              0.033615295,
+              -0.0070807124,
+              -0.05684197,
+              -0.061439563,
+              0.0060220268,
+              0.046171866,
+              -0.01574131,
+              -0.07562956,
+              0.0024098414,
+              0.0006304895,
+              -0.07831614,
+              0.060869616,
+              0.00076000375,
+              -0.008209363,
+              -0.04139266,
+              -0.085268535,
+              -0.028194478,
+              -0.024567788,
+              -0.04218179,
+              0.023546752,
+              0.036236234,
+              0.017199656,
+              -0.03315456,
+              -0.023814544,
+              0.038755447,
+              -0.023165299,
+              -0.049283065,
+              -0.006907019,
+              0.040826146,
+              0.017533792,
+              -0.036849793,
+              -0.015506943,
+              -0.010768763,
+              -0.08758806,
+              -0.0295733,
+              0.055843282,
+              -0.012555046,
+              0.0076235603,
+              0.008802991,
+              0.026661193,
+              -0.023899797,
+              0.043548774,
+              -0.034339137,
+              -0.027354732,
+              -0.07583677,
+              0.020500224,
+              0.036802996,
+              0.031019075,
+              0.04605757,
+              -0.004433706,
+              0.0108612785,
+              0.050121468,
+              -0.07816735,
+              -0.014776514,
+              -0.04565195,
+              -0.0036854912,
+              0.0075577567,
+              -0.017044865,
+              0.030597543,
+              -0.013623054,
+              -0.0648466,
+              -0.0318741,
+              -0.059455115,
+              -0.024783187,
+              -0.0088010235,
+              0.11127796,
+              0.03429834,
+              -0.010424589,
+              -0.06355135,
+              0.034265812,
+              0.02680333,
+              -0.007930513,
+              0.030092249,
+              0.008321974,
+              0.03125566,
+              -0.06832331,
+              -0.0076806936,
+              0.034010306,
+              -0.087202646,
+              -0.047684345,
+              0.06384632,
+              -0.026591811,
+              -0.0016003181,
+              0.05721666,
+              -0.0024700803,
+              -0.029714238,
+              0.07761957,
+              -0.04561395,
+              -0.053199258,
+              0.030417573,
+              -0.01958724,
+              0.0012449475,
+              -0.04003076,
+              0.08825553,
+              -0.023196172,
+              -0.08629044,
+              -0.049815316,
+              0.027229005,
+              0.0021765123,
+              0.03438692,
+              -0.09314263,
+              -0.019655729,
+              0.018762926,
+              0.025670087,
+              -0.017116003,
+              0.031716976,
+              -0.05509443,
+              0.032953184,
+              -0.02264915,
+              0.04861606,
+              -0.050201602,
+              0.033154316,
+              0.009971947,
+              -0.037610047,
+              0.016600395,
+              -0.031037569,
+              -0.015495428,
+              0.026365642,
+              -0.043527953,
+              0.055781424,
+              0.06780075,
+              -0.015966192,
+              0.03201043,
+              0.028026119
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "togethercomputer/m2-bert-80M-32k-retrieval",
+        "object": "list",
+        "usage": null
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/7ef63231b9f8.json
+++ b/tests/integration/recordings/responses/7ef63231b9f8.json
@ -0,0 +1,56 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8080/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Which planet has rings around it with a name starting with letter S?"
+        }
+      ],
+      "stream": false
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "<think>\nOkay, so the user is asking which planet has rings around it and its name starts with the letter S. Let me think... I know that the Sun is a star, not a planet. So the Moon is a natural satellite, which has the Moon's name and rings. But the Moon's name starts with M, not S. The Earth has the name Earth, but the rings aren't really around the Earth in any real sense. Mars has a thin ring of dust. Venus and Mercury don't have rings in the sense of planetary rings as we know. Wait, maybe the answer is the Moon, even though it's not the same as the name starting with S. But the question says a planet, so if there's a planet named S, that would be it. But actually, the only planet with rings is Jupiter. Wait, Jupiter has a famous system of rings. But why does the question mention a planet with a name starting with S? Maybe there's a trick. Let me double-check. Jupiter's name starts with J, so maybe the answer is Venus? But Venus doesn't have rings. Mercury, too, doesn't. The Moon, as a planet, a dwarf planet, and has rings. Despite the name, the rings are around it. So the answer would be the Moon. Therefore, the planet with rings and name starting with S is the Moon.\n</think>\n\nThe planet with rings around it and a name starting with the letter **S** is the **Moon**. Though its name doesn't start with an **S**, it is technically a dwarf planet and has the rings in its orbit. Oops Saturn!",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 1757550394,
+        "model": "Qwen/Qwen3-0.6B",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+        "usage": {
+          "completion_tokens": 336,
+          "prompt_tokens": 22,
+          "total_tokens": 358,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/894fdacb1cfa.json
+++ b/tests/integration/recordings/responses/894fdacb1cfa.json
@ -0,0 +1,176 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.together.xyz/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What's the weather in Tokyo? Use the get_weather function to get the weather."
+        }
+      ],
+      "stream": true,
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_weather",
+            "description": "Get the weather in a given city",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "city": {
+                  "type": "string",
+                  "description": "The city to get the weather for"
+                }
+              }
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtX7R-62bZhn-9801a22f6ad243dc",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758039022,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtX7R-62bZhn-9801a22f6ad243dc",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_jy63yt7kp8hfof3sy4pim94o",
+                    "function": {
+                      "arguments": "",
+                      "name": "get_weather"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758039022,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtX7R-62bZhn-9801a22f6ad243dc",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"city\":\"Tokyo\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758039022,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtX7R-62bZhn-9801a22f6ad243dc",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 128008
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null,
+              "text": "",
+              "seed": 1489065696184500700
+            }
+          ],
+          "created": 1758039022,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": {
+            "completion_tokens": 24,
+            "prompt_tokens": 193,
+            "total_tokens": 217,
+            "completion_tokens_details": null,
+            "prompt_tokens_details": null,
+            "cached_tokens": 0
+          }
+        }
+      }
+    ],
+    "is_streaming": true
+  }
+}
--- a/tests/integration/recordings/responses/89b141855b81.json
+++ b/tests/integration/recordings/responses/89b141855b81.json
--- a/tests/integration/recordings/responses/a98eecadddc8.json
+++ b/tests/integration/recordings/responses/a98eecadddc8.json
@ -0,0 +1,366 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8080/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What's the weather in Tokyo? Use the get_weather function to get the weather."
+        }
+      ],
+      "stream": true,
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_weather",
+            "description": "Get the weather in a given city",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "city": {
+                  "type": "string",
+                  "description": "The city to get the weather for"
+                }
+              }
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "0",
+                    "function": {
+                      "arguments": "{",
+                      "name": "get_weather"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1757550392,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "0",
+                    "function": {
+                      "arguments": " \"",
+                      "name": null
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1757550392,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "0",
+                    "function": {
+                      "arguments": "c",
+                      "name": null
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1757550392,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "0",
+                    "function": {
+                      "arguments": "ity",
+                      "name": null
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1757550392,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "0",
+                    "function": {
+                      "arguments": "\":",
+                      "name": null
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1757550392,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "0",
+                    "function": {
+                      "arguments": " \"",
+                      "name": null
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1757550392,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "0",
+                    "function": {
+                      "arguments": "Tok",
+                      "name": null
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1757550392,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "0",
+                    "function": {
+                      "arguments": "yo",
+                      "name": null
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1757550392,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "0",
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1757550392,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      }
+    ],
+    "is_streaming": true
+  }
+}
--- a/tests/integration/recordings/responses/aacf9abc51d4.json
+++ b/tests/integration/recordings/responses/aacf9abc51d4.json
--- a/tests/integration/recordings/responses/b9f6e724ae06.json
+++ b/tests/integration/recordings/responses/b9f6e724ae06.json
@ -0,0 +1,976 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8080/v1/v1/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "prompt": "Respond to this question and explain your answer. Complete the sentence using one word: Roses are red, violets are ",
+      "max_tokens": 50,
+      "stream": true
+    },
+    "endpoint": "/v1/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " several"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " several"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " times"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " more"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " popular"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " than"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " ____"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": ".\n"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": "Answer"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": ":\n\n"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": "The"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " roses"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " are"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " red"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": ","
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " v"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": "io"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": "lets"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " are"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " several"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " several"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " times"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " more"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " popular"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " than"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " **"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": "numbers"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": "**"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": ".\n\n"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": "Explanation"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": ":"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " \""
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": "se"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": "veral"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " several"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " times"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " more"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " popular"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " than"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": "\""
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " can"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " be"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " replaced"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " with"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " \""
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": "numbers"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": "\""
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " as"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "",
+              "index": 0,
+              "logprobs": null,
+              "text": " the"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "",
+          "choices": [
+            {
+              "finish_reason": "length",
+              "index": 0,
+              "logprobs": null,
+              "text": " number"
+            }
+          ],
+          "created": 1757550367,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+          "usage": {
+            "completion_tokens": 50,
+            "prompt_tokens": 25,
+            "total_tokens": 75,
+            "completion_tokens_details": null,
+            "prompt_tokens_details": null
+          }
+        }
+      }
+    ],
+    "is_streaming": true
+  }
+}
--- a/tests/integration/recordings/responses/bce560cbf1c6.json
+++ b/tests/integration/recordings/responses/bce560cbf1c6.json
@ -0,0 +1,800 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.together.xyz/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "togethercomputer/m2-bert-80M-32k-retrieval",
+      "input": "This is the first text"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "togethercomputer/m2-bert-80M-32k-retrieval"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              -0.039021637,
+              0.022414008,
+              0.060316082,
+              0.010932758,
+              0.018470073,
+              -0.038455445,
+              0.013484707,
+              -0.038724504,
+              -0.025575833,
+              -0.07131675,
+              0.03463345,
+              -0.025232196,
+              0.020823235,
+              0.03832292,
+              -0.006293115,
+              -0.088807434,
+              0.0063370736,
+              -0.002888027,
+              0.02621656,
+              0.055453233,
+              0.102450415,
+              0.03387425,
+              -0.005548249,
+              0.06926162,
+              0.036552645,
+              -0.027929714,
+              0.05147974,
+              -0.084861636,
+              -0.05467612,
+              0.0061274734,
+              0.01355064,
+              -0.027067322,
+              0.099598646,
+              -0.05280082,
+              -0.03848137,
+              -0.0138273295,
+              0.00055626774,
+              -0.062084854,
+              -0.026424624,
+              -0.004740091,
+              0.06750933,
+              -0.05090067,
+              0.06227124,
+              -0.01807564,
+              0.0048294156,
+              0.013328212,
+              0.004276883,
+              -0.034934912,
+              -0.036818415,
+              0.0185289,
+              0.0048565175,
+              0.016870664,
+              -0.040981345,
+              -0.035420854,
+              -0.091292314,
+              -0.08983982,
+              -0.048739515,
+              0.12078825,
+              0.04027495,
+              0.088196404,
+              0.082896,
+              -0.08266004,
+              -0.00082181377,
+              -0.050194185,
+              0.024180485,
+              -0.027468672,
+              -0.08769602,
+              0.047489725,
+              -0.03834715,
+              0.07631481,
+              -0.06501303,
+              -0.03695376,
+              0.067694835,
+              0.027814003,
+              -0.051688053,
+              -0.032236356,
+              0.039202936,
+              0.03445711,
+              0.009532945,
+              -0.034482885,
+              -0.08042295,
+              0.008322418,
+              0.05848545,
+              -0.064453684,
+              -0.17329726,
+              -0.047616575,
+              0.045936666,
+              0.023837132,
+              -0.015925486,
+              -0.0857517,
+              -0.0001586331,
+              -0.044116773,
+              -0.029393503,
+              0.009738323,
+              0.03763726,
+              -0.11253048,
+              0.019114532,
+              0.07549436,
+              -0.1030746,
+              -0.038988255,
+              0.011407976,
+              -0.037570667,
+              0.05159809,
+              0.007962588,
+              0.01113923,
+              0.003076782,
+              0.15470116,
+              0.0043370854,
+              0.030429134,
+              -0.027383734,
+              -0.030138142,
+              -0.079299994,
+              0.12148583,
+              0.034556936,
+              -0.0064313645,
+              0.048751578,
+              -0.05864567,
+              0.026685659,
+              -0.09871483,
+              -0.046130598,
+              0.019625148,
+              -0.072314,
+              0.03352563,
+              0.01364348,
+              -0.085728094,
+              0.06642468,
+              -0.094013095,
+              -0.037293892,
+              0.0076811705,
+              0.0052874135,
+              0.018115167,
+              -0.055315576,
+              -0.052764144,
+              -0.034311842,
+              0.015955461,
+              -0.07966574,
+              -0.028749859,
+              0.03149985,
+              -0.047564246,
+              0.008608991,
+              -0.021272784,
+              0.030198015,
+              -0.0107804965,
+              0.017173572,
+              -0.011607755,
+              -0.050619457,
+              0.030204969,
+              0.10163846,
+              -0.0056075957,
+              0.06950345,
+              0.04063133,
+              -0.03608383,
+              0.023170248,
+              -0.014745303,
+              -0.014478895,
+              0.10499135,
+              -0.038678814,
+              -0.0075368164,
+              0.08199838,
+              -0.09530577,
+              0.020091686,
+              0.10653022,
+              0.08388272,
+              -0.0045513124,
+              -0.04053859,
+              -0.0025074913,
+              0.017358577,
+              -0.03037232,
+              0.04310344,
+              -0.04824635,
+              0.055064622,
+              -0.019335788,
+              -0.0674805,
+              0.024816237,
+              0.019295547,
+              0.0007229409,
+              0.04357454,
+              0.021688526,
+              0.08630486,
+              -0.011211191,
+              -0.039039955,
+              0.17257652,
+              -0.007145191,
+              0.006575071,
+              -0.0139306225,
+              -0.014735097,
+              -0.044341516,
+              -0.11539079,
+              0.033123154,
+              -0.011538915,
+              -0.024190484,
+              -0.018813878,
+              0.03229297,
+              -0.04379363,
+              0.03185381,
+              -0.035783295,
+              0.06494934,
+              0.05133508,
+              0.00010083616,
+              0.007334995,
+              0.06611978,
+              -0.062722,
+              0.045553267,
+              -0.011721417,
+              0.020822436,
+              -0.04873414,
+              0.03926427,
+              0.007051802,
+              -0.05594363,
+              0.03565722,
+              -0.12122127,
+              0.027855415,
+              -0.016186016,
+              -0.041470908,
+              -0.08864265,
+              -0.0036498592,
+              0.010997135,
+              -0.012785444,
+              -0.06519897,
+              0.027590077,
+              0.067321666,
+              -0.05896251,
+              0.008983399,
+              -0.095143765,
+              0.011621533,
+              -0.06121848,
+              0.050336383,
+              0.0019902636,
+              0.053377967,
+              -0.045287643,
+              0.09474427,
+              -0.053598337,
+              0.08048404,
+              -0.08297755,
+              0.08607313,
+              0.004596277,
+              0.0204861,
+              0.0132703995,
+              0.0492952,
+              0.003006371,
+              0.024936337,
+              -0.021873668,
+              0.11727927,
+              -0.043151148,
+              -0.0846394,
+              -0.048050277,
+              0.0012273242,
+              0.16534594,
+              0.07620599,
+              0.0144042745,
+              0.09004986,
+              0.06599925,
+              0.050307803,
+              -0.014542778,
+              -0.06923349,
+              0.08603958,
+              -0.003079753,
+              -0.08008583,
+              -0.04276064,
+              0.07779741,
+              -0.04970902,
+              0.024014566,
+              0.026120175,
+              -0.007566401,
+              -0.06362058,
+              0.0075124875,
+              -0.025173014,
+              0.06797637,
+              0.064056545,
+              -0.12027379,
+              -0.030917957,
+              0.009303285,
+              0.1108725,
+              0.048372857,
+              -0.025575588,
+              -0.0063446634,
+              0.011040862,
+              -0.03459656,
+              -0.0144168,
+              0.048665646,
+              -0.009920939,
+              -0.0061537125,
+              -0.10304914,
+              0.014452626,
+              0.016036827,
+              0.012599703,
+              0.016684191,
+              -0.039659906,
+              0.010836161,
+              -0.029463075,
+              0.0011919601,
+              0.06632273,
+              -0.05316992,
+              0.039452244,
+              -0.021640282,
+              -0.05948179,
+              -0.015061293,
+              -0.015513855,
+              0.04358236,
+              -0.0029279767,
+              0.0860453,
+              -0.012484551,
+              -0.013506936,
+              0.016622225,
+              0.03162366,
+              -0.09996153,
+              -0.05663382,
+              -0.015155038,
+              0.00578972,
+              0.025347538,
+              -0.06958232,
+              0.10877864,
+              -0.036945637,
+              0.03478135,
+              0.13662694,
+              -0.020611005,
+              0.07592442,
+              0.0036063113,
+              -0.09048903,
+              0.016554832,
+              -0.04288513,
+              -0.027900286,
+              -0.07563455,
+              0.030791664,
+              -0.033230122,
+              0.018658046,
+              -0.043807156,
+              0.029736735,
+              0.10202865,
+              0.009116146,
+              -0.09378922,
+              0.099590845,
+              0.0642359,
+              0.0589953,
+              0.05296719,
+              -0.07642986,
+              -0.11738337,
+              -0.05376279,
+              0.09199399,
+              -0.0627918,
+              0.03704901,
+              -0.037008967,
+              -0.05638905,
+              0.009441371,
+              0.04416073,
+              -0.03527975,
+              -0.03531018,
+              0.07021692,
+              0.05659684,
+              0.099865966,
+              0.076215744,
+              0.043112382,
+              0.007842607,
+              -0.039226923,
+              0.006264895,
+              -0.03105526,
+              0.060152344,
+              0.040446483,
+              0.10218391,
+              -0.07178106,
+              0.015407178,
+              -0.06229486,
+              0.0043686125,
+              0.09733845,
+              -0.09527866,
+              0.041407365,
+              0.06550996,
+              0.08803008,
+              0.09149921,
+              0.04229226,
+              0.052133556,
+              0.047242433,
+              0.014378367,
+              0.03682277,
+              0.06764445,
+              0.066040926,
+              0.021740213,
+              0.04180941,
+              -0.00519632,
+              -0.0111550195,
+              0.017352529,
+              -0.00943155,
+              0.11390086,
+              0.05582122,
+              0.035394136,
+              0.0024461604,
+              0.04081662,
+              -0.0007266066,
+              0.06292638,
+              0.0052844593,
+              0.05790997,
+              -0.09407522,
+              -0.05039574,
+              0.07852171,
+              -0.08000922,
+              0.13302545,
+              0.10419625,
+              0.039512042,
+              -0.09167407,
+              0.010040825,
+              0.013924355,
+              0.027515184,
+              0.079743214,
+              0.09399837,
+              0.0151610905,
+              0.004694856,
+              -0.0536953,
+              0.06531984,
+              0.027906924,
+              -0.0012715638,
+              0.09168681,
+              -0.00026439782,
+              -0.0041136686,
+              0.033571295,
+              -0.01907176,
+              0.11883433,
+              -0.0065728375,
+              -0.0062215794,
+              -0.1049895,
+              -0.03321981,
+              -0.026450735,
+              0.072518945,
+              -0.11240429,
+              -0.022515744,
+              -0.048495665,
+              -0.037087325,
+              0.00032197312,
+              0.051534563,
+              0.046150282,
+              -0.08213623,
+              0.09886837,
+              0.041117694,
+              0.05323094,
+              -0.05427183,
+              -0.022201112,
+              -0.024121372,
+              0.012735752,
+              0.1397762,
+              -0.007587272,
+              0.05582085,
+              0.06499377,
+              -0.018458825,
+              -0.021883465,
+              0.032667745,
+              0.02018645,
+              0.040008776,
+              0.07482824,
+              -0.024819402,
+              0.045242358,
+              -0.06036402,
+              0.025522556,
+              -0.025958247,
+              0.018367121,
+              0.029390294,
+              -0.031080022,
+              -0.010285386,
+              -0.007700369,
+              0.045184247,
+              0.044544965,
+              0.029447366,
+              0.014604208,
+              -0.09001254,
+              -0.09150779,
+              0.048845917,
+              -0.005016622,
+              -0.030419605,
+              -0.021073101,
+              -0.028362123,
+              0.04180255,
+              0.011223455,
+              0.026317155,
+              0.07052029,
+              0.04195792,
+              -0.010761702,
+              -0.054835323,
+              0.047067013,
+              0.04737349,
+              0.09244638,
+              0.096748084,
+              -0.03332587,
+              -0.009952178,
+              -0.0030183739,
+              0.07009167,
+              0.05392541,
+              0.024944762,
+              0.0061005787,
+              0.028459419,
+              -0.05767917,
+              -0.051464006,
+              0.08488547,
+              -0.016385203,
+              -0.04579279,
+              -0.084523976,
+              -0.032011546,
+              -0.007594041,
+              -0.06051386,
+              -0.046265714,
+              -0.027389096,
+              -0.044890895,
+              -0.0022862924,
+              -0.1268961,
+              -0.037864592,
+              0.024412185,
+              -0.07392371,
+              -0.014362709,
+              0.07425692,
+              0.022583768,
+              0.011156761,
+              -0.057216533,
+              -0.039548866,
+              -0.018076254,
+              -0.05556914,
+              -0.057198036,
+              -0.03188685,
+              0.090208404,
+              0.10571588,
+              0.01070536,
+              0.08128956,
+              0.017667988,
+              -0.10340015,
+              0.07804198,
+              -0.019781966,
+              0.06535109,
+              -0.07777538,
+              -0.025819557,
+              -0.08128869,
+              -0.034394037,
+              0.019422948,
+              -0.039221227,
+              -0.08033355,
+              -0.02329798,
+              -0.0962552,
+              -0.016624983,
+              0.038193095,
+              -0.06870783,
+              -0.033954047,
+              -0.0025311739,
+              -0.114151455,
+              -0.00511124,
+              -0.06920173,
+              0.044555113,
+              0.10051683,
+              0.04055453,
+              -0.06167893,
+              -0.01584111,
+              0.0030792183,
+              4.6655536e-05,
+              -0.026384909,
+              -0.012856535,
+              -0.06174471,
+              0.0024448705,
+              -0.022707395,
+              0.066114195,
+              -0.010608763,
+              -0.01576041,
+              -0.0010933182,
+              0.03396316,
+              0.008329627,
+              -0.060327142,
+              -0.05505636,
+              -0.028406821,
+              -0.025708841,
+              0.016102789,
+              0.03405433,
+              0.007868113,
+              0.13327968,
+              0.072789304,
+              -0.08000951,
+              -0.050192088,
+              -0.05803803,
+              -0.050078847,
+              -0.01996999,
+              0.043255676,
+              -0.04441973,
+              0.08783117,
+              0.002935635,
+              0.040976398,
+              -0.01976899,
+              0.018852778,
+              -0.03215457,
+              -0.04958742,
+              0.015443288,
+              0.010633601,
+              -0.074571095,
+              0.053966194,
+              -0.01581196,
+              -0.04183213,
+              -0.04719714,
+              0.033312585,
+              0.011825424,
+              -0.029853545,
+              -0.050666492,
+              -0.08864941,
+              -0.022672195,
+              0.0724055,
+              0.0037794008,
+              0.055587664,
+              -0.13644798,
+              0.022921626,
+              0.1152114,
+              0.07047247,
+              0.030930748,
+              -0.0052061337,
+              0.044788003,
+              -0.08634308,
+              -0.10505402,
+              -0.025340958,
+              -0.08207144,
+              0.059532717,
+              -0.0062416205,
+              0.1022889,
+              0.010608143,
+              0.041661825,
+              -0.097806565,
+              0.0038305484,
+              0.05404457,
+              0.032105837,
+              0.06415997,
+              -0.049071103,
+              -0.03720757,
+              -0.023321476,
+              0.12579422,
+              0.043440778,
+              -0.011532883,
+              -0.05620173,
+              0.005197981,
+              -0.12449035,
+              0.008241525,
+              -0.10594952,
+              0.102292866,
+              -0.0699,
+              -0.11592147,
+              0.06966665,
+              -0.027437769,
+              -0.014774349,
+              0.018875254,
+              -0.017957961,
+              0.091627896,
+              0.04989476,
+              0.0798358,
+              0.04239699,
+              -0.007844917,
+              -0.06630319,
+              0.052326147,
+              0.02648383,
+              0.044119354,
+              -0.06851671,
+              0.15443392,
+              -0.020682698,
+              -0.03766801,
+              0.0155308945,
+              -0.063717306,
+              0.0006521008,
+              -0.05569479,
+              -0.043325484,
+              -0.014842672,
+              -0.025855135,
+              0.017403143,
+              -0.011325402,
+              0.054577086,
+              0.02011184,
+              -0.09925977,
+              -0.0069759586,
+              -0.03428202,
+              0.0034359726,
+              -0.15824135,
+              0.000930797,
+              -0.113140985,
+              -0.044972613,
+              -0.02884488,
+              -0.06731342,
+              0.04106218,
+              0.028871017,
+              -0.011909599,
+              0.03274342,
+              0.018106263,
+              -0.020201381,
+              0.1281747,
+              0.020703837,
+              0.024401633,
+              0.042717557,
+              0.014739593,
+              0.07050051,
+              0.038078446,
+              -0.022462513,
+              -0.004700358,
+              -0.014908828,
+              0.037429586,
+              0.021075286,
+              -0.047952563,
+              -0.010115325,
+              0.011719644,
+              0.052587837,
+              -0.026325963,
+              0.06416419,
+              0.04302814,
+              -0.032076415,
+              0.03226265,
+              0.047885012,
+              -0.08571586,
+              0.13789223,
+              -0.039638847,
+              0.08949073,
+              0.0019859069,
+              0.054476757,
+              -0.04336167,
+              -0.12529649,
+              0.013598417,
+              -0.046129137,
+              0.0031463325,
+              -0.10019061,
+              0.02212261,
+              -0.024540763,
+              -0.020073807,
+              -0.015366339,
+              -0.04205672,
+              -0.004573892,
+              0.04018059,
+              -0.06835582,
+              0.0762453,
+              -0.07784769,
+              -0.03393797,
+              -0.084803775,
+              0.028064115,
+              0.06559264,
+              -0.10455632,
+              0.039434727,
+              -0.038992915,
+              -0.09218861,
+              0.013562555,
+              -0.06523423,
+              0.10188195,
+              0.05163541,
+              0.02234651,
+              0.01926983,
+              0.0017454309,
+              0.030410308,
+              0.025801515,
+              -0.0333776,
+              0.0030322578,
+              0.055338234,
+              -0.017410548,
+              0.07205084,
+              0.04127999,
+              0.0026357244,
+              0.00054674776,
+              -0.018812224,
+              0.051227525,
+              2.2485852e-05,
+              -0.04581609,
+              -0.106634825,
+              0.018237107,
+              0.048612136,
+              -0.018699843,
+              -0.035245672,
+              -0.0367398,
+              -0.09525288,
+              0.05530859,
+              0.023024498,
+              -0.05791263,
+              -0.011325011,
+              -0.055147734,
+              0.02724777,
+              -0.10974393,
+              0.015870394,
+              0.053438365,
+              0.032307543,
+              0.055390432
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "togethercomputer/m2-bert-80M-32k-retrieval",
+        "object": "list",
+        "usage": null
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/cf55f983d1ff.json
+++ b/tests/integration/recordings/responses/cf55f983d1ff.json
@ -0,0 +1,84 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8080/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What's the weather in Tokyo? Use the get_weather function to get the weather."
+        }
+      ],
+      "stream": false,
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_weather",
+            "description": "Get the weather in a given city",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "city": {
+                  "type": "string",
+                  "description": "The city to get the weather for"
+                }
+              }
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": null,
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": [
+                {
+                  "id": "0",
+                  "function": {
+                    "arguments": "{\"city\":\"Tokyo\"}",
+                    "name": "get_weather",
+                    "description": null
+                  },
+                  "type": "function"
+                }
+              ]
+            }
+          }
+        ],
+        "created": 1757550396,
+        "model": "Qwen/Qwen3-0.6B",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+        "usage": {
+          "completion_tokens": 19,
+          "prompt_tokens": 239,
+          "total_tokens": 258,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/d85689907fec.json
+++ b/tests/integration/recordings/responses/d85689907fec.json
@ -0,0 +1,350 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.together.xyz/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What's the name of the Sun in latin?"
+        }
+      ],
+      "stream": true
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtQrM-62bZhn-9801a1ac2a5f9b29",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 791
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": "The",
+              "seed": null
+            }
+          ],
+          "created": 1758039001,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtQrM-62bZhn-9801a1ac2a5f9b29",
+          "choices": [
+            {
+              "delta": {
+                "content": " Latin",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 20023
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " Latin",
+              "seed": null
+            }
+          ],
+          "created": 1758039001,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtQrM-62bZhn-9801a1ac2a5f9b29",
+          "choices": [
+            {
+              "delta": {
+                "content": " name",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 836
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " name",
+              "seed": null
+            }
+          ],
+          "created": 1758039001,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtQrM-62bZhn-9801a1ac2a5f9b29",
+          "choices": [
+            {
+              "delta": {
+                "content": " for",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 369
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " for",
+              "seed": null
+            }
+          ],
+          "created": 1758039001,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtQrM-62bZhn-9801a1ac2a5f9b29",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 279
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " the",
+              "seed": null
+            }
+          ],
+          "created": 1758039001,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtQrM-62bZhn-9801a1ac2a5f9b29",
+          "choices": [
+            {
+              "delta": {
+                "content": " Sun",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 8219
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " Sun",
+              "seed": null
+            }
+          ],
+          "created": 1758039001,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtQrM-62bZhn-9801a1ac2a5f9b29",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 374
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " is",
+              "seed": null
+            }
+          ],
+          "created": 1758039001,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtQrM-62bZhn-9801a1ac2a5f9b29",
+          "choices": [
+            {
+              "delta": {
+                "content": " \"",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 330
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " \"",
+              "seed": null
+            }
+          ],
+          "created": 1758039001,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtQrM-62bZhn-9801a1ac2a5f9b29",
+          "choices": [
+            {
+              "delta": {
+                "content": "Sol",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 49912
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": "Sol",
+              "seed": null
+            }
+          ],
+          "created": 1758039001,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtQrM-62bZhn-9801a1ac2a5f9b29",
+          "choices": [
+            {
+              "delta": {
+                "content": "\".",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 3343
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": "\".",
+              "seed": null
+            }
+          ],
+          "created": 1758039001,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtQrM-62bZhn-9801a1ac2a5f9b29",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 128009
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null,
+              "text": "",
+              "seed": 10870795372179526000
+            }
+          ],
+          "created": 1758039001,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": {
+            "completion_tokens": 11,
+            "prompt_tokens": 45,
+            "total_tokens": 56,
+            "completion_tokens_details": null,
+            "prompt_tokens_details": null,
+            "cached_tokens": 0
+          }
+        }
+      }
+    ],
+    "is_streaming": true
+  }
+}
--- a/tests/integration/recordings/responses/e08e01e5652a.json
+++ b/tests/integration/recordings/responses/e08e01e5652a.json
@ -0,0 +1,56 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8080/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Which planet do humans live on?"
+        }
+      ],
+      "stream": false
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "<think>\nOkay, the user is asking which planet humans live on. I need to make sure I answer this accurately. First, I should recall what I know about our solar system. The Earth is our home, and it's in our solar system. There are eight planets in total, right? Let me check that. Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, and Neptune. Yep, that's the list.\n\nBut wait, the user might be confusing Earth with Mars. I should clarify that Earth is the only planet known to support life. The other planets are mostly gas giants and have no liquid water, so they don't support life as Earth does. So the answer should be Earth. I should also mention that although there are other planets, none have liquid water, which makes the answer more complete.\n\nI need to make sure there are no alternatives. Maybe some people might think Mars, but I know that's not the case. Also, it's good to mention that life on Earth is closely linked to the presence of water, which is why Earth is our only planet with that characteristic. That way, the answer is not only accurate but also informative.\n</think>\n\nHumans live on **Earth**, the planet that supports life as we know it. The Earth is the only known planet in our solar system where liquid water exists and where life can occur. Other planets are considered \"gas giants\" or \"ice giants\" due to their extreme conditions and lack of liquid water, making them inhospitable for life.",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 1757550390,
+        "model": "Qwen/Qwen3-0.6B",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "3.3.5-dev0-sha-1b90c50",
+        "usage": {
+          "completion_tokens": 312,
+          "prompt_tokens": 15,
+          "total_tokens": 327,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/f0bbea34c5cc.json
+++ b/tests/integration/recordings/responses/f0bbea34c5cc.json
@ -0,0 +1,611 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.together.xyz/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the name of the US captial?"
+        }
+      ],
+      "stream": true
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtdGc-62bZhn-9801a2b11e77499b",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 791
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": "The",
+              "seed": null
+            }
+          ],
+          "created": 1758039042,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtdGc-62bZhn-9801a2b11e77499b",
+          "choices": [
+            {
+              "delta": {
+                "content": " name",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 836
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " name",
+              "seed": null
+            }
+          ],
+          "created": 1758039042,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtdGc-62bZhn-9801a2b11e77499b",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 315
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " of",
+              "seed": null
+            }
+          ],
+          "created": 1758039042,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtdGc-62bZhn-9801a2b11e77499b",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 279
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " the",
+              "seed": null
+            }
+          ],
+          "created": 1758039042,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtdGc-62bZhn-9801a2b11e77499b",
+          "choices": [
+            {
+              "delta": {
+                "content": " US",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 2326
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " US",
+              "seed": null
+            }
+          ],
+          "created": 1758039042,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtdGc-62bZhn-9801a2b11e77499b",
+          "choices": [
+            {
+              "delta": {
+                "content": " capital",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 6864
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " capital",
+              "seed": null
+            }
+          ],
+          "created": 1758039042,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtdGc-62bZhn-9801a2b11e77499b",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 374
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " is",
+              "seed": null
+            }
+          ],
+          "created": 1758039042,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtdGc-62bZhn-9801a2b11e77499b",
+          "choices": [
+            {
+              "delta": {
+                "content": " Washington",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 6652
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " Washington",
+              "seed": null
+            }
+          ],
+          "created": 1758039042,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtdGc-62bZhn-9801a2b11e77499b",
+          "choices": [
+            {
+              "delta": {
+                "content": ",",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 11
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": ",",
+              "seed": null
+            }
+          ],
+          "created": 1758039042,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtdGc-62bZhn-9801a2b11e77499b",
+          "choices": [
+            {
+              "delta": {
+                "content": " D",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 423
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " D",
+              "seed": null
+            }
+          ],
+          "created": 1758039042,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtdGc-62bZhn-9801a2b11e77499b",
+          "choices": [
+            {
+              "delta": {
+                "content": ".C",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 732
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": ".C",
+              "seed": null
+            }
+          ],
+          "created": 1758039042,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtdGc-62bZhn-9801a2b11e77499b",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 13
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": ".",
+              "seed": null
+            }
+          ],
+          "created": 1758039042,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtdGc-62bZhn-9801a2b11e77499b",
+          "choices": [
+            {
+              "delta": {
+                "content": " (",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 320
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " (",
+              "seed": null
+            }
+          ],
+          "created": 1758039042,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtdGc-62bZhn-9801a2b11e77499b",
+          "choices": [
+            {
+              "delta": {
+                "content": "short",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 8846
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": "short",
+              "seed": null
+            }
+          ],
+          "created": 1758039042,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtdGc-62bZhn-9801a2b11e77499b",
+          "choices": [
+            {
+              "delta": {
+                "content": " for",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 369
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " for",
+              "seed": null
+            }
+          ],
+          "created": 1758039042,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtdGc-62bZhn-9801a2b11e77499b",
+          "choices": [
+            {
+              "delta": {
+                "content": " District",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 11182
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " District",
+              "seed": null
+            }
+          ],
+          "created": 1758039042,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtdGc-62bZhn-9801a2b11e77499b",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 315
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " of",
+              "seed": null
+            }
+          ],
+          "created": 1758039042,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtdGc-62bZhn-9801a2b11e77499b",
+          "choices": [
+            {
+              "delta": {
+                "content": " Columbia",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 19326
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": " Columbia",
+              "seed": null
+            }
+          ],
+          "created": 1758039042,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtdGc-62bZhn-9801a2b11e77499b",
+          "choices": [
+            {
+              "delta": {
+                "content": ").",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 570
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null,
+              "text": ").",
+              "seed": null
+            }
+          ],
+          "created": 1758039042,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "oBUtdGc-62bZhn-9801a2b11e77499b",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null,
+                "token_id": 128009
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null,
+              "text": "",
+              "seed": 10296991816860367000
+            }
+          ],
+          "created": 1758039042,
+          "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": {
+            "completion_tokens": 20,
+            "prompt_tokens": 45,
+            "total_tokens": 65,
+            "completion_tokens_details": null,
+            "prompt_tokens_details": null,
+            "cached_tokens": 0
+          }
+        }
+      }
+    ],
+    "is_streaming": true
+  }
+}
--- a/tests/integration/recordings/responses/f518ea4fde7d.json
+++ b/tests/integration/recordings/responses/f518ea4fde7d.json
--- a/tests/integration/scoring/test_scoring.py
+++ b/tests/integration/scoring/test_scoring.py
@ -9,6 +9,7 @@ from pathlib import Path

 import pandas as pd
 import pytest
+import requests


@pytest.fixture
@ -77,7 +78,46 @@ def test_scoring_functions_register(
    assert len(list_response) > 0
    assert any(x.identifier == sample_scoring_fn_id for x in list_response)

-    # TODO: add unregister api for scoring functions
+
+def test_scoring_functions_unregister(
+    llama_stack_client,
+    sample_scoring_fn_id,
+    judge_model_id,
+    sample_judge_prompt_template,
+):
+    llm_as_judge_provider = [
+        x
+        for x in llama_stack_client.providers.list()
+        if x.api == "scoring" and x.provider_type == "inline::llm-as-judge"
+    ]
+    if len(llm_as_judge_provider) == 0:
+        pytest.skip("No llm-as-judge provider found, cannot test unregister")
+
+    llm_as_judge_provider_id = llm_as_judge_provider[0].provider_id
+
+    # Register first
+    register_scoring_function(
+        llama_stack_client,
+        llm_as_judge_provider_id,
+        sample_scoring_fn_id,
+        judge_model_id,
+        sample_judge_prompt_template,
+    )
+
+    # Ensure it is present
+    list_response = llama_stack_client.scoring_functions.list()
+    assert any(x.identifier == sample_scoring_fn_id for x in list_response)
+
+    # Unregister scoring fn
+    try:
+        base_url = llama_stack_client.base_url
+    except AttributeError:
+        pytest.skip("No server base_url available; cannot test HTTP unregister in library mode")
+
+    resp = requests.delete(f"{base_url}/v1/scoring-functions/{sample_scoring_fn_id}", timeout=30)
+    assert resp.status_code in (200, 204)
+    list_after = llama_stack_client.scoring_functions.list()
+    assert all(x.identifier != sample_scoring_fn_id for x in list_after)


@pytest.mark.parametrize("scoring_fn_id", ["basic::equality"])
--- a/tests/integration/suites.py
+++ b/tests/integration/suites.py
@ -90,6 +90,24 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
            "embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
        },
    ),
+    "tgi": Setup(
+        name="tgi",
+        description="Text Generation Inference (TGI) provider with a text model",
+        env={
+            "TGI_URL": "http://localhost:8080",
+        },
+        defaults={
+            "text_model": "tgi/Qwen/Qwen3-0.6B",
+        },
+    ),
+    "together": Setup(
+        name="together",
+        description="Together computer models",
+        defaults={
+            "text_model": "together/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+            "embedding_model": "together/togethercomputer/m2-bert-80M-32k-retrieval",
+        },
+    ),
 }


--- a/tests/unit/distribution/routers/test_routing_tables.py
+++ b/tests/unit/distribution/routers/test_routing_tables.py
@ -105,6 +105,9 @@ class ScoringFunctionsImpl(Impl):
    async def register_scoring_function(self, scoring_fn):
        return scoring_fn

+    async def unregister_scoring_function(self, scoring_fn_id: str):
+        return scoring_fn_id
+

 class BenchmarksImpl(Impl):
    def __init__(self):
@ -113,6 +116,9 @@ class BenchmarksImpl(Impl):
    async def register_benchmark(self, benchmark):
        return benchmark

+    async def unregister_benchmark(self, benchmark_id: str):
+        return benchmark_id
+

 class ToolGroupsImpl(Impl):
    def __init__(self):
@ -330,6 +336,13 @@ async def test_scoring_functions_routing_table(cached_disk_dist_registry):
    assert "test-scoring-fn" in scoring_fn_ids
    assert "test-scoring-fn-2" in scoring_fn_ids

+    # Unregister scoring functions and verify listing
+    for i in range(len(scoring_functions.data)):
+        await table.unregister_scoring_function(scoring_functions.data[i].scoring_fn_id)
+
+    scoring_functions_list_after_deletion = await table.list_scoring_functions()
+    assert len(scoring_functions_list_after_deletion.data) == 0
+

 async def test_benchmarks_routing_table(cached_disk_dist_registry):
    table = BenchmarksRoutingTable({"test_provider": BenchmarksImpl()}, cached_disk_dist_registry, {})
@ -347,6 +360,15 @@ async def test_benchmarks_routing_table(cached_disk_dist_registry):
    benchmark_ids = {b.identifier for b in benchmarks.data}
    assert "test-benchmark" in benchmark_ids

+    # Unregister the benchmark and verify removal
+    await table.unregister_benchmark(benchmark_id="test-benchmark")
+    benchmarks_after = await table.list_benchmarks()
+    assert len(benchmarks_after.data) == 0
+
+    # Unregistering a non-existent benchmark should raise a clear error
+    with pytest.raises(ValueError, match="Benchmark 'dummy_benchmark' not found"):
+        await table.unregister_benchmark(benchmark_id="dummy_benchmark")
+

 async def test_tool_groups_routing_table(cached_disk_dist_registry):
    table = ToolGroupsRoutingTable({"test_provider": ToolGroupsImpl()}, cached_disk_dist_registry, {})
--- a/tests/unit/distribution/test_inference_recordings.py
+++ b/tests/unit/distribution/test_inference_recordings.py
@ -155,16 +155,10 @@ class TestInferenceRecording:

    async def test_recording_mode(self, temp_storage_dir, real_openai_chat_response):
        """Test that recording mode captures and stores responses."""
-
-        async def mock_create(*args, **kwargs):
-            return real_openai_chat_response
-
        temp_storage_dir = temp_storage_dir / "test_recording_mode"
-        with patch(
-            "openai.resources.chat.completions.AsyncCompletions.create", new_callable=AsyncMock, side_effect=mock_create
-        ):
        with inference_recording(mode=InferenceMode.RECORD, storage_dir=str(temp_storage_dir)):
            client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
+            client.chat.completions._post = AsyncMock(return_value=real_openai_chat_response)

            response = await client.chat.completions.create(
                model="llama3.2:3b",
@ -176,6 +170,7 @@ class TestInferenceRecording:

            # Verify the response was returned correctly
            assert response.choices[0].message.content == "Hello! I'm doing well, thank you for asking."
+            client.chat.completions._post.assert_called_once()

        # Verify recording was stored
        storage = ResponseStorage(temp_storage_dir)
@ -183,17 +178,11 @@ class TestInferenceRecording:

    async def test_replay_mode(self, temp_storage_dir, real_openai_chat_response):
        """Test that replay mode returns stored responses without making real calls."""
-
-        async def mock_create(*args, **kwargs):
-            return real_openai_chat_response
-
        temp_storage_dir = temp_storage_dir / "test_replay_mode"
        # First, record a response
-        with patch(
-            "openai.resources.chat.completions.AsyncCompletions.create", new_callable=AsyncMock, side_effect=mock_create
-        ):
        with inference_recording(mode=InferenceMode.RECORD, storage_dir=str(temp_storage_dir)):
            client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
+            client.chat.completions._post = AsyncMock(return_value=real_openai_chat_response)

            response = await client.chat.completions.create(
                model="llama3.2:3b",
@ -202,11 +191,12 @@ class TestInferenceRecording:
                max_tokens=50,
                user=NOT_GIVEN,
            )
+            client.chat.completions._post.assert_called_once()

        # Now test replay mode - should not call the original method
-        with patch("openai.resources.chat.completions.AsyncCompletions.create") as mock_create_patch:
        with inference_recording(mode=InferenceMode.REPLAY, storage_dir=str(temp_storage_dir)):
            client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
+            client.chat.completions._post = AsyncMock(return_value=real_openai_chat_response)

            response = await client.chat.completions.create(
                model="llama3.2:3b",
@ -219,7 +209,7 @@ class TestInferenceRecording:
            assert response.choices[0].message.content == "Hello! I'm doing well, thank you for asking."

            # Verify the original method was NOT called
-                mock_create_patch.assert_not_called()
+            client.chat.completions._post.assert_not_called()

    async def test_replay_mode_models(self, temp_storage_dir):
        """Test that replay mode returns stored responses without making real model listing calls."""
@ -272,16 +262,23 @@ class TestInferenceRecording:
    async def test_embeddings_recording(self, temp_storage_dir, real_embeddings_response):
        """Test recording and replay of embeddings calls."""

-        async def mock_create(*args, **kwargs):
-            return real_embeddings_response
+        # baseline - mock works without recording
+        client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
+        client.embeddings._post = AsyncMock(return_value=real_embeddings_response)
+        response = await client.embeddings.create(
+            model=real_embeddings_response.model,
+            input=["Hello world", "Test embedding"],
+            encoding_format=NOT_GIVEN,
+        )
+        assert len(response.data) == 2
+        assert response.data[0].embedding == [0.1, 0.2, 0.3]
+        client.embeddings._post.assert_called_once()

        temp_storage_dir = temp_storage_dir / "test_embeddings_recording"
        # Record
-        with patch(
-            "openai.resources.embeddings.AsyncEmbeddings.create", new_callable=AsyncMock, side_effect=mock_create
-        ):
        with inference_recording(mode=InferenceMode.RECORD, storage_dir=str(temp_storage_dir)):
            client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
+            client.embeddings._post = AsyncMock(return_value=real_embeddings_response)

            response = await client.embeddings.create(
                model=real_embeddings_response.model,
@ -294,9 +291,9 @@ class TestInferenceRecording:
            assert len(response.data) == 2

        # Replay
-        with patch("openai.resources.embeddings.AsyncEmbeddings.create") as mock_create_patch:
        with inference_recording(mode=InferenceMode.REPLAY, storage_dir=str(temp_storage_dir)):
            client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
+            client.embeddings._post = AsyncMock(return_value=real_embeddings_response)

            response = await client.embeddings.create(
                model=real_embeddings_response.model,
@ -308,7 +305,7 @@ class TestInferenceRecording:
            assert response.data[0].embedding == [0.1, 0.2, 0.3]

            # Verify original method was not called
-                mock_create_patch.assert_not_called()
+            client.embeddings._post.assert_not_called()

    async def test_completions_recording(self, temp_storage_dir):
        real_completions_response = OpenAICompletion(
@ -326,17 +323,25 @@ class TestInferenceRecording:
            ],
        )

-        async def mock_create(*args, **kwargs):
-            return real_completions_response
-
        temp_storage_dir = temp_storage_dir / "test_completions_recording"

+        # baseline - mock works without recording
+        client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
+        client.completions._post = AsyncMock(return_value=real_completions_response)
+        response = await client.completions.create(
+            model=real_completions_response.model,
+            prompt="Hello, how are you?",
+            temperature=0.7,
+            max_tokens=50,
+            user=NOT_GIVEN,
+        )
+        assert response.choices[0].text == real_completions_response.choices[0].text
+        client.completions._post.assert_called_once()
+
        # Record
-        with patch(
-            "openai.resources.completions.AsyncCompletions.create", new_callable=AsyncMock, side_effect=mock_create
-        ):
        with inference_recording(mode=InferenceMode.RECORD, storage_dir=str(temp_storage_dir)):
            client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
+            client.completions._post = AsyncMock(return_value=real_completions_response)

            response = await client.completions.create(
                model=real_completions_response.model,
@ -347,11 +352,12 @@ class TestInferenceRecording:
            )

            assert response.choices[0].text == real_completions_response.choices[0].text
+            client.completions._post.assert_called_once()

        # Replay
-        with patch("openai.resources.completions.AsyncCompletions.create") as mock_create_patch:
        with inference_recording(mode=InferenceMode.REPLAY, storage_dir=str(temp_storage_dir)):
            client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
+            client.completions._post = AsyncMock(return_value=real_completions_response)
            response = await client.completions.create(
                model=real_completions_response.model,
                prompt="Hello, how are you?",
@ -359,7 +365,7 @@ class TestInferenceRecording:
                max_tokens=50,
            )
            assert response.choices[0].text == real_completions_response.choices[0].text
-                mock_create_patch.assert_not_called()
+            client.completions._post.assert_not_called()

    async def test_live_mode(self, real_openai_chat_response):
        """Test that live mode passes through to original methods."""
--- a/tests/unit/providers/nvidia/test_eval.py
+++ b/tests/unit/providers/nvidia/test_eval.py
@ -52,14 +52,19 @@ class TestNVIDIAEvalImpl(unittest.TestCase):
        self.evaluator_post_patcher = patch(
            "llama_stack.providers.remote.eval.nvidia.eval.NVIDIAEvalImpl._evaluator_post"
        )
+        self.evaluator_delete_patcher = patch(
+            "llama_stack.providers.remote.eval.nvidia.eval.NVIDIAEvalImpl._evaluator_delete"
+        )

        self.mock_evaluator_get = self.evaluator_get_patcher.start()
        self.mock_evaluator_post = self.evaluator_post_patcher.start()
+        self.mock_evaluator_delete = self.evaluator_delete_patcher.start()

    def tearDown(self):
        """Clean up after each test."""
        self.evaluator_get_patcher.stop()
        self.evaluator_post_patcher.stop()
+        self.evaluator_delete_patcher.stop()

    def _assert_request_body(self, expected_json):
        """Helper method to verify request body in Evaluator POST request is correct"""
@ -115,6 +120,13 @@ class TestNVIDIAEvalImpl(unittest.TestCase):
        self.mock_evaluator_post.assert_called_once()
        self._assert_request_body({"namespace": benchmark.provider_id, "name": benchmark.identifier, **eval_config})

+    def test_unregister_benchmark(self):
+        # Unregister the benchmark
+        self.run_async(self.eval_impl.unregister_benchmark(benchmark_id=MOCK_BENCHMARK_ID))
+
+        # Verify the Evaluator API was called correctly
+        self.mock_evaluator_delete.assert_called_once_with(f"/v1/evaluation/configs/nvidia/{MOCK_BENCHMARK_ID}")
+
    def test_run_eval(self):
        benchmark_config = BenchmarkConfig(
            eval_candidate=ModelCandidate(
--- a/uv.lock
+++ b/uv.lock
@ -1,5 +1,5 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.12"
 resolution-markers = [
    "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
@ -1749,7 +1749,7 @@ wheels = [

 [[package]]
 name = "llama-stack"
-version = "0.2.21"
+version = "0.2.22"
 source = { editable = "." }
 dependencies = [
    { name = "aiohttp" },
@ -1885,15 +1885,15 @@ requires-dist = [
    { name = "huggingface-hub", specifier = ">=0.34.0,<1.0" },
    { name = "jinja2", specifier = ">=3.1.6" },
    { name = "jsonschema" },
-    { name = "llama-stack-client", specifier = ">=0.2.21" },
-    { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.21" },
+    { name = "llama-stack-client", specifier = ">=0.2.22" },
+    { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.22" },
    { name = "openai", specifier = ">=1.100.0" },
    { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" },
    { name = "opentelemetry-sdk", specifier = ">=1.30.0" },
    { name = "pandas", marker = "extra == 'ui'" },
    { name = "pillow" },
    { name = "prompt-toolkit" },
-    { name = "pydantic", specifier = ">=2" },
+    { name = "pydantic", specifier = ">=2.11.9" },
    { name = "python-dotenv" },
    { name = "python-jose", extras = ["cryptography"] },
    { name = "python-multipart", specifier = ">=0.0.20" },
@ -1911,7 +1911,7 @@ provides-extras = ["ui"]
 benchmark = [{ name = "locust", specifier = ">=2.39.1" }]
 codegen = [
    { name = "jinja2", specifier = ">=3.1.6" },
-    { name = "pydantic" },
+    { name = "pydantic", specifier = ">=2.11.9" },
    { name = "rich" },
 ]
 dev = [
@ -1993,7 +1993,7 @@ unit = [

 [[package]]
 name = "llama-stack-client"
-version = "0.2.21"
+version = "0.2.22"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "anyio" },
@ -2012,9 +2012,9 @@ dependencies = [
    { name = "tqdm" },
    { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/f1/d3/8c50561d167f1e9b601b8fffe852b44c1ff97aaa6db6cdedd611d9e02a65/llama_stack_client-0.2.21.tar.gz", hash = "sha256:bd931fdcadedec5ccdbaa3c54d0c17761af1c227711ad6150dc0dd33d7b66ce2", size = 318319, upload-time = "2025-09-08T22:26:57.668Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/60/80/4260816bfaaa889d515206c9df4906d08d405bf94c9b4d1be399b1923e46/llama_stack_client-0.2.22.tar.gz", hash = "sha256:9a0bc756b91ebd539858eeaf1f231c5e5c6900e1ea4fcced726c6717f3d27ca7", size = 318309, upload-time = "2025-09-16T19:43:33.212Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/02/77/dadc682046a2c7ad68be8d2d2afac7007bf4d22efb0d3929d85ab9706ffe/llama_stack_client-0.2.21-py3-none-any.whl", hash = "sha256:adba82fdf18ab3b8ac218cedba4927bd5d26c23c2318e75c8763a44bb6b40693", size = 369902, upload-time = "2025-09-08T22:26:56.308Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/8e/1ebf6ac0dbb62b81038e856ed00768e283d927b14fcd614e3018a227092b/llama_stack_client-0.2.22-py3-none-any.whl", hash = "sha256:b260d73aec56fcfd8fa601b3b34c2f83c4fbcfb7261a246b02bbdf6c2da184fe", size = 369901, upload-time = "2025-09-16T19:43:32.089Z" },
 ]

 [[package]]
@ -3393,7 +3393,7 @@ wheels = [

 [[package]]
 name = "pydantic"
-version = "2.11.7"
+version = "2.11.9"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "annotated-types" },
@ -3401,9 +3401,9 @@ dependencies = [
    { name = "typing-extensions" },
    { name = "typing-inspection" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/00/dd/4325abf92c39ba8623b5af936ddb36ffcfe0beae70405d456ab1fb2f5b8c/pydantic-2.11.7.tar.gz", hash = "sha256:d989c3c6cb79469287b1569f7447a17848c998458d49ebe294e975b9baf0f0db", size = 788350, upload-time = "2025-06-14T08:33:17.137Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ff/5d/09a551ba512d7ca404d785072700d3f6727a02f6f3c24ecfd081c7cf0aa8/pydantic-2.11.9.tar.gz", hash = "sha256:6b8ffda597a14812a7975c90b82a8a2e777d9257aba3453f973acd3c032a18e2", size = 788495, upload-time = "2025-09-13T11:26:39.325Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/6a/c0/ec2b1c8712ca690e5d61979dee872603e92b8a32f94cc1b72d53beab008a/pydantic-2.11.7-py3-none-any.whl", hash = "sha256:dde5df002701f6de26248661f6835bbe296a47bf73990135c7d07ce741b9623b", size = 444782, upload-time = "2025-06-14T08:33:14.905Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/d3/108f2006987c58e76691d5ae5d200dd3e0f532cb4e5fa3560751c3a1feba/pydantic-2.11.9-py3-none-any.whl", hash = "sha256:c42dd626f5cfc1c6950ce6205ea58c93efa406da65f479dcb4029d5934857da2", size = 444855, upload-time = "2025-09-13T11:26:36.909Z" },
 ]

 [[package]]