Enable streaming usage metrics for OpenAI providers

Inject stream_options for telemetry, add completion streaming metrics, fix params mutation, remove duplicate provider logic. Add unit tests.
2025-12-03 09:53:45 +00:00 · 2025-11-19 14:48:46 -05:00 · 2025-11-19 14:48:46 -05:00 · 606b9f0ca4
commit 606b9f0ca4
parent a7c7c72467
6 changed files with 211 additions and 53 deletions
--- a/src/llama_stack/providers/utils/inference/openai_mixin.py
+++ b/src/llama_stack/providers/utils/inference/openai_mixin.py
@ -271,6 +271,16 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
        """
        Direct OpenAI completion API call.
        """
+        from llama_stack.core.telemetry.tracing import get_current_span
+
+        # inject if streaming AND telemetry active
+        if params.stream and get_current_span() is not None:
+            params = params.model_copy()
+            if params.stream_options is None:
+                params.stream_options = {"include_usage": True}
+            elif "include_usage" not in params.stream_options:
+                params.stream_options = {**params.stream_options, "include_usage": True}
+
        # TODO: fix openai_completion to return type compatible with OpenAI's API response
        provider_model_id = await self._get_provider_model_id(params.model)
        self._validate_model_allowed(provider_model_id)
@ -308,6 +318,16 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
        """
        Direct OpenAI chat completion API call.
        """
+        from llama_stack.core.telemetry.tracing import get_current_span
+
+        # inject if streaming AND telemetry active
+        if params.stream and get_current_span() is not None:
+            params = params.model_copy()
+            if params.stream_options is None:
+                params.stream_options = {"include_usage": True}
+            elif "include_usage" not in params.stream_options:
+                params.stream_options = {**params.stream_options, "include_usage": True}
+
        provider_model_id = await self._get_provider_model_id(params.model)
        self._validate_model_allowed(provider_model_id)