Enable streaming usage metrics for OpenAI providers

Inject stream_options for telemetry, add completion streaming metrics,
fix params mutation, remove duplicate provider logic. Add unit tests.
This commit is contained in:
skamenan7 2025-11-19 14:48:46 -05:00 committed by Sumanth Kamenani
parent a7c7c72467
commit 606b9f0ca4
6 changed files with 211 additions and 53 deletions

View file

@ -271,6 +271,16 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
"""
Direct OpenAI completion API call.
"""
from llama_stack.core.telemetry.tracing import get_current_span
# inject if streaming AND telemetry active
if params.stream and get_current_span() is not None:
params = params.model_copy()
if params.stream_options is None:
params.stream_options = {"include_usage": True}
elif "include_usage" not in params.stream_options:
params.stream_options = {**params.stream_options, "include_usage": True}
# TODO: fix openai_completion to return type compatible with OpenAI's API response
provider_model_id = await self._get_provider_model_id(params.model)
self._validate_model_allowed(provider_model_id)
@ -308,6 +318,16 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
"""
Direct OpenAI chat completion API call.
"""
from llama_stack.core.telemetry.tracing import get_current_span
# inject if streaming AND telemetry active
if params.stream and get_current_span() is not None:
params = params.model_copy()
if params.stream_options is None:
params.stream_options = {"include_usage": True}
elif "include_usage" not in params.stream_options:
params.stream_options = {**params.stream_options, "include_usage": True}
provider_model_id = await self._get_provider_model_id(params.model)
self._validate_model_allowed(provider_model_id)