From 935b8e28de29400a4b42d8b54169341c5244fec7 Mon Sep 17 00:00:00 2001
From: slekkala1 <swapna942@meta.com>
Date: Wed, 10 Sep 2025 08:48:01 -0700
Subject: [PATCH] fix: Fireworks chat completion broken due to telemetry
 (#3392)

# What does this PR do?
Fix fireworks chat completion broken due to telemetry expecting
response.usage
 Closes https://github.com/llamastack/llama-stack/issues/3391

## Test Plan
1. `uv run --with llama-stack llama stack build --distro starter
--image-type venv --run`
Try

```
curl -X POST http://0.0.0.0:8321/v1/openai/v1/chat/completions \
    -H "Content-Type: application/json" \
    -d '{
      "model": "fireworks/accounts/fireworks/models/llama-v3p1-8b-instruct",
      "messages": [{"role": "user", "content": "Hello!"}]
    }'
```
```
{"id":"chatcmpl-ee922a08-0df0-4974-b0d3-b322113e8bc0","choices":[{"message":{"role":"assistant","content":"Hello! How can I assist you today?","name":null,"tool_calls":null},"finish_reason":"stop","index":0,"logprobs":null}],"object":"chat.completion","created":1757456375,"model":"fireworks/accounts/fireworks/models/llama-v3p1-8b-instruct"}%
```

Without fix fails as mentioned in
https://github.com/llamastack/llama-stack/issues/3391

Co-authored-by: Francisco Arceo <arceofrancisco@gmail.com>
---
 llama_stack/core/routers/inference.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llama_stack/core/routers/inference.py b/llama_stack/core/routers/inference.py
index 045093fe0..23972deb5 100644
--- a/llama_stack/core/routers/inference.py
+++ b/llama_stack/core/routers/inference.py
@@ -423,7 +423,7 @@ class InferenceRouter(Inference):
             # response_stream = await provider.openai_completion(**params)
 
         response = await provider.openai_completion(**params)
-        if self.telemetry:
+        if self.telemetry and getattr(response, "usage", None):
             metrics = self._construct_metrics(
                 prompt_tokens=response.usage.prompt_tokens,
                 completion_tokens=response.usage.completion_tokens,
@@ -529,7 +529,7 @@ class InferenceRouter(Inference):
         if self.store:
             asyncio.create_task(self.store.store_chat_completion(response, messages))
 
-        if self.telemetry:
+        if self.telemetry and getattr(response, "usage", None):
             metrics = self._construct_metrics(
                 prompt_tokens=response.usage.prompt_tokens,
                 completion_tokens=response.usage.completion_tokens,