feat!: Implement include parameter specifically for adding logprobs in the output message (#4261)

# Problem As an Application Developer, I want to use the include parameter with the value message.output_text.logprobs, so that I can receive log probabilities for output tokens to assess the model's confidence in its response. # What does this PR do? - Updates the include parameter in various resource definitions - Updates the inline provider to return logprobs when "message.output_text.logprobs" is passed in the include parameter - Converts the logprobs returned by the inference provider from chat completion format to responses format Closes #[4260](https://github.com/llamastack/llama-stack/issues/4260) ## Test Plan - Created a script to explore OpenAI behavior: https://github.com/s-akhtar-baig/llama-stack-examples/blob/main/responses/src/include.py - Added integration tests and new recordings --------- Co-authored-by: Matthew Farrellee <matt@cs.wisc.edu> Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
2025-12-21 09:42:26 +00:00 · 2025-12-11 14:11:21 -05:00 · 2025-12-11 14:11:21 -05:00 · 805abf573f
commit 805abf573f
parent 76e47d811a
26 changed files with 13524 additions and 161 deletions
--- a/src/llama_stack/core/routers/inference.py
+++ b/src/llama_stack/core/routers/inference.py
@ -43,6 +43,8 @@ from llama_stack_api import (
    OpenAIEmbeddingsRequestWithExtraBody,
    OpenAIEmbeddingsResponse,
    OpenAIMessageParam,
+    OpenAITokenLogProb,
+    OpenAITopLogProb,
    Order,
    RerankResponse,
    RoutingTable,
@ -342,8 +344,34 @@ class InferenceRouter(Inference):
                                            )
                        if choice_delta.finish_reason:
                            current_choice_data["finish_reason"] = choice_delta.finish_reason
+
+                        # Convert logprobs from chat completion format to responses format
+                        # Chat completion returns list of ChatCompletionTokenLogprob, but
+                        # expecting list of OpenAITokenLogProb in OpenAIChoice
                        if choice_delta.logprobs and choice_delta.logprobs.content:
-                            current_choice_data["logprobs_content_parts"].extend(choice_delta.logprobs.content)
+                            converted_logprobs = []
+                            for token_logprob in choice_delta.logprobs.content:
+                                top_logprobs = None
+                                if token_logprob.top_logprobs:
+                                    top_logprobs = [
+                                        OpenAITopLogProb(
+                                            token=tlp.token,
+                                            bytes=tlp.bytes,
+                                            logprob=tlp.logprob,
+                                        )
+                                        for tlp in token_logprob.top_logprobs
+                                    ]
+                                converted_logprobs.append(
+                                    OpenAITokenLogProb(
+                                        token=token_logprob.token,
+                                        bytes=token_logprob.bytes,
+                                        logprob=token_logprob.logprob,
+                                        top_logprobs=top_logprobs,
+                                    )
+                                )
+                            # Update choice delta with the newly formatted logprobs object
+                            choice_delta.logprobs.content = converted_logprobs
+                            current_choice_data["logprobs_content_parts"].extend(converted_logprobs)

                # Compute metrics on final chunk
                if chunk.choices and chunk.choices[0].finish_reason: