From f3aac253528608fa9d2a8d7b388200d80d31e06f Mon Sep 17 00:00:00 2001
From: Abhishek Bongale <abhishekbongale@outlook.com>
Date: Wed, 12 Nov 2025 14:38:05 +0000
Subject: [PATCH] feat: Add metadata field to request and response

This changes adds Optional metadata field to OpenAI compatible request
and response object.

fixes: #3564
Signed-off-by: Abhishek Bongale <abhishekbongale@outlook.com>
---
 client-sdks/stainless/openapi.yml           | 114 ++++++++++++++++++++
 docs/static/llama-stack-spec.yaml           | 114 ++++++++++++++++++++
 docs/static/stainless-llama-stack-spec.yaml | 114 ++++++++++++++++++++
 src/llama_stack/apis/inference/inference.py |  14 +++
 src/llama_stack/core/routers/inference.py   |  11 ++
 5 files changed, 367 insertions(+)

diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml
index 9f3ef15b5..cd5dab53f 100644
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@@ -3909,6 +3909,19 @@ components:
                 $ref: '#/components/schemas/OpenAIChatCompletionUsage'
                 description: >-
                   Token usage information for the completion
+              metadata:
+                type: object
+                additionalProperties:
+                  oneOf:
+                    - type: 'null'
+                    - type: boolean
+                    - type: number
+                    - type: string
+                    - type: array
+                    - type: object
+                description: >-
+                  (Optional) Set of key-value pairs that were attached to the request.
+                  This metadata is copied from the request.
               input_messages:
                 type: array
                 items:
@@ -4619,6 +4632,19 @@ components:
         user:
           type: string
           description: (Optional) The user to use.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Set of key-value pairs that can be attached to the request.
+            This metadata will be included in the response object.
       additionalProperties: false
       required:
         - model
@@ -4655,6 +4681,19 @@ components:
           $ref: '#/components/schemas/OpenAIChatCompletionUsage'
           description: >-
             Token usage information for the completion
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Set of key-value pairs that were attached to the request. This
+            metadata is copied from the request.
       additionalProperties: false
       required:
         - id
@@ -4694,6 +4733,19 @@ components:
           $ref: '#/components/schemas/OpenAIChatCompletionUsage'
           description: >-
             Token usage information (typically included in final chunk with stream_options)
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Set of key-value pairs that were attached to the request. This
+            metadata is copied from the request.
       additionalProperties: false
       required:
         - id
@@ -4783,6 +4835,19 @@ components:
           $ref: '#/components/schemas/OpenAIChatCompletionUsage'
           description: >-
             Token usage information for the completion
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Set of key-value pairs that were attached to the request. This
+            metadata is copied from the request.
         input_messages:
           type: array
           items:
@@ -4888,6 +4953,19 @@ components:
           type: string
           description: >-
             (Optional) The suffix that should be appended to the completion.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Set of key-value pairs that can be attached to the request.
+            This metadata will be included in the response object.
       additionalProperties: false
       required:
         - model
@@ -4912,6 +4990,16 @@ components:
           type: string
           const: text_completion
           default: text_completion
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
       additionalProperties: false
       required:
         - id
@@ -5744,6 +5832,19 @@ components:
           description: >-
             (Optional) A unique identifier representing your end-user, which can help
             OpenAI to monitor and detect abuse.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Set of key-value pairs that can be attached to the request.
+            This metadata will be included in the response object.
       additionalProperties: false
       required:
         - model
@@ -5817,6 +5918,19 @@ components:
         usage:
           $ref: '#/components/schemas/OpenAIEmbeddingUsage'
           description: Usage information
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Set of key-value pairs that were attached to the request. This
+            metadata is copied from the request.
       additionalProperties: false
       required:
         - object
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index ce8708b68..529f1dfa4 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -3193,6 +3193,19 @@ components:
                 $ref: '#/components/schemas/OpenAIChatCompletionUsage'
                 description: >-
                   Token usage information for the completion
+              metadata:
+                type: object
+                additionalProperties:
+                  oneOf:
+                    - type: 'null'
+                    - type: boolean
+                    - type: number
+                    - type: string
+                    - type: array
+                    - type: object
+                description: >-
+                  (Optional) Set of key-value pairs that were attached to the request.
+                  This metadata is copied from the request.
               input_messages:
                 type: array
                 items:
@@ -3903,6 +3916,19 @@ components:
         user:
           type: string
           description: (Optional) The user to use.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Set of key-value pairs that can be attached to the request.
+            This metadata will be included in the response object.
       additionalProperties: false
       required:
         - model
@@ -3939,6 +3965,19 @@ components:
           $ref: '#/components/schemas/OpenAIChatCompletionUsage'
           description: >-
             Token usage information for the completion
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Set of key-value pairs that were attached to the request. This
+            metadata is copied from the request.
       additionalProperties: false
       required:
         - id
@@ -3978,6 +4017,19 @@ components:
           $ref: '#/components/schemas/OpenAIChatCompletionUsage'
           description: >-
             Token usage information (typically included in final chunk with stream_options)
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Set of key-value pairs that were attached to the request. This
+            metadata is copied from the request.
       additionalProperties: false
       required:
         - id
@@ -4067,6 +4119,19 @@ components:
           $ref: '#/components/schemas/OpenAIChatCompletionUsage'
           description: >-
             Token usage information for the completion
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Set of key-value pairs that were attached to the request. This
+            metadata is copied from the request.
         input_messages:
           type: array
           items:
@@ -4172,6 +4237,19 @@ components:
           type: string
           description: >-
             (Optional) The suffix that should be appended to the completion.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Set of key-value pairs that can be attached to the request.
+            This metadata will be included in the response object.
       additionalProperties: false
       required:
         - model
@@ -4196,6 +4274,16 @@ components:
           type: string
           const: text_completion
           default: text_completion
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
       additionalProperties: false
       required:
         - id
@@ -5028,6 +5116,19 @@ components:
           description: >-
             (Optional) A unique identifier representing your end-user, which can help
             OpenAI to monitor and detect abuse.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Set of key-value pairs that can be attached to the request.
+            This metadata will be included in the response object.
       additionalProperties: false
       required:
         - model
@@ -5101,6 +5202,19 @@ components:
         usage:
           $ref: '#/components/schemas/OpenAIEmbeddingUsage'
           description: Usage information
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Set of key-value pairs that were attached to the request. This
+            metadata is copied from the request.
       additionalProperties: false
       required:
         - object
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index 9f3ef15b5..cd5dab53f 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -3909,6 +3909,19 @@ components:
                 $ref: '#/components/schemas/OpenAIChatCompletionUsage'
                 description: >-
                   Token usage information for the completion
+              metadata:
+                type: object
+                additionalProperties:
+                  oneOf:
+                    - type: 'null'
+                    - type: boolean
+                    - type: number
+                    - type: string
+                    - type: array
+                    - type: object
+                description: >-
+                  (Optional) Set of key-value pairs that were attached to the request.
+                  This metadata is copied from the request.
               input_messages:
                 type: array
                 items:
@@ -4619,6 +4632,19 @@ components:
         user:
           type: string
           description: (Optional) The user to use.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Set of key-value pairs that can be attached to the request.
+            This metadata will be included in the response object.
       additionalProperties: false
       required:
         - model
@@ -4655,6 +4681,19 @@ components:
           $ref: '#/components/schemas/OpenAIChatCompletionUsage'
           description: >-
             Token usage information for the completion
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Set of key-value pairs that were attached to the request. This
+            metadata is copied from the request.
       additionalProperties: false
       required:
         - id
@@ -4694,6 +4733,19 @@ components:
           $ref: '#/components/schemas/OpenAIChatCompletionUsage'
           description: >-
             Token usage information (typically included in final chunk with stream_options)
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Set of key-value pairs that were attached to the request. This
+            metadata is copied from the request.
       additionalProperties: false
       required:
         - id
@@ -4783,6 +4835,19 @@ components:
           $ref: '#/components/schemas/OpenAIChatCompletionUsage'
           description: >-
             Token usage information for the completion
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Set of key-value pairs that were attached to the request. This
+            metadata is copied from the request.
         input_messages:
           type: array
           items:
@@ -4888,6 +4953,19 @@ components:
           type: string
           description: >-
             (Optional) The suffix that should be appended to the completion.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Set of key-value pairs that can be attached to the request.
+            This metadata will be included in the response object.
       additionalProperties: false
       required:
         - model
@@ -4912,6 +4990,16 @@ components:
           type: string
           const: text_completion
           default: text_completion
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
       additionalProperties: false
       required:
         - id
@@ -5744,6 +5832,19 @@ components:
           description: >-
             (Optional) A unique identifier representing your end-user, which can help
             OpenAI to monitor and detect abuse.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Set of key-value pairs that can be attached to the request.
+            This metadata will be included in the response object.
       additionalProperties: false
       required:
         - model
@@ -5817,6 +5918,19 @@ components:
         usage:
           $ref: '#/components/schemas/OpenAIEmbeddingUsage'
           description: Usage information
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Set of key-value pairs that were attached to the request. This
+            metadata is copied from the request.
       additionalProperties: false
       required:
         - object
diff --git a/src/llama_stack/apis/inference/inference.py b/src/llama_stack/apis/inference/inference.py
index 9f04917c9..8d418d53d 100644
--- a/src/llama_stack/apis/inference/inference.py
+++ b/src/llama_stack/apis/inference/inference.py
@@ -694,6 +694,7 @@ class OpenAIChatCompletion(BaseModel):
     :param created: The Unix timestamp in seconds when the chat completion was created
     :param model: The model that was used to generate the chat completion
     :param usage: Token usage information for the completion
+    :param metadata: (Optional) Set of key-value pairs that were attached to the request. This metadata is copied from the request.
     """
 
     id: str
@@ -702,6 +703,7 @@ class OpenAIChatCompletion(BaseModel):
     created: int
     model: str
     usage: OpenAIChatCompletionUsage | None = None
+    metadata: dict[str, Any] | None = None
 
 
 @json_schema_type
@@ -714,6 +716,7 @@ class OpenAIChatCompletionChunk(BaseModel):
     :param created: The Unix timestamp in seconds when the chat completion was created
     :param model: The model that was used to generate the chat completion
     :param usage: Token usage information (typically included in final chunk with stream_options)
+    :param metadata: (Optional) Set of key-value pairs that were attached to the request. This metadata is copied from the request.
     """
 
     id: str
@@ -722,6 +725,7 @@ class OpenAIChatCompletionChunk(BaseModel):
     created: int
     model: str
     usage: OpenAIChatCompletionUsage | None = None
+    metadata: dict[str, Any] | None = None
 
 
 @json_schema_type
@@ -765,6 +769,7 @@ class OpenAICompletion(BaseModel):
     :created: The Unix timestamp in seconds when the completion was created
     :model: The model that was used to generate the completion
     :object: The object type, which will be "text_completion"
+    :metadata: (Optional) Set of key-value pairs that were attached to the request. This metadata is copied from the request.
     """
 
     id: str
@@ -772,6 +777,7 @@ class OpenAICompletion(BaseModel):
     created: int
     model: str
     object: Literal["text_completion"] = "text_completion"
+    metadata: dict[str, Any] | None = None
 
 
 @json_schema_type
@@ -809,12 +815,14 @@ class OpenAIEmbeddingsResponse(BaseModel):
     :param data: List of embedding data objects
     :param model: The model that was used to generate the embeddings
     :param usage: Usage information
+    :param metadata: (Optional) Set of key-value pairs that were attached to the request. This metadata is copied from the request.
     """
 
     object: Literal["list"] = "list"
     data: list[OpenAIEmbeddingData]
     model: str
     usage: OpenAIEmbeddingUsage
+    metadata: dict[str, Any] | None = None
 
 
 class ModelStore(Protocol):
@@ -890,6 +898,7 @@ class OpenAICompletionRequestWithExtraBody(BaseModel, extra="allow"):
     :param top_p: (Optional) The top p to use.
     :param user: (Optional) The user to use.
     :param suffix: (Optional) The suffix that should be appended to the completion.
+    :param metadata: (Optional) Set of key-value pairs that can be attached to the request. This metadata will be included in the response object.
     """
 
     # Standard OpenAI completion parameters
@@ -911,6 +920,7 @@ class OpenAICompletionRequestWithExtraBody(BaseModel, extra="allow"):
     top_p: float | None = None
     user: str | None = None
     suffix: str | None = None
+    metadata: dict[str, Any] | None = None
 
 
 # extra_body can be accessed via .model_extra
@@ -941,6 +951,7 @@ class OpenAIChatCompletionRequestWithExtraBody(BaseModel, extra="allow"):
     :param top_logprobs: (Optional) The top log probabilities to use.
     :param top_p: (Optional) The top p to use.
     :param user: (Optional) The user to use.
+    :param metadata: (Optional) Set of key-value pairs that can be attached to the request. This metadata will be included in the response object.
     """
 
     # Standard OpenAI chat completion parameters
@@ -967,6 +978,7 @@ class OpenAIChatCompletionRequestWithExtraBody(BaseModel, extra="allow"):
     top_logprobs: int | None = None
     top_p: float | None = None
     user: str | None = None
+    metadata: dict[str, Any] | None = None
 
 
 # extra_body can be accessed via .model_extra
@@ -979,6 +991,7 @@ class OpenAIEmbeddingsRequestWithExtraBody(BaseModel, extra="allow"):
     :param encoding_format: (Optional) The format to return the embeddings in. Can be either "float" or "base64". Defaults to "float".
     :param dimensions: (Optional) The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models.
     :param user: (Optional) A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
+    :param metadata: (Optional) Set of key-value pairs that can be attached to the request. This metadata will be included in the response object.
     """
 
     model: str
@@ -986,6 +999,7 @@ class OpenAIEmbeddingsRequestWithExtraBody(BaseModel, extra="allow"):
     encoding_format: str | None = "float"
     dimensions: int | None = None
     user: str | None = None
+    metadata: dict[str, Any] | None = None
 
 
 @runtime_checkable
diff --git a/src/llama_stack/core/routers/inference.py b/src/llama_stack/core/routers/inference.py
index d6270d428..c31dd6765 100644
--- a/src/llama_stack/core/routers/inference.py
+++ b/src/llama_stack/core/routers/inference.py
@@ -190,6 +190,8 @@ class InferenceRouter(Inference):
 
         response = await provider.openai_completion(params)
         response.model = request_model_id
+        # Copy metadata from request to response
+        response.metadata = params.metadata
         if self.telemetry_enabled and response.usage is not None:
             metrics = self._construct_metrics(
                 prompt_tokens=response.usage.prompt_tokens,
@@ -244,10 +246,13 @@ class InferenceRouter(Inference):
                 fully_qualified_model_id=request_model_id,
                 provider_id=provider.__provider_id__,
                 messages=params.messages,
+                metadata=params.metadata,
             )
 
         response = await self._nonstream_openai_chat_completion(provider, params)
         response.model = request_model_id
+        # Copy metadata from request to response
+        response.metadata = params.metadata
 
         # Store the response with the ID that will be returned to the client
         if self.store:
@@ -282,6 +287,8 @@ class InferenceRouter(Inference):
 
         response = await provider.openai_embeddings(params)
         response.model = request_model_id
+        # Copy metadata from request to response
+        response.metadata = params.metadata
         return response
 
     async def list_chat_completions(
@@ -340,6 +347,7 @@ class InferenceRouter(Inference):
         fully_qualified_model_id: str,
         provider_id: str,
         messages: list[OpenAIMessageParam] | None = None,
+        metadata: dict[str, Any] | None = None,
     ) -> AsyncIterator[OpenAIChatCompletionChunk]:
         """Stream OpenAI chat completion chunks, compute metrics, and store the final completion."""
         id = None
@@ -359,6 +367,8 @@ class InferenceRouter(Inference):
                     created = chunk.created
 
                 chunk.model = fully_qualified_model_id
+                # Copy metadata from request to each chunk
+                chunk.metadata = metadata
 
                 # Accumulate choice data for final assembly
                 if chunk.choices:
@@ -467,6 +477,7 @@ class InferenceRouter(Inference):
                     created=created or int(time.time()),
                     model=fully_qualified_model_id,
                     object="chat.completion",
+                    metadata=metadata,
                 )
                 logger.debug(f"InferenceRouter.completion_response: {final_response}")
                 asyncio.create_task(self.store.store_chat_completion(final_response, messages))