feat: Add metadata field to request and response

This changes adds Optional metadata field to OpenAI compatible request and response object. fixes: #3564 Signed-off-by: Abhishek Bongale <abhishekbongale@outlook.com>
2025-12-03 09:53:45 +00:00 · 2025-11-12 14:38:05 +00:00 · 2025-11-12 14:38:05 +00:00 · f3aac25352
commit f3aac25352
parent 539b9c08f3
5 changed files with 367 additions and 0 deletions
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@ -3909,6 +3909,19 @@ components:
                $ref: '#/components/schemas/OpenAIChatCompletionUsage'
                description: >-
                  Token usage information for the completion
              metadata:
                type: object
                additionalProperties:
                  oneOf:
                    - type: 'null'
                    - type: boolean
                    - type: number
                    - type: string
                    - type: array
                    - type: object
                description: >-
                  (Optional) Set of key-value pairs that were attached to the request.
                  This metadata is copied from the request.
              input_messages:
                type: array
                items:
@ -4619,6 +4632,19 @@ components:
        user:
          type: string
          description: (Optional) The user to use.
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            (Optional) Set of key-value pairs that can be attached to the request.
            This metadata will be included in the response object.
      additionalProperties: false
      required:
        - model
@ -4655,6 +4681,19 @@ components:
          $ref: '#/components/schemas/OpenAIChatCompletionUsage'
          description: >-
            Token usage information for the completion
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            (Optional) Set of key-value pairs that were attached to the request. This
            metadata is copied from the request.
      additionalProperties: false
      required:
        - id
@ -4694,6 +4733,19 @@ components:
          $ref: '#/components/schemas/OpenAIChatCompletionUsage'
          description: >-
            Token usage information (typically included in final chunk with stream_options)
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            (Optional) Set of key-value pairs that were attached to the request. This
            metadata is copied from the request.
      additionalProperties: false
      required:
        - id
@ -4783,6 +4835,19 @@ components:
          $ref: '#/components/schemas/OpenAIChatCompletionUsage'
          description: >-
            Token usage information for the completion
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            (Optional) Set of key-value pairs that were attached to the request. This
            metadata is copied from the request.
        input_messages:
          type: array
          items:
@ -4888,6 +4953,19 @@ components:
          type: string
          description: >-
            (Optional) The suffix that should be appended to the completion.
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            (Optional) Set of key-value pairs that can be attached to the request.
            This metadata will be included in the response object.
      additionalProperties: false
      required:
        - model
@ -4912,6 +4990,16 @@ components:
          type: string
          const: text_completion
          default: text_completion
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
      additionalProperties: false
      required:
        - id
@ -5744,6 +5832,19 @@ components:
          description: >-
            (Optional) A unique identifier representing your end-user, which can help
            OpenAI to monitor and detect abuse.
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            (Optional) Set of key-value pairs that can be attached to the request.
            This metadata will be included in the response object.
      additionalProperties: false
      required:
        - model
@ -5817,6 +5918,19 @@ components:
        usage:
          $ref: '#/components/schemas/OpenAIEmbeddingUsage'
          description: Usage information
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            (Optional) Set of key-value pairs that were attached to the request. This
            metadata is copied from the request.
      additionalProperties: false
      required:
        - object
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -3193,6 +3193,19 @@ components:
                $ref: '#/components/schemas/OpenAIChatCompletionUsage'
                description: >-
                  Token usage information for the completion
              metadata:
                type: object
                additionalProperties:
                  oneOf:
                    - type: 'null'
                    - type: boolean
                    - type: number
                    - type: string
                    - type: array
                    - type: object
                description: >-
                  (Optional) Set of key-value pairs that were attached to the request.
                  This metadata is copied from the request.
              input_messages:
                type: array
                items:
@ -3903,6 +3916,19 @@ components:
        user:
          type: string
          description: (Optional) The user to use.
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            (Optional) Set of key-value pairs that can be attached to the request.
            This metadata will be included in the response object.
      additionalProperties: false
      required:
        - model
@ -3939,6 +3965,19 @@ components:
          $ref: '#/components/schemas/OpenAIChatCompletionUsage'
          description: >-
            Token usage information for the completion
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            (Optional) Set of key-value pairs that were attached to the request. This
            metadata is copied from the request.
      additionalProperties: false
      required:
        - id
@ -3978,6 +4017,19 @@ components:
          $ref: '#/components/schemas/OpenAIChatCompletionUsage'
          description: >-
            Token usage information (typically included in final chunk with stream_options)
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            (Optional) Set of key-value pairs that were attached to the request. This
            metadata is copied from the request.
      additionalProperties: false
      required:
        - id
@ -4067,6 +4119,19 @@ components:
          $ref: '#/components/schemas/OpenAIChatCompletionUsage'
          description: >-
            Token usage information for the completion
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            (Optional) Set of key-value pairs that were attached to the request. This
            metadata is copied from the request.
        input_messages:
          type: array
          items:
@ -4172,6 +4237,19 @@ components:
          type: string
          description: >-
            (Optional) The suffix that should be appended to the completion.
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            (Optional) Set of key-value pairs that can be attached to the request.
            This metadata will be included in the response object.
      additionalProperties: false
      required:
        - model
@ -4196,6 +4274,16 @@ components:
          type: string
          const: text_completion
          default: text_completion
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
      additionalProperties: false
      required:
        - id
@ -5028,6 +5116,19 @@ components:
          description: >-
            (Optional) A unique identifier representing your end-user, which can help
            OpenAI to monitor and detect abuse.
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            (Optional) Set of key-value pairs that can be attached to the request.
            This metadata will be included in the response object.
      additionalProperties: false
      required:
        - model
@ -5101,6 +5202,19 @@ components:
        usage:
          $ref: '#/components/schemas/OpenAIEmbeddingUsage'
          description: Usage information
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            (Optional) Set of key-value pairs that were attached to the request. This
            metadata is copied from the request.
      additionalProperties: false
      required:
        - object
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@ -3909,6 +3909,19 @@ components:
                $ref: '#/components/schemas/OpenAIChatCompletionUsage'
                description: >-
                  Token usage information for the completion
              metadata:
                type: object
                additionalProperties:
                  oneOf:
                    - type: 'null'
                    - type: boolean
                    - type: number
                    - type: string
                    - type: array
                    - type: object
                description: >-
                  (Optional) Set of key-value pairs that were attached to the request.
                  This metadata is copied from the request.
              input_messages:
                type: array
                items:
@ -4619,6 +4632,19 @@ components:
        user:
          type: string
          description: (Optional) The user to use.
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            (Optional) Set of key-value pairs that can be attached to the request.
            This metadata will be included in the response object.
      additionalProperties: false
      required:
        - model
@ -4655,6 +4681,19 @@ components:
          $ref: '#/components/schemas/OpenAIChatCompletionUsage'
          description: >-
            Token usage information for the completion
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            (Optional) Set of key-value pairs that were attached to the request. This
            metadata is copied from the request.
      additionalProperties: false
      required:
        - id
@ -4694,6 +4733,19 @@ components:
          $ref: '#/components/schemas/OpenAIChatCompletionUsage'
          description: >-
            Token usage information (typically included in final chunk with stream_options)
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            (Optional) Set of key-value pairs that were attached to the request. This
            metadata is copied from the request.
      additionalProperties: false
      required:
        - id
@ -4783,6 +4835,19 @@ components:
          $ref: '#/components/schemas/OpenAIChatCompletionUsage'
          description: >-
            Token usage information for the completion
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            (Optional) Set of key-value pairs that were attached to the request. This
            metadata is copied from the request.
        input_messages:
          type: array
          items:
@ -4888,6 +4953,19 @@ components:
          type: string
          description: >-
            (Optional) The suffix that should be appended to the completion.
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            (Optional) Set of key-value pairs that can be attached to the request.
            This metadata will be included in the response object.
      additionalProperties: false
      required:
        - model
@ -4912,6 +4990,16 @@ components:
          type: string
          const: text_completion
          default: text_completion
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
      additionalProperties: false
      required:
        - id
@ -5744,6 +5832,19 @@ components:
          description: >-
            (Optional) A unique identifier representing your end-user, which can help
            OpenAI to monitor and detect abuse.
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            (Optional) Set of key-value pairs that can be attached to the request.
            This metadata will be included in the response object.
      additionalProperties: false
      required:
        - model
@ -5817,6 +5918,19 @@ components:
        usage:
          $ref: '#/components/schemas/OpenAIEmbeddingUsage'
          description: Usage information
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            (Optional) Set of key-value pairs that were attached to the request. This
            metadata is copied from the request.
      additionalProperties: false
      required:
        - object
--- a/src/llama_stack/apis/inference/inference.py
+++ b/src/llama_stack/apis/inference/inference.py
@ -694,6 +694,7 @@ class OpenAIChatCompletion(BaseModel):
    :param created: The Unix timestamp in seconds when the chat completion was created
    :param model: The model that was used to generate the chat completion
    :param usage: Token usage information for the completion
    :param metadata: (Optional) Set of key-value pairs that were attached to the request. This metadata is copied from the request.
    """
    id: str
@ -702,6 +703,7 @@ class OpenAIChatCompletion(BaseModel):
    created: int
    model: str
    usage: OpenAIChatCompletionUsage | None = None
    metadata: dict[str, Any] | None = None
@json_schema_type
@ -714,6 +716,7 @@ class OpenAIChatCompletionChunk(BaseModel):
    :param created: The Unix timestamp in seconds when the chat completion was created
    :param model: The model that was used to generate the chat completion
    :param usage: Token usage information (typically included in final chunk with stream_options)
    :param metadata: (Optional) Set of key-value pairs that were attached to the request. This metadata is copied from the request.
    """
    id: str
@ -722,6 +725,7 @@ class OpenAIChatCompletionChunk(BaseModel):
    created: int
    model: str
    usage: OpenAIChatCompletionUsage | None = None
    metadata: dict[str, Any] | None = None
@json_schema_type
@ -765,6 +769,7 @@ class OpenAICompletion(BaseModel):
    :created: The Unix timestamp in seconds when the completion was created
    :model: The model that was used to generate the completion
    :object: The object type, which will be "text_completion"
    :metadata: (Optional) Set of key-value pairs that were attached to the request. This metadata is copied from the request.
    """
    id: str
@ -772,6 +777,7 @@ class OpenAICompletion(BaseModel):
    created: int
    model: str
    object: Literal["text_completion"] = "text_completion"
    metadata: dict[str, Any] | None = None
@json_schema_type
@ -809,12 +815,14 @@ class OpenAIEmbeddingsResponse(BaseModel):
    :param data: List of embedding data objects
    :param model: The model that was used to generate the embeddings
    :param usage: Usage information
    :param metadata: (Optional) Set of key-value pairs that were attached to the request. This metadata is copied from the request.
    """
    object: Literal["list"] = "list"
    data: list[OpenAIEmbeddingData]
    model: str
    usage: OpenAIEmbeddingUsage
    metadata: dict[str, Any] | None = None
 class ModelStore(Protocol):
@ -890,6 +898,7 @@ class OpenAICompletionRequestWithExtraBody(BaseModel, extra="allow"):
    :param top_p: (Optional) The top p to use.
    :param user: (Optional) The user to use.
    :param suffix: (Optional) The suffix that should be appended to the completion.
    :param metadata: (Optional) Set of key-value pairs that can be attached to the request. This metadata will be included in the response object.
    """
    # Standard OpenAI completion parameters
@ -911,6 +920,7 @@ class OpenAICompletionRequestWithExtraBody(BaseModel, extra="allow"):
    top_p: float | None = None
    user: str | None = None
    suffix: str | None = None
    metadata: dict[str, Any] | None = None
 # extra_body can be accessed via .model_extra
@ -941,6 +951,7 @@ class OpenAIChatCompletionRequestWithExtraBody(BaseModel, extra="allow"):
    :param top_logprobs: (Optional) The top log probabilities to use.
    :param top_p: (Optional) The top p to use.
    :param user: (Optional) The user to use.
    :param metadata: (Optional) Set of key-value pairs that can be attached to the request. This metadata will be included in the response object.
    """
    # Standard OpenAI chat completion parameters
@ -967,6 +978,7 @@ class OpenAIChatCompletionRequestWithExtraBody(BaseModel, extra="allow"):
    top_logprobs: int | None = None
    top_p: float | None = None
    user: str | None = None
    metadata: dict[str, Any] | None = None
 # extra_body can be accessed via .model_extra
@ -979,6 +991,7 @@ class OpenAIEmbeddingsRequestWithExtraBody(BaseModel, extra="allow"):
    :param encoding_format: (Optional) The format to return the embeddings in. Can be either "float" or "base64". Defaults to "float".
    :param dimensions: (Optional) The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models.
    :param user: (Optional) A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
    :param metadata: (Optional) Set of key-value pairs that can be attached to the request. This metadata will be included in the response object.
    """
    model: str
@ -986,6 +999,7 @@ class OpenAIEmbeddingsRequestWithExtraBody(BaseModel, extra="allow"):
    encoding_format: str | None = "float"
    dimensions: int | None = None
    user: str | None = None
    metadata: dict[str, Any] | None = None
@runtime_checkable
--- a/src/llama_stack/core/routers/inference.py
+++ b/src/llama_stack/core/routers/inference.py
@ -190,6 +190,8 @@ class InferenceRouter(Inference):
        response = await provider.openai_completion(params)
        response.model = request_model_id
        # Copy metadata from request to response
        response.metadata = params.metadata
        if self.telemetry_enabled and response.usage is not None:
            metrics = self._construct_metrics(
                prompt_tokens=response.usage.prompt_tokens,
@ -244,10 +246,13 @@ class InferenceRouter(Inference):
                fully_qualified_model_id=request_model_id,
                provider_id=provider.__provider_id__,
                messages=params.messages,
                metadata=params.metadata,
            )
        response = await self._nonstream_openai_chat_completion(provider, params)
        response.model = request_model_id
        # Copy metadata from request to response
        response.metadata = params.metadata
        # Store the response with the ID that will be returned to the client
        if self.store:
@ -282,6 +287,8 @@ class InferenceRouter(Inference):
        response = await provider.openai_embeddings(params)
        response.model = request_model_id
        # Copy metadata from request to response
        response.metadata = params.metadata
        return response
    async def list_chat_completions(
@ -340,6 +347,7 @@ class InferenceRouter(Inference):
        fully_qualified_model_id: str,
        provider_id: str,
        messages: list[OpenAIMessageParam] | None = None,
        metadata: dict[str, Any] | None = None,
    ) -> AsyncIterator[OpenAIChatCompletionChunk]:
        """Stream OpenAI chat completion chunks, compute metrics, and store the final completion."""
        id = None
@ -359,6 +367,8 @@ class InferenceRouter(Inference):
                    created = chunk.created
                chunk.model = fully_qualified_model_id
                # Copy metadata from request to each chunk
                chunk.metadata = metadata
                # Accumulate choice data for final assembly
                if chunk.choices:
@ -467,6 +477,7 @@ class InferenceRouter(Inference):
                    created=created or int(time.time()),
                    model=fully_qualified_model_id,
                    object="chat.completion",
                    metadata=metadata,
                )
                logger.debug(f"InferenceRouter.completion_response: {final_response}")
                asyncio.create_task(self.store.store_chat_completion(final_response, messages))