feat: Add metadata field to request and response

This changes adds Optional metadata field to OpenAI compatible request
and response object.

fixes: #3564
Signed-off-by: Abhishek Bongale <abhishekbongale@outlook.com>
This commit is contained in:
Abhishek Bongale 2025-11-12 14:38:05 +00:00
parent 539b9c08f3
commit f3aac25352
5 changed files with 367 additions and 0 deletions

View file

@ -3909,6 +3909,19 @@ components:
$ref: '#/components/schemas/OpenAIChatCompletionUsage' $ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >- description: >-
Token usage information for the completion Token usage information for the completion
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that were attached to the request.
This metadata is copied from the request.
input_messages: input_messages:
type: array type: array
items: items:
@ -4619,6 +4632,19 @@ components:
user: user:
type: string type: string
description: (Optional) The user to use. description: (Optional) The user to use.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that can be attached to the request.
This metadata will be included in the response object.
additionalProperties: false additionalProperties: false
required: required:
- model - model
@ -4655,6 +4681,19 @@ components:
$ref: '#/components/schemas/OpenAIChatCompletionUsage' $ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >- description: >-
Token usage information for the completion Token usage information for the completion
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that were attached to the request. This
metadata is copied from the request.
additionalProperties: false additionalProperties: false
required: required:
- id - id
@ -4694,6 +4733,19 @@ components:
$ref: '#/components/schemas/OpenAIChatCompletionUsage' $ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >- description: >-
Token usage information (typically included in final chunk with stream_options) Token usage information (typically included in final chunk with stream_options)
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that were attached to the request. This
metadata is copied from the request.
additionalProperties: false additionalProperties: false
required: required:
- id - id
@ -4783,6 +4835,19 @@ components:
$ref: '#/components/schemas/OpenAIChatCompletionUsage' $ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >- description: >-
Token usage information for the completion Token usage information for the completion
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that were attached to the request. This
metadata is copied from the request.
input_messages: input_messages:
type: array type: array
items: items:
@ -4888,6 +4953,19 @@ components:
type: string type: string
description: >- description: >-
(Optional) The suffix that should be appended to the completion. (Optional) The suffix that should be appended to the completion.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that can be attached to the request.
This metadata will be included in the response object.
additionalProperties: false additionalProperties: false
required: required:
- model - model
@ -4912,6 +4990,16 @@ components:
type: string type: string
const: text_completion const: text_completion
default: text_completion default: text_completion
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false additionalProperties: false
required: required:
- id - id
@ -5744,6 +5832,19 @@ components:
description: >- description: >-
(Optional) A unique identifier representing your end-user, which can help (Optional) A unique identifier representing your end-user, which can help
OpenAI to monitor and detect abuse. OpenAI to monitor and detect abuse.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that can be attached to the request.
This metadata will be included in the response object.
additionalProperties: false additionalProperties: false
required: required:
- model - model
@ -5817,6 +5918,19 @@ components:
usage: usage:
$ref: '#/components/schemas/OpenAIEmbeddingUsage' $ref: '#/components/schemas/OpenAIEmbeddingUsage'
description: Usage information description: Usage information
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that were attached to the request. This
metadata is copied from the request.
additionalProperties: false additionalProperties: false
required: required:
- object - object

View file

@ -3193,6 +3193,19 @@ components:
$ref: '#/components/schemas/OpenAIChatCompletionUsage' $ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >- description: >-
Token usage information for the completion Token usage information for the completion
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that were attached to the request.
This metadata is copied from the request.
input_messages: input_messages:
type: array type: array
items: items:
@ -3903,6 +3916,19 @@ components:
user: user:
type: string type: string
description: (Optional) The user to use. description: (Optional) The user to use.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that can be attached to the request.
This metadata will be included in the response object.
additionalProperties: false additionalProperties: false
required: required:
- model - model
@ -3939,6 +3965,19 @@ components:
$ref: '#/components/schemas/OpenAIChatCompletionUsage' $ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >- description: >-
Token usage information for the completion Token usage information for the completion
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that were attached to the request. This
metadata is copied from the request.
additionalProperties: false additionalProperties: false
required: required:
- id - id
@ -3978,6 +4017,19 @@ components:
$ref: '#/components/schemas/OpenAIChatCompletionUsage' $ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >- description: >-
Token usage information (typically included in final chunk with stream_options) Token usage information (typically included in final chunk with stream_options)
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that were attached to the request. This
metadata is copied from the request.
additionalProperties: false additionalProperties: false
required: required:
- id - id
@ -4067,6 +4119,19 @@ components:
$ref: '#/components/schemas/OpenAIChatCompletionUsage' $ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >- description: >-
Token usage information for the completion Token usage information for the completion
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that were attached to the request. This
metadata is copied from the request.
input_messages: input_messages:
type: array type: array
items: items:
@ -4172,6 +4237,19 @@ components:
type: string type: string
description: >- description: >-
(Optional) The suffix that should be appended to the completion. (Optional) The suffix that should be appended to the completion.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that can be attached to the request.
This metadata will be included in the response object.
additionalProperties: false additionalProperties: false
required: required:
- model - model
@ -4196,6 +4274,16 @@ components:
type: string type: string
const: text_completion const: text_completion
default: text_completion default: text_completion
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false additionalProperties: false
required: required:
- id - id
@ -5028,6 +5116,19 @@ components:
description: >- description: >-
(Optional) A unique identifier representing your end-user, which can help (Optional) A unique identifier representing your end-user, which can help
OpenAI to monitor and detect abuse. OpenAI to monitor and detect abuse.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that can be attached to the request.
This metadata will be included in the response object.
additionalProperties: false additionalProperties: false
required: required:
- model - model
@ -5101,6 +5202,19 @@ components:
usage: usage:
$ref: '#/components/schemas/OpenAIEmbeddingUsage' $ref: '#/components/schemas/OpenAIEmbeddingUsage'
description: Usage information description: Usage information
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that were attached to the request. This
metadata is copied from the request.
additionalProperties: false additionalProperties: false
required: required:
- object - object

View file

@ -3909,6 +3909,19 @@ components:
$ref: '#/components/schemas/OpenAIChatCompletionUsage' $ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >- description: >-
Token usage information for the completion Token usage information for the completion
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that were attached to the request.
This metadata is copied from the request.
input_messages: input_messages:
type: array type: array
items: items:
@ -4619,6 +4632,19 @@ components:
user: user:
type: string type: string
description: (Optional) The user to use. description: (Optional) The user to use.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that can be attached to the request.
This metadata will be included in the response object.
additionalProperties: false additionalProperties: false
required: required:
- model - model
@ -4655,6 +4681,19 @@ components:
$ref: '#/components/schemas/OpenAIChatCompletionUsage' $ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >- description: >-
Token usage information for the completion Token usage information for the completion
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that were attached to the request. This
metadata is copied from the request.
additionalProperties: false additionalProperties: false
required: required:
- id - id
@ -4694,6 +4733,19 @@ components:
$ref: '#/components/schemas/OpenAIChatCompletionUsage' $ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >- description: >-
Token usage information (typically included in final chunk with stream_options) Token usage information (typically included in final chunk with stream_options)
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that were attached to the request. This
metadata is copied from the request.
additionalProperties: false additionalProperties: false
required: required:
- id - id
@ -4783,6 +4835,19 @@ components:
$ref: '#/components/schemas/OpenAIChatCompletionUsage' $ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >- description: >-
Token usage information for the completion Token usage information for the completion
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that were attached to the request. This
metadata is copied from the request.
input_messages: input_messages:
type: array type: array
items: items:
@ -4888,6 +4953,19 @@ components:
type: string type: string
description: >- description: >-
(Optional) The suffix that should be appended to the completion. (Optional) The suffix that should be appended to the completion.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that can be attached to the request.
This metadata will be included in the response object.
additionalProperties: false additionalProperties: false
required: required:
- model - model
@ -4912,6 +4990,16 @@ components:
type: string type: string
const: text_completion const: text_completion
default: text_completion default: text_completion
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false additionalProperties: false
required: required:
- id - id
@ -5744,6 +5832,19 @@ components:
description: >- description: >-
(Optional) A unique identifier representing your end-user, which can help (Optional) A unique identifier representing your end-user, which can help
OpenAI to monitor and detect abuse. OpenAI to monitor and detect abuse.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that can be attached to the request.
This metadata will be included in the response object.
additionalProperties: false additionalProperties: false
required: required:
- model - model
@ -5817,6 +5918,19 @@ components:
usage: usage:
$ref: '#/components/schemas/OpenAIEmbeddingUsage' $ref: '#/components/schemas/OpenAIEmbeddingUsage'
description: Usage information description: Usage information
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that were attached to the request. This
metadata is copied from the request.
additionalProperties: false additionalProperties: false
required: required:
- object - object

View file

@ -694,6 +694,7 @@ class OpenAIChatCompletion(BaseModel):
:param created: The Unix timestamp in seconds when the chat completion was created :param created: The Unix timestamp in seconds when the chat completion was created
:param model: The model that was used to generate the chat completion :param model: The model that was used to generate the chat completion
:param usage: Token usage information for the completion :param usage: Token usage information for the completion
:param metadata: (Optional) Set of key-value pairs that were attached to the request. This metadata is copied from the request.
""" """
id: str id: str
@ -702,6 +703,7 @@ class OpenAIChatCompletion(BaseModel):
created: int created: int
model: str model: str
usage: OpenAIChatCompletionUsage | None = None usage: OpenAIChatCompletionUsage | None = None
metadata: dict[str, Any] | None = None
@json_schema_type @json_schema_type
@ -714,6 +716,7 @@ class OpenAIChatCompletionChunk(BaseModel):
:param created: The Unix timestamp in seconds when the chat completion was created :param created: The Unix timestamp in seconds when the chat completion was created
:param model: The model that was used to generate the chat completion :param model: The model that was used to generate the chat completion
:param usage: Token usage information (typically included in final chunk with stream_options) :param usage: Token usage information (typically included in final chunk with stream_options)
:param metadata: (Optional) Set of key-value pairs that were attached to the request. This metadata is copied from the request.
""" """
id: str id: str
@ -722,6 +725,7 @@ class OpenAIChatCompletionChunk(BaseModel):
created: int created: int
model: str model: str
usage: OpenAIChatCompletionUsage | None = None usage: OpenAIChatCompletionUsage | None = None
metadata: dict[str, Any] | None = None
@json_schema_type @json_schema_type
@ -765,6 +769,7 @@ class OpenAICompletion(BaseModel):
:created: The Unix timestamp in seconds when the completion was created :created: The Unix timestamp in seconds when the completion was created
:model: The model that was used to generate the completion :model: The model that was used to generate the completion
:object: The object type, which will be "text_completion" :object: The object type, which will be "text_completion"
:metadata: (Optional) Set of key-value pairs that were attached to the request. This metadata is copied from the request.
""" """
id: str id: str
@ -772,6 +777,7 @@ class OpenAICompletion(BaseModel):
created: int created: int
model: str model: str
object: Literal["text_completion"] = "text_completion" object: Literal["text_completion"] = "text_completion"
metadata: dict[str, Any] | None = None
@json_schema_type @json_schema_type
@ -809,12 +815,14 @@ class OpenAIEmbeddingsResponse(BaseModel):
:param data: List of embedding data objects :param data: List of embedding data objects
:param model: The model that was used to generate the embeddings :param model: The model that was used to generate the embeddings
:param usage: Usage information :param usage: Usage information
:param metadata: (Optional) Set of key-value pairs that were attached to the request. This metadata is copied from the request.
""" """
object: Literal["list"] = "list" object: Literal["list"] = "list"
data: list[OpenAIEmbeddingData] data: list[OpenAIEmbeddingData]
model: str model: str
usage: OpenAIEmbeddingUsage usage: OpenAIEmbeddingUsage
metadata: dict[str, Any] | None = None
class ModelStore(Protocol): class ModelStore(Protocol):
@ -890,6 +898,7 @@ class OpenAICompletionRequestWithExtraBody(BaseModel, extra="allow"):
:param top_p: (Optional) The top p to use. :param top_p: (Optional) The top p to use.
:param user: (Optional) The user to use. :param user: (Optional) The user to use.
:param suffix: (Optional) The suffix that should be appended to the completion. :param suffix: (Optional) The suffix that should be appended to the completion.
:param metadata: (Optional) Set of key-value pairs that can be attached to the request. This metadata will be included in the response object.
""" """
# Standard OpenAI completion parameters # Standard OpenAI completion parameters
@ -911,6 +920,7 @@ class OpenAICompletionRequestWithExtraBody(BaseModel, extra="allow"):
top_p: float | None = None top_p: float | None = None
user: str | None = None user: str | None = None
suffix: str | None = None suffix: str | None = None
metadata: dict[str, Any] | None = None
# extra_body can be accessed via .model_extra # extra_body can be accessed via .model_extra
@ -941,6 +951,7 @@ class OpenAIChatCompletionRequestWithExtraBody(BaseModel, extra="allow"):
:param top_logprobs: (Optional) The top log probabilities to use. :param top_logprobs: (Optional) The top log probabilities to use.
:param top_p: (Optional) The top p to use. :param top_p: (Optional) The top p to use.
:param user: (Optional) The user to use. :param user: (Optional) The user to use.
:param metadata: (Optional) Set of key-value pairs that can be attached to the request. This metadata will be included in the response object.
""" """
# Standard OpenAI chat completion parameters # Standard OpenAI chat completion parameters
@ -967,6 +978,7 @@ class OpenAIChatCompletionRequestWithExtraBody(BaseModel, extra="allow"):
top_logprobs: int | None = None top_logprobs: int | None = None
top_p: float | None = None top_p: float | None = None
user: str | None = None user: str | None = None
metadata: dict[str, Any] | None = None
# extra_body can be accessed via .model_extra # extra_body can be accessed via .model_extra
@ -979,6 +991,7 @@ class OpenAIEmbeddingsRequestWithExtraBody(BaseModel, extra="allow"):
:param encoding_format: (Optional) The format to return the embeddings in. Can be either "float" or "base64". Defaults to "float". :param encoding_format: (Optional) The format to return the embeddings in. Can be either "float" or "base64". Defaults to "float".
:param dimensions: (Optional) The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models. :param dimensions: (Optional) The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models.
:param user: (Optional) A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. :param user: (Optional) A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
:param metadata: (Optional) Set of key-value pairs that can be attached to the request. This metadata will be included in the response object.
""" """
model: str model: str
@ -986,6 +999,7 @@ class OpenAIEmbeddingsRequestWithExtraBody(BaseModel, extra="allow"):
encoding_format: str | None = "float" encoding_format: str | None = "float"
dimensions: int | None = None dimensions: int | None = None
user: str | None = None user: str | None = None
metadata: dict[str, Any] | None = None
@runtime_checkable @runtime_checkable

View file

@ -190,6 +190,8 @@ class InferenceRouter(Inference):
response = await provider.openai_completion(params) response = await provider.openai_completion(params)
response.model = request_model_id response.model = request_model_id
# Copy metadata from request to response
response.metadata = params.metadata
if self.telemetry_enabled and response.usage is not None: if self.telemetry_enabled and response.usage is not None:
metrics = self._construct_metrics( metrics = self._construct_metrics(
prompt_tokens=response.usage.prompt_tokens, prompt_tokens=response.usage.prompt_tokens,
@ -244,10 +246,13 @@ class InferenceRouter(Inference):
fully_qualified_model_id=request_model_id, fully_qualified_model_id=request_model_id,
provider_id=provider.__provider_id__, provider_id=provider.__provider_id__,
messages=params.messages, messages=params.messages,
metadata=params.metadata,
) )
response = await self._nonstream_openai_chat_completion(provider, params) response = await self._nonstream_openai_chat_completion(provider, params)
response.model = request_model_id response.model = request_model_id
# Copy metadata from request to response
response.metadata = params.metadata
# Store the response with the ID that will be returned to the client # Store the response with the ID that will be returned to the client
if self.store: if self.store:
@ -282,6 +287,8 @@ class InferenceRouter(Inference):
response = await provider.openai_embeddings(params) response = await provider.openai_embeddings(params)
response.model = request_model_id response.model = request_model_id
# Copy metadata from request to response
response.metadata = params.metadata
return response return response
async def list_chat_completions( async def list_chat_completions(
@ -340,6 +347,7 @@ class InferenceRouter(Inference):
fully_qualified_model_id: str, fully_qualified_model_id: str,
provider_id: str, provider_id: str,
messages: list[OpenAIMessageParam] | None = None, messages: list[OpenAIMessageParam] | None = None,
metadata: dict[str, Any] | None = None,
) -> AsyncIterator[OpenAIChatCompletionChunk]: ) -> AsyncIterator[OpenAIChatCompletionChunk]:
"""Stream OpenAI chat completion chunks, compute metrics, and store the final completion.""" """Stream OpenAI chat completion chunks, compute metrics, and store the final completion."""
id = None id = None
@ -359,6 +367,8 @@ class InferenceRouter(Inference):
created = chunk.created created = chunk.created
chunk.model = fully_qualified_model_id chunk.model = fully_qualified_model_id
# Copy metadata from request to each chunk
chunk.metadata = metadata
# Accumulate choice data for final assembly # Accumulate choice data for final assembly
if chunk.choices: if chunk.choices:
@ -467,6 +477,7 @@ class InferenceRouter(Inference):
created=created or int(time.time()), created=created or int(time.time()),
model=fully_qualified_model_id, model=fully_qualified_model_id,
object="chat.completion", object="chat.completion",
metadata=metadata,
) )
logger.debug(f"InferenceRouter.completion_response: {final_response}") logger.debug(f"InferenceRouter.completion_response: {final_response}")
asyncio.create_task(self.store.store_chat_completion(final_response, messages)) asyncio.create_task(self.store.store_chat_completion(final_response, messages))