feat: Add metadata field to request and response

This changes adds Optional metadata field to OpenAI compatible request
and response object.

fixes: #3564
Signed-off-by: Abhishek Bongale <abhishekbongale@outlook.com>
This commit is contained in:
Abhishek Bongale 2025-11-12 14:38:05 +00:00
parent 539b9c08f3
commit f3aac25352
5 changed files with 367 additions and 0 deletions

View file

@ -3909,6 +3909,19 @@ components:
$ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >-
Token usage information for the completion
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that were attached to the request.
This metadata is copied from the request.
input_messages:
type: array
items:
@ -4619,6 +4632,19 @@ components:
user:
type: string
description: (Optional) The user to use.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that can be attached to the request.
This metadata will be included in the response object.
additionalProperties: false
required:
- model
@ -4655,6 +4681,19 @@ components:
$ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >-
Token usage information for the completion
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that were attached to the request. This
metadata is copied from the request.
additionalProperties: false
required:
- id
@ -4694,6 +4733,19 @@ components:
$ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >-
Token usage information (typically included in final chunk with stream_options)
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that were attached to the request. This
metadata is copied from the request.
additionalProperties: false
required:
- id
@ -4783,6 +4835,19 @@ components:
$ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >-
Token usage information for the completion
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that were attached to the request. This
metadata is copied from the request.
input_messages:
type: array
items:
@ -4888,6 +4953,19 @@ components:
type: string
description: >-
(Optional) The suffix that should be appended to the completion.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that can be attached to the request.
This metadata will be included in the response object.
additionalProperties: false
required:
- model
@ -4912,6 +4990,16 @@ components:
type: string
const: text_completion
default: text_completion
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- id
@ -5744,6 +5832,19 @@ components:
description: >-
(Optional) A unique identifier representing your end-user, which can help
OpenAI to monitor and detect abuse.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that can be attached to the request.
This metadata will be included in the response object.
additionalProperties: false
required:
- model
@ -5817,6 +5918,19 @@ components:
usage:
$ref: '#/components/schemas/OpenAIEmbeddingUsage'
description: Usage information
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that were attached to the request. This
metadata is copied from the request.
additionalProperties: false
required:
- object

View file

@ -3193,6 +3193,19 @@ components:
$ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >-
Token usage information for the completion
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that were attached to the request.
This metadata is copied from the request.
input_messages:
type: array
items:
@ -3903,6 +3916,19 @@ components:
user:
type: string
description: (Optional) The user to use.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that can be attached to the request.
This metadata will be included in the response object.
additionalProperties: false
required:
- model
@ -3939,6 +3965,19 @@ components:
$ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >-
Token usage information for the completion
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that were attached to the request. This
metadata is copied from the request.
additionalProperties: false
required:
- id
@ -3978,6 +4017,19 @@ components:
$ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >-
Token usage information (typically included in final chunk with stream_options)
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that were attached to the request. This
metadata is copied from the request.
additionalProperties: false
required:
- id
@ -4067,6 +4119,19 @@ components:
$ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >-
Token usage information for the completion
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that were attached to the request. This
metadata is copied from the request.
input_messages:
type: array
items:
@ -4172,6 +4237,19 @@ components:
type: string
description: >-
(Optional) The suffix that should be appended to the completion.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that can be attached to the request.
This metadata will be included in the response object.
additionalProperties: false
required:
- model
@ -4196,6 +4274,16 @@ components:
type: string
const: text_completion
default: text_completion
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- id
@ -5028,6 +5116,19 @@ components:
description: >-
(Optional) A unique identifier representing your end-user, which can help
OpenAI to monitor and detect abuse.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that can be attached to the request.
This metadata will be included in the response object.
additionalProperties: false
required:
- model
@ -5101,6 +5202,19 @@ components:
usage:
$ref: '#/components/schemas/OpenAIEmbeddingUsage'
description: Usage information
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that were attached to the request. This
metadata is copied from the request.
additionalProperties: false
required:
- object

View file

@ -3909,6 +3909,19 @@ components:
$ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >-
Token usage information for the completion
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that were attached to the request.
This metadata is copied from the request.
input_messages:
type: array
items:
@ -4619,6 +4632,19 @@ components:
user:
type: string
description: (Optional) The user to use.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that can be attached to the request.
This metadata will be included in the response object.
additionalProperties: false
required:
- model
@ -4655,6 +4681,19 @@ components:
$ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >-
Token usage information for the completion
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that were attached to the request. This
metadata is copied from the request.
additionalProperties: false
required:
- id
@ -4694,6 +4733,19 @@ components:
$ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >-
Token usage information (typically included in final chunk with stream_options)
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that were attached to the request. This
metadata is copied from the request.
additionalProperties: false
required:
- id
@ -4783,6 +4835,19 @@ components:
$ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >-
Token usage information for the completion
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that were attached to the request. This
metadata is copied from the request.
input_messages:
type: array
items:
@ -4888,6 +4953,19 @@ components:
type: string
description: >-
(Optional) The suffix that should be appended to the completion.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that can be attached to the request.
This metadata will be included in the response object.
additionalProperties: false
required:
- model
@ -4912,6 +4990,16 @@ components:
type: string
const: text_completion
default: text_completion
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- id
@ -5744,6 +5832,19 @@ components:
description: >-
(Optional) A unique identifier representing your end-user, which can help
OpenAI to monitor and detect abuse.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that can be attached to the request.
This metadata will be included in the response object.
additionalProperties: false
required:
- model
@ -5817,6 +5918,19 @@ components:
usage:
$ref: '#/components/schemas/OpenAIEmbeddingUsage'
description: Usage information
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Set of key-value pairs that were attached to the request. This
metadata is copied from the request.
additionalProperties: false
required:
- object

View file

@ -694,6 +694,7 @@ class OpenAIChatCompletion(BaseModel):
:param created: The Unix timestamp in seconds when the chat completion was created
:param model: The model that was used to generate the chat completion
:param usage: Token usage information for the completion
:param metadata: (Optional) Set of key-value pairs that were attached to the request. This metadata is copied from the request.
"""
id: str
@ -702,6 +703,7 @@ class OpenAIChatCompletion(BaseModel):
created: int
model: str
usage: OpenAIChatCompletionUsage | None = None
metadata: dict[str, Any] | None = None
@json_schema_type
@ -714,6 +716,7 @@ class OpenAIChatCompletionChunk(BaseModel):
:param created: The Unix timestamp in seconds when the chat completion was created
:param model: The model that was used to generate the chat completion
:param usage: Token usage information (typically included in final chunk with stream_options)
:param metadata: (Optional) Set of key-value pairs that were attached to the request. This metadata is copied from the request.
"""
id: str
@ -722,6 +725,7 @@ class OpenAIChatCompletionChunk(BaseModel):
created: int
model: str
usage: OpenAIChatCompletionUsage | None = None
metadata: dict[str, Any] | None = None
@json_schema_type
@ -765,6 +769,7 @@ class OpenAICompletion(BaseModel):
:created: The Unix timestamp in seconds when the completion was created
:model: The model that was used to generate the completion
:object: The object type, which will be "text_completion"
:metadata: (Optional) Set of key-value pairs that were attached to the request. This metadata is copied from the request.
"""
id: str
@ -772,6 +777,7 @@ class OpenAICompletion(BaseModel):
created: int
model: str
object: Literal["text_completion"] = "text_completion"
metadata: dict[str, Any] | None = None
@json_schema_type
@ -809,12 +815,14 @@ class OpenAIEmbeddingsResponse(BaseModel):
:param data: List of embedding data objects
:param model: The model that was used to generate the embeddings
:param usage: Usage information
:param metadata: (Optional) Set of key-value pairs that were attached to the request. This metadata is copied from the request.
"""
object: Literal["list"] = "list"
data: list[OpenAIEmbeddingData]
model: str
usage: OpenAIEmbeddingUsage
metadata: dict[str, Any] | None = None
class ModelStore(Protocol):
@ -890,6 +898,7 @@ class OpenAICompletionRequestWithExtraBody(BaseModel, extra="allow"):
:param top_p: (Optional) The top p to use.
:param user: (Optional) The user to use.
:param suffix: (Optional) The suffix that should be appended to the completion.
:param metadata: (Optional) Set of key-value pairs that can be attached to the request. This metadata will be included in the response object.
"""
# Standard OpenAI completion parameters
@ -911,6 +920,7 @@ class OpenAICompletionRequestWithExtraBody(BaseModel, extra="allow"):
top_p: float | None = None
user: str | None = None
suffix: str | None = None
metadata: dict[str, Any] | None = None
# extra_body can be accessed via .model_extra
@ -941,6 +951,7 @@ class OpenAIChatCompletionRequestWithExtraBody(BaseModel, extra="allow"):
:param top_logprobs: (Optional) The top log probabilities to use.
:param top_p: (Optional) The top p to use.
:param user: (Optional) The user to use.
:param metadata: (Optional) Set of key-value pairs that can be attached to the request. This metadata will be included in the response object.
"""
# Standard OpenAI chat completion parameters
@ -967,6 +978,7 @@ class OpenAIChatCompletionRequestWithExtraBody(BaseModel, extra="allow"):
top_logprobs: int | None = None
top_p: float | None = None
user: str | None = None
metadata: dict[str, Any] | None = None
# extra_body can be accessed via .model_extra
@ -979,6 +991,7 @@ class OpenAIEmbeddingsRequestWithExtraBody(BaseModel, extra="allow"):
:param encoding_format: (Optional) The format to return the embeddings in. Can be either "float" or "base64". Defaults to "float".
:param dimensions: (Optional) The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models.
:param user: (Optional) A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.
:param metadata: (Optional) Set of key-value pairs that can be attached to the request. This metadata will be included in the response object.
"""
model: str
@ -986,6 +999,7 @@ class OpenAIEmbeddingsRequestWithExtraBody(BaseModel, extra="allow"):
encoding_format: str | None = "float"
dimensions: int | None = None
user: str | None = None
metadata: dict[str, Any] | None = None
@runtime_checkable

View file

@ -190,6 +190,8 @@ class InferenceRouter(Inference):
response = await provider.openai_completion(params)
response.model = request_model_id
# Copy metadata from request to response
response.metadata = params.metadata
if self.telemetry_enabled and response.usage is not None:
metrics = self._construct_metrics(
prompt_tokens=response.usage.prompt_tokens,
@ -244,10 +246,13 @@ class InferenceRouter(Inference):
fully_qualified_model_id=request_model_id,
provider_id=provider.__provider_id__,
messages=params.messages,
metadata=params.metadata,
)
response = await self._nonstream_openai_chat_completion(provider, params)
response.model = request_model_id
# Copy metadata from request to response
response.metadata = params.metadata
# Store the response with the ID that will be returned to the client
if self.store:
@ -282,6 +287,8 @@ class InferenceRouter(Inference):
response = await provider.openai_embeddings(params)
response.model = request_model_id
# Copy metadata from request to response
response.metadata = params.metadata
return response
async def list_chat_completions(
@ -340,6 +347,7 @@ class InferenceRouter(Inference):
fully_qualified_model_id: str,
provider_id: str,
messages: list[OpenAIMessageParam] | None = None,
metadata: dict[str, Any] | None = None,
) -> AsyncIterator[OpenAIChatCompletionChunk]:
"""Stream OpenAI chat completion chunks, compute metrics, and store the final completion."""
id = None
@ -359,6 +367,8 @@ class InferenceRouter(Inference):
created = chunk.created
chunk.model = fully_qualified_model_id
# Copy metadata from request to each chunk
chunk.metadata = metadata
# Accumulate choice data for final assembly
if chunk.choices:
@ -467,6 +477,7 @@ class InferenceRouter(Inference):
created=created or int(time.time()),
model=fully_qualified_model_id,
object="chat.completion",
metadata=metadata,
)
logger.debug(f"InferenceRouter.completion_response: {final_response}")
asyncio.create_task(self.store.store_chat_completion(final_response, messages))