From f3aac253528608fa9d2a8d7b388200d80d31e06f Mon Sep 17 00:00:00 2001 From: Abhishek Bongale Date: Wed, 12 Nov 2025 14:38:05 +0000 Subject: [PATCH] feat: Add metadata field to request and response This changes adds Optional metadata field to OpenAI compatible request and response object. fixes: #3564 Signed-off-by: Abhishek Bongale --- client-sdks/stainless/openapi.yml | 114 ++++++++++++++++++++ docs/static/llama-stack-spec.yaml | 114 ++++++++++++++++++++ docs/static/stainless-llama-stack-spec.yaml | 114 ++++++++++++++++++++ src/llama_stack/apis/inference/inference.py | 14 +++ src/llama_stack/core/routers/inference.py | 11 ++ 5 files changed, 367 insertions(+) diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml index 9f3ef15b5..cd5dab53f 100644 --- a/client-sdks/stainless/openapi.yml +++ b/client-sdks/stainless/openapi.yml @@ -3909,6 +3909,19 @@ components: $ref: '#/components/schemas/OpenAIChatCompletionUsage' description: >- Token usage information for the completion + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Set of key-value pairs that were attached to the request. + This metadata is copied from the request. input_messages: type: array items: @@ -4619,6 +4632,19 @@ components: user: type: string description: (Optional) The user to use. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Set of key-value pairs that can be attached to the request. + This metadata will be included in the response object. additionalProperties: false required: - model @@ -4655,6 +4681,19 @@ components: $ref: '#/components/schemas/OpenAIChatCompletionUsage' description: >- Token usage information for the completion + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Set of key-value pairs that were attached to the request. This + metadata is copied from the request. additionalProperties: false required: - id @@ -4694,6 +4733,19 @@ components: $ref: '#/components/schemas/OpenAIChatCompletionUsage' description: >- Token usage information (typically included in final chunk with stream_options) + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Set of key-value pairs that were attached to the request. This + metadata is copied from the request. additionalProperties: false required: - id @@ -4783,6 +4835,19 @@ components: $ref: '#/components/schemas/OpenAIChatCompletionUsage' description: >- Token usage information for the completion + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Set of key-value pairs that were attached to the request. This + metadata is copied from the request. input_messages: type: array items: @@ -4888,6 +4953,19 @@ components: type: string description: >- (Optional) The suffix that should be appended to the completion. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Set of key-value pairs that can be attached to the request. + This metadata will be included in the response object. additionalProperties: false required: - model @@ -4912,6 +4990,16 @@ components: type: string const: text_completion default: text_completion + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object additionalProperties: false required: - id @@ -5744,6 +5832,19 @@ components: description: >- (Optional) A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Set of key-value pairs that can be attached to the request. + This metadata will be included in the response object. additionalProperties: false required: - model @@ -5817,6 +5918,19 @@ components: usage: $ref: '#/components/schemas/OpenAIEmbeddingUsage' description: Usage information + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Set of key-value pairs that were attached to the request. This + metadata is copied from the request. additionalProperties: false required: - object diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index ce8708b68..529f1dfa4 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -3193,6 +3193,19 @@ components: $ref: '#/components/schemas/OpenAIChatCompletionUsage' description: >- Token usage information for the completion + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Set of key-value pairs that were attached to the request. + This metadata is copied from the request. input_messages: type: array items: @@ -3903,6 +3916,19 @@ components: user: type: string description: (Optional) The user to use. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Set of key-value pairs that can be attached to the request. + This metadata will be included in the response object. additionalProperties: false required: - model @@ -3939,6 +3965,19 @@ components: $ref: '#/components/schemas/OpenAIChatCompletionUsage' description: >- Token usage information for the completion + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Set of key-value pairs that were attached to the request. This + metadata is copied from the request. additionalProperties: false required: - id @@ -3978,6 +4017,19 @@ components: $ref: '#/components/schemas/OpenAIChatCompletionUsage' description: >- Token usage information (typically included in final chunk with stream_options) + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Set of key-value pairs that were attached to the request. This + metadata is copied from the request. additionalProperties: false required: - id @@ -4067,6 +4119,19 @@ components: $ref: '#/components/schemas/OpenAIChatCompletionUsage' description: >- Token usage information for the completion + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Set of key-value pairs that were attached to the request. This + metadata is copied from the request. input_messages: type: array items: @@ -4172,6 +4237,19 @@ components: type: string description: >- (Optional) The suffix that should be appended to the completion. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Set of key-value pairs that can be attached to the request. + This metadata will be included in the response object. additionalProperties: false required: - model @@ -4196,6 +4274,16 @@ components: type: string const: text_completion default: text_completion + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object additionalProperties: false required: - id @@ -5028,6 +5116,19 @@ components: description: >- (Optional) A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Set of key-value pairs that can be attached to the request. + This metadata will be included in the response object. additionalProperties: false required: - model @@ -5101,6 +5202,19 @@ components: usage: $ref: '#/components/schemas/OpenAIEmbeddingUsage' description: Usage information + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Set of key-value pairs that were attached to the request. This + metadata is copied from the request. additionalProperties: false required: - object diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index 9f3ef15b5..cd5dab53f 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -3909,6 +3909,19 @@ components: $ref: '#/components/schemas/OpenAIChatCompletionUsage' description: >- Token usage information for the completion + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Set of key-value pairs that were attached to the request. + This metadata is copied from the request. input_messages: type: array items: @@ -4619,6 +4632,19 @@ components: user: type: string description: (Optional) The user to use. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Set of key-value pairs that can be attached to the request. + This metadata will be included in the response object. additionalProperties: false required: - model @@ -4655,6 +4681,19 @@ components: $ref: '#/components/schemas/OpenAIChatCompletionUsage' description: >- Token usage information for the completion + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Set of key-value pairs that were attached to the request. This + metadata is copied from the request. additionalProperties: false required: - id @@ -4694,6 +4733,19 @@ components: $ref: '#/components/schemas/OpenAIChatCompletionUsage' description: >- Token usage information (typically included in final chunk with stream_options) + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Set of key-value pairs that were attached to the request. This + metadata is copied from the request. additionalProperties: false required: - id @@ -4783,6 +4835,19 @@ components: $ref: '#/components/schemas/OpenAIChatCompletionUsage' description: >- Token usage information for the completion + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Set of key-value pairs that were attached to the request. This + metadata is copied from the request. input_messages: type: array items: @@ -4888,6 +4953,19 @@ components: type: string description: >- (Optional) The suffix that should be appended to the completion. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Set of key-value pairs that can be attached to the request. + This metadata will be included in the response object. additionalProperties: false required: - model @@ -4912,6 +4990,16 @@ components: type: string const: text_completion default: text_completion + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object additionalProperties: false required: - id @@ -5744,6 +5832,19 @@ components: description: >- (Optional) A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Set of key-value pairs that can be attached to the request. + This metadata will be included in the response object. additionalProperties: false required: - model @@ -5817,6 +5918,19 @@ components: usage: $ref: '#/components/schemas/OpenAIEmbeddingUsage' description: Usage information + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Set of key-value pairs that were attached to the request. This + metadata is copied from the request. additionalProperties: false required: - object diff --git a/src/llama_stack/apis/inference/inference.py b/src/llama_stack/apis/inference/inference.py index 9f04917c9..8d418d53d 100644 --- a/src/llama_stack/apis/inference/inference.py +++ b/src/llama_stack/apis/inference/inference.py @@ -694,6 +694,7 @@ class OpenAIChatCompletion(BaseModel): :param created: The Unix timestamp in seconds when the chat completion was created :param model: The model that was used to generate the chat completion :param usage: Token usage information for the completion + :param metadata: (Optional) Set of key-value pairs that were attached to the request. This metadata is copied from the request. """ id: str @@ -702,6 +703,7 @@ class OpenAIChatCompletion(BaseModel): created: int model: str usage: OpenAIChatCompletionUsage | None = None + metadata: dict[str, Any] | None = None @json_schema_type @@ -714,6 +716,7 @@ class OpenAIChatCompletionChunk(BaseModel): :param created: The Unix timestamp in seconds when the chat completion was created :param model: The model that was used to generate the chat completion :param usage: Token usage information (typically included in final chunk with stream_options) + :param metadata: (Optional) Set of key-value pairs that were attached to the request. This metadata is copied from the request. """ id: str @@ -722,6 +725,7 @@ class OpenAIChatCompletionChunk(BaseModel): created: int model: str usage: OpenAIChatCompletionUsage | None = None + metadata: dict[str, Any] | None = None @json_schema_type @@ -765,6 +769,7 @@ class OpenAICompletion(BaseModel): :created: The Unix timestamp in seconds when the completion was created :model: The model that was used to generate the completion :object: The object type, which will be "text_completion" + :metadata: (Optional) Set of key-value pairs that were attached to the request. This metadata is copied from the request. """ id: str @@ -772,6 +777,7 @@ class OpenAICompletion(BaseModel): created: int model: str object: Literal["text_completion"] = "text_completion" + metadata: dict[str, Any] | None = None @json_schema_type @@ -809,12 +815,14 @@ class OpenAIEmbeddingsResponse(BaseModel): :param data: List of embedding data objects :param model: The model that was used to generate the embeddings :param usage: Usage information + :param metadata: (Optional) Set of key-value pairs that were attached to the request. This metadata is copied from the request. """ object: Literal["list"] = "list" data: list[OpenAIEmbeddingData] model: str usage: OpenAIEmbeddingUsage + metadata: dict[str, Any] | None = None class ModelStore(Protocol): @@ -890,6 +898,7 @@ class OpenAICompletionRequestWithExtraBody(BaseModel, extra="allow"): :param top_p: (Optional) The top p to use. :param user: (Optional) The user to use. :param suffix: (Optional) The suffix that should be appended to the completion. + :param metadata: (Optional) Set of key-value pairs that can be attached to the request. This metadata will be included in the response object. """ # Standard OpenAI completion parameters @@ -911,6 +920,7 @@ class OpenAICompletionRequestWithExtraBody(BaseModel, extra="allow"): top_p: float | None = None user: str | None = None suffix: str | None = None + metadata: dict[str, Any] | None = None # extra_body can be accessed via .model_extra @@ -941,6 +951,7 @@ class OpenAIChatCompletionRequestWithExtraBody(BaseModel, extra="allow"): :param top_logprobs: (Optional) The top log probabilities to use. :param top_p: (Optional) The top p to use. :param user: (Optional) The user to use. + :param metadata: (Optional) Set of key-value pairs that can be attached to the request. This metadata will be included in the response object. """ # Standard OpenAI chat completion parameters @@ -967,6 +978,7 @@ class OpenAIChatCompletionRequestWithExtraBody(BaseModel, extra="allow"): top_logprobs: int | None = None top_p: float | None = None user: str | None = None + metadata: dict[str, Any] | None = None # extra_body can be accessed via .model_extra @@ -979,6 +991,7 @@ class OpenAIEmbeddingsRequestWithExtraBody(BaseModel, extra="allow"): :param encoding_format: (Optional) The format to return the embeddings in. Can be either "float" or "base64". Defaults to "float". :param dimensions: (Optional) The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models. :param user: (Optional) A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. + :param metadata: (Optional) Set of key-value pairs that can be attached to the request. This metadata will be included in the response object. """ model: str @@ -986,6 +999,7 @@ class OpenAIEmbeddingsRequestWithExtraBody(BaseModel, extra="allow"): encoding_format: str | None = "float" dimensions: int | None = None user: str | None = None + metadata: dict[str, Any] | None = None @runtime_checkable diff --git a/src/llama_stack/core/routers/inference.py b/src/llama_stack/core/routers/inference.py index d6270d428..c31dd6765 100644 --- a/src/llama_stack/core/routers/inference.py +++ b/src/llama_stack/core/routers/inference.py @@ -190,6 +190,8 @@ class InferenceRouter(Inference): response = await provider.openai_completion(params) response.model = request_model_id + # Copy metadata from request to response + response.metadata = params.metadata if self.telemetry_enabled and response.usage is not None: metrics = self._construct_metrics( prompt_tokens=response.usage.prompt_tokens, @@ -244,10 +246,13 @@ class InferenceRouter(Inference): fully_qualified_model_id=request_model_id, provider_id=provider.__provider_id__, messages=params.messages, + metadata=params.metadata, ) response = await self._nonstream_openai_chat_completion(provider, params) response.model = request_model_id + # Copy metadata from request to response + response.metadata = params.metadata # Store the response with the ID that will be returned to the client if self.store: @@ -282,6 +287,8 @@ class InferenceRouter(Inference): response = await provider.openai_embeddings(params) response.model = request_model_id + # Copy metadata from request to response + response.metadata = params.metadata return response async def list_chat_completions( @@ -340,6 +347,7 @@ class InferenceRouter(Inference): fully_qualified_model_id: str, provider_id: str, messages: list[OpenAIMessageParam] | None = None, + metadata: dict[str, Any] | None = None, ) -> AsyncIterator[OpenAIChatCompletionChunk]: """Stream OpenAI chat completion chunks, compute metrics, and store the final completion.""" id = None @@ -359,6 +367,8 @@ class InferenceRouter(Inference): created = chunk.created chunk.model = fully_qualified_model_id + # Copy metadata from request to each chunk + chunk.metadata = metadata # Accumulate choice data for final assembly if chunk.choices: @@ -467,6 +477,7 @@ class InferenceRouter(Inference): created=created or int(time.time()), model=fully_qualified_model_id, object="chat.completion", + metadata=metadata, ) logger.debug(f"InferenceRouter.completion_response: {final_response}") asyncio.create_task(self.store.store_chat_completion(final_response, messages))