From 44096512b5244b5157b0adacb62febc0379e1126 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Mon, 3 Nov 2025 15:56:07 -0800 Subject: [PATCH] feat: add custom_metadata to OpenAIModel to unify /v1/models with /v1/openai/v1/models (#4051) We need to remove `/v1/openai/v1` paths shortly. There is one trouble -- our current `/v1/openai/v1/models` endpoint provides different data than `/v1/models`. Unfortunately our tests target the latter (llama-stack customized) behavior. We need to get to true OpenAI compatibility. This is step 1: adding `custom_metadata` field to `OpenAIModel` that includes all the extra stuff we add in the native `/v1/models` response. This can be extracted on the consumer end by look at `__pydantic_extra__` or other similar fields. This PR: - Adds `custom_metadata` field to `OpenAIModel` class in `src/llama_stack/apis/models/models.py` - Modified `openai_list_models()` in `src/llama_stack/core/routing_tables/models.py` to populate custom_metadata Next Steps 1. Update stainless client to use `/v1/openai/v1/models` instead of `/v1/models` 2. Migrate tests to read from `custom_metadata` 3. Remove `/v1/openai/v1/` prefix entirely and consolidate to single `/v1/models` endpoint --- client-sdks/stainless/openapi.yml | 67 +++++++++++++++++++ docs/static/deprecated-llama-stack-spec.yaml | 60 ----------------- docs/static/llama-stack-spec.yaml | 67 +++++++++++++++++++ docs/static/stainless-llama-stack-spec.yaml | 67 +++++++++++++++++++ src/llama_stack/apis/models/models.py | 4 +- src/llama_stack/core/routing_tables/models.py | 6 ++ .../routers/test_routing_tables.py | 8 +++ 7 files changed, 218 insertions(+), 61 deletions(-) diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml index a1085c9eb..985e344d4 100644 --- a/client-sdks/stainless/openapi.yml +++ b/client-sdks/stainless/openapi.yml @@ -1129,6 +1129,31 @@ paths: $ref: '#/components/schemas/RunModerationRequest' required: true deprecated: false + /v1/openai/v1/models: + get: + responses: + '200': + description: A OpenAIListModelsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAIListModelsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Models + summary: List models using the OpenAI API. + description: List models using the OpenAI API. + parameters: [] + deprecated: false /v1/prompts: get: responses: @@ -7029,6 +7054,48 @@ components: - metadata title: ModerationObjectResults description: A moderation object. + OpenAIModel: + type: object + properties: + id: + type: string + object: + type: string + const: model + default: model + created: + type: integer + owned_by: + type: string + custom_metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + additionalProperties: false + required: + - id + - object + - created + - owned_by + title: OpenAIModel + description: A model from OpenAI. + OpenAIListModelsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/OpenAIModel' + additionalProperties: false + required: + - data + title: OpenAIListModelsResponse Prompt: type: object properties: diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml index ec3880a6b..15a3166de 100644 --- a/docs/static/deprecated-llama-stack-spec.yaml +++ b/docs/static/deprecated-llama-stack-spec.yaml @@ -1561,31 +1561,6 @@ paths: schema: type: string deprecated: true - /v1/openai/v1/models: - get: - responses: - '200': - description: A OpenAIListModelsResponse. - content: - application/json: - schema: - $ref: '#/components/schemas/OpenAIListModelsResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Models - summary: List models using the OpenAI API. - description: List models using the OpenAI API. - parameters: [] - deprecated: true /v1/openai/v1/moderations: post: responses: @@ -6516,38 +6491,6 @@ components: Response: type: object title: Response - OpenAIModel: - type: object - properties: - id: - type: string - object: - type: string - const: model - default: model - created: - type: integer - owned_by: - type: string - additionalProperties: false - required: - - id - - object - - created - - owned_by - title: OpenAIModel - description: A model from OpenAI. - OpenAIListModelsResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/OpenAIModel' - additionalProperties: false - required: - - data - title: OpenAIListModelsResponse RunModerationRequest: type: object properties: @@ -10768,8 +10711,6 @@ tags: - Rerank models: these models reorder the documents based on their relevance to a query. x-displayName: Inference - - name: Models - description: '' - name: PostTraining (Coming Soon) description: '' - name: Safety @@ -10788,7 +10729,6 @@ x-tagGroups: - Eval - Files - Inference - - Models - PostTraining (Coming Soon) - Safety - VectorIO diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index e35287952..e94725c82 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -1126,6 +1126,31 @@ paths: $ref: '#/components/schemas/RunModerationRequest' required: true deprecated: false + /v1/openai/v1/models: + get: + responses: + '200': + description: A OpenAIListModelsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAIListModelsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Models + summary: List models using the OpenAI API. + description: List models using the OpenAI API. + parameters: [] + deprecated: false /v1/prompts: get: responses: @@ -5816,6 +5841,48 @@ components: - metadata title: ModerationObjectResults description: A moderation object. + OpenAIModel: + type: object + properties: + id: + type: string + object: + type: string + const: model + default: model + created: + type: integer + owned_by: + type: string + custom_metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + additionalProperties: false + required: + - id + - object + - created + - owned_by + title: OpenAIModel + description: A model from OpenAI. + OpenAIListModelsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/OpenAIModel' + additionalProperties: false + required: + - data + title: OpenAIListModelsResponse Prompt: type: object properties: diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index a1085c9eb..985e344d4 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -1129,6 +1129,31 @@ paths: $ref: '#/components/schemas/RunModerationRequest' required: true deprecated: false + /v1/openai/v1/models: + get: + responses: + '200': + description: A OpenAIListModelsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAIListModelsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Models + summary: List models using the OpenAI API. + description: List models using the OpenAI API. + parameters: [] + deprecated: false /v1/prompts: get: responses: @@ -7029,6 +7054,48 @@ components: - metadata title: ModerationObjectResults description: A moderation object. + OpenAIModel: + type: object + properties: + id: + type: string + object: + type: string + const: model + default: model + created: + type: integer + owned_by: + type: string + custom_metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + additionalProperties: false + required: + - id + - object + - created + - owned_by + title: OpenAIModel + description: A model from OpenAI. + OpenAIListModelsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/OpenAIModel' + additionalProperties: false + required: + - data + title: OpenAIListModelsResponse Prompt: type: object properties: diff --git a/src/llama_stack/apis/models/models.py b/src/llama_stack/apis/models/models.py index 903bd6510..a963c8dcc 100644 --- a/src/llama_stack/apis/models/models.py +++ b/src/llama_stack/apis/models/models.py @@ -90,12 +90,14 @@ class OpenAIModel(BaseModel): :object: The object type, which will be "model" :created: The Unix timestamp in seconds when the model was created :owned_by: The owner of the model + :custom_metadata: Llama Stack-specific metadata including model_type, provider info, and additional metadata """ id: str object: Literal["model"] = "model" created: int owned_by: str + custom_metadata: dict[str, Any] | None = None class OpenAIListModelsResponse(BaseModel): @@ -113,7 +115,7 @@ class Models(Protocol): """ ... - @webmethod(route="/openai/v1/models", method="GET", level=LLAMA_STACK_API_V1, deprecated=True) + @webmethod(route="/openai/v1/models", method="GET", level=LLAMA_STACK_API_V1) async def openai_list_models(self) -> OpenAIListModelsResponse: """List models using the OpenAI API. diff --git a/src/llama_stack/core/routing_tables/models.py b/src/llama_stack/core/routing_tables/models.py index be17be3d4..1fb1186cd 100644 --- a/src/llama_stack/core/routing_tables/models.py +++ b/src/llama_stack/core/routing_tables/models.py @@ -134,6 +134,12 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models): object="model", created=int(time.time()), owned_by="llama_stack", + custom_metadata={ + "model_type": model.model_type, + "provider_id": model.provider_id, + "provider_resource_id": model.provider_resource_id, + **model.metadata, + }, ) for model in all_models ] diff --git a/tests/unit/distribution/routers/test_routing_tables.py b/tests/unit/distribution/routers/test_routing_tables.py index 87ebcef00..8c1838ba3 100644 --- a/tests/unit/distribution/routers/test_routing_tables.py +++ b/tests/unit/distribution/routers/test_routing_tables.py @@ -166,6 +166,14 @@ async def test_models_routing_table(cached_disk_dist_registry): assert "test_provider/test-model" in openai_model_ids assert "test_provider/test-model-2" in openai_model_ids + # Verify custom_metadata is populated with Llama Stack-specific data + for openai_model in openai_models.data: + assert openai_model.custom_metadata is not None + assert "model_type" in openai_model.custom_metadata + assert "provider_id" in openai_model.custom_metadata + assert "provider_resource_id" in openai_model.custom_metadata + assert openai_model.custom_metadata["provider_id"] == "test_provider" + # Test get_object_by_identifier model = await table.get_object_by_identifier("model", "test_provider/test-model") assert model is not None