Use our own pydantic models for OpenAI Server APIs

Importing the models from the OpenAI client library required a top-level dependency on the openai python package, and also was incompatible with our API generation code due to some quirks in how the OpenAI pydantic models are defined. So, this creates our own stubs of those pydantic models so that we're in more direct control of our API surface for this OpenAI-compatible API, so that it works with our code generation, and so that the openai python client isn't a hard requirement of Llama Stack's API.
2025-08-03 09:21:45 +00:00 · 2025-04-08 09:01:35 -04:00 · 2025-04-08 09:01:35 -04:00 · 92fdf6d0c9
commit 92fdf6d0c9
parent a193c9fc3f
8 changed files with 1826 additions and 15 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -3092,6 +3092,125 @@
                }
            }
        },
+        "/v1/openai/v1/chat/completions": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/OpenAIChatCompletion"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Inference"
+                ],
+                "description": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/OpenaiChatCompletionRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
+        "/v1/openai/v1/completions": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/OpenAICompletion"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Inference"
+                ],
+                "description": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/OpenaiCompletionRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
+        "/v1/openai/v1/models": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/OpenAIListModelsResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Models"
+                ],
+                "description": "",
+                "parameters": []
+            }
+        },
        "/v1/post-training/preference-optimize": {
            "post": {
                "responses": {
@ -8713,6 +8832,785 @@
                ],
                "title": "LogEventRequest"
            },
+            "OpenAIAssistantMessageParam": {
+                "type": "object",
+                "properties": {
+                    "role": {
+                        "type": "string",
+                        "const": "assistant",
+                        "default": "assistant",
+                        "description": "Must be \"assistant\" to identify this as the model's response"
+                    },
+                    "content": {
+                        "$ref": "#/components/schemas/InterleavedContent",
+                        "description": "The content of the model's response"
+                    },
+                    "name": {
+                        "type": "string",
+                        "description": "(Optional) The name of the assistant message participant."
+                    },
+                    "tool_calls": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/ToolCall"
+                        },
+                        "description": "List of tool calls. Each tool call is a ToolCall object."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "role",
+                    "content"
+                ],
+                "title": "OpenAIAssistantMessageParam",
+                "description": "A message containing the model's (assistant) response in an OpenAI-compatible chat completion request."
+            },
+            "OpenAIDeveloperMessageParam": {
+                "type": "object",
+                "properties": {
+                    "role": {
+                        "type": "string",
+                        "const": "developer",
+                        "default": "developer",
+                        "description": "Must be \"developer\" to identify this as a developer message"
+                    },
+                    "content": {
+                        "$ref": "#/components/schemas/InterleavedContent",
+                        "description": "The content of the developer message"
+                    },
+                    "name": {
+                        "type": "string",
+                        "description": "(Optional) The name of the developer message participant."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "role",
+                    "content"
+                ],
+                "title": "OpenAIDeveloperMessageParam",
+                "description": "A message from the developer in an OpenAI-compatible chat completion request."
+            },
+            "OpenAIMessageParam": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/OpenAIUserMessageParam"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAISystemMessageParam"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIAssistantMessageParam"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIToolMessageParam"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIDeveloperMessageParam"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "role",
+                    "mapping": {
+                        "user": "#/components/schemas/OpenAIUserMessageParam",
+                        "system": "#/components/schemas/OpenAISystemMessageParam",
+                        "assistant": "#/components/schemas/OpenAIAssistantMessageParam",
+                        "tool": "#/components/schemas/OpenAIToolMessageParam",
+                        "developer": "#/components/schemas/OpenAIDeveloperMessageParam"
+                    }
+                }
+            },
+            "OpenAISystemMessageParam": {
+                "type": "object",
+                "properties": {
+                    "role": {
+                        "type": "string",
+                        "const": "system",
+                        "default": "system",
+                        "description": "Must be \"system\" to identify this as a system message"
+                    },
+                    "content": {
+                        "$ref": "#/components/schemas/InterleavedContent",
+                        "description": "The content of the \"system prompt\". If multiple system messages are provided, they are concatenated. The underlying Llama Stack code may also add other system messages (for example, for formatting tool definitions)."
+                    },
+                    "name": {
+                        "type": "string",
+                        "description": "(Optional) The name of the system message participant."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "role",
+                    "content"
+                ],
+                "title": "OpenAISystemMessageParam",
+                "description": "A system message providing instructions or context to the model."
+            },
+            "OpenAIToolMessageParam": {
+                "type": "object",
+                "properties": {
+                    "role": {
+                        "type": "string",
+                        "const": "tool",
+                        "default": "tool",
+                        "description": "Must be \"tool\" to identify this as a tool response"
+                    },
+                    "tool_call_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the tool call this response is for"
+                    },
+                    "content": {
+                        "$ref": "#/components/schemas/InterleavedContent",
+                        "description": "The response content from the tool"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "role",
+                    "tool_call_id",
+                    "content"
+                ],
+                "title": "OpenAIToolMessageParam",
+                "description": "A message representing the result of a tool invocation in an OpenAI-compatible chat completion request."
+            },
+            "OpenAIUserMessageParam": {
+                "type": "object",
+                "properties": {
+                    "role": {
+                        "type": "string",
+                        "const": "user",
+                        "default": "user",
+                        "description": "Must be \"user\" to identify this as a user message"
+                    },
+                    "content": {
+                        "$ref": "#/components/schemas/InterleavedContent",
+                        "description": "The content of the message, which can include text and other media"
+                    },
+                    "name": {
+                        "type": "string",
+                        "description": "(Optional) The name of the user message participant."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "role",
+                    "content"
+                ],
+                "title": "OpenAIUserMessageParam",
+                "description": "A message from the user in an OpenAI-compatible chat completion request."
+            },
+            "OpenaiChatCompletionRequest": {
+                "type": "object",
+                "properties": {
+                    "model": {
+                        "type": "string",
+                        "description": "The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint."
+                    },
+                    "messages": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIMessageParam"
+                        },
+                        "description": "List of messages in the conversation"
+                    },
+                    "frequency_penalty": {
+                        "type": "number",
+                        "description": "(Optional) The penalty for repeated tokens"
+                    },
+                    "function_call": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "object",
+                                "additionalProperties": {
+                                    "oneOf": [
+                                        {
+                                            "type": "null"
+                                        },
+                                        {
+                                            "type": "boolean"
+                                        },
+                                        {
+                                            "type": "number"
+                                        },
+                                        {
+                                            "type": "string"
+                                        },
+                                        {
+                                            "type": "array"
+                                        },
+                                        {
+                                            "type": "object"
+                                        }
+                                    ]
+                                }
+                            }
+                        ],
+                        "description": "(Optional) The function call to use"
+                    },
+                    "functions": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "oneOf": [
+                                    {
+                                        "type": "null"
+                                    },
+                                    {
+                                        "type": "boolean"
+                                    },
+                                    {
+                                        "type": "number"
+                                    },
+                                    {
+                                        "type": "string"
+                                    },
+                                    {
+                                        "type": "array"
+                                    },
+                                    {
+                                        "type": "object"
+                                    }
+                                ]
+                            }
+                        },
+                        "description": "(Optional) List of functions to use"
+                    },
+                    "logit_bias": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "number"
+                        },
+                        "description": "(Optional) The logit bias to use"
+                    },
+                    "logprobs": {
+                        "type": "boolean",
+                        "description": "(Optional) The log probabilities to use"
+                    },
+                    "max_completion_tokens": {
+                        "type": "integer",
+                        "description": "(Optional) The maximum number of tokens to generate"
+                    },
+                    "max_tokens": {
+                        "type": "integer",
+                        "description": "(Optional) The maximum number of tokens to generate"
+                    },
+                    "n": {
+                        "type": "integer",
+                        "description": "(Optional) The number of completions to generate"
+                    },
+                    "parallel_tool_calls": {
+                        "type": "boolean",
+                        "description": "(Optional) Whether to parallelize tool calls"
+                    },
+                    "presence_penalty": {
+                        "type": "number",
+                        "description": "(Optional) The penalty for repeated tokens"
+                    },
+                    "response_format": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "string"
+                        },
+                        "description": "(Optional) The response format to use"
+                    },
+                    "seed": {
+                        "type": "integer",
+                        "description": "(Optional) The seed to use"
+                    },
+                    "stop": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "type": "string"
+                                }
+                            }
+                        ],
+                        "description": "(Optional) The stop tokens to use"
+                    },
+                    "stream": {
+                        "type": "boolean",
+                        "description": "(Optional) Whether to stream the response"
+                    },
+                    "stream_options": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "(Optional) The stream options to use"
+                    },
+                    "temperature": {
+                        "type": "number",
+                        "description": "(Optional) The temperature to use"
+                    },
+                    "tool_choice": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "object",
+                                "additionalProperties": {
+                                    "oneOf": [
+                                        {
+                                            "type": "null"
+                                        },
+                                        {
+                                            "type": "boolean"
+                                        },
+                                        {
+                                            "type": "number"
+                                        },
+                                        {
+                                            "type": "string"
+                                        },
+                                        {
+                                            "type": "array"
+                                        },
+                                        {
+                                            "type": "object"
+                                        }
+                                    ]
+                                }
+                            }
+                        ],
+                        "description": "(Optional) The tool choice to use"
+                    },
+                    "tools": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "oneOf": [
+                                    {
+                                        "type": "null"
+                                    },
+                                    {
+                                        "type": "boolean"
+                                    },
+                                    {
+                                        "type": "number"
+                                    },
+                                    {
+                                        "type": "string"
+                                    },
+                                    {
+                                        "type": "array"
+                                    },
+                                    {
+                                        "type": "object"
+                                    }
+                                ]
+                            }
+                        },
+                        "description": "(Optional) The tools to use"
+                    },
+                    "top_logprobs": {
+                        "type": "integer",
+                        "description": "(Optional) The top log probabilities to use"
+                    },
+                    "top_p": {
+                        "type": "number",
+                        "description": "(Optional) The top p to use"
+                    },
+                    "user": {
+                        "type": "string",
+                        "description": "(Optional) The user to use"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "model",
+                    "messages"
+                ],
+                "title": "OpenaiChatCompletionRequest"
+            },
+            "OpenAIChatCompletion": {
+                "type": "object",
+                "properties": {
+                    "id": {
+                        "type": "string",
+                        "description": "The ID of the chat completion"
+                    },
+                    "choices": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIChoice"
+                        },
+                        "description": "List of choices"
+                    },
+                    "object": {
+                        "type": "string",
+                        "const": "chat.completion",
+                        "default": "chat.completion",
+                        "description": "The object type, which will be \"chat.completion\""
+                    },
+                    "created": {
+                        "type": "integer",
+                        "description": "The Unix timestamp in seconds when the chat completion was created"
+                    },
+                    "model": {
+                        "type": "string",
+                        "description": "The model that was used to generate the chat completion"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id",
+                    "choices",
+                    "object",
+                    "created",
+                    "model"
+                ],
+                "title": "OpenAIChatCompletion",
+                "description": "Response from an OpenAI-compatible chat completion request."
+            },
+            "OpenAIChoice": {
+                "type": "object",
+                "properties": {
+                    "message": {
+                        "$ref": "#/components/schemas/OpenAIMessageParam",
+                        "description": "The message from the model"
+                    },
+                    "finish_reason": {
+                        "type": "string",
+                        "description": "The reason the model stopped generating"
+                    },
+                    "index": {
+                        "type": "integer"
+                    },
+                    "logprobs": {
+                        "$ref": "#/components/schemas/OpenAIChoiceLogprobs"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "message",
+                    "finish_reason",
+                    "index"
+                ],
+                "title": "OpenAIChoice",
+                "description": "A choice from an OpenAI-compatible chat completion response."
+            },
+            "OpenAIChoiceLogprobs": {
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAITokenLogProb"
+                        }
+                    },
+                    "refusal": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAITokenLogProb"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "title": "OpenAIChoiceLogprobs",
+                "description": "The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response."
+            },
+            "OpenAITokenLogProb": {
+                "type": "object",
+                "properties": {
+                    "token": {
+                        "type": "string"
+                    },
+                    "bytes": {
+                        "type": "array",
+                        "items": {
+                            "type": "integer"
+                        }
+                    },
+                    "logprob": {
+                        "type": "number"
+                    },
+                    "top_logprobs": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAITopLogProb"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "token",
+                    "logprob",
+                    "top_logprobs"
+                ],
+                "title": "OpenAITokenLogProb",
+                "description": "The log probability for a token from an OpenAI-compatible chat completion response."
+            },
+            "OpenAITopLogProb": {
+                "type": "object",
+                "properties": {
+                    "token": {
+                        "type": "string"
+                    },
+                    "bytes": {
+                        "type": "array",
+                        "items": {
+                            "type": "integer"
+                        }
+                    },
+                    "logprob": {
+                        "type": "number"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "token",
+                    "logprob"
+                ],
+                "title": "OpenAITopLogProb",
+                "description": "The top log probability for a token from an OpenAI-compatible chat completion response."
+            },
+            "OpenaiCompletionRequest": {
+                "type": "object",
+                "properties": {
+                    "model": {
+                        "type": "string",
+                        "description": "The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint."
+                    },
+                    "prompt": {
+                        "type": "string",
+                        "description": "The prompt to generate a completion for"
+                    },
+                    "best_of": {
+                        "type": "integer",
+                        "description": "(Optional) The number of completions to generate"
+                    },
+                    "echo": {
+                        "type": "boolean",
+                        "description": "(Optional) Whether to echo the prompt"
+                    },
+                    "frequency_penalty": {
+                        "type": "number",
+                        "description": "(Optional) The penalty for repeated tokens"
+                    },
+                    "logit_bias": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "number"
+                        },
+                        "description": "(Optional) The logit bias to use"
+                    },
+                    "logprobs": {
+                        "type": "boolean",
+                        "description": "(Optional) The log probabilities to use"
+                    },
+                    "max_tokens": {
+                        "type": "integer",
+                        "description": "(Optional) The maximum number of tokens to generate"
+                    },
+                    "n": {
+                        "type": "integer",
+                        "description": "(Optional) The number of completions to generate"
+                    },
+                    "presence_penalty": {
+                        "type": "number",
+                        "description": "(Optional) The penalty for repeated tokens"
+                    },
+                    "seed": {
+                        "type": "integer",
+                        "description": "(Optional) The seed to use"
+                    },
+                    "stop": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "type": "string"
+                                }
+                            }
+                        ],
+                        "description": "(Optional) The stop tokens to use"
+                    },
+                    "stream": {
+                        "type": "boolean",
+                        "description": "(Optional) Whether to stream the response"
+                    },
+                    "stream_options": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "(Optional) The stream options to use"
+                    },
+                    "temperature": {
+                        "type": "number",
+                        "description": "(Optional) The temperature to use"
+                    },
+                    "top_p": {
+                        "type": "number",
+                        "description": "(Optional) The top p to use"
+                    },
+                    "user": {
+                        "type": "string",
+                        "description": "(Optional) The user to use"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "model",
+                    "prompt"
+                ],
+                "title": "OpenaiCompletionRequest"
+            },
+            "OpenAICompletion": {
+                "type": "object",
+                "properties": {
+                    "id": {
+                        "type": "string"
+                    },
+                    "choices": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAICompletionChoice"
+                        }
+                    },
+                    "created": {
+                        "type": "integer"
+                    },
+                    "model": {
+                        "type": "string"
+                    },
+                    "object": {
+                        "type": "string",
+                        "const": "text_completion",
+                        "default": "text_completion"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id",
+                    "choices",
+                    "created",
+                    "model",
+                    "object"
+                ],
+                "title": "OpenAICompletion",
+                "description": "Response from an OpenAI-compatible completion request."
+            },
+            "OpenAICompletionChoice": {
+                "type": "object",
+                "properties": {
+                    "finish_reason": {
+                        "type": "string"
+                    },
+                    "text": {
+                        "type": "string"
+                    },
+                    "index": {
+                        "type": "integer"
+                    },
+                    "logprobs": {
+                        "$ref": "#/components/schemas/OpenAIChoiceLogprobs"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "finish_reason",
+                    "text",
+                    "index"
+                ],
+                "title": "OpenAICompletionChoice",
+                "description": "A choice from an OpenAI-compatible completion response."
+            },
+            "OpenAIModel": {
+                "type": "object",
+                "properties": {
+                    "id": {
+                        "type": "string"
+                    },
+                    "object": {
+                        "type": "string",
+                        "const": "model",
+                        "default": "model"
+                    },
+                    "created": {
+                        "type": "integer"
+                    },
+                    "owned_by": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id",
+                    "object",
+                    "created",
+                    "owned_by"
+                ],
+                "title": "OpenAIModel",
+                "description": "A model from OpenAI."
+            },
+            "OpenAIListModelsResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIModel"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "OpenAIListModelsResponse"
+            },
            "DPOAlignmentConfig": {
                "type": "object",
                "properties": {
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -2131,6 +2131,91 @@ paths:
            schema:
              $ref: '#/components/schemas/LogEventRequest'
        required: true
+  /v1/openai/v1/chat/completions:
+    post:
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAIChatCompletion'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Inference
+      description: >-
+        Generate an OpenAI-compatible chat completion for the given messages using
+        the specified model.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/OpenaiChatCompletionRequest'
+        required: true
+  /v1/openai/v1/completions:
+    post:
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAICompletion'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Inference
+      description: >-
+        Generate an OpenAI-compatible completion for the given prompt using the specified
+        model.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/OpenaiCompletionRequest'
+        required: true
+  /v1/openai/v1/models:
+    get:
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/OpenAIListModelsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Models
+      description: ''
+      parameters: []
  /v1/post-training/preference-optimize:
    post:
      responses:
@ -5980,6 +6065,568 @@ components:
        - event
        - ttl_seconds
      title: LogEventRequest
+    OpenAIAssistantMessageParam:
+      type: object
+      properties:
+        role:
+          type: string
+          const: assistant
+          default: assistant
+          description: >-
+            Must be "assistant" to identify this as the model's response
+        content:
+          $ref: '#/components/schemas/InterleavedContent'
+          description: The content of the model's response
+        name:
+          type: string
+          description: >-
+            (Optional) The name of the assistant message participant.
+        tool_calls:
+          type: array
+          items:
+            $ref: '#/components/schemas/ToolCall'
+          description: >-
+            List of tool calls. Each tool call is a ToolCall object.
+      additionalProperties: false
+      required:
+        - role
+        - content
+      title: OpenAIAssistantMessageParam
+      description: >-
+        A message containing the model's (assistant) response in an OpenAI-compatible
+        chat completion request.
+    OpenAIDeveloperMessageParam:
+      type: object
+      properties:
+        role:
+          type: string
+          const: developer
+          default: developer
+          description: >-
+            Must be "developer" to identify this as a developer message
+        content:
+          $ref: '#/components/schemas/InterleavedContent'
+          description: The content of the developer message
+        name:
+          type: string
+          description: >-
+            (Optional) The name of the developer message participant.
+      additionalProperties: false
+      required:
+        - role
+        - content
+      title: OpenAIDeveloperMessageParam
+      description: >-
+        A message from the developer in an OpenAI-compatible chat completion request.
+    OpenAIMessageParam:
+      oneOf:
+        - $ref: '#/components/schemas/OpenAIUserMessageParam'
+        - $ref: '#/components/schemas/OpenAISystemMessageParam'
+        - $ref: '#/components/schemas/OpenAIAssistantMessageParam'
+        - $ref: '#/components/schemas/OpenAIToolMessageParam'
+        - $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
+      discriminator:
+        propertyName: role
+        mapping:
+          user: '#/components/schemas/OpenAIUserMessageParam'
+          system: '#/components/schemas/OpenAISystemMessageParam'
+          assistant: '#/components/schemas/OpenAIAssistantMessageParam'
+          tool: '#/components/schemas/OpenAIToolMessageParam'
+          developer: '#/components/schemas/OpenAIDeveloperMessageParam'
+    OpenAISystemMessageParam:
+      type: object
+      properties:
+        role:
+          type: string
+          const: system
+          default: system
+          description: >-
+            Must be "system" to identify this as a system message
+        content:
+          $ref: '#/components/schemas/InterleavedContent'
+          description: >-
+            The content of the "system prompt". If multiple system messages are provided,
+            they are concatenated. The underlying Llama Stack code may also add other
+            system messages (for example, for formatting tool definitions).
+        name:
+          type: string
+          description: >-
+            (Optional) The name of the system message participant.
+      additionalProperties: false
+      required:
+        - role
+        - content
+      title: OpenAISystemMessageParam
+      description: >-
+        A system message providing instructions or context to the model.
+    OpenAIToolMessageParam:
+      type: object
+      properties:
+        role:
+          type: string
+          const: tool
+          default: tool
+          description: >-
+            Must be "tool" to identify this as a tool response
+        tool_call_id:
+          type: string
+          description: >-
+            Unique identifier for the tool call this response is for
+        content:
+          $ref: '#/components/schemas/InterleavedContent'
+          description: The response content from the tool
+      additionalProperties: false
+      required:
+        - role
+        - tool_call_id
+        - content
+      title: OpenAIToolMessageParam
+      description: >-
+        A message representing the result of a tool invocation in an OpenAI-compatible
+        chat completion request.
+    OpenAIUserMessageParam:
+      type: object
+      properties:
+        role:
+          type: string
+          const: user
+          default: user
+          description: >-
+            Must be "user" to identify this as a user message
+        content:
+          $ref: '#/components/schemas/InterleavedContent'
+          description: >-
+            The content of the message, which can include text and other media
+        name:
+          type: string
+          description: >-
+            (Optional) The name of the user message participant.
+      additionalProperties: false
+      required:
+        - role
+        - content
+      title: OpenAIUserMessageParam
+      description: >-
+        A message from the user in an OpenAI-compatible chat completion request.
+    OpenaiChatCompletionRequest:
+      type: object
+      properties:
+        model:
+          type: string
+          description: >-
+            The identifier of the model to use. The model must be registered with
+            Llama Stack and available via the /models endpoint.
+        messages:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIMessageParam'
+          description: List of messages in the conversation
+        frequency_penalty:
+          type: number
+          description: >-
+            (Optional) The penalty for repeated tokens
+        function_call:
+          oneOf:
+            - type: string
+            - type: object
+              additionalProperties:
+                oneOf:
+                  - type: 'null'
+                  - type: boolean
+                  - type: number
+                  - type: string
+                  - type: array
+                  - type: object
+          description: (Optional) The function call to use
+        functions:
+          type: array
+          items:
+            type: object
+            additionalProperties:
+              oneOf:
+                - type: 'null'
+                - type: boolean
+                - type: number
+                - type: string
+                - type: array
+                - type: object
+          description: (Optional) List of functions to use
+        logit_bias:
+          type: object
+          additionalProperties:
+            type: number
+          description: (Optional) The logit bias to use
+        logprobs:
+          type: boolean
+          description: (Optional) The log probabilities to use
+        max_completion_tokens:
+          type: integer
+          description: >-
+            (Optional) The maximum number of tokens to generate
+        max_tokens:
+          type: integer
+          description: >-
+            (Optional) The maximum number of tokens to generate
+        n:
+          type: integer
+          description: >-
+            (Optional) The number of completions to generate
+        parallel_tool_calls:
+          type: boolean
+          description: >-
+            (Optional) Whether to parallelize tool calls
+        presence_penalty:
+          type: number
+          description: >-
+            (Optional) The penalty for repeated tokens
+        response_format:
+          type: object
+          additionalProperties:
+            type: string
+          description: (Optional) The response format to use
+        seed:
+          type: integer
+          description: (Optional) The seed to use
+        stop:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                type: string
+          description: (Optional) The stop tokens to use
+        stream:
+          type: boolean
+          description: >-
+            (Optional) Whether to stream the response
+        stream_options:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: (Optional) The stream options to use
+        temperature:
+          type: number
+          description: (Optional) The temperature to use
+        tool_choice:
+          oneOf:
+            - type: string
+            - type: object
+              additionalProperties:
+                oneOf:
+                  - type: 'null'
+                  - type: boolean
+                  - type: number
+                  - type: string
+                  - type: array
+                  - type: object
+          description: (Optional) The tool choice to use
+        tools:
+          type: array
+          items:
+            type: object
+            additionalProperties:
+              oneOf:
+                - type: 'null'
+                - type: boolean
+                - type: number
+                - type: string
+                - type: array
+                - type: object
+          description: (Optional) The tools to use
+        top_logprobs:
+          type: integer
+          description: >-
+            (Optional) The top log probabilities to use
+        top_p:
+          type: number
+          description: (Optional) The top p to use
+        user:
+          type: string
+          description: (Optional) The user to use
+      additionalProperties: false
+      required:
+        - model
+        - messages
+      title: OpenaiChatCompletionRequest
+    OpenAIChatCompletion:
+      type: object
+      properties:
+        id:
+          type: string
+          description: The ID of the chat completion
+        choices:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIChoice'
+          description: List of choices
+        object:
+          type: string
+          const: chat.completion
+          default: chat.completion
+          description: >-
+            The object type, which will be "chat.completion"
+        created:
+          type: integer
+          description: >-
+            The Unix timestamp in seconds when the chat completion was created
+        model:
+          type: string
+          description: >-
+            The model that was used to generate the chat completion
+      additionalProperties: false
+      required:
+        - id
+        - choices
+        - object
+        - created
+        - model
+      title: OpenAIChatCompletion
+      description: >-
+        Response from an OpenAI-compatible chat completion request.
+    OpenAIChoice:
+      type: object
+      properties:
+        message:
+          $ref: '#/components/schemas/OpenAIMessageParam'
+          description: The message from the model
+        finish_reason:
+          type: string
+          description: The reason the model stopped generating
+        index:
+          type: integer
+        logprobs:
+          $ref: '#/components/schemas/OpenAIChoiceLogprobs'
+      additionalProperties: false
+      required:
+        - message
+        - finish_reason
+        - index
+      title: OpenAIChoice
+      description: >-
+        A choice from an OpenAI-compatible chat completion response.
+    OpenAIChoiceLogprobs:
+      type: object
+      properties:
+        content:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAITokenLogProb'
+        refusal:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAITokenLogProb'
+      additionalProperties: false
+      title: OpenAIChoiceLogprobs
+      description: >-
+        The log probabilities for the tokens in the message from an OpenAI-compatible
+        chat completion response.
+    OpenAITokenLogProb:
+      type: object
+      properties:
+        token:
+          type: string
+        bytes:
+          type: array
+          items:
+            type: integer
+        logprob:
+          type: number
+        top_logprobs:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAITopLogProb'
+      additionalProperties: false
+      required:
+        - token
+        - logprob
+        - top_logprobs
+      title: OpenAITokenLogProb
+      description: >-
+        The log probability for a token from an OpenAI-compatible chat completion
+        response.
+    OpenAITopLogProb:
+      type: object
+      properties:
+        token:
+          type: string
+        bytes:
+          type: array
+          items:
+            type: integer
+        logprob:
+          type: number
+      additionalProperties: false
+      required:
+        - token
+        - logprob
+      title: OpenAITopLogProb
+      description: >-
+        The top log probability for a token from an OpenAI-compatible chat completion
+        response.
+    OpenaiCompletionRequest:
+      type: object
+      properties:
+        model:
+          type: string
+          description: >-
+            The identifier of the model to use. The model must be registered with
+            Llama Stack and available via the /models endpoint.
+        prompt:
+          type: string
+          description: The prompt to generate a completion for
+        best_of:
+          type: integer
+          description: >-
+            (Optional) The number of completions to generate
+        echo:
+          type: boolean
+          description: (Optional) Whether to echo the prompt
+        frequency_penalty:
+          type: number
+          description: >-
+            (Optional) The penalty for repeated tokens
+        logit_bias:
+          type: object
+          additionalProperties:
+            type: number
+          description: (Optional) The logit bias to use
+        logprobs:
+          type: boolean
+          description: (Optional) The log probabilities to use
+        max_tokens:
+          type: integer
+          description: >-
+            (Optional) The maximum number of tokens to generate
+        n:
+          type: integer
+          description: >-
+            (Optional) The number of completions to generate
+        presence_penalty:
+          type: number
+          description: >-
+            (Optional) The penalty for repeated tokens
+        seed:
+          type: integer
+          description: (Optional) The seed to use
+        stop:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                type: string
+          description: (Optional) The stop tokens to use
+        stream:
+          type: boolean
+          description: >-
+            (Optional) Whether to stream the response
+        stream_options:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: (Optional) The stream options to use
+        temperature:
+          type: number
+          description: (Optional) The temperature to use
+        top_p:
+          type: number
+          description: (Optional) The top p to use
+        user:
+          type: string
+          description: (Optional) The user to use
+      additionalProperties: false
+      required:
+        - model
+        - prompt
+      title: OpenaiCompletionRequest
+    OpenAICompletion:
+      type: object
+      properties:
+        id:
+          type: string
+        choices:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAICompletionChoice'
+        created:
+          type: integer
+        model:
+          type: string
+        object:
+          type: string
+          const: text_completion
+          default: text_completion
+      additionalProperties: false
+      required:
+        - id
+        - choices
+        - created
+        - model
+        - object
+      title: OpenAICompletion
+      description: >-
+        Response from an OpenAI-compatible completion request.
+    OpenAICompletionChoice:
+      type: object
+      properties:
+        finish_reason:
+          type: string
+        text:
+          type: string
+        index:
+          type: integer
+        logprobs:
+          $ref: '#/components/schemas/OpenAIChoiceLogprobs'
+      additionalProperties: false
+      required:
+        - finish_reason
+        - text
+        - index
+      title: OpenAICompletionChoice
+      description: >-
+        A choice from an OpenAI-compatible completion response.
+    OpenAIModel:
+      type: object
+      properties:
+        id:
+          type: string
+        object:
+          type: string
+          const: model
+          default: model
+        created:
+          type: integer
+        owned_by:
+          type: string
+      additionalProperties: false
+      required:
+        - id
+        - object
+        - created
+        - owned_by
+      title: OpenAIModel
+      description: A model from OpenAI.
+    OpenAIListModelsResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIModel'
+      additionalProperties: false
+      required:
+        - data
+      title: OpenAIListModelsResponse
    DPOAlignmentConfig:
      type: object
      properties:
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@ -17,9 +17,6 @@ from typing import (
    runtime_checkable,
 )

-from openai.types.chat import ChatCompletion as OpenAIChatCompletion
-from openai.types.chat import ChatCompletionMessageParam as OpenAIChatCompletionMessageParam
-from openai.types.completion import Completion as OpenAICompletion
 from pydantic import BaseModel, Field, field_validator
 from typing_extensions import Annotated

@ -445,6 +442,217 @@ class EmbeddingsResponse(BaseModel):
    embeddings: List[List[float]]


+@json_schema_type
+class OpenAIUserMessageParam(BaseModel):
+    """A message from the user in an OpenAI-compatible chat completion request.
+
+    :param role: Must be "user" to identify this as a user message
+    :param content: The content of the message, which can include text and other media
+    :param name: (Optional) The name of the user message participant.
+    """
+
+    role: Literal["user"] = "user"
+    content: InterleavedContent
+    name: Optional[str] = None
+
+
+@json_schema_type
+class OpenAISystemMessageParam(BaseModel):
+    """A system message providing instructions or context to the model.
+
+    :param role: Must be "system" to identify this as a system message
+    :param content: The content of the "system prompt". If multiple system messages are provided, they are concatenated. The underlying Llama Stack code may also add other system messages (for example, for formatting tool definitions).
+    :param name: (Optional) The name of the system message participant.
+    """
+
+    role: Literal["system"] = "system"
+    content: InterleavedContent
+    name: Optional[str] = None
+
+
+@json_schema_type
+class OpenAIAssistantMessageParam(BaseModel):
+    """A message containing the model's (assistant) response in an OpenAI-compatible chat completion request.
+
+    :param role: Must be "assistant" to identify this as the model's response
+    :param content: The content of the model's response
+    :param name: (Optional) The name of the assistant message participant.
+    :param tool_calls: List of tool calls. Each tool call is a ToolCall object.
+    """
+
+    role: Literal["assistant"] = "assistant"
+    content: InterleavedContent
+    name: Optional[str] = None
+    tool_calls: Optional[List[ToolCall]] = Field(default_factory=list)
+
+
+@json_schema_type
+class OpenAIToolMessageParam(BaseModel):
+    """A message representing the result of a tool invocation in an OpenAI-compatible chat completion request.
+
+    :param role: Must be "tool" to identify this as a tool response
+    :param tool_call_id: Unique identifier for the tool call this response is for
+    :param content: The response content from the tool
+    """
+
+    role: Literal["tool"] = "tool"
+    tool_call_id: str
+    content: InterleavedContent
+
+
+@json_schema_type
+class OpenAIDeveloperMessageParam(BaseModel):
+    """A message from the developer in an OpenAI-compatible chat completion request.
+
+    :param role: Must be "developer" to identify this as a developer message
+    :param content: The content of the developer message
+    :param name: (Optional) The name of the developer message participant.
+    """
+
+    role: Literal["developer"] = "developer"
+    content: InterleavedContent
+    name: Optional[str] = None
+
+
+OpenAIMessageParam = Annotated[
+    Union[
+        OpenAIUserMessageParam,
+        OpenAISystemMessageParam,
+        OpenAIAssistantMessageParam,
+        OpenAIToolMessageParam,
+        OpenAIDeveloperMessageParam,
+    ],
+    Field(discriminator="role"),
+]
+register_schema(OpenAIMessageParam, name="OpenAIMessageParam")
+
+
+@json_schema_type
+class OpenAITopLogProb(BaseModel):
+    """The top log probability for a token from an OpenAI-compatible chat completion response.
+
+    :token: The token
+    :bytes: (Optional) The bytes for the token
+    :logprob: The log probability of the token
+    """
+
+    token: str
+    bytes: Optional[List[int]] = None
+    logprob: float
+
+
+@json_schema_type
+class OpenAITokenLogProb(BaseModel):
+    """The log probability for a token from an OpenAI-compatible chat completion response.
+
+    :token: The token
+    :bytes: (Optional) The bytes for the token
+    :logprob: The log probability of the token
+    :top_logprobs: The top log probabilities for the token
+    """
+
+    token: str
+    bytes: Optional[List[int]] = None
+    logprob: float
+    top_logprobs: List[OpenAITopLogProb]
+
+
+@json_schema_type
+class OpenAIChoiceLogprobs(BaseModel):
+    """The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response.
+
+    :content: (Optional) The log probabilities for the tokens in the message
+    :refusal: (Optional) The log probabilities for the tokens in the message
+    """
+
+    content: Optional[List[OpenAITokenLogProb]] = None
+    refusal: Optional[List[OpenAITokenLogProb]] = None
+
+
+@json_schema_type
+class OpenAIChoice(BaseModel):
+    """A choice from an OpenAI-compatible chat completion response.
+
+    :param message: The message from the model
+    :param finish_reason: The reason the model stopped generating
+    :index: The index of the choice
+    :logprobs: (Optional) The log probabilities for the tokens in the message
+    """
+
+    message: OpenAIMessageParam
+    finish_reason: str
+    index: int
+    logprobs: Optional[OpenAIChoiceLogprobs] = None
+
+
+@json_schema_type
+class OpenAIChatCompletion(BaseModel):
+    """Response from an OpenAI-compatible chat completion request.
+
+    :param id: The ID of the chat completion
+    :param choices: List of choices
+    :param object: The object type, which will be "chat.completion"
+    :param created: The Unix timestamp in seconds when the chat completion was created
+    :param model: The model that was used to generate the chat completion
+    """
+
+    id: str
+    choices: List[OpenAIChoice]
+    object: Literal["chat.completion"] = "chat.completion"
+    created: int
+    model: str
+
+
+@json_schema_type
+class OpenAICompletionLogprobs(BaseModel):
+    """The log probabilities for the tokens in the message from an OpenAI-compatible completion response.
+
+    :text_offset: (Optional) The offset of the token in the text
+    :token_logprobs: (Optional) The log probabilities for the tokens
+    :tokens: (Optional) The tokens
+    :top_logprobs: (Optional) The top log probabilities for the tokens
+    """
+
+    text_offset: Optional[List[int]] = None
+    token_logprobs: Optional[List[float]] = None
+    tokens: Optional[List[str]] = None
+    top_logprobs: Optional[List[Dict[str, float]]] = None
+
+
+@json_schema_type
+class OpenAICompletionChoice(BaseModel):
+    """A choice from an OpenAI-compatible completion response.
+
+    :finish_reason: The reason the model stopped generating
+    :text: The text of the choice
+    :index: The index of the choice
+    :logprobs: (Optional) The log probabilities for the tokens in the choice
+    """
+
+    finish_reason: str
+    text: str
+    index: int
+    logprobs: Optional[OpenAIChoiceLogprobs] = None
+
+
+@json_schema_type
+class OpenAICompletion(BaseModel):
+    """Response from an OpenAI-compatible completion request.
+
+    :id: The ID of the completion
+    :choices: List of choices
+    :created: The Unix timestamp in seconds when the completion was created
+    :model: The model that was used to generate the completion
+    :object: The object type, which will be "text_completion"
+    """
+
+    id: str
+    choices: List[OpenAICompletionChoice]
+    created: int
+    model: str
+    object: Literal["text_completion"] = "text_completion"
+
+
 class ModelStore(Protocol):
    async def get_model(self, identifier: str) -> Model: ...

@ -589,14 +797,33 @@ class Inference(Protocol):
        top_p: Optional[float] = None,
        user: Optional[str] = None,
    ) -> OpenAICompletion:
-        """Generate an OpenAI-compatible completion for the given prompt using the specified model."""
+        """Generate an OpenAI-compatible completion for the given prompt using the specified model.
+
+        :param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
+        :param prompt: The prompt to generate a completion for
+        :param best_of: (Optional) The number of completions to generate
+        :param echo: (Optional) Whether to echo the prompt
+        :param frequency_penalty: (Optional) The penalty for repeated tokens
+        :param logit_bias: (Optional) The logit bias to use
+        :param logprobs: (Optional) The log probabilities to use
+        :param max_tokens: (Optional) The maximum number of tokens to generate
+        :param n: (Optional) The number of completions to generate
+        :param presence_penalty: (Optional) The penalty for repeated tokens
+        :param seed: (Optional) The seed to use
+        :param stop: (Optional) The stop tokens to use
+        :param stream: (Optional) Whether to stream the response
+        :param stream_options: (Optional) The stream options to use
+        :param temperature: (Optional) The temperature to use
+        :param top_p: (Optional) The top p to use
+        :param user: (Optional) The user to use
+        """
        ...

    @webmethod(route="/openai/v1/chat/completions", method="POST")
    async def openai_chat_completion(
        self,
        model: str,
-        messages: List[OpenAIChatCompletionMessageParam],
+        messages: List[OpenAIMessageParam],
        frequency_penalty: Optional[float] = None,
        function_call: Optional[Union[str, Dict[str, Any]]] = None,
        functions: Optional[List[Dict[str, Any]]] = None,
@ -619,5 +846,30 @@ class Inference(Protocol):
        top_p: Optional[float] = None,
        user: Optional[str] = None,
    ) -> OpenAIChatCompletion:
-        """Generate an OpenAI-compatible chat completion for the given messages using the specified model."""
+        """Generate an OpenAI-compatible chat completion for the given messages using the specified model.
+
+        :param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
+        :param messages: List of messages in the conversation
+        :param frequency_penalty: (Optional) The penalty for repeated tokens
+        :param function_call: (Optional) The function call to use
+        :param functions: (Optional) List of functions to use
+        :param logit_bias: (Optional) The logit bias to use
+        :param logprobs: (Optional) The log probabilities to use
+        :param max_completion_tokens: (Optional) The maximum number of tokens to generate
+        :param max_tokens: (Optional) The maximum number of tokens to generate
+        :param n: (Optional) The number of completions to generate
+        :param parallel_tool_calls: (Optional) Whether to parallelize tool calls
+        :param presence_penalty: (Optional) The penalty for repeated tokens
+        :param response_format: (Optional) The response format to use
+        :param seed: (Optional) The seed to use
+        :param stop: (Optional) The stop tokens to use
+        :param stream: (Optional) Whether to stream the response
+        :param stream_options: (Optional) The stream options to use
+        :param temperature: (Optional) The temperature to use
+        :param tool_choice: (Optional) The tool choice to use
+        :param tools: (Optional) The tools to use
+        :param top_logprobs: (Optional) The top log probabilities to use
+        :param top_p: (Optional) The top p to use
+        :param user: (Optional) The user to use
+        """
        ...
--- a/llama_stack/apis/models/models.py
+++ b/llama_stack/apis/models/models.py
@ -7,7 +7,6 @@
 from enum import Enum
 from typing import Any, Dict, List, Literal, Optional, Protocol, runtime_checkable

-from openai.types.model import Model as OpenAIModel
 from pydantic import BaseModel, ConfigDict, Field

 from llama_stack.apis.resource import Resource, ResourceType
@ -57,6 +56,22 @@ class ListModelsResponse(BaseModel):
    data: List[Model]


+@json_schema_type
+class OpenAIModel(BaseModel):
+    """A model from OpenAI.
+
+    :id: The ID of the model
+    :object: The object type, which will be "model"
+    :created: The Unix timestamp in seconds when the model was created
+    :owned_by: The owner of the model
+    """
+
+    id: str
+    object: Literal["model"] = "model"
+    created: int
+    owned_by: str
+
+
 class OpenAIListModelsResponse(BaseModel):
    data: List[OpenAIModel]

--- a/llama_stack/distribution/routers/routers.py
+++ b/llama_stack/distribution/routers/routers.py
@ -8,7 +8,6 @@ import time
 from typing import Any, AsyncGenerator, AsyncIterator, Dict, List, Optional, Union

 from openai.types.chat import ChatCompletion as OpenAIChatCompletion
-from openai.types.chat import ChatCompletionMessageParam as OpenAIChatCompletionMessageParam
 from openai.types.completion import Completion as OpenAICompletion

 from llama_stack.apis.common.content_types import (
@ -39,6 +38,7 @@ from llama_stack.apis.inference import (
    ToolDefinition,
    ToolPromptFormat,
 )
+from llama_stack.apis.inference.inference import OpenAIMessageParam
 from llama_stack.apis.models import Model, ModelType
 from llama_stack.apis.safety import RunShieldResponse, Safety
 from llama_stack.apis.scoring import (
@ -478,7 +478,7 @@ class InferenceRouter(Inference):
    async def openai_chat_completion(
        self,
        model: str,
-        messages: List[OpenAIChatCompletionMessageParam],
+        messages: List[OpenAIMessageParam],
        frequency_penalty: Optional[float] = None,
        function_call: Optional[Union[str, Dict[str, Any]]] = None,
        functions: Optional[List[Dict[str, Any]]] = None,
--- a/llama_stack/distribution/routers/routing_tables.py
+++ b/llama_stack/distribution/routers/routing_tables.py
@ -9,7 +9,6 @@ import time
 import uuid
 from typing import Any, Dict, List, Optional

-from openai.types.model import Model as OpenAIModel
 from pydantic import TypeAdapter

 from llama_stack.apis.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse
@ -25,7 +24,7 @@ from llama_stack.apis.datasets import (
    RowsDataSource,
    URIDataSource,
 )
-from llama_stack.apis.models import ListModelsResponse, Model, Models, ModelType, OpenAIListModelsResponse
+from llama_stack.apis.models import ListModelsResponse, Model, Models, ModelType, OpenAIListModelsResponse, OpenAIModel
 from llama_stack.apis.resource import ResourceType
 from llama_stack.apis.scoring_functions import (
    ListScoringFunctionsResponse,
--- a/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
+++ b/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
@ -8,7 +8,6 @@ import logging
 from typing import Any, AsyncGenerator, Dict, List, Optional, Union

 from openai.types.chat import ChatCompletion as OpenAIChatCompletion
-from openai.types.chat import ChatCompletionMessageParam as OpenAIChatCompletionMessageParam
 from openai.types.completion import Completion as OpenAICompletion

 from llama_stack.apis.inference import (
@ -23,6 +22,7 @@ from llama_stack.apis.inference import (
    ToolDefinition,
    ToolPromptFormat,
 )
+from llama_stack.apis.inference.inference import OpenAIMessageParam
 from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
 from llama_stack.providers.utils.inference.embedding_mixin import (
    SentenceTransformerEmbeddingMixin,
@ -104,7 +104,7 @@ class SentenceTransformersInferenceImpl(
    async def openai_chat_completion(
        self,
        model: str,
-        messages: List[OpenAIChatCompletionMessageParam],
+        messages: List[OpenAIMessageParam],
        frequency_penalty: Optional[float] = None,
        function_call: Optional[Union[str, Dict[str, Any]]] = None,
        functions: Optional[List[Dict[str, Any]]] = None,
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@ -10,7 +10,6 @@ from typing import Any, AsyncGenerator, Dict, List, Optional, Union
 import httpx
 from openai import AsyncOpenAI
 from openai.types.chat import ChatCompletion as OpenAIChatCompletion
-from openai.types.chat import ChatCompletionMessageParam as OpenAIChatCompletionMessageParam
 from openai.types.chat.chat_completion_chunk import (
    ChatCompletionChunk as OpenAIChatCompletionChunk,
 )
@ -48,6 +47,7 @@ from llama_stack.apis.inference import (
    ToolDefinition,
    ToolPromptFormat,
 )
+from llama_stack.apis.inference.inference import OpenAIMessageParam
 from llama_stack.apis.models import Model, ModelType
 from llama_stack.models.llama.datatypes import BuiltinTool, StopReason, ToolCall
 from llama_stack.models.llama.sku_list import all_registered_models
@ -471,7 +471,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
    async def openai_chat_completion(
        self,
        model: str,
-        messages: List[OpenAIChatCompletionMessageParam],
+        messages: List[OpenAIMessageParam],
        frequency_penalty: Optional[float] = None,
        function_call: Optional[Union[str, Dict[str, Any]]] = None,
        functions: Optional[List[Dict[str, Any]]] = None,