Use our own pydantic models for OpenAI Server APIs

Importing the models from the OpenAI client library required a
top-level dependency on the openai python package, and also was
incompatible with our API generation code due to some quirks in how
the OpenAI pydantic models are defined.

So, this creates our own stubs of those pydantic models so that we're
in more direct control of our API surface for this OpenAI-compatible
API, so that it works with our code generation, and so that the openai
python client isn't a hard requirement of Llama Stack's API.
This commit is contained in:
Ben Browning 2025-04-08 09:01:35 -04:00
parent a193c9fc3f
commit 92fdf6d0c9
8 changed files with 1826 additions and 15 deletions

View file

@ -8,7 +8,6 @@ import logging
from typing import Any, AsyncGenerator, Dict, List, Optional, Union
from openai.types.chat import ChatCompletion as OpenAIChatCompletion
from openai.types.chat import ChatCompletionMessageParam as OpenAIChatCompletionMessageParam
from openai.types.completion import Completion as OpenAICompletion
from llama_stack.apis.inference import (
@ -23,6 +22,7 @@ from llama_stack.apis.inference import (
ToolDefinition,
ToolPromptFormat,
)
from llama_stack.apis.inference.inference import OpenAIMessageParam
from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
from llama_stack.providers.utils.inference.embedding_mixin import (
SentenceTransformerEmbeddingMixin,
@ -104,7 +104,7 @@ class SentenceTransformersInferenceImpl(
async def openai_chat_completion(
self,
model: str,
messages: List[OpenAIChatCompletionMessageParam],
messages: List[OpenAIMessageParam],
frequency_penalty: Optional[float] = None,
function_call: Optional[Union[str, Dict[str, Any]]] = None,
functions: Optional[List[Dict[str, Any]]] = None,