diff --git a/src/llama_stack/providers/utils/inference/openai_compat.py b/src/llama_stack/providers/utils/inference/openai_compat.py
index 32d41ffde..3ce7d361d 100644
--- a/src/llama_stack/providers/utils/inference/openai_compat.py
+++ b/src/llama_stack/providers/utils/inference/openai_compat.py
@@ -3,23 +3,10 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from collections.abc import Iterable
 from typing import (
     Any,
 )
 
-from openai.types.chat import (
-    ChatCompletionContentPartParam as OpenAIChatCompletionContentPartParam,
-)
-
-try:
-    from openai.types.chat import (
-        ChatCompletionMessageFunctionToolCall as OpenAIChatCompletionMessageFunctionToolCall,
-    )
-except ImportError:
-    from openai.types.chat.chat_completion_message_tool_call import (
-        ChatCompletionMessageToolCall as OpenAIChatCompletionMessageFunctionToolCall,
-    )
 from openai.types.chat import (
     ChatCompletionMessageToolCall,
 )
@@ -32,18 +19,6 @@ from llama_stack.models.llama.datatypes import (
     ToolCall,
     ToolDefinition,
 )
-from llama_stack_api import (
-    URL,
-    GreedySamplingStrategy,
-    ImageContentItem,
-    JsonSchemaResponseFormat,
-    OpenAIResponseFormatParam,
-    SamplingParams,
-    TextContentItem,
-    TopKSamplingStrategy,
-    TopPSamplingStrategy,
-    _URLOrData,
-)
 
 logger = get_logger(name=__name__, category="providers::utils")
 
@@ -73,42 +48,6 @@ class OpenAICompatCompletionResponse(BaseModel):
     choices: list[OpenAICompatCompletionChoice]
 
 
-def get_sampling_strategy_options(params: SamplingParams) -> dict:
-    options = {}
-    if isinstance(params.strategy, GreedySamplingStrategy):
-        options["temperature"] = 0.0
-    elif isinstance(params.strategy, TopPSamplingStrategy):
-        if params.strategy.temperature is not None:
-            options["temperature"] = params.strategy.temperature
-        if params.strategy.top_p is not None:
-            options["top_p"] = params.strategy.top_p
-    elif isinstance(params.strategy, TopKSamplingStrategy):
-        options["top_k"] = params.strategy.top_k
-    else:
-        raise ValueError(f"Unsupported sampling strategy: {params.strategy}")
-
-    return options
-
-
-def get_sampling_options(params: SamplingParams | None) -> dict:
-    if not params:
-        return {}
-
-    options = {}
-    if params:
-        options.update(get_sampling_strategy_options(params))
-        if params.max_tokens:
-            options["max_tokens"] = params.max_tokens
-
-        if params.repetition_penalty is not None and params.repetition_penalty != 1.0:
-            options["repeat_penalty"] = params.repetition_penalty
-
-        if params.stop is not None:
-            options["stop"] = params.stop
-
-    return options
-
-
 def text_from_choice(choice) -> str:
     if hasattr(choice, "delta") and choice.delta:
         return choice.delta.content  # type: ignore[no-any-return]  # external OpenAI types lack precise annotations
@@ -253,154 +192,6 @@ def convert_tooldef_to_openai_tool(tool: ToolDefinition) -> dict:
     return out
 
 
-def _convert_stop_reason_to_openai_finish_reason(stop_reason: StopReason) -> str:
-    """
-    Convert a StopReason to an OpenAI chat completion finish_reason.
-    """
-    return {
-        StopReason.end_of_turn: "stop",
-        StopReason.end_of_message: "tool_calls",
-        StopReason.out_of_tokens: "length",
-    }.get(stop_reason, "stop")
-
-
-def _convert_openai_finish_reason(finish_reason: str) -> StopReason:
-    """
-    Convert an OpenAI chat completion finish_reason to a StopReason.
-
-    finish_reason: Literal["stop", "length", "tool_calls", ...]
-        - stop: model hit a natural stop point or a provided stop sequence
-        - length: maximum number of tokens specified in the request was reached
-        - tool_calls: model called a tool
-
-    ->
-
-    class StopReason(Enum):
-        end_of_turn = "end_of_turn"
-        end_of_message = "end_of_message"
-        out_of_tokens = "out_of_tokens"
-    """
-
-    # TODO(mf): are end_of_turn and end_of_message semantics correct?
-    return {
-        "stop": StopReason.end_of_turn,
-        "length": StopReason.out_of_tokens,
-        "tool_calls": StopReason.end_of_message,
-    }.get(finish_reason, StopReason.end_of_turn)
-
-
-def _convert_openai_request_tools(tools: list[dict[str, Any]] | None = None) -> list[ToolDefinition]:
-    lls_tools: list[ToolDefinition] = []
-    if not tools:
-        return lls_tools
-
-    for tool in tools:
-        tool_fn = tool.get("function", {})
-        tool_name = tool_fn.get("name", None)
-        tool_desc = tool_fn.get("description", None)
-        tool_params = tool_fn.get("parameters", None)
-
-        lls_tool = ToolDefinition(
-            tool_name=tool_name,
-            description=tool_desc,
-            input_schema=tool_params,  # Pass through entire JSON Schema
-        )
-        lls_tools.append(lls_tool)
-    return lls_tools
-
-
-def _convert_openai_request_response_format(
-    response_format: OpenAIResponseFormatParam | None = None,
-):
-    if not response_format:
-        return None
-    # response_format can be a dict or a pydantic model
-    response_format_dict = dict(response_format)  # type: ignore[arg-type]  # OpenAIResponseFormatParam union needs dict conversion
-    if response_format_dict.get("type", "") == "json_schema":
-        return JsonSchemaResponseFormat(
-            type="json_schema",  # type: ignore[arg-type]  # Literal["json_schema"] incompatible with expected type
-            json_schema=response_format_dict.get("json_schema", {}).get("schema", ""),
-        )
-    return None
-
-
-def _convert_openai_tool_calls(
-    tool_calls: list[OpenAIChatCompletionMessageFunctionToolCall],
-) -> list[ToolCall]:
-    """
-    Convert an OpenAI ChatCompletionMessageToolCall list into a list of ToolCall.
-
-    OpenAI ChatCompletionMessageToolCall:
-        id: str
-        function: Function
-        type: Literal["function"]
-
-    OpenAI Function:
-        arguments: str
-        name: str
-
-    ->
-
-    ToolCall:
-        call_id: str
-        tool_name: str
-        arguments: Dict[str, ...]
-    """
-    if not tool_calls:
-        return []  # CompletionMessage tool_calls is not optional
-
-    return [
-        ToolCall(
-            call_id=call.id,
-            tool_name=call.function.name,
-            arguments=call.function.arguments,
-        )
-        for call in tool_calls
-    ]
-
-
-def _convert_openai_sampling_params(
-    max_tokens: int | None = None,
-    temperature: float | None = None,
-    top_p: float | None = None,
-) -> SamplingParams:
-    sampling_params = SamplingParams()
-
-    if max_tokens:
-        sampling_params.max_tokens = max_tokens
-
-    # Map an explicit temperature of 0 to greedy sampling
-    if temperature == 0:
-        sampling_params.strategy = GreedySamplingStrategy()
-    else:
-        # OpenAI defaults to 1.0 for temperature and top_p if unset
-        if temperature is None:
-            temperature = 1.0
-        if top_p is None:
-            top_p = 1.0
-        sampling_params.strategy = TopPSamplingStrategy(temperature=temperature, top_p=top_p)  # type: ignore[assignment]  # SamplingParams.strategy union accepts this type
-
-    return sampling_params
-
-
-def openai_content_to_content(content: str | Iterable[OpenAIChatCompletionContentPartParam] | None):
-    if content is None:
-        return ""
-    if isinstance(content, str):
-        return content
-    elif isinstance(content, list):
-        return [openai_content_to_content(c) for c in content]
-    elif hasattr(content, "type"):
-        if content.type == "text":
-            return TextContentItem(type="text", text=content.text)  # type: ignore[attr-defined]  # Iterable narrowed by hasattr check but mypy doesn't track
-        elif content.type == "image_url":
-            return ImageContentItem(type="image", image=_URLOrData(url=URL(uri=content.image_url.url)))  # type: ignore[attr-defined]  # Iterable narrowed by hasattr check but mypy doesn't track
-        else:
-            raise ValueError(f"Unknown content type: {content.type}")
-    else:
-        raise ValueError(f"Unknown content type: {content}")
-
-
 async def prepare_openai_completion_params(**params):
     async def _prepare_value(value: Any) -> Any:
         new_value = value