chore: refactor (chat)completions endpoints to use shared params struct (#3761)

# What does this PR do? Converts openai(_chat)_completions params to pydantic BaseModel to reduce code duplication across all providers. ## Test Plan CI --- [//]: # (BEGIN SAPLING FOOTER) Stack created with [Sapling](https://sapling-scm.com). Best reviewed with [ReviewStack](https://reviewstack.dev/llamastack/llama-stack/pull/3761). * #3777 * __->__ #3761
2025-12-06 10:37:22 +00:00 · 2025-10-10 15:46:34 -07:00 · 2025-10-10 15:46:34 -07:00 · 80d58ab519
commit 80d58ab519
parent 6954fe2274
33 changed files with 599 additions and 890 deletions
--- a/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
+++ b/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
@ -8,7 +8,7 @@
 from jinja2 import Template

 from llama_stack.apis.common.content_types import InterleavedContent
-from llama_stack.apis.inference import OpenAIUserMessageParam
+from llama_stack.apis.inference import OpenAIChatCompletionRequest, OpenAIUserMessageParam
 from llama_stack.apis.tools.rag_tool import (
    DefaultRAGQueryGeneratorConfig,
    LLMRAGQueryGeneratorConfig,
@ -65,11 +65,12 @@ async def llm_rag_query_generator(

    model = config.model
    message = OpenAIUserMessageParam(content=rendered_content)
-    response = await inference_api.openai_chat_completion(
+    params = OpenAIChatCompletionRequest(
        model=model,
        messages=[message],
        stream=False,
    )
+    response = await inference_api.openai_chat_completion(params)

    query = response.choices[0].message.content