[1/n] migrate inference/chat_completion

2025-10-04 20:14:13 +00:00 · 2024-09-11 12:21:19 -07:00 · 2024-09-11 12:21:19 -07:00 · 0c7c6b7e02
commit 0c7c6b7e02
parent 1433aaf9f7
3 changed files with 35 additions and 7 deletions
--- a/llama_toolchain/inference/client.py
+++ b/llama_toolchain/inference/client.py
@ -10,10 +10,10 @@ from typing import Any, AsyncGenerator

 import fire
 import httpx
-from pydantic import BaseModel
-from termcolor import cprint

 from llama_toolchain.core.datatypes import RemoteProviderConfig
+from pydantic import BaseModel
+from termcolor import cprint

 from .api import (
    ChatCompletionRequest,
@ -52,9 +52,7 @@ class InferenceClient(Inference):
            async with client.stream(
                "POST",
                f"{self.base_url}/inference/chat_completion",
-                json={
-                    "request": encodable_dict(request),
-                },
+                json=encodable_dict(request),
                headers={"Content-Type": "application/json"},
                timeout=20,
            ) as response: