re-work tool definitions, fix FastAPI issues, fix tool regressions

2025-10-04 04:04:14 +00:00 · 2024-08-24 22:07:06 -07:00 · 2024-08-24 22:07:06 -07:00 · 8efe614719
commit 8efe614719
parent 8d14d4228b
11 changed files with 144 additions and 104 deletions
--- a/llama_toolchain/inference/client.py
+++ b/llama_toolchain/inference/client.py
@ -10,6 +10,7 @@ from typing import AsyncGenerator

 import fire
 import httpx
+from pydantic import BaseModel
 from termcolor import cprint

 from .api import (
@ -27,6 +28,10 @@ async def get_client_impl(base_url: str):
    return InferenceClient(base_url)


+def encodable_dict(d: BaseModel):
+    return json.loads(d.json())
+
+
 class InferenceClient(Inference):
    def __init__(self, base_url: str):
        print(f"Initializing client for {base_url}")
@ -46,7 +51,9 @@ class InferenceClient(Inference):
            async with client.stream(
                "POST",
                f"{self.base_url}/inference/chat_completion",
-                data=request.json(),
+                json={
+                    "request": encodable_dict(request),
+                },
                headers={"Content-Type": "application/json"},
                timeout=20,
            ) as response: