mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 04:04:14 +00:00
re-work tool definitions, fix FastAPI issues, fix tool regressions
This commit is contained in:
parent
8d14d4228b
commit
8efe614719
11 changed files with 144 additions and 104 deletions
|
@ -10,6 +10,7 @@ from typing import AsyncGenerator
|
|||
|
||||
import fire
|
||||
import httpx
|
||||
from pydantic import BaseModel
|
||||
from termcolor import cprint
|
||||
|
||||
from .api import (
|
||||
|
@ -27,6 +28,10 @@ async def get_client_impl(base_url: str):
|
|||
return InferenceClient(base_url)
|
||||
|
||||
|
||||
def encodable_dict(d: BaseModel):
|
||||
return json.loads(d.json())
|
||||
|
||||
|
||||
class InferenceClient(Inference):
|
||||
def __init__(self, base_url: str):
|
||||
print(f"Initializing client for {base_url}")
|
||||
|
@ -46,7 +51,9 @@ class InferenceClient(Inference):
|
|||
async with client.stream(
|
||||
"POST",
|
||||
f"{self.base_url}/inference/chat_completion",
|
||||
data=request.json(),
|
||||
json={
|
||||
"request": encodable_dict(request),
|
||||
},
|
||||
headers={"Content-Type": "application/json"},
|
||||
timeout=20,
|
||||
) as response:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue