Further bug fixes

2025-12-06 02:30:58 +00:00 · 2024-09-20 15:15:57 -07:00 · 2024-09-20 15:15:57 -07:00 · a57411b4b3
commit a57411b4b3
parent 9252e81a7b
3 changed files with 30 additions and 18 deletions
--- a/llama_stack/apis/inference/client.py
+++ b/llama_stack/apis/inference/client.py
@ -10,21 +10,14 @@ from typing import Any, AsyncGenerator

 import fire
 import httpx
-
-from llama_stack.distribution.datatypes import RemoteProviderConfig
 from pydantic import BaseModel
 from termcolor import cprint

+from llama_stack.distribution.datatypes import RemoteProviderConfig
+
 from .event_logger import EventLogger

-from .inference import (
-    ChatCompletionRequest,
-    ChatCompletionResponse,
-    ChatCompletionResponseStreamChunk,
-    CompletionRequest,
-    Inference,
-    UserMessage,
-)
+from llama_stack.apis.inference import *  # noqa: F403


 async def get_client_impl(config: RemoteProviderConfig, _deps: Any) -> Inference:
@ -48,7 +41,27 @@ class InferenceClient(Inference):
    async def completion(self, request: CompletionRequest) -> AsyncGenerator:
        raise NotImplementedError()

-    async def chat_completion(self, request: ChatCompletionRequest) -> AsyncGenerator:
+    async def chat_completion(
+        self,
+        model: str,
+        messages: List[Message],
+        sampling_params: Optional[SamplingParams] = SamplingParams(),
+        tools: Optional[List[ToolDefinition]] = None,
+        tool_choice: Optional[ToolChoice] = ToolChoice.auto,
+        tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json,
+        stream: Optional[bool] = False,
+        logprobs: Optional[LogProbConfig] = None,
+    ) -> AsyncGenerator:
+        request = ChatCompletionRequest(
+            model=model,
+            messages=messages,
+            sampling_params=sampling_params,
+            tools=tools or [],
+            tool_choice=tool_choice,
+            tool_prompt_format=tool_prompt_format,
+            stream=stream,
+            logprobs=logprobs,
+        )
        async with httpx.AsyncClient() as client:
            async with client.stream(
                "POST",