together adapter inference

2025-12-03 09:53:45 +00:00 · 2024-09-11 18:41:00 -07:00 · 2024-09-11 18:41:00 -07:00 · 29d1ef3fdc
commit 29d1ef3fdc
parent f55ffa8b53
1 changed files with 23 additions and 1 deletions
--- a/llama_toolchain/inference/adapters/together/together.py
+++ b/llama_toolchain/inference/adapters/together/together.py
@ -76,7 +76,29 @@ class TogetherInferenceAdapter(Inference):
        return options
-    async def chat_completion(self, request: ChatCompletionRequest) -> AsyncGenerator:
+    async def chat_completion(
        self,
        model: str,
        messages: List[Message],
        sampling_params: Optional[SamplingParams] = SamplingParams(),
        tools: Optional[List[ToolDefinition]] = list(),
        tool_choice: Optional[ToolChoice] = ToolChoice.auto,
        tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json,
        stream: Optional[bool] = False,
        logprobs: Optional[LogProbConfig] = None,
    ) -> AsyncGenerator:
        # wrapper request to make it easier to pass around (internal only, not exposed to API)
        request = ChatCompletionRequest(
            model=model,
            messages=messages,
            sampling_params=sampling_params,
            tools=tools,
            tool_choice=tool_choice,
            tool_prompt_format=tool_prompt_format,
            stream=stream,
            logprobs=logprobs,
        )
        # accumulate sampling params and other options to pass to together
        options = self.get_together_chat_options(request)
        together_model = self.resolve_together_model(request.model)