Several smaller fixes to make adapters work

Also, reorganized the pattern of __init__ inside providers so configuration can stay lightweight
2025-10-05 04:17:32 +00:00 · 2024-08-28 09:42:08 -07:00 · 2024-08-28 09:42:08 -07:00 · 45987996c4
commit 45987996c4
parent 2a1552a5eb
23 changed files with 164 additions and 160 deletions
--- a/llama_toolchain/inference/adapters/ollama/ollama.py
+++ b/llama_toolchain/inference/adapters/ollama/ollama.py
@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from typing import Any, AsyncGenerator
+from typing import AsyncGenerator

 import httpx

@ -14,34 +14,18 @@ from llama_models.llama3.api.tokenizer import Tokenizer
 from llama_models.sku_list import resolve_model
 from ollama import AsyncClient

-from llama_toolchain.distribution.datatypes import RemoteProviderConfig
-from llama_toolchain.inference.api import (
-    ChatCompletionRequest,
-    ChatCompletionResponse,
-    ChatCompletionResponseEvent,
-    ChatCompletionResponseEventType,
-    ChatCompletionResponseStreamChunk,
-    CompletionRequest,
-    Inference,
-    ToolCallDelta,
-    ToolCallParseStatus,
-)
+from llama_toolchain.inference.api import *  # noqa: F403
 from llama_toolchain.inference.prepare_messages import prepare_messages

 # TODO: Eventually this will move to the llama cli model list command
 # mapping of Model SKUs to ollama models
 OLLAMA_SUPPORTED_SKUS = {
+    # "Meta-Llama3.1-8B-Instruct": "llama3.1",
    "Meta-Llama3.1-8B-Instruct": "llama3.1:8b-instruct-fp16",
    "Meta-Llama3.1-70B-Instruct": "llama3.1:70b-instruct-fp16",
 }


-async def get_provider_impl(config: RemoteProviderConfig, _deps: Any) -> Inference:
-    impl = OllamaInferenceAdapter(config.url)
-    await impl.initialize()
-    return impl
-
-
 class OllamaInferenceAdapter(Inference):
    def __init__(self, url: str) -> None:
        self.url = url