removed assertion in ollama.py and fixed typo in the readme

2025-08-02 08:44:44 +00:00 · 2024-12-03 16:15:47 -08:00 · 2024-12-03 16:15:47 -08:00 · 98c97d3104
commit 98c97d3104
parent 435f34b05e
2 changed files with 2 additions and 3 deletions
--- a/docs/source/distributions/importing_as_library.md
+++ b/docs/source/distributions/importing_as_library.md
@ -21,7 +21,7 @@ print(response)
 ```python
 response = await client.inference.chat_completion(
    messages=[UserMessage(content="What is the capital of France?", role="user")],
-    model="Llama3.1-8B-Instruct",
+    model_id="Llama3.1-8B-Instruct",
    stream=False,
 )
 print("\nChat completion response:")
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@ -13,13 +13,13 @@ from llama_models.datatypes import CoreModelId
 from llama_models.llama3.api.chat_format import ChatFormat
 from llama_models.llama3.api.datatypes import Message
 from llama_models.llama3.api.tokenizer import Tokenizer
-from ollama import AsyncClient

 from llama_stack.providers.utils.inference.model_registry import (
    build_model_alias,
    build_model_alias_with_just_provider_model_id,
    ModelRegistryHelper,
 )
+from ollama import AsyncClient

 from llama_stack.apis.inference import *  # noqa: F403
 from llama_stack.providers.datatypes import ModelsProtocolPrivate
@ -180,7 +180,6 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate):
    async def _nonstream_completion(self, request: CompletionRequest) -> AsyncGenerator:
        params = await self._get_params(request)
        r = await self.client.generate(**params)
-        assert isinstance(r, dict)

        choice = OpenAICompatCompletionChoice(
            finish_reason=r["done_reason"] if r["done"] else None,