mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-02 08:44:44 +00:00
removed assertion in ollama.py and fixed typo in the readme
This commit is contained in:
parent
435f34b05e
commit
98c97d3104
2 changed files with 2 additions and 3 deletions
|
@ -21,7 +21,7 @@ print(response)
|
||||||
```python
|
```python
|
||||||
response = await client.inference.chat_completion(
|
response = await client.inference.chat_completion(
|
||||||
messages=[UserMessage(content="What is the capital of France?", role="user")],
|
messages=[UserMessage(content="What is the capital of France?", role="user")],
|
||||||
model="Llama3.1-8B-Instruct",
|
model_id="Llama3.1-8B-Instruct",
|
||||||
stream=False,
|
stream=False,
|
||||||
)
|
)
|
||||||
print("\nChat completion response:")
|
print("\nChat completion response:")
|
||||||
|
|
|
@ -13,13 +13,13 @@ from llama_models.datatypes import CoreModelId
|
||||||
from llama_models.llama3.api.chat_format import ChatFormat
|
from llama_models.llama3.api.chat_format import ChatFormat
|
||||||
from llama_models.llama3.api.datatypes import Message
|
from llama_models.llama3.api.datatypes import Message
|
||||||
from llama_models.llama3.api.tokenizer import Tokenizer
|
from llama_models.llama3.api.tokenizer import Tokenizer
|
||||||
from ollama import AsyncClient
|
|
||||||
|
|
||||||
from llama_stack.providers.utils.inference.model_registry import (
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
build_model_alias,
|
build_model_alias,
|
||||||
build_model_alias_with_just_provider_model_id,
|
build_model_alias_with_just_provider_model_id,
|
||||||
ModelRegistryHelper,
|
ModelRegistryHelper,
|
||||||
)
|
)
|
||||||
|
from ollama import AsyncClient
|
||||||
|
|
||||||
from llama_stack.apis.inference import * # noqa: F403
|
from llama_stack.apis.inference import * # noqa: F403
|
||||||
from llama_stack.providers.datatypes import ModelsProtocolPrivate
|
from llama_stack.providers.datatypes import ModelsProtocolPrivate
|
||||||
|
@ -180,7 +180,6 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate):
|
||||||
async def _nonstream_completion(self, request: CompletionRequest) -> AsyncGenerator:
|
async def _nonstream_completion(self, request: CompletionRequest) -> AsyncGenerator:
|
||||||
params = await self._get_params(request)
|
params = await self._get_params(request)
|
||||||
r = await self.client.generate(**params)
|
r = await self.client.generate(**params)
|
||||||
assert isinstance(r, dict)
|
|
||||||
|
|
||||||
choice = OpenAICompatCompletionChoice(
|
choice = OpenAICompatCompletionChoice(
|
||||||
finish_reason=r["done_reason"] if r["done"] else None,
|
finish_reason=r["done_reason"] if r["done"] else None,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue