mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-28 02:53:30 +00:00
fix: pull ollama embedding model if necessary (#1209)
Embedding models are tiny and can be pulled on-demand. Let's do that so the user doesn't have to do "yet another thing" to get themselves set up. Thanks @hardikjshah for the suggestion. Also fixed a build dependency miss (TODO: distro_codegen needs to actually check that the build template contains all providers mentioned for the run.yaml file) ## Test Plan First run `ollama rm all-minilm:latest`. Run `llama stack build --template ollama && llama stack run ollama --env INFERENCE_MODEL=llama3.2:3b-instruct-fp16`. See that it outputs a "Pulling embedding model `all-minilm:latest`" output and the stack starts up correctly. Verify that `ollama list` shows the model is correctly downloaded.
This commit is contained in:
parent
840fae2259
commit
11697f85c5
5 changed files with 6 additions and 2 deletions
|
@ -356,6 +356,7 @@
|
||||||
"scikit-learn",
|
"scikit-learn",
|
||||||
"scipy",
|
"scipy",
|
||||||
"sentencepiece",
|
"sentencepiece",
|
||||||
|
"sqlite-vec",
|
||||||
"tqdm",
|
"tqdm",
|
||||||
"transformers",
|
"transformers",
|
||||||
"uvicorn",
|
"uvicorn",
|
||||||
|
|
|
@ -23,7 +23,7 @@ The `llamastack/distribution-ollama` distribution consists of the following prov
|
||||||
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||||
| telemetry | `inline::meta-reference` |
|
| telemetry | `inline::meta-reference` |
|
||||||
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime` |
|
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime` |
|
||||||
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
| vector_io | `inline::faiss`, `inline::sqlite_vec`, `remote::chromadb`, `remote::pgvector` |
|
||||||
|
|
||||||
|
|
||||||
You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.
|
You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.
|
||||||
|
|
|
@ -281,6 +281,8 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate):
|
||||||
|
|
||||||
async def register_model(self, model: Model) -> Model:
|
async def register_model(self, model: Model) -> Model:
|
||||||
if model.model_type == ModelType.embedding:
|
if model.model_type == ModelType.embedding:
|
||||||
|
log.info(f"Pulling embedding model `{model.provider_resource_id}` if necessary...")
|
||||||
|
await self.client.pull(model.provider_resource_id)
|
||||||
response = await self.client.list()
|
response = await self.client.list()
|
||||||
else:
|
else:
|
||||||
response = await self.client.ps()
|
response = await self.client.ps()
|
||||||
|
|
|
@ -6,6 +6,7 @@ distribution_spec:
|
||||||
- remote::ollama
|
- remote::ollama
|
||||||
vector_io:
|
vector_io:
|
||||||
- inline::faiss
|
- inline::faiss
|
||||||
|
- inline::sqlite_vec
|
||||||
- remote::chromadb
|
- remote::chromadb
|
||||||
- remote::pgvector
|
- remote::pgvector
|
||||||
safety:
|
safety:
|
||||||
|
|
|
@ -25,7 +25,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
providers = {
|
providers = {
|
||||||
"inference": ["remote::ollama"],
|
"inference": ["remote::ollama"],
|
||||||
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
"vector_io": ["inline::faiss", "inline::sqlite_vec", "remote::chromadb", "remote::pgvector"],
|
||||||
"safety": ["inline::llama-guard"],
|
"safety": ["inline::llama-guard"],
|
||||||
"agents": ["inline::meta-reference"],
|
"agents": ["inline::meta-reference"],
|
||||||
"telemetry": ["inline::meta-reference"],
|
"telemetry": ["inline::meta-reference"],
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue