From 214b1fe1ae984f1a41216597114e3c18f1dbc348 Mon Sep 17 00:00:00 2001 From: Wen Zhou Date: Fri, 4 Jul 2025 15:53:53 +0200 Subject: [PATCH] refactor: set proper name for embedding all-minilm:l6-v2 model - we are using all-minilm:l6-v2 but the model we download from ollama is all-minilm:latest - even currently they are exactly the same model but if l12-v2 is updated, "latest" might not still valid. - the only change in this PR is pin the model from ollama Signed-off-by: Wen Zhou --- docs/source/getting_started/detailed_tutorial.md | 4 ++-- llama_stack/providers/remote/inference/ollama/models.py | 2 +- tests/Containerfile | 2 +- tests/external-provider/llama-stack-provider-ollama/run.yaml | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/source/getting_started/detailed_tutorial.md b/docs/source/getting_started/detailed_tutorial.md index d80ec3554..85da081e8 100644 --- a/docs/source/getting_started/detailed_tutorial.md +++ b/docs/source/getting_started/detailed_tutorial.md @@ -180,9 +180,9 @@ Available Models ┏━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ ┃ model_type ┃ identifier ┃ provider_resource_id ┃ metadata ┃ provider_id ┃ ┡━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ -│ embedding │ all-MiniLM-L6-v2 │ all-minilm:latest │ {'embedding_dimension': 384.0} │ ollama │ +│ embedding │ all-MiniLM-L6-v2 │ all-minilm:l6-v2 │ {'embedding_dimension': 384.0} │ ollama │ ├─────────────────┼─────────────────────────────────────┼─────────────────────────────────────┼───────────────────────────────────────────┼─────────────────┤ -│ llm │ llama3.2:3b │ llama3.2:3b │ │ ollama │ +│ llm │ meta-llama/Llama-3.2-3B-Instruct │ llama3.2:3b-instruct-fp16 │ │ ollama │ └─────────────────┴─────────────────────────────────────┴─────────────────────────────────────┴───────────────────────────────────────────┴─────────────────┘ Total models: 2 diff --git a/llama_stack/providers/remote/inference/ollama/models.py b/llama_stack/providers/remote/inference/ollama/models.py index cacf88861..64ddb23d9 100644 --- a/llama_stack/providers/remote/inference/ollama/models.py +++ b/llama_stack/providers/remote/inference/ollama/models.py @@ -84,7 +84,7 @@ MODEL_ENTRIES = [ CoreModelId.llama_guard_3_1b.value, ), ProviderModelEntry( - provider_model_id="all-minilm:latest", + provider_model_id="all-minilm:l6-v2", aliases=["all-minilm"], model_type=ModelType.embedding, metadata={ diff --git a/tests/Containerfile b/tests/Containerfile index 3080d053a..441d276c2 100644 --- a/tests/Containerfile +++ b/tests/Containerfile @@ -7,7 +7,7 @@ FROM --platform=linux/amd64 ollama/ollama:latest RUN ollama serve & \ sleep 5 && \ ollama pull llama3.2:3b-instruct-fp16 && \ - ollama pull all-minilm:latest + ollama pull all-minilm:l6-v2 # Set the entrypoint to start ollama serve ENTRYPOINT ["ollama", "serve"] diff --git a/tests/external-provider/llama-stack-provider-ollama/run.yaml b/tests/external-provider/llama-stack-provider-ollama/run.yaml index 60cff7503..65fd7571c 100644 --- a/tests/external-provider/llama-stack-provider-ollama/run.yaml +++ b/tests/external-provider/llama-stack-provider-ollama/run.yaml @@ -105,7 +105,7 @@ models: embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: custom_ollama - provider_model_id: all-minilm:latest + provider_model_id: all-minilm:l6-v2 model_type: embedding shields: [] vector_dbs: []