From 21fb92d7cfb22260846653025814b4cc03cd0aee Mon Sep 17 00:00:00 2001 From: Aidan Do Date: Thu, 26 Dec 2024 17:15:58 +1100 Subject: [PATCH] Add 3.3 70B to Ollama inference provider (#681) # What does this PR do? Adds 3.3 70B support to Ollama inference provider ## Test Plan
Manual ```bash # 42GB to download ollama pull llama3.3:70b ollama run llama3.3:70b --keepalive 60m export LLAMA_STACK_PORT=5000 pip install -e . \ && llama stack build --template ollama --image-type conda \ && llama stack run ./distributions/ollama/run.yaml \ --port $LLAMA_STACK_PORT \ --env INFERENCE_MODEL=Llama3.3-70B-Instruct \ --env OLLAMA_URL=http://localhost:11434 export LLAMA_STACK_PORT=5000 llama-stack-client --endpoint http://localhost:$LLAMA_STACK_PORT \ inference chat-completion \ --model-id Llama3.3-70B-Instruct \ --message "hello, what model are you?" ``` image
## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [x] Ran pre-commit to handle lint / formatting issues. - [x] Read the [contributor guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md), Pull Request section? - [ ] Updated relevant documentation. - [ ] Wrote necessary unit or integration tests. --- llama_stack/providers/remote/inference/ollama/ollama.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py index bf55c5ad2..920f3dd7e 100644 --- a/llama_stack/providers/remote/inference/ollama/ollama.py +++ b/llama_stack/providers/remote/inference/ollama/ollama.py @@ -100,6 +100,10 @@ model_aliases = [ "llama3.2-vision:90b", CoreModelId.llama3_2_90b_vision_instruct.value, ), + build_model_alias( + "llama3.3:70b", + CoreModelId.llama3_3_70b_instruct.value, + ), # The Llama Guard models don't have their full fp16 versions # so we are going to alias their default version to the canonical SKU build_model_alias(