llama-stack/kvant_start_local.sh
Angel Nunez Mencias 51816af52e
All checks were successful
Build and Push playground container / build-playground (push) Successful in 1m6s
Build and Push container / build (push) Successful in 4m22s
use env file
2025-06-02 01:39:17 +02:00

25 lines
891 B
Bash
Executable file

#!/usr/bin/env bash
export INFERENCE_MODEL="inference-llama4-maverick"
export EMBEDDING_MODEL="inference-bge-m3"
export EMBEDDING_DIMENSION="1024"
export LLAMA_STACK_PORT=8321
export OPENAI_BASE_URL=https://maas.ai-2.kvant.cloud/v1
# OPENAI_API_KEY= env file
export VLLM_MAX_TOKENS=125000
# KEYCLOAK_CLIENT_SECRET= env file
docker run -it \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-v $(pwd)/data:/root/.llama \
--mount type=bind,source="$(pwd)"/llama_stack/templates/kvant/run.yaml,target=/root/.llama/config.yaml,readonly \
--entrypoint python \
--env-file ./.env \
distribution-kvant:dev \
-m llama_stack.distribution.server.server --config /root/.llama/config.yaml \
--port $LLAMA_STACK_PORT \
--env VLLM_URL=$OPENAI_BASE_URL \
--env INFERENCE_MODEL=$INFERENCE_MODEL \
--env EMBEDDING_MODEL=$EMBEDDING_MODEL \
--env EMBEDDING_DIMENSION=$EMBEDDING_DIMENSION \