forked from phoenix-oss/llama-stack-mirror
30 lines
1.1 KiB
Bash
Executable file
30 lines
1.1 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
|
|
export INFERENCE_MODEL="inference-llama4-maverick"
|
|
export EMBEDDING_MODEL="inference-bge-m3"
|
|
export EMBEDDING_DIMENSION="1024"
|
|
export LLAMA_STACK_PORT=8321
|
|
export OPENAI_BASE_URL=https://maas.ai-2.kvant.cloud/v1
|
|
# OPENAI_API_KEY= env file
|
|
export VLLM_MAX_TOKENS=125000
|
|
# KEYCLOAK_CLIENT_SECRET= env file
|
|
export $(cat .env | xargs)
|
|
|
|
|
|
docker run -it \
|
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
|
-v $(pwd)/data:/root/.llama \
|
|
--mount type=bind,source="$(pwd)"/llama_stack/templates/kvant/run.yaml,target=/root/.llama/config.yaml,readonly \
|
|
--entrypoint python \
|
|
--env-file ./.env \
|
|
distribution-kvant:dev \
|
|
-m llama_stack.distribution.server.server --config /root/.llama/config.yaml \
|
|
--port $LLAMA_STACK_PORT \
|
|
--env VLLM_URL=$OPENAI_BASE_URL \
|
|
--env VLLM_API_TOKEN=$OPENAI_API_KEY \
|
|
--env PASSTHROUGH_URL=$OPENAI_BASE_URL \
|
|
--env PASSTHROUGH_API_KEY=$OPENAI_API_KEY \
|
|
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
|
--env EMBEDDING_MODEL=$EMBEDDING_MODEL \
|
|
--env EMBEDDING_DIMENSION=$EMBEDDING_DIMENSION \
|
|
--env KEYCLOAK_CLIENT_SECRET=$KEYCLOAK_CLIENT_SECRET \
|