diff --git a/.gitignore b/.gitignore index 2cc885604..747acdc7b 100644 --- a/.gitignore +++ b/.gitignore @@ -24,3 +24,4 @@ venv/ pytest-report.xml .coverage .python-version +data diff --git a/kvant_build_local.sh b/kvant_build_local.sh new file mode 100755 index 000000000..9701c57dc --- /dev/null +++ b/kvant_build_local.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +export USE_COPY_NOT_MOUNT=true +export LLAMA_STACK_DIR=. + +uvx --from . llama stack build --template kvant --image-type container --image-name kvant diff --git a/kvant_start_local.sh b/kvant_start_local.sh new file mode 100755 index 000000000..397b12a50 --- /dev/null +++ b/kvant_start_local.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +export INFERENCE_MODEL="inference-llama4-maverick" +export EMBEDDING_MODEL="inference-bge-m3" +export EMBEDDING_DIMENSION="1024" +export LLAMA_STACK_PORT=8321 +export OPENAI_BASE_URL=https://maas.ai-2.kvant.cloud/v1 +export OPENAI_API_KEY=sk-ZqAWqBKFXjb6y3tVej2AaA +export VLLM_MAX_TOKENS=125000 + +docker run -it \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v $(pwd)/data:/root/.llama \ + --mount type=bind,source="$(pwd)"/llama_stack/templates/kvant/run.yaml,target=/root/.llama/config.yaml,readonly \ + --entrypoint python \ + distribution-kvant:dev \ + -m llama_stack.distribution.server.server --config /root/.llama/config.yaml \ + --port $LLAMA_STACK_PORT \ + --env VLLM_URL=$OPENAI_BASE_URL \ + --env VLLM_API_TOKEN=$OPENAI_API_KEY \ + --env PASSTHROUGH_URL=$OPENAI_BASE_URL \ + --env PASSTHROUGH_API_KEY=$OPENAI_API_KEY \ + --env INFERENCE_MODEL=$INFERENCE_MODEL \ + --env EMBEDDING_MODEL=$EMBEDDING_MODEL \ + --env EMBEDDING_DIMENSION=$EMBEDDING_DIMENSION \ diff --git a/llama_stack/templates/kvant/run.yaml b/llama_stack/templates/kvant/run.yaml index 9caa78567..e5942140e 100644 --- a/llama_stack/templates/kvant/run.yaml +++ b/llama_stack/templates/kvant/run.yaml @@ -30,9 +30,6 @@ providers: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/faiss_store.db - responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/responses_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -46,6 +43,9 @@ providers: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/agents_store.db + responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference diff --git a/playground_start_local.sh b/playground_start_local.sh new file mode 100755 index 000000000..1dbe129fc --- /dev/null +++ b/playground_start_local.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +uv run --with ".[ui]" streamlit run llama_stack/distribution/ui/app.py