Compare commits

..

No commits in common. "kvant" and "main" have entirely different histories.
kvant ... main

44 changed files with 2349 additions and 3859 deletions

View file

@ -1 +0,0 @@
FROM localhost:5000/distribution-kvant:dev

View file

@ -1,73 +0,0 @@
name: Build and Push playground container
run-name: Build and Push playground container
on:
workflow_dispatch:
#schedule:
# - cron: "0 10 * * *"
push:
branches:
- main
- kvant
tags:
- 'v*'
pull_request:
branches:
- main
- kvant
env:
IMAGE: git.kvant.cloud/${{github.repository}}-playground
jobs:
build-playground:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set current time
uses: https://github.com/gerred/actions/current-time@master
id: current_time
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to git.kvant.cloud registry
uses: docker/login-action@v3
with:
registry: git.kvant.cloud
username: ${{ vars.ORG_PACKAGE_WRITER_USERNAME }}
password: ${{ secrets.ORG_PACKAGE_WRITER_TOKEN }}
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
# list of Docker images to use as base name for tags
images: |
${{env.IMAGE}}
# generate Docker tags based on the following events/attributes
tags: |
type=schedule
type=ref,event=branch
type=ref,event=pr
type=ref,event=tag
type=semver,pattern={{version}}
- name: Build and push to gitea registry
uses: docker/build-push-action@v6
with:
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
context: .
file: llama_stack/distribution/ui/Containerfile
provenance: mode=max
sbom: true
build-args: |
BUILD_DATE=${{ steps.current_time.outputs.time }}
cache-from: |
type=registry,ref=${{ env.IMAGE }}:buildcache
type=registry,ref=${{ env.IMAGE }}:${{ github.ref_name }}
type=registry,ref=${{ env.IMAGE }}:main
cache-to: type=registry,ref=${{ env.IMAGE }}:buildcache,mode=max,image-manifest=true

View file

@ -1,98 +0,0 @@
name: Build and Push container
run-name: Build and Push container
on:
workflow_dispatch:
#schedule:
# - cron: "0 10 * * *"
push:
branches:
- main
- kvant
tags:
- 'v*'
pull_request:
branches:
- main
- kvant
env:
IMAGE: git.kvant.cloud/${{github.repository}}
jobs:
build:
runs-on: ubuntu-latest
services:
registry:
image: registry:2
ports:
- 5000:5000
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set current time
uses: https://github.com/gerred/actions/current-time@master
id: current_time
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
driver-opts: network=host
- name: Login to git.kvant.cloud registry
uses: docker/login-action@v3
with:
registry: git.kvant.cloud
username: ${{ vars.ORG_PACKAGE_WRITER_USERNAME }}
password: ${{ secrets.ORG_PACKAGE_WRITER_TOKEN }}
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
# list of Docker images to use as base name for tags
images: |
${{env.IMAGE}}
# generate Docker tags based on the following events/attributes
tags: |
type=schedule
type=ref,event=branch
type=ref,event=pr
type=ref,event=tag
type=semver,pattern={{version}}
- name: Install uv
uses: https://github.com/astral-sh/setup-uv@v5
with:
# Install a specific version of uv.
version: "0.7.8"
- name: Build
env:
USE_COPY_NOT_MOUNT: true
LLAMA_STACK_DIR: .
run: |
uvx --from . llama stack build --template kvant --image-type container
# docker tag distribution-kvant:dev ${{env.IMAGE}}:kvant
# docker push ${{env.IMAGE}}:kvant
docker tag distribution-kvant:dev localhost:5000/distribution-kvant:dev
docker push localhost:5000/distribution-kvant:dev
- name: Build and push to gitea registry
uses: docker/build-push-action@v6
with:
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
context: .github/workflows
provenance: mode=max
sbom: true
build-args: |
BUILD_DATE=${{ steps.current_time.outputs.time }}
cache-from: |
type=registry,ref=${{ env.IMAGE }}:buildcache
type=registry,ref=${{ env.IMAGE }}:${{ github.ref_name }}
type=registry,ref=${{ env.IMAGE }}:main
cache-to: type=registry,ref=${{ env.IMAGE }}:buildcache,mode=max,image-manifest=true

1
.gitignore vendored
View file

@ -24,4 +24,3 @@ venv/
pytest-report.xml
.coverage
.python-version
data

View file

@ -1,6 +0,0 @@
#!/usr/bin/env bash
export USE_COPY_NOT_MOUNT=true
export LLAMA_STACK_DIR=.
uvx --from . llama stack build --template kvant --image-type container --image-name kvant

View file

@ -1,17 +0,0 @@
#!/usr/bin/env bash
export LLAMA_STACK_PORT=8321
# VLLM_API_TOKEN= env file
# KEYCLOAK_CLIENT_SECRET= env file
docker run -it \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-v $(pwd)/data:/root/.llama \
--mount type=bind,source="$(pwd)"/llama_stack/templates/kvant/run.yaml,target=/root/.llama/config.yaml,readonly \
--entrypoint python \
--env-file ./.env \
distribution-kvant:dev \
-m llama_stack.distribution.server.server --config /root/.llama/config.yaml \
--port $LLAMA_STACK_PORT \

View file

@ -5,8 +5,7 @@ FROM python:3.12-slim
WORKDIR /app
COPY . /app/
RUN /usr/local/bin/python -m pip install --upgrade pip && \
/usr/local/bin/pip3 install -r requirements.txt && \
/usr/local/bin/pip3 install -r llama_stack/distribution/ui/requirements.txt
/usr/local/bin/pip3 install -r requirements.txt
EXPOSE 8501
ENTRYPOINT ["streamlit", "run", "llama_stack/distribution/ui/app.py", "--server.port=8501", "--server.address=0.0.0.0"]
ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]

View file

@ -48,6 +48,3 @@ uv run --with ".[ui]" streamlit run llama_stack/distribution/ui/app.py
| TOGETHER_API_KEY | API key for Together provider | (empty string) |
| SAMBANOVA_API_KEY | API key for SambaNova provider | (empty string) |
| OPENAI_API_KEY | API key for OpenAI provider | (empty string) |
| KEYCLOAK_URL | URL for keycloak authentication | (empty string) |
| KEYCLOAK_REALM | Keycloak realm | default |
| KEYCLOAK_CLIENT_ID | Client ID for keycloak auth | (empty string) |

View file

@ -50,42 +50,6 @@ def main():
)
pg.run()
def main2():
from dataclasses import asdict
st.subheader(f"Welcome {keycloak.user_info['preferred_username']}!")
st.write(f"Here is your user information:")
st.write(asdict(keycloak))
def get_access_token() -> str|None:
return st.session_state.get('access_token')
if __name__ == "__main__":
from streamlit_keycloak import login
import os
keycloak_url = os.environ.get("KEYCLOAK_URL")
keycloak_realm = os.environ.get("KEYCLOAK_REALM", "default")
keycloak_client_id = os.environ.get("KEYCLOAK_CLIENT_ID")
if keycloak_url and keycloak_client_id:
keycloak = login(
url=keycloak_url,
realm=keycloak_realm,
client_id=keycloak_client_id,
custom_labels={
"labelButton": "Sign in to kvant",
"labelLogin": "Please sign in to your kvant account.",
"errorNoPopup": "Unable to open the authentication popup. Allow popups and refresh the page to proceed.",
"errorPopupClosed": "Authentication popup was closed manually.",
"errorFatal": "Unable to connect to Keycloak using the current configuration."
},
auto_refresh=True,
)
if keycloak.authenticated:
st.session_state['access_token'] = keycloak.access_token
main()
# TBD - add other authentications
else:
main()
main()

View file

@ -7,13 +7,11 @@
import os
from llama_stack_client import LlamaStackClient
from llama_stack.distribution.ui.app import get_access_token
class LlamaStackApi:
def __init__(self):
self.client = LlamaStackClient(
api_key=get_access_token(),
base_url=os.environ.get("LLAMA_STACK_ENDPOINT", "http://localhost:8321"),
provider_data={
"fireworks_api_key": os.environ.get("FIREWORKS_API_KEY", ""),
@ -30,3 +28,5 @@ class LlamaStackApi:
scoring_params = {fn_id: None for fn_id in scoring_function_ids}
return self.client.scoring.score(input_rows=[row], scoring_functions=scoring_params)
llama_stack_api = LlamaStackApi()

View file

@ -6,13 +6,13 @@
import streamlit as st
from llama_stack.distribution.ui.modules.api import LlamaStackApi
from llama_stack.distribution.ui.modules.api import llama_stack_api
def datasets():
st.header("Datasets")
datasets_info = {d.identifier: d.to_dict() for d in LlamaStackApi().client.datasets.list()}
datasets_info = {d.identifier: d.to_dict() for d in llama_stack_api.client.datasets.list()}
if len(datasets_info) > 0:
selected_dataset = st.selectbox("Select a dataset", list(datasets_info.keys()))
st.json(datasets_info[selected_dataset], expanded=True)

View file

@ -6,14 +6,14 @@
import streamlit as st
from llama_stack.distribution.ui.modules.api import LlamaStackApi
from llama_stack.distribution.ui.modules.api import llama_stack_api
def benchmarks():
# Benchmarks Section
st.header("Benchmarks")
benchmarks_info = {d.identifier: d.to_dict() for d in LlamaStackApi().client.benchmarks.list()}
benchmarks_info = {d.identifier: d.to_dict() for d in llama_stack_api.client.benchmarks.list()}
if len(benchmarks_info) > 0:
selected_benchmark = st.selectbox("Select an eval task", list(benchmarks_info.keys()), key="benchmark_inspect")

View file

@ -6,13 +6,13 @@
import streamlit as st
from llama_stack.distribution.ui.modules.api import LlamaStackApi
from llama_stack.distribution.ui.modules.api import llama_stack_api
def models():
# Models Section
st.header("Models")
models_info = {m.identifier: m.to_dict() for m in LlamaStackApi().client.models.list()}
models_info = {m.identifier: m.to_dict() for m in llama_stack_api.client.models.list()}
selected_model = st.selectbox("Select a model", list(models_info.keys()))
st.json(models_info[selected_model])

View file

@ -6,12 +6,12 @@
import streamlit as st
from llama_stack.distribution.ui.modules.api import LlamaStackApi
from llama_stack.distribution.ui.modules.api import llama_stack_api
def providers():
st.header("🔍 API Providers")
apis_providers_lst = LlamaStackApi().client.providers.list()
apis_providers_lst = llama_stack_api.client.providers.list()
api_to_providers = {}
for api_provider in apis_providers_lst:
if api_provider.api in api_to_providers:

View file

@ -6,13 +6,13 @@
import streamlit as st
from llama_stack.distribution.ui.modules.api import LlamaStackApi
from llama_stack.distribution.ui.modules.api import llama_stack_api
def scoring_functions():
st.header("Scoring Functions")
scoring_functions_info = {s.identifier: s.to_dict() for s in LlamaStackApi().client.scoring_functions.list()}
scoring_functions_info = {s.identifier: s.to_dict() for s in llama_stack_api.client.scoring_functions.list()}
selected_scoring_function = st.selectbox("Select a scoring function", list(scoring_functions_info.keys()))
st.json(scoring_functions_info[selected_scoring_function], expanded=True)

View file

@ -6,14 +6,14 @@
import streamlit as st
from llama_stack.distribution.ui.modules.api import LlamaStackApi
from llama_stack.distribution.ui.modules.api import llama_stack_api
def shields():
# Shields Section
st.header("Shields")
shields_info = {s.identifier: s.to_dict() for s in LlamaStackApi().client.shields.list()}
shields_info = {s.identifier: s.to_dict() for s in llama_stack_api.client.shields.list()}
selected_shield = st.selectbox("Select a shield", list(shields_info.keys()))
st.json(shields_info[selected_shield])

View file

@ -6,12 +6,12 @@
import streamlit as st
from llama_stack.distribution.ui.modules.api import LlamaStackApi
from llama_stack.distribution.ui.modules.api import llama_stack_api
def vector_dbs():
st.header("Vector Databases")
vector_dbs_info = {v.identifier: v.to_dict() for v in LlamaStackApi().client.vector_dbs.list()}
vector_dbs_info = {v.identifier: v.to_dict() for v in llama_stack_api.client.vector_dbs.list()}
if len(vector_dbs_info) > 0:
selected_vector_db = st.selectbox("Select a vector database", list(vector_dbs_info.keys()))

View file

@ -9,7 +9,7 @@ import json
import pandas as pd
import streamlit as st
from llama_stack.distribution.ui.modules.api import LlamaStackApi
from llama_stack.distribution.ui.modules.api import llama_stack_api
from llama_stack.distribution.ui.modules.utils import process_dataset
@ -39,7 +39,7 @@ def application_evaluation_page():
# Select Scoring Functions to Run Evaluation On
st.subheader("Select Scoring Functions")
scoring_functions = LlamaStackApi().client.scoring_functions.list()
scoring_functions = llama_stack_api.client.scoring_functions.list()
scoring_functions = {sf.identifier: sf for sf in scoring_functions}
scoring_functions_names = list(scoring_functions.keys())
selected_scoring_functions = st.multiselect(
@ -48,7 +48,7 @@ def application_evaluation_page():
help="Choose one or more scoring functions.",
)
available_models = LlamaStackApi().client.models.list()
available_models = llama_stack_api.client.models.list()
available_models = [m.identifier for m in available_models]
scoring_params = {}
@ -108,7 +108,7 @@ def application_evaluation_page():
progress_bar.progress(progress, text=progress_text)
# Run evaluation for current row
score_res = LlamaStackApi().run_scoring(
score_res = llama_stack_api.run_scoring(
r,
scoring_function_ids=selected_scoring_functions,
scoring_params=scoring_params,

View file

@ -9,13 +9,13 @@ import json
import pandas as pd
import streamlit as st
from llama_stack.distribution.ui.modules.api import LlamaStackApi
from llama_stack.distribution.ui.modules.api import llama_stack_api
def select_benchmark_1():
# Select Benchmarks
st.subheader("1. Choose An Eval Task")
benchmarks = LlamaStackApi().client.benchmarks.list()
benchmarks = llama_stack_api.client.benchmarks.list()
benchmarks = {et.identifier: et for et in benchmarks}
benchmarks_names = list(benchmarks.keys())
selected_benchmark = st.selectbox(
@ -47,7 +47,7 @@ def define_eval_candidate_2():
# Define Eval Candidate
candidate_type = st.radio("Candidate Type", ["model", "agent"])
available_models = LlamaStackApi().client.models.list()
available_models = llama_stack_api.client.models.list()
available_models = [model.identifier for model in available_models]
selected_model = st.selectbox(
"Choose a model",
@ -167,7 +167,7 @@ def run_evaluation_3():
eval_candidate = st.session_state["eval_candidate"]
dataset_id = benchmarks[selected_benchmark].dataset_id
rows = LlamaStackApi().client.datasets.iterrows(
rows = llama_stack_api.client.datasets.iterrows(
dataset_id=dataset_id,
)
total_rows = len(rows.data)
@ -208,7 +208,7 @@ def run_evaluation_3():
progress = i / len(rows)
progress_bar.progress(progress, text=progress_text)
# Run evaluation for current row
eval_res = LlamaStackApi().client.eval.evaluate_rows(
eval_res = llama_stack_api.client.eval.evaluate_rows(
benchmark_id=selected_benchmark,
input_rows=[r],
scoring_functions=benchmarks[selected_benchmark].scoring_functions,

View file

@ -6,12 +6,12 @@
import streamlit as st
from llama_stack.distribution.ui.modules.api import LlamaStackApi
from llama_stack.distribution.ui.modules.api import llama_stack_api
# Sidebar configurations
with st.sidebar:
st.header("Configuration")
available_models = LlamaStackApi().client.models.list()
available_models = llama_stack_api.client.models.list()
available_models = [model.identifier for model in available_models if model.model_type == "llm"]
selected_model = st.selectbox(
"Choose a model",
@ -103,7 +103,7 @@ if prompt := st.chat_input("Example: What is Llama Stack?"):
else:
strategy = {"type": "greedy"}
response = LlamaStackApi().client.inference.chat_completion(
response = llama_stack_api.client.inference.chat_completion(
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt},

View file

@ -10,7 +10,7 @@ import streamlit as st
from llama_stack_client import Agent, AgentEventLogger, RAGDocument
from llama_stack.apis.common.content_types import ToolCallDelta
from llama_stack.distribution.ui.modules.api import LlamaStackApi
from llama_stack.distribution.ui.modules.api import llama_stack_api
from llama_stack.distribution.ui.modules.utils import data_url_from_file
@ -57,14 +57,14 @@ def rag_chat_page():
for i, uploaded_file in enumerate(uploaded_files)
]
providers = LlamaStackApi().client.providers.list()
providers = llama_stack_api.client.providers.list()
vector_io_provider = None
for x in providers:
if x.api == "vector_io":
vector_io_provider = x.provider_id
LlamaStackApi().client.vector_dbs.register(
llama_stack_api.client.vector_dbs.register(
vector_db_id=vector_db_name, # Use the user-provided name
embedding_dimension=384,
embedding_model="all-MiniLM-L6-v2",
@ -72,7 +72,7 @@ def rag_chat_page():
)
# insert documents using the custom vector db name
LlamaStackApi().client.tool_runtime.rag_tool.insert(
llama_stack_api.client.tool_runtime.rag_tool.insert(
vector_db_id=vector_db_name, # Use the user-provided name
documents=documents,
chunk_size_in_tokens=512,
@ -93,7 +93,7 @@ def rag_chat_page():
)
# select memory banks
vector_dbs = LlamaStackApi().client.vector_dbs.list()
vector_dbs = llama_stack_api.client.vector_dbs.list()
vector_dbs = [vector_db.identifier for vector_db in vector_dbs]
selected_vector_dbs = st.multiselect(
label="Select Document Collections to use in RAG queries",
@ -103,7 +103,7 @@ def rag_chat_page():
)
st.subheader("Inference Parameters", divider=True)
available_models = LlamaStackApi().client.models.list()
available_models = llama_stack_api.client.models.list()
available_models = [model.identifier for model in available_models if model.model_type == "llm"]
selected_model = st.selectbox(
label="Choose a model",
@ -167,7 +167,7 @@ def rag_chat_page():
@st.cache_resource
def create_agent():
return Agent(
LlamaStackApi().client,
llama_stack_api.client,
model=selected_model,
instructions=system_prompt,
sampling_params={
@ -232,7 +232,7 @@ def rag_chat_page():
st.session_state.messages.append({"role": "system", "content": system_prompt})
# Query the vector DB
rag_response = LlamaStackApi().client.tool_runtime.rag_tool.query(
rag_response = llama_stack_api.client.tool_runtime.rag_tool.query(
content=prompt, vector_db_ids=list(selected_vector_dbs)
)
prompt_context = rag_response.content
@ -251,7 +251,7 @@ def rag_chat_page():
# Run inference directly
st.session_state.messages.append({"role": "user", "content": extended_prompt})
response = LlamaStackApi().client.inference.chat_completion(
response = llama_stack_api.client.inference.chat_completion(
messages=st.session_state.messages,
model_id=selected_model,
sampling_params={

View file

@ -13,7 +13,7 @@ from llama_stack_client import Agent
from llama_stack_client.lib.agents.react.agent import ReActAgent
from llama_stack_client.lib.agents.react.tool_parser import ReActOutput
from llama_stack.distribution.ui.modules.api import LlamaStackApi
from llama_stack.distribution.ui.modules.api import llama_stack_api
class AgentType(enum.Enum):
@ -24,7 +24,7 @@ class AgentType(enum.Enum):
def tool_chat_page():
st.title("🛠 Tools")
client = LlamaStackApi().client
client = llama_stack_api.client
models = client.models.list()
model_list = [model.identifier for model in models if model.api_model_type == "llm"]
@ -55,7 +55,7 @@ def tool_chat_page():
)
if "builtin::rag" in toolgroup_selection:
vector_dbs = LlamaStackApi().client.vector_dbs.list() or []
vector_dbs = llama_stack_api.client.vector_dbs.list() or []
if not vector_dbs:
st.info("No vector databases available for selection.")
vector_dbs = [vector_db.identifier for vector_db in vector_dbs]

View file

@ -1,5 +1,5 @@
llama-stack-client>=0.2.9
llama-stack>=0.2.1
llama-stack-client>=0.2.1
pandas
streamlit
streamlit-option-menu
streamlit-keycloak

View file

@ -1,904 +0,0 @@
{
"bedrock": [
"aiosqlite",
"autoevals",
"blobfile",
"boto3",
"chardet",
"chromadb-client",
"datasets",
"emoji",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"langdetect",
"matplotlib",
"mcp",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pymongo",
"pypdf",
"pythainlp",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
"uvicorn"
],
"cerebras": [
"aiosqlite",
"autoevals",
"blobfile",
"cerebras_cloud_sdk",
"chardet",
"chromadb-client",
"datasets",
"emoji",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"langdetect",
"matplotlib",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pymongo",
"pypdf",
"pythainlp",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"ci-tests": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"emoji",
"fastapi",
"fire",
"fireworks-ai",
"httpx",
"langdetect",
"matplotlib",
"mcp",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pymongo",
"pypdf",
"pythainlp",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"sqlite-vec",
"tqdm",
"transformers",
"tree_sitter",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"dell": [
"aiohttp",
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"emoji",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"huggingface_hub",
"langdetect",
"matplotlib",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pymongo",
"pypdf",
"pythainlp",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"fireworks": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"emoji",
"faiss-cpu",
"fastapi",
"fire",
"fireworks-ai",
"httpx",
"langdetect",
"matplotlib",
"mcp",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pymongo",
"pypdf",
"pythainlp",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"groq": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"datasets",
"emoji",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"langdetect",
"litellm",
"matplotlib",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pymongo",
"pypdf",
"pythainlp",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
"uvicorn"
],
"hf-endpoint": [
"aiohttp",
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"emoji",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"huggingface_hub",
"langdetect",
"matplotlib",
"mcp",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pymongo",
"pypdf",
"pythainlp",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
"uvicorn"
],
"hf-serverless": [
"aiohttp",
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"emoji",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"huggingface_hub",
"langdetect",
"matplotlib",
"mcp",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pymongo",
"pypdf",
"pythainlp",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"kvant": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"emoji",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"langdetect",
"matplotlib",
"mcp",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pymongo",
"pypdf",
"pythainlp",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"llama_api": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"emoji",
"fastapi",
"fire",
"httpx",
"langdetect",
"litellm",
"matplotlib",
"mcp",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pymongo",
"pypdf",
"pythainlp",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"sqlite-vec",
"tqdm",
"transformers",
"tree_sitter",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"meta-reference-gpu": [
"accelerate",
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"emoji",
"fairscale",
"faiss-cpu",
"fastapi",
"fbgemm-gpu-genai==1.1.2",
"fire",
"httpx",
"langdetect",
"lm-format-enforcer",
"matplotlib",
"mcp",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pymongo",
"pypdf",
"pythainlp",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentence-transformers",
"sentencepiece",
"sqlalchemy[asyncio]",
"torch",
"torchao==0.8.0",
"torchvision",
"tqdm",
"transformers",
"tree_sitter",
"uvicorn",
"zmq"
],
"nvidia": [
"aiohttp",
"aiosqlite",
"blobfile",
"chardet",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pymongo",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"uvicorn"
],
"ollama": [
"aiohttp",
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"emoji",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"langdetect",
"matplotlib",
"mcp",
"nltk",
"numpy",
"ollama",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"peft",
"pillow",
"psycopg2-binary",
"pymongo",
"pypdf",
"pythainlp",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"torch",
"tqdm",
"transformers",
"tree_sitter",
"trl",
"uvicorn"
],
"open-benchmark": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"emoji",
"fastapi",
"fire",
"httpx",
"langdetect",
"litellm",
"matplotlib",
"mcp",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pymongo",
"pypdf",
"pythainlp",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"sqlite-vec",
"together",
"tqdm",
"transformers",
"tree_sitter",
"uvicorn"
],
"passthrough": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"emoji",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"langdetect",
"matplotlib",
"mcp",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pymongo",
"pypdf",
"pythainlp",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"remote-vllm": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"emoji",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"langdetect",
"matplotlib",
"mcp",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pymongo",
"pypdf",
"pythainlp",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"sambanova": [
"aiosqlite",
"blobfile",
"chardet",
"chromadb-client",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"litellm",
"matplotlib",
"mcp",
"nltk",
"numpy",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pymongo",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"starter": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"emoji",
"fastapi",
"fire",
"fireworks-ai",
"httpx",
"langdetect",
"litellm",
"matplotlib",
"mcp",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pymongo",
"pypdf",
"pythainlp",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"sqlite-vec",
"tqdm",
"transformers",
"tree_sitter",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"tgi": [
"aiohttp",
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"emoji",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"huggingface_hub",
"langdetect",
"matplotlib",
"mcp",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pymongo",
"pypdf",
"pythainlp",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"together": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"emoji",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"langdetect",
"matplotlib",
"mcp",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pymongo",
"pypdf",
"pythainlp",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"together",
"tqdm",
"transformers",
"tree_sitter",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"verification": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"emoji",
"fastapi",
"fire",
"httpx",
"langdetect",
"litellm",
"matplotlib",
"mcp",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pymongo",
"pypdf",
"pythainlp",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"sqlite-vec",
"tqdm",
"transformers",
"tree_sitter",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"vllm-gpu": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"emoji",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"langdetect",
"matplotlib",
"mcp",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pymongo",
"pypdf",
"pythainlp",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
"uvicorn",
"vllm",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"watsonx": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"datasets",
"emoji",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"ibm_watson_machine_learning",
"langdetect",
"matplotlib",
"mcp",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pymongo",
"pypdf",
"pythainlp",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"sqlalchemy[asyncio]",
"tqdm",
"transformers",
"tree_sitter",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
]
}

View file

@ -1,7 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from .kvant import get_distribution_template # noqa: F401

View file

@ -1,35 +0,0 @@
version: '2'
distribution_spec:
description: distribution for kvant cloud
providers:
inference:
- remote::vllm
- inline::sentence-transformers
vector_io:
- inline::faiss
- remote::chromadb
- remote::pgvector
safety:
- inline::llama-guard
agents:
- inline::meta-reference
telemetry:
- inline::meta-reference
eval:
- inline::meta-reference
datasetio:
- remote::huggingface
- inline::localfs
scoring:
- inline::basic
- inline::llm-as-judge
- inline::braintrust
tool_runtime:
- remote::brave-search
- remote::tavily-search
- remote::wolfram-alpha
- inline::rag-runtime
- remote::model-context-protocol
image_type: conda
additional_pip_packages:
- sqlalchemy[asyncio]

View file

@ -1,136 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from pathlib import Path
from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import (
ModelInput,
Provider,
ShieldInput,
ToolGroupInput,
)
from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig,
)
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
from llama_stack.providers.remote.inference.passthrough.config import (
PassthroughImplConfig,
)
from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
def get_distribution_template() -> DistributionTemplate:
providers = {
"inference": ["remote::openai", "inline::sentence-transformers"],
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"],
"eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
"remote::wolfram-alpha",
"inline::rag-runtime",
"remote::model-context-protocol",
],
}
name = "kvant"
inference_provider = Provider(
provider_id="openai",
provider_type="remote::openai",
config=PassthroughImplConfig.sample_run_config(),
)
embedding_provider = Provider(
provider_id="sentence-transformers",
provider_type="inline::sentence-transformers",
config=SentenceTransformersInferenceConfig.sample_run_config(),
)
vector_io_provider = Provider(
provider_id="faiss",
provider_type="inline::faiss",
config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
)
default_models = [
ModelInput(
metadata={},
model_id="inference-llama4-maverick",
provider_id="openai",
provider_model_id="inference-llama4-maverick",
model_type=ModelType.llm,
),
]
embedding_model = ModelInput(
model_id="all-MiniLM-L6-v2",
provider_id="sentence-transformers",
model_type=ModelType.embedding,
metadata={
"embedding_dimension": 384,
},
)
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::wolfram_alpha",
provider_id="wolfram-alpha",
),
ToolGroupInput(
toolgroup_id="builtin::rag",
provider_id="rag-runtime",
),
]
return DistributionTemplate(
name=name,
distro_type="self_hosted",
description="Use Passthrough hosted llama-stack endpoint for LLM inference",
container_image=None,
providers=providers,
available_models_by_provider={
"openai": [
ProviderModelEntry(
provider_model_id="inference-llama4-maverick",
model_type=ModelType.llm,
),
],
},
run_configs={
"run.yaml": RunConfigSettings(
provider_overrides={
"inference": [inference_provider, embedding_provider],
"vector_io": [vector_io_provider],
},
default_models=default_models + [embedding_model],
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
default_tool_groups=default_tool_groups,
),
},
run_config_env_vars={
"LLAMA_STACK_PORT": (
"8321",
"Port for the Llama Stack distribution server",
),
"OPENAI_API_KEY": (
"",
"kvant maas API Key",
),
"OPENAI_BASE_URL": (
"https://maas.kvant.cloud",
"kvant maas URL",
),
},
)

View file

@ -1,170 +0,0 @@
version: '2'
image_name: kvant
apis:
- agents
- datasetio
- eval
- inference
- safety
- scoring
- telemetry
- tool_runtime
- vector_io
providers:
inference:
- provider_id: kvant
provider_type: remote::vllm
config:
url: ${env.VLLM_URL:https://maas.ai-2.kvant.cloud/v1}
max_tokens: ${env.VLLM_MAX_TOKENS:400000}
api_token: ${env.VLLM_API_TOKEN:fake}
tls_verify: ${env.VLLM_TLS_VERIFY:true}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
config: {}
vector_io:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/faiss_store.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config:
excluded_categories: []
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/agents_store.db
responses_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/responses_store.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:}
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/trace_store.db
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/localfs_datasetio.db
scoring:
- provider_id: basic
provider_type: inline::basic
config: {}
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
config: {}
- provider_id: braintrust
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: wolfram-alpha
provider_type: remote::wolfram-alpha
config:
api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
- provider_id: rag-runtime
provider_type: inline::rag-runtime
config: {}
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
config: {}
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/inference_store.db
models:
- metadata: {}
model_id: Llama-4-Maverick-17B-128E-Instruct-FP8
provider_id: kvant
provider_model_id: inference-llama4-maverick
model_type: llm
- metadata:
embedding_dimension: 1024
context_length: 8192
model_id: inference-bge-m3
provider_id: kvant
model_type: embedding
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
model_type: embedding
shields:
- shield_id: meta-llama/Llama-Guard-3-8B
vector_dbs: []
# - vector_db_id: test-bge
# embedding_model: inference-bge-m3
# embedding_dimension: 1024
# provider_id: faiss
# - vector_db_id: test-MiniLM-L6-v2
# embedding_model: all-MiniLM-L6-v2
# embedding_dimension: 384
# provider_id: faiss
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::wolfram_alpha
provider_id: wolfram-alpha
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
auth:
provider_type: "oauth2_token"
config:
jwks:
introspection:
url: ${env.KEYCLOAK_INSTROSPECT:https://iam.phoenix-systems.ch/realms/kvant/protocol/openid-connect/token/introspect}
client_id: ${env.KEYCLOAK_CLIENT_ID:llama-stack}
client_secret: ${env.KEYCLOAK_CLIENT_SECRET}
claims_mapping:
sub: projects
scope: roles
#groups: teams
customer/id: teams
aud: namespaces

View file

@ -1,7 +0,0 @@
#!/usr/bin/env bash
export KEYCLOAK_URL="https://iam.phoenix-systems.ch"
export KEYCLOAK_REALM="kvant"
export KEYCLOAK_CLIENT_ID="llama-stack-playground"
uv run --with ".[ui]" streamlit run llama_stack/distribution/ui/app.py

View file

@ -49,7 +49,6 @@ ui = [
"pandas",
"llama-stack-client>=0.2.9",
"streamlit-option-menu",
"streamlit-keycloak",
]
[dependency-groups]

4622
uv.lock generated

File diff suppressed because it is too large Load diff