mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-05 02:17:31 +00:00
Merge bfe06ae00f into sapling-pr-archive-ehhuang
This commit is contained in:
commit
0b9736b6c1
32 changed files with 793 additions and 12 deletions
3
.github/workflows/integration-auth-tests.yml
vendored
3
.github/workflows/integration-auth-tests.yml
vendored
|
|
@ -91,6 +91,9 @@ jobs:
|
||||||
conversations:
|
conversations:
|
||||||
table_name: openai_conversations
|
table_name: openai_conversations
|
||||||
backend: sql_default
|
backend: sql_default
|
||||||
|
prompts:
|
||||||
|
namespace: prompts
|
||||||
|
backend: kv_default
|
||||||
server:
|
server:
|
||||||
port: 8321
|
port: 8321
|
||||||
EOF
|
EOF
|
||||||
|
|
|
||||||
|
|
@ -107,13 +107,21 @@ data:
|
||||||
db: ${env.POSTGRES_DB:=llamastack}
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
user: ${env.POSTGRES_USER:=llamastack}
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
references:
|
stores:
|
||||||
metadata:
|
metadata:
|
||||||
backend: kv_default
|
backend: kv_default
|
||||||
namespace: registry
|
namespace: registry
|
||||||
inference:
|
inference:
|
||||||
backend: sql_default
|
backend: sql_default
|
||||||
table_name: inference_store
|
table_name: inference_store
|
||||||
|
max_write_queue_size: 10000
|
||||||
|
num_writers: 4
|
||||||
|
conversations:
|
||||||
|
backend: sql_default
|
||||||
|
table_name: openai_conversations
|
||||||
|
prompts:
|
||||||
|
backend: kv_default
|
||||||
|
namespace: prompts
|
||||||
models:
|
models:
|
||||||
- metadata:
|
- metadata:
|
||||||
embedding_dimension: 768
|
embedding_dimension: 768
|
||||||
|
|
|
||||||
|
|
@ -100,6 +100,9 @@ storage:
|
||||||
conversations:
|
conversations:
|
||||||
table_name: openai_conversations
|
table_name: openai_conversations
|
||||||
backend: sql_default
|
backend: sql_default
|
||||||
|
prompts:
|
||||||
|
namespace: prompts
|
||||||
|
backend: kv_default
|
||||||
registered_resources:
|
registered_resources:
|
||||||
models:
|
models:
|
||||||
- metadata:
|
- metadata:
|
||||||
|
|
|
||||||
|
|
@ -58,13 +58,21 @@ storage:
|
||||||
sql_default:
|
sql_default:
|
||||||
type: sql_sqlite
|
type: sql_sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/sqlstore.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/sqlstore.db
|
||||||
references:
|
stores:
|
||||||
metadata:
|
metadata:
|
||||||
backend: kv_default
|
backend: kv_default
|
||||||
namespace: registry
|
namespace: registry
|
||||||
inference:
|
inference:
|
||||||
backend: sql_default
|
backend: sql_default
|
||||||
table_name: inference_store
|
table_name: inference_store
|
||||||
|
max_write_queue_size: 10000
|
||||||
|
num_writers: 4
|
||||||
|
conversations:
|
||||||
|
backend: sql_default
|
||||||
|
table_name: openai_conversations
|
||||||
|
prompts:
|
||||||
|
backend: kv_default
|
||||||
|
namespace: prompts
|
||||||
models:
|
models:
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: ${env.INFERENCE_MODEL}
|
model_id: ${env.INFERENCE_MODEL}
|
||||||
|
|
|
||||||
|
|
@ -113,13 +113,21 @@ data:
|
||||||
db: ${env.POSTGRES_DB:=llamastack}
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
user: ${env.POSTGRES_USER:=llamastack}
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
references:
|
stores:
|
||||||
metadata:
|
metadata:
|
||||||
backend: kv_default
|
backend: kv_default
|
||||||
namespace: registry
|
namespace: registry
|
||||||
inference:
|
inference:
|
||||||
backend: sql_default
|
backend: sql_default
|
||||||
table_name: inference_store
|
table_name: inference_store
|
||||||
|
max_write_queue_size: 10000
|
||||||
|
num_writers: 4
|
||||||
|
conversations:
|
||||||
|
backend: sql_default
|
||||||
|
table_name: openai_conversations
|
||||||
|
prompts:
|
||||||
|
backend: kv_default
|
||||||
|
namespace: prompts
|
||||||
models:
|
models:
|
||||||
- metadata:
|
- metadata:
|
||||||
embedding_dimension: 768
|
embedding_dimension: 768
|
||||||
|
|
|
||||||
|
|
@ -106,6 +106,9 @@ storage:
|
||||||
conversations:
|
conversations:
|
||||||
table_name: openai_conversations
|
table_name: openai_conversations
|
||||||
backend: sql_default
|
backend: sql_default
|
||||||
|
prompts:
|
||||||
|
namespace: prompts
|
||||||
|
backend: kv_default
|
||||||
registered_resources:
|
registered_resources:
|
||||||
models:
|
models:
|
||||||
- metadata:
|
- metadata:
|
||||||
|
|
|
||||||
|
|
@ -589,6 +589,7 @@ can be instantiated multiple times (with different configs) if necessary.
|
||||||
_ensure_backend(stores.inference, sql_backends, "storage.stores.inference")
|
_ensure_backend(stores.inference, sql_backends, "storage.stores.inference")
|
||||||
_ensure_backend(stores.conversations, sql_backends, "storage.stores.conversations")
|
_ensure_backend(stores.conversations, sql_backends, "storage.stores.conversations")
|
||||||
_ensure_backend(stores.responses, sql_backends, "storage.stores.responses")
|
_ensure_backend(stores.responses, sql_backends, "storage.stores.responses")
|
||||||
|
_ensure_backend(stores.prompts, kv_backends, "storage.stores.prompts")
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,6 @@ from pydantic import BaseModel
|
||||||
|
|
||||||
from llama_stack.apis.prompts import ListPromptsResponse, Prompt, Prompts
|
from llama_stack.apis.prompts import ListPromptsResponse, Prompt, Prompts
|
||||||
from llama_stack.core.datatypes import StackRunConfig
|
from llama_stack.core.datatypes import StackRunConfig
|
||||||
from llama_stack.core.storage.datatypes import KVStoreReference
|
|
||||||
from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
|
from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -40,11 +39,10 @@ class PromptServiceImpl(Prompts):
|
||||||
self.kvstore: KVStore
|
self.kvstore: KVStore
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
# Use metadata store backend with prompts-specific namespace
|
# Use prompts store reference from run config
|
||||||
metadata_ref = self.config.run_config.storage.stores.metadata
|
prompts_ref = self.config.run_config.storage.stores.prompts
|
||||||
if not metadata_ref:
|
if not prompts_ref:
|
||||||
raise ValueError("storage.stores.metadata must be configured in run config")
|
raise ValueError("storage.stores.prompts must be configured in run config")
|
||||||
prompts_ref = KVStoreReference(namespace="prompts", backend=metadata_ref.backend)
|
|
||||||
self.kvstore = await kvstore_impl(prompts_ref)
|
self.kvstore = await kvstore_impl(prompts_ref)
|
||||||
|
|
||||||
def _get_default_key(self, prompt_id: str) -> str:
|
def _get_default_key(self, prompt_id: str) -> str:
|
||||||
|
|
|
||||||
|
|
@ -563,6 +563,7 @@ def run_config_from_adhoc_config_spec(
|
||||||
metadata=KVStoreReference(backend="kv_default", namespace="registry"),
|
metadata=KVStoreReference(backend="kv_default", namespace="registry"),
|
||||||
inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
|
inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
|
||||||
conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"),
|
conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"),
|
||||||
|
prompts=KVStoreReference(backend="kv_default", namespace="prompts"),
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -271,6 +271,10 @@ class ServerStoresConfig(BaseModel):
|
||||||
default=None,
|
default=None,
|
||||||
description="Responses store configuration (uses SQL backend)",
|
description="Responses store configuration (uses SQL backend)",
|
||||||
)
|
)
|
||||||
|
prompts: KVStoreReference | None = Field(
|
||||||
|
default=None,
|
||||||
|
description="Prompts store configuration (uses KV backend)",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class StorageConfig(BaseModel):
|
class StorageConfig(BaseModel):
|
||||||
|
|
|
||||||
|
|
@ -247,6 +247,9 @@ storage:
|
||||||
conversations:
|
conversations:
|
||||||
table_name: openai_conversations
|
table_name: openai_conversations
|
||||||
backend: sql_default
|
backend: sql_default
|
||||||
|
prompts:
|
||||||
|
namespace: prompts
|
||||||
|
backend: kv_default
|
||||||
registered_resources:
|
registered_resources:
|
||||||
models: []
|
models: []
|
||||||
shields:
|
shields:
|
||||||
|
|
|
||||||
|
|
@ -109,6 +109,9 @@ storage:
|
||||||
conversations:
|
conversations:
|
||||||
table_name: openai_conversations
|
table_name: openai_conversations
|
||||||
backend: sql_default
|
backend: sql_default
|
||||||
|
prompts:
|
||||||
|
namespace: prompts
|
||||||
|
backend: kv_default
|
||||||
registered_resources:
|
registered_resources:
|
||||||
models:
|
models:
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
|
|
|
||||||
|
|
@ -105,6 +105,9 @@ storage:
|
||||||
conversations:
|
conversations:
|
||||||
table_name: openai_conversations
|
table_name: openai_conversations
|
||||||
backend: sql_default
|
backend: sql_default
|
||||||
|
prompts:
|
||||||
|
namespace: prompts
|
||||||
|
backend: kv_default
|
||||||
registered_resources:
|
registered_resources:
|
||||||
models:
|
models:
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
|
|
|
||||||
|
|
@ -122,6 +122,9 @@ storage:
|
||||||
conversations:
|
conversations:
|
||||||
table_name: openai_conversations
|
table_name: openai_conversations
|
||||||
backend: sql_default
|
backend: sql_default
|
||||||
|
prompts:
|
||||||
|
namespace: prompts
|
||||||
|
backend: kv_default
|
||||||
registered_resources:
|
registered_resources:
|
||||||
models:
|
models:
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
|
|
|
||||||
|
|
@ -112,6 +112,9 @@ storage:
|
||||||
conversations:
|
conversations:
|
||||||
table_name: openai_conversations
|
table_name: openai_conversations
|
||||||
backend: sql_default
|
backend: sql_default
|
||||||
|
prompts:
|
||||||
|
namespace: prompts
|
||||||
|
backend: kv_default
|
||||||
registered_resources:
|
registered_resources:
|
||||||
models:
|
models:
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
|
|
|
||||||
|
|
@ -111,6 +111,9 @@ storage:
|
||||||
conversations:
|
conversations:
|
||||||
table_name: openai_conversations
|
table_name: openai_conversations
|
||||||
backend: sql_default
|
backend: sql_default
|
||||||
|
prompts:
|
||||||
|
namespace: prompts
|
||||||
|
backend: kv_default
|
||||||
registered_resources:
|
registered_resources:
|
||||||
models:
|
models:
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
|
|
|
||||||
|
|
@ -100,6 +100,9 @@ storage:
|
||||||
conversations:
|
conversations:
|
||||||
table_name: openai_conversations
|
table_name: openai_conversations
|
||||||
backend: sql_default
|
backend: sql_default
|
||||||
|
prompts:
|
||||||
|
namespace: prompts
|
||||||
|
backend: kv_default
|
||||||
registered_resources:
|
registered_resources:
|
||||||
models: []
|
models: []
|
||||||
shields: []
|
shields: []
|
||||||
|
|
|
||||||
|
|
@ -142,6 +142,9 @@ storage:
|
||||||
conversations:
|
conversations:
|
||||||
table_name: openai_conversations
|
table_name: openai_conversations
|
||||||
backend: sql_default
|
backend: sql_default
|
||||||
|
prompts:
|
||||||
|
namespace: prompts
|
||||||
|
backend: kv_default
|
||||||
registered_resources:
|
registered_resources:
|
||||||
models:
|
models:
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
|
|
|
||||||
|
|
@ -87,6 +87,9 @@ storage:
|
||||||
conversations:
|
conversations:
|
||||||
table_name: openai_conversations
|
table_name: openai_conversations
|
||||||
backend: sql_default
|
backend: sql_default
|
||||||
|
prompts:
|
||||||
|
namespace: prompts
|
||||||
|
backend: kv_default
|
||||||
registered_resources:
|
registered_resources:
|
||||||
models:
|
models:
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
|
|
|
||||||
|
|
@ -250,6 +250,9 @@ storage:
|
||||||
conversations:
|
conversations:
|
||||||
table_name: openai_conversations
|
table_name: openai_conversations
|
||||||
backend: sql_default
|
backend: sql_default
|
||||||
|
prompts:
|
||||||
|
namespace: prompts
|
||||||
|
backend: kv_default
|
||||||
registered_resources:
|
registered_resources:
|
||||||
models: []
|
models: []
|
||||||
shields:
|
shields:
|
||||||
|
|
|
||||||
|
|
@ -247,6 +247,9 @@ storage:
|
||||||
conversations:
|
conversations:
|
||||||
table_name: openai_conversations
|
table_name: openai_conversations
|
||||||
backend: sql_default
|
backend: sql_default
|
||||||
|
prompts:
|
||||||
|
namespace: prompts
|
||||||
|
backend: kv_default
|
||||||
registered_resources:
|
registered_resources:
|
||||||
models: []
|
models: []
|
||||||
shields:
|
shields:
|
||||||
|
|
|
||||||
|
|
@ -259,6 +259,10 @@ class RunConfigSettings(BaseModel):
|
||||||
backend="sql_default",
|
backend="sql_default",
|
||||||
table_name="openai_conversations",
|
table_name="openai_conversations",
|
||||||
).model_dump(exclude_none=True),
|
).model_dump(exclude_none=True),
|
||||||
|
"prompts": KVStoreReference(
|
||||||
|
backend="kv_default",
|
||||||
|
namespace="prompts",
|
||||||
|
).model_dump(exclude_none=True),
|
||||||
}
|
}
|
||||||
|
|
||||||
storage_config = dict(
|
storage_config = dict(
|
||||||
|
|
|
||||||
|
|
@ -115,6 +115,9 @@ storage:
|
||||||
conversations:
|
conversations:
|
||||||
table_name: openai_conversations
|
table_name: openai_conversations
|
||||||
backend: sql_default
|
backend: sql_default
|
||||||
|
prompts:
|
||||||
|
namespace: prompts
|
||||||
|
backend: kv_default
|
||||||
registered_resources:
|
registered_resources:
|
||||||
models: []
|
models: []
|
||||||
shields: []
|
shields: []
|
||||||
|
|
|
||||||
|
|
@ -30,8 +30,10 @@ materialize_telemetry_configs() {
|
||||||
local otel_cfg="${dest}/otel-collector-config.yaml"
|
local otel_cfg="${dest}/otel-collector-config.yaml"
|
||||||
local prom_cfg="${dest}/prometheus.yml"
|
local prom_cfg="${dest}/prometheus.yml"
|
||||||
local graf_cfg="${dest}/grafana-datasources.yaml"
|
local graf_cfg="${dest}/grafana-datasources.yaml"
|
||||||
|
local graf_dash_cfg="${dest}/grafana-dashboards.yaml"
|
||||||
|
local dash_json="${dest}/llama-stack-dashboard.json"
|
||||||
|
|
||||||
for asset in "$otel_cfg" "$prom_cfg" "$graf_cfg"; do
|
for asset in "$otel_cfg" "$prom_cfg" "$graf_cfg" "$graf_dash_cfg" "$dash_json"; do
|
||||||
if [ -e "$asset" ]; then
|
if [ -e "$asset" ]; then
|
||||||
die "Telemetry asset ${asset} already exists; refusing to overwrite"
|
die "Telemetry asset ${asset} already exists; refusing to overwrite"
|
||||||
fi
|
fi
|
||||||
|
|
@ -103,6 +105,7 @@ datasources:
|
||||||
type: prometheus
|
type: prometheus
|
||||||
access: proxy
|
access: proxy
|
||||||
url: http://prometheus:9090
|
url: http://prometheus:9090
|
||||||
|
uid: prometheus
|
||||||
isDefault: true
|
isDefault: true
|
||||||
editable: true
|
editable: true
|
||||||
|
|
||||||
|
|
@ -112,6 +115,224 @@ datasources:
|
||||||
url: http://jaeger:16686
|
url: http://jaeger:16686
|
||||||
editable: true
|
editable: true
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
|
cat <<'EOF' > "$graf_dash_cfg"
|
||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
providers:
|
||||||
|
- name: 'Llama Stack'
|
||||||
|
orgId: 1
|
||||||
|
folder: ''
|
||||||
|
type: file
|
||||||
|
disableDeletion: false
|
||||||
|
updateIntervalSeconds: 10
|
||||||
|
allowUiUpdates: true
|
||||||
|
options:
|
||||||
|
path: /etc/grafana/provisioning/dashboards
|
||||||
|
EOF
|
||||||
|
|
||||||
|
# Copy the dashboard JSON inline to avoid line-length issues
|
||||||
|
cat > "$dash_json" <<'DASHBOARD_JSON'
|
||||||
|
{
|
||||||
|
"annotations": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"editable": true,
|
||||||
|
"fiscalYearStartMonth": 0,
|
||||||
|
"graphTooltip": 0,
|
||||||
|
"id": null,
|
||||||
|
"links": [],
|
||||||
|
"liveNow": false,
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {
|
||||||
|
"drawStyle": "line",
|
||||||
|
"lineInterpolation": "linear",
|
||||||
|
"showPoints": "auto",
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [{"color": "green", "value": null}]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 0},
|
||||||
|
"id": 1,
|
||||||
|
"options": {
|
||||||
|
"legend": {"calcs": [], "displayMode": "table", "placement": "bottom", "showLegend": true},
|
||||||
|
"tooltip": {"mode": "multi", "sort": "none"}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {"type": "prometheus", "uid": "prometheus"},
|
||||||
|
"expr": "llama_stack_completion_tokens_total",
|
||||||
|
"legendFormat": "{{model_id}} ({{provider_id}})",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Completion Tokens",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {"drawStyle": "line", "lineInterpolation": "linear", "showPoints": "auto", "fillOpacity": 10},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 0},
|
||||||
|
"id": 2,
|
||||||
|
"options": {
|
||||||
|
"legend": {"calcs": [], "displayMode": "table", "placement": "bottom", "showLegend": true},
|
||||||
|
"tooltip": {"mode": "multi", "sort": "none"}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "llama_stack_prompt_tokens_total", "legendFormat": "Prompt - {{model_id}}", "refId": "A"},
|
||||||
|
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "llama_stack_tokens_total", "legendFormat": "Total - {{model_id}}", "refId": "B"}
|
||||||
|
],
|
||||||
|
"title": "Prompt & Total Tokens",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {"drawStyle": "line", "lineInterpolation": "linear", "showPoints": "auto", "fillOpacity": 10},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||||
|
"unit": "ms"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 8},
|
||||||
|
"id": 3,
|
||||||
|
"options": {
|
||||||
|
"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true},
|
||||||
|
"tooltip": {"mode": "multi", "sort": "none"}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "histogram_quantile(0.95, rate(llama_stack_http_server_duration_milliseconds_bucket[5m]))", "legendFormat": "p95", "refId": "A"},
|
||||||
|
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "histogram_quantile(0.99, rate(llama_stack_http_server_duration_milliseconds_bucket[5m]))", "legendFormat": "p99", "refId": "B"}
|
||||||
|
],
|
||||||
|
"title": "HTTP Request Duration (p95, p99)",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {"h": 8, "w": 6, "x": 12, "y": 8},
|
||||||
|
"id": 4,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "value",
|
||||||
|
"graphMode": "area",
|
||||||
|
"justifyMode": "auto",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false},
|
||||||
|
"textMode": "auto"
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "sum(llama_stack_http_server_duration_milliseconds_count)", "refId": "A"}
|
||||||
|
],
|
||||||
|
"title": "Total Requests",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {"h": 8, "w": 6, "x": 18, "y": 8},
|
||||||
|
"id": 5,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "value",
|
||||||
|
"graphMode": "none",
|
||||||
|
"justifyMode": "auto",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false},
|
||||||
|
"textMode": "auto"
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "sum(llama_stack_http_server_active_requests)", "refId": "A"}
|
||||||
|
],
|
||||||
|
"title": "Active Requests",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {"drawStyle": "line", "lineInterpolation": "linear", "showPoints": "auto", "fillOpacity": 10},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||||
|
"unit": "reqps"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 16},
|
||||||
|
"id": 6,
|
||||||
|
"options": {
|
||||||
|
"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true},
|
||||||
|
"tooltip": {"mode": "multi", "sort": "none"}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "rate(llama_stack_http_server_duration_milliseconds_count[5m])", "legendFormat": "{{http_target}} - {{http_status_code}}", "refId": "A"}
|
||||||
|
],
|
||||||
|
"title": "Request Rate",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {"drawStyle": "line", "lineInterpolation": "linear", "showPoints": "auto", "fillOpacity": 10},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||||
|
"unit": "Bps"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 16},
|
||||||
|
"id": 7,
|
||||||
|
"options": {
|
||||||
|
"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true},
|
||||||
|
"tooltip": {"mode": "multi", "sort": "none"}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "rate(llama_stack_http_server_request_size_bytes_sum[5m])", "legendFormat": "Request", "refId": "A"},
|
||||||
|
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "rate(llama_stack_http_server_response_size_bytes_sum[5m])", "legendFormat": "Response", "refId": "B"}
|
||||||
|
],
|
||||||
|
"title": "Request/Response Sizes",
|
||||||
|
"type": "timeseries"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"refresh": "5s",
|
||||||
|
"schemaVersion": 38,
|
||||||
|
"tags": ["llama-stack"],
|
||||||
|
"templating": {"list": []},
|
||||||
|
"time": {"from": "now-15m", "to": "now"},
|
||||||
|
"timepicker": {},
|
||||||
|
"timezone": "browser",
|
||||||
|
"title": "Llama Stack Metrics",
|
||||||
|
"uid": "llama-stack-metrics",
|
||||||
|
"version": 0,
|
||||||
|
"weekStart": ""
|
||||||
|
}
|
||||||
|
DASHBOARD_JSON
|
||||||
}
|
}
|
||||||
|
|
||||||
# Cleanup function to remove temporary files
|
# Cleanup function to remove temporary files
|
||||||
|
|
@ -372,6 +593,8 @@ if [ "$WITH_TELEMETRY" = true ]; then
|
||||||
-e GF_SECURITY_ADMIN_PASSWORD=admin \
|
-e GF_SECURITY_ADMIN_PASSWORD=admin \
|
||||||
-e GF_USERS_ALLOW_SIGN_UP=false \
|
-e GF_USERS_ALLOW_SIGN_UP=false \
|
||||||
-v "${TELEMETRY_ASSETS_DIR}/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z" \
|
-v "${TELEMETRY_ASSETS_DIR}/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z" \
|
||||||
|
-v "${TELEMETRY_ASSETS_DIR}/grafana-dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml:Z" \
|
||||||
|
-v "${TELEMETRY_ASSETS_DIR}/llama-stack-dashboard.json:/etc/grafana/provisioning/dashboards/llama-stack-dashboard.json:Z" \
|
||||||
docker.io/grafana/grafana:11.0.0 > /dev/null 2>&1; then
|
docker.io/grafana/grafana:11.0.0 > /dev/null 2>&1; then
|
||||||
die "Grafana startup failed"
|
die "Grafana startup failed"
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
13
scripts/telemetry/grafana-dashboards.yaml
Normal file
13
scripts/telemetry/grafana-dashboards.yaml
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
providers:
|
||||||
|
- name: 'Llama Stack'
|
||||||
|
orgId: 1
|
||||||
|
folder: ''
|
||||||
|
type: file
|
||||||
|
disableDeletion: false
|
||||||
|
updateIntervalSeconds: 10
|
||||||
|
allowUiUpdates: true
|
||||||
|
options:
|
||||||
|
path: /etc/grafana/provisioning/dashboards
|
||||||
|
|
||||||
|
|
@ -5,6 +5,7 @@ datasources:
|
||||||
type: prometheus
|
type: prometheus
|
||||||
access: proxy
|
access: proxy
|
||||||
url: http://prometheus:9090
|
url: http://prometheus:9090
|
||||||
|
uid: prometheus
|
||||||
isDefault: true
|
isDefault: true
|
||||||
editable: true
|
editable: true
|
||||||
|
|
||||||
|
|
|
||||||
457
scripts/telemetry/llama-stack-dashboard.json
Normal file
457
scripts/telemetry/llama-stack-dashboard.json
Normal file
|
|
@ -0,0 +1,457 @@
|
||||||
|
{
|
||||||
|
"annotations": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"editable": true,
|
||||||
|
"fiscalYearStartMonth": 0,
|
||||||
|
"graphTooltip": 0,
|
||||||
|
"id": null,
|
||||||
|
"links": [],
|
||||||
|
"liveNow": false,
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {
|
||||||
|
"drawStyle": "line",
|
||||||
|
"lineInterpolation": "linear",
|
||||||
|
"showPoints": "auto",
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
|
"id": 1,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"calcs": [],
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "bottom",
|
||||||
|
"showLegend": true
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "none"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"expr": "llama_stack_completion_tokens_total",
|
||||||
|
"legendFormat": "{{model_id}} ({{provider_id}})",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Completion Tokens",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {
|
||||||
|
"drawStyle": "line",
|
||||||
|
"lineInterpolation": "linear",
|
||||||
|
"showPoints": "auto",
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
|
"id": 2,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"calcs": [],
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "bottom",
|
||||||
|
"showLegend": true
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "none"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"expr": "llama_stack_prompt_tokens_total",
|
||||||
|
"legendFormat": "Prompt - {{model_id}}",
|
||||||
|
"refId": "A"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"expr": "llama_stack_tokens_total",
|
||||||
|
"legendFormat": "Total - {{model_id}}",
|
||||||
|
"refId": "B"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Prompt & Total Tokens",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {
|
||||||
|
"drawStyle": "line",
|
||||||
|
"lineInterpolation": "linear",
|
||||||
|
"showPoints": "auto",
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "ms"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 8
|
||||||
|
},
|
||||||
|
"id": 3,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"calcs": [],
|
||||||
|
"displayMode": "list",
|
||||||
|
"placement": "bottom",
|
||||||
|
"showLegend": true
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "none"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"expr": "histogram_quantile(0.95, rate(llama_stack_http_server_duration_milliseconds_bucket[5m]))",
|
||||||
|
"legendFormat": "p95",
|
||||||
|
"refId": "A"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"expr": "histogram_quantile(0.99, rate(llama_stack_http_server_duration_milliseconds_bucket[5m]))",
|
||||||
|
"legendFormat": "p99",
|
||||||
|
"refId": "B"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "HTTP Request Duration (p95, p99)",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 6,
|
||||||
|
"x": 12,
|
||||||
|
"y": 8
|
||||||
|
},
|
||||||
|
"id": 4,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "value",
|
||||||
|
"graphMode": "area",
|
||||||
|
"justifyMode": "auto",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull"
|
||||||
|
],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"textMode": "auto"
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"expr": "sum(llama_stack_http_server_duration_milliseconds_count)",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Total Requests",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 6,
|
||||||
|
"x": 18,
|
||||||
|
"y": 8
|
||||||
|
},
|
||||||
|
"id": 5,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "value",
|
||||||
|
"graphMode": "none",
|
||||||
|
"justifyMode": "auto",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull"
|
||||||
|
],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"textMode": "auto"
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"expr": "sum(llama_stack_http_server_active_requests)",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Active Requests",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {
|
||||||
|
"drawStyle": "line",
|
||||||
|
"lineInterpolation": "linear",
|
||||||
|
"showPoints": "auto",
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "reqps"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 16
|
||||||
|
},
|
||||||
|
"id": 6,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"calcs": [],
|
||||||
|
"displayMode": "list",
|
||||||
|
"placement": "bottom",
|
||||||
|
"showLegend": true
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "none"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"expr": "rate(llama_stack_http_server_duration_milliseconds_count[5m])",
|
||||||
|
"legendFormat": "{{http_target}} - {{http_status_code}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Request Rate",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {
|
||||||
|
"drawStyle": "line",
|
||||||
|
"lineInterpolation": "linear",
|
||||||
|
"showPoints": "auto",
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "Bps"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 16
|
||||||
|
},
|
||||||
|
"id": 7,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"calcs": [],
|
||||||
|
"displayMode": "list",
|
||||||
|
"placement": "bottom",
|
||||||
|
"showLegend": true
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "none"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"expr": "rate(llama_stack_http_server_request_size_bytes_sum[5m])",
|
||||||
|
"legendFormat": "Request",
|
||||||
|
"refId": "A"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"expr": "rate(llama_stack_http_server_response_size_bytes_sum[5m])",
|
||||||
|
"legendFormat": "Response",
|
||||||
|
"refId": "B"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Request/Response Sizes",
|
||||||
|
"type": "timeseries"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"refresh": "5s",
|
||||||
|
"schemaVersion": 38,
|
||||||
|
"tags": [
|
||||||
|
"llama-stack"
|
||||||
|
],
|
||||||
|
"templating": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"time": {
|
||||||
|
"from": "now-15m",
|
||||||
|
"to": "now"
|
||||||
|
},
|
||||||
|
"timepicker": {},
|
||||||
|
"timezone": "browser",
|
||||||
|
"title": "Llama Stack Metrics",
|
||||||
|
"uid": "llama-stack-metrics",
|
||||||
|
"version": 0,
|
||||||
|
"weekStart": ""
|
||||||
|
}
|
||||||
|
|
@ -135,6 +135,8 @@ $CONTAINER_RUNTIME run -d --name grafana \
|
||||||
-e GF_SECURITY_ADMIN_PASSWORD=admin \
|
-e GF_SECURITY_ADMIN_PASSWORD=admin \
|
||||||
-e GF_USERS_ALLOW_SIGN_UP=false \
|
-e GF_USERS_ALLOW_SIGN_UP=false \
|
||||||
-v "$SCRIPT_DIR/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z" \
|
-v "$SCRIPT_DIR/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z" \
|
||||||
|
-v "$SCRIPT_DIR/grafana-dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml:Z" \
|
||||||
|
-v "$SCRIPT_DIR/llama-stack-dashboard.json:/etc/grafana/provisioning/dashboards/llama-stack-dashboard.json:Z" \
|
||||||
docker.io/grafana/grafana:11.0.0
|
docker.io/grafana/grafana:11.0.0
|
||||||
|
|
||||||
# Wait for services to start
|
# Wait for services to start
|
||||||
|
|
|
||||||
3
tests/external/run-byoa.yaml
vendored
3
tests/external/run-byoa.yaml
vendored
|
|
@ -25,6 +25,9 @@ storage:
|
||||||
conversations:
|
conversations:
|
||||||
table_name: openai_conversations
|
table_name: openai_conversations
|
||||||
backend: sql_default
|
backend: sql_default
|
||||||
|
prompts:
|
||||||
|
namespace: prompts
|
||||||
|
backend: kv_default
|
||||||
external_apis_dir: ~/.llama/apis.d
|
external_apis_dir: ~/.llama/apis.d
|
||||||
external_providers_dir: ~/.llama/providers.d
|
external_providers_dir: ~/.llama/providers.d
|
||||||
server:
|
server:
|
||||||
|
|
|
||||||
|
|
@ -44,6 +44,9 @@ def config_with_image_name_int():
|
||||||
responses:
|
responses:
|
||||||
backend: sql_default
|
backend: sql_default
|
||||||
table_name: responses
|
table_name: responses
|
||||||
|
prompts:
|
||||||
|
backend: kv_default
|
||||||
|
namespace: prompts
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: provider1
|
- provider_id: provider1
|
||||||
|
|
|
||||||
|
|
@ -48,6 +48,7 @@ def _default_storage() -> StorageConfig:
|
||||||
metadata=KVStoreReference(backend="kv_default", namespace="registry"),
|
metadata=KVStoreReference(backend="kv_default", namespace="registry"),
|
||||||
inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
|
inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
|
||||||
conversations=SqlStoreReference(backend="sql_default", table_name="conversations"),
|
conversations=SqlStoreReference(backend="sql_default", table_name="conversations"),
|
||||||
|
prompts=KVStoreReference(backend="kv_default", namespace="prompts"),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,7 @@ from llama_stack.core.storage.datatypes import (
|
||||||
SqlStoreReference,
|
SqlStoreReference,
|
||||||
StorageConfig,
|
StorageConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.providers.utils.kvstore import kvstore_impl, register_kvstore_backends
|
from llama_stack.providers.utils.kvstore import register_kvstore_backends
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
|
|
@ -38,6 +38,7 @@ async def temp_prompt_store(tmp_path_factory):
|
||||||
metadata=KVStoreReference(backend="kv_test", namespace="registry"),
|
metadata=KVStoreReference(backend="kv_test", namespace="registry"),
|
||||||
inference=InferenceStoreReference(backend="sql_test", table_name="inference"),
|
inference=InferenceStoreReference(backend="sql_test", table_name="inference"),
|
||||||
conversations=SqlStoreReference(backend="sql_test", table_name="conversations"),
|
conversations=SqlStoreReference(backend="sql_test", table_name="conversations"),
|
||||||
|
prompts=KVStoreReference(backend="kv_test", namespace="prompts"),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
mock_run_config = StackRunConfig(
|
mock_run_config = StackRunConfig(
|
||||||
|
|
@ -50,6 +51,6 @@ async def temp_prompt_store(tmp_path_factory):
|
||||||
store = PromptServiceImpl(config, deps={})
|
store = PromptServiceImpl(config, deps={})
|
||||||
|
|
||||||
register_kvstore_backends({"kv_test": storage.backends["kv_test"]})
|
register_kvstore_backends({"kv_test": storage.backends["kv_test"]})
|
||||||
store.kvstore = await kvstore_impl(KVStoreReference(backend="kv_test", namespace="prompts"))
|
await store.initialize()
|
||||||
|
|
||||||
yield store
|
yield store
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue