mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-02 08:44:44 +00:00
Merge branch 'playground-ui' into ui-compose
This commit is contained in:
commit
1dc337db8b
7 changed files with 88 additions and 12 deletions
|
@ -27,8 +27,6 @@ $ llama-stack-client configure
|
|||
Done! You can now use the Llama Stack Client CLI with endpoint http://localhost:5000
|
||||
```
|
||||
|
||||
## Provider Commands
|
||||
|
||||
### `llama-stack-client providers list`
|
||||
```bash
|
||||
$ llama-stack-client providers list
|
||||
|
@ -119,8 +117,25 @@ $ llama-stack-client memory_banks list
|
|||
+--------------+----------------+--------+-------------------+------------------------+--------------------------+
|
||||
```
|
||||
|
||||
## Shield Management
|
||||
### `llama-stack-client memory_banks register`
|
||||
```bash
|
||||
$ llama-stack-client memory_banks register <memory-bank-id> --type <type> [--provider-id <provider-id>] [--provider-memory-bank-id <provider-memory-bank-id>] [--chunk-size <chunk-size>] [--embedding-model <embedding-model>] [--overlap-size <overlap-size>]
|
||||
```
|
||||
|
||||
Options:
|
||||
- `--type`: Required. Type of memory bank. Choices: "vector", "keyvalue", "keyword", "graph"
|
||||
- `--provider-id`: Optional. Provider ID for the memory bank
|
||||
- `--provider-memory-bank-id`: Optional. Provider's memory bank ID
|
||||
- `--chunk-size`: Optional. Chunk size in tokens (for vector type). Default: 512
|
||||
- `--embedding-model`: Optional. Embedding model (for vector type). Default: "all-MiniLM-L6-v2"
|
||||
- `--overlap-size`: Optional. Overlap size in tokens (for vector type). Default: 64
|
||||
|
||||
### `llama-stack-client memory_banks unregister`
|
||||
```bash
|
||||
$ llama-stack-client memory_banks unregister <memory-bank-id>
|
||||
```
|
||||
|
||||
## Shield Management
|
||||
### `llama-stack-client shields list`
|
||||
```bash
|
||||
$ llama-stack-client shields list
|
||||
|
@ -134,16 +149,51 @@ $ llama-stack-client shields list
|
|||
+--------------+----------+----------------+-------------+
|
||||
```
|
||||
|
||||
## Evaluation Tasks
|
||||
### `llama-stack-client shields register`
|
||||
```bash
|
||||
$ llama-stack-client shields register --shield-id <shield-id> [--provider-id <provider-id>] [--provider-shield-id <provider-shield-id>] [--params <params>]
|
||||
```
|
||||
|
||||
Options:
|
||||
- `--shield-id`: Required. ID of the shield
|
||||
- `--provider-id`: Optional. Provider ID for the shield
|
||||
- `--provider-shield-id`: Optional. Provider's shield ID
|
||||
- `--params`: Optional. JSON configuration parameters for the shield
|
||||
|
||||
## Eval Task Management
|
||||
|
||||
### `llama-stack-client eval_tasks list`
|
||||
```bash
|
||||
$ llama-stack-client eval run_benchmark <task_id1> <task_id2> --num-examples 10 --output-dir ./ --eval-task-config ~/eval_task_config.json
|
||||
$ llama-stack-client eval_tasks list
|
||||
```
|
||||
|
||||
where `eval_task_config.json` is the path to the eval task config file in JSON format. An example eval_task_config
|
||||
### `llama-stack-client eval_tasks register`
|
||||
```bash
|
||||
$ llama-stack-client eval_tasks register --eval-task-id <eval-task-id> --dataset-id <dataset-id> --scoring-functions <function1> [<function2> ...] [--provider-id <provider-id>] [--provider-eval-task-id <provider-eval-task-id>] [--metadata <metadata>]
|
||||
```
|
||||
$ cat ~/eval_task_config.json
|
||||
|
||||
Options:
|
||||
- `--eval-task-id`: Required. ID of the eval task
|
||||
- `--dataset-id`: Required. ID of the dataset to evaluate
|
||||
- `--scoring-functions`: Required. One or more scoring functions to use for evaluation
|
||||
- `--provider-id`: Optional. Provider ID for the eval task
|
||||
- `--provider-eval-task-id`: Optional. Provider's eval task ID
|
||||
- `--metadata`: Optional. Metadata for the eval task in JSON format
|
||||
|
||||
## Eval execution
|
||||
### `llama-stack-client eval run-benchmark`
|
||||
```bash
|
||||
$ llama-stack-client eval run-benchmark <eval-task-id1> [<eval-task-id2> ...] --eval-task-config <config-file> --output-dir <output-dir> [--num-examples <num>] [--visualize]
|
||||
```
|
||||
|
||||
Options:
|
||||
- `--eval-task-config`: Required. Path to the eval task config file in JSON format
|
||||
- `--output-dir`: Required. Path to the directory where evaluation results will be saved
|
||||
- `--num-examples`: Optional. Number of examples to evaluate (useful for debugging)
|
||||
- `--visualize`: Optional flag. If set, visualizes evaluation results after completion
|
||||
|
||||
Example eval_task_config.json:
|
||||
```json
|
||||
{
|
||||
"type": "benchmark",
|
||||
"eval_candidate": {
|
||||
|
@ -160,3 +210,14 @@ $ cat ~/eval_task_config.json
|
|||
}
|
||||
}
|
||||
```
|
||||
|
||||
### `llama-stack-client eval run-scoring`
|
||||
```bash
|
||||
$ llama-stack-client eval run-scoring <eval-task-id> --eval-task-config <config-file> --output-dir <output-dir> [--num-examples <num>] [--visualize]
|
||||
```
|
||||
|
||||
Options:
|
||||
- `--eval-task-config`: Required. Path to the eval task config file in JSON format
|
||||
- `--output-dir`: Required. Path to the directory where scoring results will be saved
|
||||
- `--num-examples`: Optional. Number of examples to evaluate (useful for debugging)
|
||||
- `--visualize`: Optional flag. If set, visualizes scoring results after completion
|
||||
|
|
5
llama_stack/providers/inline/scoring/__init__.py
Normal file
5
llama_stack/providers/inline/scoring/__init__.py
Normal file
|
@ -0,0 +1,5 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
|
@ -5,9 +5,10 @@
|
|||
# the root directory of this source tree.
|
||||
from typing import Dict
|
||||
|
||||
from llama_stack.distribution.datatypes import Api, ProviderSpec
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.distribution.datatypes import Api, ProviderSpec
|
||||
|
||||
from .config import BraintrustScoringConfig
|
||||
|
||||
|
||||
|
|
|
@ -16,6 +16,7 @@ import os
|
|||
|
||||
from autoevals.llm import Factuality
|
||||
from autoevals.ragas import AnswerCorrectness
|
||||
|
||||
from llama_stack.distribution.request_headers import NeedsRequestProviderData
|
||||
from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate
|
||||
|
||||
|
|
|
@ -211,7 +211,15 @@ class TestInference:
|
|||
response = await inference_impl.chat_completion(
|
||||
model_id=inference_model,
|
||||
messages=[
|
||||
SystemMessage(content="You are a helpful assistant."),
|
||||
# we include context about Michael Jordan in the prompt so that the test is
|
||||
# focused on the funtionality of the model and not on the information embedded
|
||||
# in the model. Llama 3.2 3B Instruct tends to think MJ played for 14 seasons.
|
||||
SystemMessage(
|
||||
content=(
|
||||
"You are a helpful assistant.\n\n"
|
||||
"Michael Jordan was born in 1963. He played basketball for the Chicago Bulls for 15 seasons."
|
||||
)
|
||||
),
|
||||
UserMessage(content="Please give me information about Michael Jordan."),
|
||||
],
|
||||
stream=False,
|
||||
|
|
|
@ -2,8 +2,8 @@ blobfile
|
|||
fire
|
||||
httpx
|
||||
huggingface-hub
|
||||
llama-models>=0.0.56
|
||||
llama-stack-client>=0.0.56
|
||||
llama-models>=0.0.57
|
||||
llama-stack-client>=0.0.57
|
||||
prompt-toolkit
|
||||
python-dotenv
|
||||
pydantic>=2
|
||||
|
|
2
setup.py
2
setup.py
|
@ -16,7 +16,7 @@ def read_requirements():
|
|||
|
||||
setup(
|
||||
name="llama_stack",
|
||||
version="0.0.56",
|
||||
version="0.0.57",
|
||||
author="Meta Llama",
|
||||
author_email="llama-oss@meta.com",
|
||||
description="Llama Stack",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue