temp commit

This commit is contained in:
Botao Chen 2025-03-11 17:11:26 -07:00
parent 96039831c4
commit 27d5892dfa
7 changed files with 149 additions and 109 deletions

View file

@ -453,6 +453,40 @@
"transformers", "transformers",
"uvicorn" "uvicorn"
], ],
"open-benchmark": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"fastapi",
"fire",
"httpx",
"litellm",
"matplotlib",
"mcp",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pymongo",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"sqlite-vec",
"together",
"tqdm",
"transformers",
"uvicorn"
],
"remote-vllm": [ "remote-vllm": [
"aiosqlite", "aiosqlite",
"autoevals", "autoevals",

View file

@ -26,5 +26,5 @@ class TogetherImplConfig(BaseModel):
def sample_run_config(cls, **kwargs) -> Dict[str, Any]: def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
return { return {
"url": "https://api.together.xyz/v1", "url": "https://api.together.xyz/v1",
"api_key": "${env.TOGETHER_API_KEY}", "api_key": "${env.TOGETHER_API_KEY:}",
} }

View file

@ -0,0 +1,7 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from .open_benchmark import get_distribution_template # noqa: F401

View file

@ -36,7 +36,7 @@ def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]:
"openai", "openai",
[ [
ProviderModelEntry( ProviderModelEntry(
provider_model_id="penai/gpt-4o", provider_model_id="openai/gpt-4o",
model_type=ModelType.llm, model_type=ModelType.llm,
) )
], ],
@ -62,26 +62,6 @@ def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]:
], ],
GeminiConfig.sample_run_config(api_key="${env.GEMINI_API_KEY:}"), GeminiConfig.sample_run_config(api_key="${env.GEMINI_API_KEY:}"),
), ),
(
"groq",
[
ProviderModelEntry(
provider_model_id="groq/llama-3.3-70b-versatile",
model_type=ModelType.llm,
)
],
GroqConfig.sample_run_config(api_key="${env.GROQ_API_KEY:}"),
),
(
"together",
[
ProviderModelEntry(
provider_model_id="meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
model_type=ModelType.llm,
)
],
TogetherImplConfig.sample_run_config(api_key="${env.TOGETHER_API_KEY:}"),
),
] ]
inference_providers = [] inference_providers = []
available_models = {} available_models = {}
@ -243,7 +223,7 @@ def get_distribution_template() -> DistributionTemplate:
return DistributionTemplate( return DistributionTemplate(
name=name, name=name,
distro_type="self_hosted", distro_type="self_hosted",
description="Distribution for running e2e tests in CI", description="Distribution for running open benchmarks",
container_image=None, container_image=None,
template_path=None, template_path=None,
providers=providers, providers=providers,
@ -266,13 +246,25 @@ def get_distribution_template() -> DistributionTemplate:
"5001", "5001",
"Port for the Llama Stack distribution server", "Port for the Llama Stack distribution server",
), ),
"FIREWORKS_API_KEY": ( "TOGETHER_API_KEY": (
"", "",
"Fireworks API Key", "Together API Key",
), ),
"OPENAI_API_KEY": ( "OPENAI_API_KEY": (
"", "",
"OpenAI API Key", "OpenAI API Key",
), ),
"GEMINI_API_KEY": (
"",
"Gemini API Key",
),
"ANTHROPIC_API_KEY": (
"",
"Anthropic API Key",
),
"GROQ_API_KEY": (
"",
"Groq API Key",
),
}, },
) )

View file

@ -38,7 +38,7 @@ providers:
- provider_id: sqlite-vec - provider_id: sqlite-vec
provider_type: inline::sqlite-vec provider_type: inline::sqlite-vec
config: config:
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/sqlite_vec.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/sqlite_vec.db
- provider_id: ${env.ENABLE_CHROMADB+chromadb} - provider_id: ${env.ENABLE_CHROMADB+chromadb}
provider_type: remote::chromadb provider_type: remote::chromadb
config: config:
@ -62,14 +62,14 @@ providers:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/agents_store.db
telemetry: telemetry:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
service_name: ${env.OTEL_SERVICE_NAME:llama-stack} service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
sinks: ${env.TELEMETRY_SINKS:console,sqlite} sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/dev/trace_store.db} sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/open-benchmark/trace_store.db}
eval: eval:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
@ -114,18 +114,13 @@ providers:
config: {} config: {}
metadata_store: metadata_store:
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/registry.db
models: models:
- metadata: {} - metadata: {}
model_id: openai/gpt-4o model_id: openai/gpt-4o
provider_id: openai provider_id: openai
provider_model_id: openai/gpt-4o provider_model_id: openai/gpt-4o
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct
provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: anthropic/claude-3-5-sonnet-latest model_id: anthropic/claude-3-5-sonnet-latest
provider_id: anthropic provider_id: anthropic
@ -137,88 +132,100 @@ models:
provider_model_id: gemini/gemini-1.5-flash provider_model_id: gemini/gemini-1.5-flash
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct model_id: groq/llama-3.3-70b-versatile
provider_id: groq provider_id: groq
provider_model_id: groq/llama-3.3-70b-versatile provider_model_id: groq/llama-3.3-70b-versatile
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
model_type: llm
shields: shields:
- shield_id: meta-llama/Llama-Guard-3-8B - shield_id: meta-llama/Llama-Guard-3-8B
vector_dbs: [] vector_dbs: []
datasets: datasets:
- dataset_id: simpleqa - dataset_schema:
provider_id: huggingface input_query:
url: type: string
uri: https://huggingface.co/datasets/llamastack/simpleqa expected_answer:
metadata: type: string
path: llamastack/simpleqa chat_completion_input:
name: type: string
split: train url:
dataset_schema: uri: https://huggingface.co/datasets/llamastack/simpleqa
input_query: metadata:
type: string path: llamastack/simpleqa
expected_answer: name: null
type: string split: train
chat_completion_input: dataset_id: simpleqa
type: string provider_id: huggingface
- dataset_id: mmlu_cot - dataset_schema:
provider_id: huggingface input_query:
url: type: string
uri: https://huggingface.co/datasets/llamastack/mmlu_cot expected_answer:
metadata: type: string
path: llamastack/mmlu_cot chat_completion_input:
name: all type: string
split: test url:
dataset_schema: uri: https://huggingface.co/datasets/llamastack/mmlu_cot
input_query: metadata:
type: string path: llamastack/mmlu_cot
expected_answer: name: all
type: string split: test
chat_completion_input: dataset_id: mmlu_cot
type: string provider_id: huggingface
- dataset_id: gpqa_cot - dataset_schema:
provider_id: huggingface input_query:
url: type: string
uri: https://huggingface.co/datasets/llamastack/gpqa_0shot_cot expected_answer:
metadata: type: string
path: llamastack/gpqa_0shot_cot chat_completion_input:
name: gpqa_main type: string
split: train url:
dataset_schema: uri: https://huggingface.co/datasets/llamastack/gpqa_0shot_cot
input_query: metadata:
type: string path: llamastack/gpqa_0shot_cot
expected_answer: name: main
type: string split: train
chat_completion_input: dataset_id: gpqa_cot
type: string provider_id: huggingface
- dataset_id: math_500 - dataset_schema:
provider_id: huggingface input_query:
url: type: string
uri: https://huggingface.co/datasets/llamastack/math_500 expected_answer:
metadata: type: string
path: llamastack/math_500 chat_completion_input:
name: type: string
split: test url:
dataset_schema: uri: https://huggingface.co/datasets/llamastack/math_500
input_query: metadata:
type: string path: llamastack/math_500
expected_answer: split: test
type: string dataset_id: math_500
chat_completion_input: provider_id: huggingface
type: string
scoring_fns: [] scoring_fns: []
benchmarks: benchmarks:
- benchmark_id: meta-reference-simpleqa - dataset_id: simpleqa
dataset_id: simpleqa scoring_functions:
scoring_functions: ["llm-as-judge::405b-simpleqa"] - llm-as-judge::405b-simpleqa
- benchmark_id: meta-reference-mmlu-cot metadata: {}
dataset_id: mmlu_cot benchmark_id: meta-reference-simpleqa
scoring_functions: ["basic::regex_parser_multiple_choice_answer"] - dataset_id: mmlu_cot
- benchmark_id: meta-reference-gpqa-cot scoring_functions:
dataset_id: gpqa_cot - basic::regex_parser_multiple_choice_answer
scoring_functions: ["basic::regex_parser_multiple_choice_answer"] metadata: {}
- benchmark_id: meta-reference-math-500 benchmark_id: meta-reference-mmlu-cot
dataset_id: math_500 - dataset_id: gpqa_cot
scoring_functions: ["basic::regex_parser_math_response"] scoring_functions:
- basic::regex_parser_multiple_choice_answer
metadata: {}
benchmark_id: meta-reference-gpqa-cot
- dataset_id: math_500
scoring_functions:
- basic::regex_parser_math_response
metadata: {}
benchmark_id: meta-reference-math-500
tool_groups: tool_groups:
- toolgroup_id: builtin::websearch - toolgroup_id: builtin::websearch
provider_id: tavily-search provider_id: tavily-search

View file

@ -16,7 +16,7 @@ providers:
provider_type: remote::together provider_type: remote::together
config: config:
url: https://api.together.xyz/v1 url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY} api_key: ${env.TOGETHER_API_KEY:}
- provider_id: sentence-transformers - provider_id: sentence-transformers
provider_type: inline::sentence-transformers provider_type: inline::sentence-transformers
config: {} config: {}

View file

@ -16,7 +16,7 @@ providers:
provider_type: remote::together provider_type: remote::together
config: config:
url: https://api.together.xyz/v1 url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY} api_key: ${env.TOGETHER_API_KEY:}
- provider_id: sentence-transformers - provider_id: sentence-transformers
provider_type: inline::sentence-transformers provider_type: inline::sentence-transformers
config: {} config: {}