temp commit

This commit is contained in:
Botao Chen 2025-03-11 17:11:26 -07:00
parent 96039831c4
commit 27d5892dfa
7 changed files with 149 additions and 109 deletions

View file

@ -453,6 +453,40 @@
"transformers", "transformers",
"uvicorn" "uvicorn"
], ],
"open-benchmark": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"fastapi",
"fire",
"httpx",
"litellm",
"matplotlib",
"mcp",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pymongo",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"sqlite-vec",
"together",
"tqdm",
"transformers",
"uvicorn"
],
"remote-vllm": [ "remote-vllm": [
"aiosqlite", "aiosqlite",
"autoevals", "autoevals",

View file

@ -26,5 +26,5 @@ class TogetherImplConfig(BaseModel):
def sample_run_config(cls, **kwargs) -> Dict[str, Any]: def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
return { return {
"url": "https://api.together.xyz/v1", "url": "https://api.together.xyz/v1",
"api_key": "${env.TOGETHER_API_KEY}", "api_key": "${env.TOGETHER_API_KEY:}",
} }

View file

@ -0,0 +1,7 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from .open_benchmark import get_distribution_template # noqa: F401

View file

@ -36,7 +36,7 @@ def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]:
"openai", "openai",
[ [
ProviderModelEntry( ProviderModelEntry(
provider_model_id="penai/gpt-4o", provider_model_id="openai/gpt-4o",
model_type=ModelType.llm, model_type=ModelType.llm,
) )
], ],
@ -62,26 +62,6 @@ def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]:
], ],
GeminiConfig.sample_run_config(api_key="${env.GEMINI_API_KEY:}"), GeminiConfig.sample_run_config(api_key="${env.GEMINI_API_KEY:}"),
), ),
(
"groq",
[
ProviderModelEntry(
provider_model_id="groq/llama-3.3-70b-versatile",
model_type=ModelType.llm,
)
],
GroqConfig.sample_run_config(api_key="${env.GROQ_API_KEY:}"),
),
(
"together",
[
ProviderModelEntry(
provider_model_id="meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
model_type=ModelType.llm,
)
],
TogetherImplConfig.sample_run_config(api_key="${env.TOGETHER_API_KEY:}"),
),
] ]
inference_providers = [] inference_providers = []
available_models = {} available_models = {}
@ -243,7 +223,7 @@ def get_distribution_template() -> DistributionTemplate:
return DistributionTemplate( return DistributionTemplate(
name=name, name=name,
distro_type="self_hosted", distro_type="self_hosted",
description="Distribution for running e2e tests in CI", description="Distribution for running open benchmarks",
container_image=None, container_image=None,
template_path=None, template_path=None,
providers=providers, providers=providers,
@ -266,13 +246,25 @@ def get_distribution_template() -> DistributionTemplate:
"5001", "5001",
"Port for the Llama Stack distribution server", "Port for the Llama Stack distribution server",
), ),
"FIREWORKS_API_KEY": ( "TOGETHER_API_KEY": (
"", "",
"Fireworks API Key", "Together API Key",
), ),
"OPENAI_API_KEY": ( "OPENAI_API_KEY": (
"", "",
"OpenAI API Key", "OpenAI API Key",
), ),
"GEMINI_API_KEY": (
"",
"Gemini API Key",
),
"ANTHROPIC_API_KEY": (
"",
"Anthropic API Key",
),
"GROQ_API_KEY": (
"",
"Groq API Key",
),
}, },
) )

View file

@ -38,7 +38,7 @@ providers:
- provider_id: sqlite-vec - provider_id: sqlite-vec
provider_type: inline::sqlite-vec provider_type: inline::sqlite-vec
config: config:
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/sqlite_vec.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/sqlite_vec.db
- provider_id: ${env.ENABLE_CHROMADB+chromadb} - provider_id: ${env.ENABLE_CHROMADB+chromadb}
provider_type: remote::chromadb provider_type: remote::chromadb
config: config:
@ -62,14 +62,14 @@ providers:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/agents_store.db
telemetry: telemetry:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
service_name: ${env.OTEL_SERVICE_NAME:llama-stack} service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
sinks: ${env.TELEMETRY_SINKS:console,sqlite} sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/dev/trace_store.db} sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/open-benchmark/trace_store.db}
eval: eval:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
@ -114,18 +114,13 @@ providers:
config: {} config: {}
metadata_store: metadata_store:
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/registry.db
models: models:
- metadata: {} - metadata: {}
model_id: openai/gpt-4o model_id: openai/gpt-4o
provider_id: openai provider_id: openai
provider_model_id: openai/gpt-4o provider_model_id: openai/gpt-4o
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct
provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: anthropic/claude-3-5-sonnet-latest model_id: anthropic/claude-3-5-sonnet-latest
provider_id: anthropic provider_id: anthropic
@ -137,88 +132,100 @@ models:
provider_model_id: gemini/gemini-1.5-flash provider_model_id: gemini/gemini-1.5-flash
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct model_id: groq/llama-3.3-70b-versatile
provider_id: groq provider_id: groq
provider_model_id: groq/llama-3.3-70b-versatile provider_model_id: groq/llama-3.3-70b-versatile
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
model_type: llm
shields: shields:
- shield_id: meta-llama/Llama-Guard-3-8B - shield_id: meta-llama/Llama-Guard-3-8B
vector_dbs: [] vector_dbs: []
datasets: datasets:
- dataset_id: simpleqa - dataset_schema:
provider_id: huggingface
url:
uri: https://huggingface.co/datasets/llamastack/simpleqa
metadata:
path: llamastack/simpleqa
name:
split: train
dataset_schema:
input_query: input_query:
type: string type: string
expected_answer: expected_answer:
type: string type: string
chat_completion_input: chat_completion_input:
type: string type: string
- dataset_id: mmlu_cot url:
uri: https://huggingface.co/datasets/llamastack/simpleqa
metadata:
path: llamastack/simpleqa
name: null
split: train
dataset_id: simpleqa
provider_id: huggingface provider_id: huggingface
- dataset_schema:
input_query:
type: string
expected_answer:
type: string
chat_completion_input:
type: string
url: url:
uri: https://huggingface.co/datasets/llamastack/mmlu_cot uri: https://huggingface.co/datasets/llamastack/mmlu_cot
metadata: metadata:
path: llamastack/mmlu_cot path: llamastack/mmlu_cot
name: all name: all
split: test split: test
dataset_schema: dataset_id: mmlu_cot
provider_id: huggingface
- dataset_schema:
input_query: input_query:
type: string type: string
expected_answer: expected_answer:
type: string type: string
chat_completion_input: chat_completion_input:
type: string type: string
- dataset_id: gpqa_cot
provider_id: huggingface
url: url:
uri: https://huggingface.co/datasets/llamastack/gpqa_0shot_cot uri: https://huggingface.co/datasets/llamastack/gpqa_0shot_cot
metadata: metadata:
path: llamastack/gpqa_0shot_cot path: llamastack/gpqa_0shot_cot
name: gpqa_main name: main
split: train split: train
dataset_schema: dataset_id: gpqa_cot
provider_id: huggingface
- dataset_schema:
input_query: input_query:
type: string type: string
expected_answer: expected_answer:
type: string type: string
chat_completion_input: chat_completion_input:
type: string type: string
- dataset_id: math_500
provider_id: huggingface
url: url:
uri: https://huggingface.co/datasets/llamastack/math_500 uri: https://huggingface.co/datasets/llamastack/math_500
metadata: metadata:
path: llamastack/math_500 path: llamastack/math_500
name:
split: test split: test
dataset_schema: dataset_id: math_500
input_query: provider_id: huggingface
type: string
expected_answer:
type: string
chat_completion_input:
type: string
scoring_fns: [] scoring_fns: []
benchmarks: benchmarks:
- benchmark_id: meta-reference-simpleqa - dataset_id: simpleqa
dataset_id: simpleqa scoring_functions:
scoring_functions: ["llm-as-judge::405b-simpleqa"] - llm-as-judge::405b-simpleqa
- benchmark_id: meta-reference-mmlu-cot metadata: {}
dataset_id: mmlu_cot benchmark_id: meta-reference-simpleqa
scoring_functions: ["basic::regex_parser_multiple_choice_answer"] - dataset_id: mmlu_cot
- benchmark_id: meta-reference-gpqa-cot scoring_functions:
dataset_id: gpqa_cot - basic::regex_parser_multiple_choice_answer
scoring_functions: ["basic::regex_parser_multiple_choice_answer"] metadata: {}
- benchmark_id: meta-reference-math-500 benchmark_id: meta-reference-mmlu-cot
dataset_id: math_500 - dataset_id: gpqa_cot
scoring_functions: ["basic::regex_parser_math_response"] scoring_functions:
- basic::regex_parser_multiple_choice_answer
metadata: {}
benchmark_id: meta-reference-gpqa-cot
- dataset_id: math_500
scoring_functions:
- basic::regex_parser_math_response
metadata: {}
benchmark_id: meta-reference-math-500
tool_groups: tool_groups:
- toolgroup_id: builtin::websearch - toolgroup_id: builtin::websearch
provider_id: tavily-search provider_id: tavily-search

View file

@ -16,7 +16,7 @@ providers:
provider_type: remote::together provider_type: remote::together
config: config:
url: https://api.together.xyz/v1 url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY} api_key: ${env.TOGETHER_API_KEY:}
- provider_id: sentence-transformers - provider_id: sentence-transformers
provider_type: inline::sentence-transformers provider_type: inline::sentence-transformers
config: {} config: {}

View file

@ -16,7 +16,7 @@ providers:
provider_type: remote::together provider_type: remote::together
config: config:
url: https://api.together.xyz/v1 url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY} api_key: ${env.TOGETHER_API_KEY:}
- provider_id: sentence-transformers - provider_id: sentence-transformers
provider_type: inline::sentence-transformers provider_type: inline::sentence-transformers
config: {} config: {}