diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py b/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py index 1fc1d34e2..ea04331c9 100644 --- a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +++ b/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py @@ -12,6 +12,7 @@ from llama_stack.apis.scoring_functions import ( ) MULTILINGUAL_ANSWER_REGEXES = [ + r"The best answer is ", r"Answer\s*:", r"Answer\s*:​​​​​​", # Korean invisible character r"উত্তর\s*:", diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py b/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py index 074f1ff46..3139cb671 100644 --- a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +++ b/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py @@ -88,7 +88,7 @@ llm_as_judge_405b_simpleqa = ScoringFn( provider_id="llm-as-judge", provider_resource_id="llm-as-judge-405b-simpleqa", params=LLMAsJudgeScoringFnParams( - judge_model="meta-llama/Llama-3.1-405B-Instruct", + judge_model="meta-llama/Llama-3.2-3B-Instruct", prompt_template=GRADER_TEMPLATE, judge_score_regexes=[r"(A|B|C)"], aggregation_functions=[AggregationFunctionType.categorical_count.value], diff --git a/llama_stack/templates/openai/build.yaml b/llama_stack/templates/openai/build.yaml new file mode 100644 index 000000000..9369bf3b4 --- /dev/null +++ b/llama_stack/templates/openai/build.yaml @@ -0,0 +1,32 @@ +version: '2' +distribution_spec: + description: OpenAI model inference + providers: + inference: + - remote::openai + vector_io: + - inline::faiss + - remote::chromadb + - remote::pgvector + safety: + - inline::llama-guard + agents: + - inline::meta-reference + telemetry: + - inline::meta-reference + eval: + - inline::meta-reference + datasetio: + - remote::huggingface + - inline::localfs + scoring: + - inline::basic + - inline::llm-as-judge + - inline::braintrust + tool_runtime: + - remote::brave-search + - remote::tavily-search + - inline::code-interpreter + - inline::rag-runtime + - remote::model-context-protocol +image_type: conda diff --git a/llama_stack/templates/openai/run.yaml b/llama_stack/templates/openai/run.yaml new file mode 100644 index 000000000..0ecb15c2e --- /dev/null +++ b/llama_stack/templates/openai/run.yaml @@ -0,0 +1,148 @@ +version: '2' +image_name: openai +apis: +- agents +- datasetio +- eval +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY} + - provider_id: together + provider_type: remote::together + config: + url: https://api.together.xyz/v1 + api_key: ${env.TOGETHER_API_KEY} + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/openai}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: ${env.OTEL_SERVICE_NAME:llama-stack} + sinks: ${env.TELEMETRY_SINKS:console,sqlite} + sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/meta-reference-gpu/trace_store.db} + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: {} + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: {} + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:} + max_results: 3 + - provider_id: code-interpreter + provider_type: inline::code-interpreter + config: {} + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} +metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/openai}/registry.db +models: +- metadata: {} + model_id: openai/gpt-4o + provider_id: openai + provider_model_id: openai/gpt-4o + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-405B-Instruct + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-3B-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo + model_type: llm +shields: [] +vector_dbs: [] +datasets: + - dataset_id: simpleqa + provider_id: huggingface + url: + uri: https://huggingface.co/datasets/llamastack/simpleqa + metadata: + path: llamastack/simpleqa + name: + split: train + dataset_schema: + input_query: + type: string + expected_answer: + type: string + chat_completion_input: + type: string + - dataset_id: mmlu_cot + provider_id: huggingface + url: + uri: https://huggingface.co/datasets/llamastack/mmlu_cot + metadata: + path: llamastack/mmlu_cot + name: all + split: test + dataset_schema: + input_query: + type: string + expected_answer: + type: string + chat_completion_input: + type: string +scoring_fns: [] +benchmarks: + - benchmark_id: meta-reference-simpleqa + dataset_id: simpleqa + scoring_functions: ["llm-as-judge::405b-simpleqa"] + - benchmark_id: meta-reference-mmlu-cot + dataset_id: mmlu_cot + scoring_functions: ["basic::regex_parser_multiple_choice_answer"] +tool_groups: [] +server: + port: 8321