From 5418e63919e11b63fdb833a11910ab1b54858aa7 Mon Sep 17 00:00:00 2001 From: Francisco Arceo Date: Wed, 19 Mar 2025 10:59:17 -0600 Subject: [PATCH 1/5] chore: Add triagers list #1561 (#1701) # What does this PR do? Adds triagers list ## Closes #1561 ## Documentation Was provided here: https://github.com/meta-llama/llama-stack/pull/1621 Signed-off-by: Francisco Javier Arceo --- .github/TRIAGERS.md | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .github/TRIAGERS.md diff --git a/.github/TRIAGERS.md b/.github/TRIAGERS.md new file mode 100644 index 000000000..d4ef6d1ac --- /dev/null +++ b/.github/TRIAGERS.md @@ -0,0 +1,2 @@ +# This file documents Triage members in the Llama Stack community +@franciscojavierarceo @leseb From 113f3a259c91bd74881be7434a55e36f860f7e33 Mon Sep 17 00:00:00 2001 From: ehhuang Date: Wed, 19 Mar 2025 10:16:00 -0700 Subject: [PATCH 2/5] docs: add documentation for RAGDocument (#1693) # What does this PR do? ## Test Plan --- docs/_static/llama-stack-spec.html | 15 ++++++++++----- docs/_static/llama-stack-spec.yaml | 6 ++++++ llama_stack/apis/tools/rag_tool.py | 9 +++++++++ 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 2362dfa53..b32b7cfdf 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -7787,7 +7787,8 @@ "type": "object", "properties": { "document_id": { - "type": "string" + "type": "string", + "description": "The unique identifier for the document." }, "content": { "oneOf": [ @@ -7806,10 +7807,12 @@ { "$ref": "#/components/schemas/URL" } - ] + ], + "description": "The content of the document." }, "mime_type": { - "type": "string" + "type": "string", + "description": "The MIME type of the document." }, "metadata": { "type": "object", @@ -7834,7 +7837,8 @@ "type": "object" } ] - } + }, + "description": "Additional metadata for the document." } }, "additionalProperties": false, @@ -7843,7 +7847,8 @@ "content", "metadata" ], - "title": "RAGDocument" + "title": "RAGDocument", + "description": "A document to be used for document ingestion in the RAG Tool." }, "InsertRequest": { "type": "object", diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index 38e08e41c..eb5d9722e 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -5375,6 +5375,7 @@ components: properties: document_id: type: string + description: The unique identifier for the document. content: oneOf: - type: string @@ -5383,8 +5384,10 @@ components: items: $ref: '#/components/schemas/InterleavedContentItem' - $ref: '#/components/schemas/URL' + description: The content of the document. mime_type: type: string + description: The MIME type of the document. metadata: type: object additionalProperties: @@ -5395,12 +5398,15 @@ components: - type: string - type: array - type: object + description: Additional metadata for the document. additionalProperties: false required: - document_id - content - metadata title: RAGDocument + description: >- + A document to be used for document ingestion in the RAG Tool. InsertRequest: type: object properties: diff --git a/llama_stack/apis/tools/rag_tool.py b/llama_stack/apis/tools/rag_tool.py index 2b9ef10d8..671e19619 100644 --- a/llama_stack/apis/tools/rag_tool.py +++ b/llama_stack/apis/tools/rag_tool.py @@ -17,6 +17,15 @@ from llama_stack.schema_utils import json_schema_type, register_schema, webmetho @json_schema_type class RAGDocument(BaseModel): + """ + A document to be used for document ingestion in the RAG Tool. + + :param document_id: The unique identifier for the document. + :param content: The content of the document. + :param mime_type: The MIME type of the document. + :param metadata: Additional metadata for the document. + """ + document_id: str content: InterleavedContent | URL mime_type: str | None = None From 65ca85ba6b938bf14a848200ebbf0ad111c837f4 Mon Sep 17 00:00:00 2001 From: Hardik Shah Date: Wed, 19 Mar 2025 10:36:19 -0700 Subject: [PATCH 3/5] fix: Updating `ToolCall.arguments` to allow for json strings that can be decoded on client side (#1685) ### What does this PR do? Currently, `ToolCall.arguments` is a `Dict[str, RecursiveType]`. However, on the client SDK side -- the `RecursiveType` gets deserialized into a number ( both int and float get collapsed ) and hence when params are `int` they get converted to float which might break client side tools that might be doing type checking. Closes: https://github.com/meta-llama/llama-stack/issues/1683 ### Test Plan Stainless changes -- https://github.com/meta-llama/llama-stack-client-python/pull/204 ``` pytest -s -v --stack-config=fireworks tests/integration/agents/test_agents.py --text-model meta-llama/Llama-3.1-8B-Instruct ``` --- docs/_static/llama-stack-spec.html | 132 ++++++++++-------- docs/_static/llama-stack-spec.yaml | 52 +++---- llama_stack/models/llama/datatypes.py | 9 +- .../models/llama/llama3/chat_format.py | 9 +- .../models/llama/llama3/template_data.py | 7 +- .../providers/inline/inference/vllm/vllm.py | 1 + .../remote/inference/sambanova/sambanova.py | 10 +- .../providers/remote/inference/vllm/vllm.py | 8 +- .../utils/inference/openai_compat.py | 14 +- tests/unit/models/test_prompt_adapter.py | 5 +- 10 files changed, 137 insertions(+), 110 deletions(-) diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index b32b7cfdf..eb626fc44 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -4159,70 +4159,80 @@ ] }, "arguments": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "string" - }, - { - "type": "integer" - }, - { - "type": "number" - }, - { - "type": "boolean" - }, - { - "type": "null" - }, - { - "type": "array", - "items": { - "oneOf": [ - { - "type": "string" - }, - { - "type": "integer" - }, - { - "type": "number" - }, - { - "type": "boolean" - }, - { - "type": "null" + "oneOf": [ + { + "type": "string" + }, + { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "integer" + }, + { + "type": "number" + }, + { + "type": "boolean" + }, + { + "type": "null" + }, + { + "type": "array", + "items": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "integer" + }, + { + "type": "number" + }, + { + "type": "boolean" + }, + { + "type": "null" + } + ] } - ] - } - }, - { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "string" - }, - { - "type": "integer" - }, - { - "type": "number" - }, - { - "type": "boolean" - }, - { - "type": "null" + }, + { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "integer" + }, + { + "type": "number" + }, + { + "type": "boolean" + }, + { + "type": "null" + } + ] } - ] - } + } + ] } - ] - } + } + ] + }, + "arguments_json": { + "type": "string" } }, "additionalProperties": false, diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index eb5d9722e..fa6920381 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -2864,30 +2864,34 @@ components: title: BuiltinTool - type: string arguments: - type: object - additionalProperties: - oneOf: - - type: string - - type: integer - - type: number - - type: boolean - - type: 'null' - - type: array - items: - oneOf: - - type: string - - type: integer - - type: number - - type: boolean - - type: 'null' - - type: object - additionalProperties: - oneOf: - - type: string - - type: integer - - type: number - - type: boolean - - type: 'null' + oneOf: + - type: string + - type: object + additionalProperties: + oneOf: + - type: string + - type: integer + - type: number + - type: boolean + - type: 'null' + - type: array + items: + oneOf: + - type: string + - type: integer + - type: number + - type: boolean + - type: 'null' + - type: object + additionalProperties: + oneOf: + - type: string + - type: integer + - type: number + - type: boolean + - type: 'null' + arguments_json: + type: string additionalProperties: false required: - call_id diff --git a/llama_stack/models/llama/datatypes.py b/llama_stack/models/llama/datatypes.py index b25bf0ea9..9842d7980 100644 --- a/llama_stack/models/llama/datatypes.py +++ b/llama_stack/models/llama/datatypes.py @@ -47,7 +47,14 @@ RecursiveType = Union[Primitive, List[Primitive], Dict[str, Primitive]] class ToolCall(BaseModel): call_id: str tool_name: Union[BuiltinTool, str] - arguments: Dict[str, RecursiveType] + # Plan is to deprecate the Dict in favor of a JSON string + # that is parsed on the client side instead of trying to manage + # the recursive type here. + # Making this a union so that client side can start prepping for this change. + # Eventually, we will remove both the Dict and arguments_json field, + # and arguments will just be a str + arguments: Union[str, Dict[str, RecursiveType]] + arguments_json: Optional[str] = None @field_validator("tool_name", mode="before") @classmethod diff --git a/llama_stack/models/llama/llama3/chat_format.py b/llama_stack/models/llama/llama3/chat_format.py index 011ccb02a..2862f8558 100644 --- a/llama_stack/models/llama/llama3/chat_format.py +++ b/llama_stack/models/llama/llama3/chat_format.py @@ -12,6 +12,7 @@ # the top-level of this source tree. import io +import json import uuid from dataclasses import dataclass from typing import Dict, List, Optional, Tuple @@ -203,9 +204,10 @@ class ChatFormat: # This code tries to handle that case if tool_name in BuiltinTool.__members__: tool_name = BuiltinTool[tool_name] - tool_arguments = { - "query": list(tool_arguments.values())[0], - } + if isinstance(tool_arguments, dict): + tool_arguments = { + "query": list(tool_arguments.values())[0], + } else: builtin_tool_info = ToolUtils.maybe_extract_builtin_tool_call(content) if builtin_tool_info is not None: @@ -229,6 +231,7 @@ class ChatFormat: call_id=call_id, tool_name=tool_name, arguments=tool_arguments, + arguments_json=json.dumps(tool_arguments), ) ) content = "" diff --git a/llama_stack/models/llama/llama3/template_data.py b/llama_stack/models/llama/llama3/template_data.py index aa16aa009..076b4adb4 100644 --- a/llama_stack/models/llama/llama3/template_data.py +++ b/llama_stack/models/llama/llama3/template_data.py @@ -11,11 +11,8 @@ # top-level folder for each specific model found within the models/ directory at # the top-level of this source tree. -from llama_stack.models.llama.datatypes import ( - BuiltinTool, - StopReason, - ToolCall, -) + +from llama_stack.models.llama.datatypes import BuiltinTool, StopReason, ToolCall from .prompt_templates import ( BuiltinToolGenerator, diff --git a/llama_stack/providers/inline/inference/vllm/vllm.py b/llama_stack/providers/inline/inference/vllm/vllm.py index b59df13d0..256e0f821 100644 --- a/llama_stack/providers/inline/inference/vllm/vllm.py +++ b/llama_stack/providers/inline/inference/vllm/vllm.py @@ -582,6 +582,7 @@ class VLLMInferenceImpl(Inference, ModelsProtocolPrivate): tool_name=t.function.name, # vLLM function args come back as a string. Llama Stack expects JSON. arguments=json.loads(t.function.arguments), + arguments_json=t.function.arguments, ) for t in vllm_message.tool_calls ], diff --git a/llama_stack/providers/remote/inference/sambanova/sambanova.py b/llama_stack/providers/remote/inference/sambanova/sambanova.py index a5e17c2a3..635a42d38 100644 --- a/llama_stack/providers/remote/inference/sambanova/sambanova.py +++ b/llama_stack/providers/remote/inference/sambanova/sambanova.py @@ -42,9 +42,7 @@ from llama_stack.models.llama.datatypes import ( TopKSamplingStrategy, TopPSamplingStrategy, ) -from llama_stack.providers.utils.inference.model_registry import ( - ModelRegistryHelper, -) +from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper from llama_stack.providers.utils.inference.openai_compat import ( process_chat_completion_stream_response, ) @@ -293,14 +291,12 @@ class SambaNovaInferenceAdapter(ModelRegistryHelper, Inference): if not tool_calls: return [] - for call in tool_calls: - call_function_arguments = json.loads(call.function.arguments) - compitable_tool_calls = [ ToolCall( call_id=call.id, tool_name=call.function.name, - arguments=call_function_arguments, + arguments=json.loads(call.function.arguments), + arguments_json=call.function.arguments, ) for call in tool_calls ] diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index f940de7ba..eda1a179c 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -90,15 +90,12 @@ def _convert_to_vllm_tool_calls_in_response( if not tool_calls: return [] - call_function_arguments = None - for call in tool_calls: - call_function_arguments = json.loads(call.function.arguments) - return [ ToolCall( call_id=call.id, tool_name=call.function.name, - arguments=call_function_arguments, + arguments=json.loads(call.function.arguments), + arguments_json=call.function.arguments, ) for call in tool_calls ] @@ -183,6 +180,7 @@ async def _process_vllm_chat_completion_stream_response( call_id=tool_call_buf.call_id, tool_name=tool_call_buf.tool_name, arguments=args, + arguments_json=args_str, ), parse_status=ToolCallParseStatus.succeeded, ), diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py index 2a362f8cb..b264c7312 100644 --- a/llama_stack/providers/utils/inference/openai_compat.py +++ b/llama_stack/providers/utils/inference/openai_compat.py @@ -529,7 +529,11 @@ async def convert_message_to_openai_dict_new( ) -> Union[str, Iterable[OpenAIChatCompletionContentPartParam]]: async def impl( content_: InterleavedContent, - ) -> Union[str, OpenAIChatCompletionContentPartParam, List[OpenAIChatCompletionContentPartParam]]: + ) -> Union[ + str, + OpenAIChatCompletionContentPartParam, + List[OpenAIChatCompletionContentPartParam], + ]: # Llama Stack and OpenAI spec match for str and text input if isinstance(content_, str): return content_ @@ -570,7 +574,7 @@ async def convert_message_to_openai_dict_new( OpenAIChatCompletionMessageToolCall( id=tool.call_id, function=OpenAIFunction( - name=tool.tool_name if not isinstance(tool.tool_name, BuiltinTool) else tool.tool_name.value, + name=(tool.tool_name if not isinstance(tool.tool_name, BuiltinTool) else tool.tool_name.value), arguments=json.dumps(tool.arguments), ), type="function", @@ -609,6 +613,7 @@ def convert_tool_call( call_id=tool_call.id, tool_name=tool_call.function.name, arguments=json.loads(tool_call.function.arguments), + arguments_json=tool_call.function.arguments, ) except Exception: return UnparseableToolCall( @@ -759,6 +764,7 @@ def _convert_openai_tool_calls( call_id=call.id, tool_name=call.function.name, arguments=json.loads(call.function.arguments), + arguments_json=call.function.arguments, ) for call in tool_calls ] @@ -890,7 +896,8 @@ async def convert_openai_chat_completion_stream( # ChatCompletionResponseEvent only supports one per stream if len(choice.delta.tool_calls) > 1: warnings.warn( - "multiple tool calls found in a single delta, using the first, ignoring the rest", stacklevel=2 + "multiple tool calls found in a single delta, using the first, ignoring the rest", + stacklevel=2, ) if not enable_incremental_tool_calls: @@ -971,6 +978,7 @@ async def convert_openai_chat_completion_stream( call_id=buffer["call_id"], tool_name=buffer["name"], arguments=arguments, + arguments_json=buffer["arguments"], ) yield ChatCompletionResponseStreamChunk( event=ChatCompletionResponseEvent( diff --git a/tests/unit/models/test_prompt_adapter.py b/tests/unit/models/test_prompt_adapter.py index c3755e2cb..0e2780e50 100644 --- a/tests/unit/models/test_prompt_adapter.py +++ b/tests/unit/models/test_prompt_adapter.py @@ -165,7 +165,10 @@ class PrepareMessagesTests(unittest.IsolatedAsyncioTestCase): request.model = MODEL request.tool_config.tool_prompt_format = ToolPromptFormat.json prompt = await chat_completion_request_to_prompt(request, request.model) - self.assertIn('{"type": "function", "name": "custom1", "parameters": {"param1": "value1"}}', prompt) + self.assertIn( + '{"type": "function", "name": "custom1", "parameters": {"param1": "value1"}}', + prompt, + ) async def test_user_provided_system_message(self): content = "Hello !" From 6949bd19998d761003958486e38a2bd53c231d58 Mon Sep 17 00:00:00 2001 From: Derek Higgins Date: Wed, 19 Mar 2025 17:46:37 +0000 Subject: [PATCH 4/5] fix: Call pandas.read_* in a seperate thread (#1698) These block on io reads which in turn block the server. Move them to their own thread. Closes: #1697 # What does this PR do? To avoid blocking the main eventloop, updates datasetio/localfs to load data in a seperate thread Signed-off-by: Derek Higgins --- .../providers/inline/datasetio/localfs/datasetio.py | 8 ++++---- llama_stack/providers/utils/datasetio/url_utils.py | 10 +++++++--- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/llama_stack/providers/inline/datasetio/localfs/datasetio.py b/llama_stack/providers/inline/datasetio/localfs/datasetio.py index cf4bf7fec..f489739bf 100644 --- a/llama_stack/providers/inline/datasetio/localfs/datasetio.py +++ b/llama_stack/providers/inline/datasetio/localfs/datasetio.py @@ -35,12 +35,12 @@ class PandasDataframeDataset: else: return self.df.iloc[idx].to_dict() - def load(self) -> None: + async def load(self) -> None: if self.df is not None: return if self.dataset_def.source.type == "uri": - self.df = get_dataframe_from_uri(self.dataset_def.source.uri) + self.df = await get_dataframe_from_uri(self.dataset_def.source.uri) elif self.dataset_def.source.type == "rows": self.df = pandas.DataFrame(self.dataset_def.source.rows) else: @@ -95,7 +95,7 @@ class LocalFSDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate): ) -> IterrowsResponse: dataset_def = self.dataset_infos[dataset_id] dataset_impl = PandasDataframeDataset(dataset_def) - dataset_impl.load() + await dataset_impl.load() start_index = start_index or 0 @@ -114,7 +114,7 @@ class LocalFSDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate): async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None: dataset_def = self.dataset_infos[dataset_id] dataset_impl = PandasDataframeDataset(dataset_def) - dataset_impl.load() + await dataset_impl.load() new_rows_df = pandas.DataFrame(rows) dataset_impl.df = pandas.concat([dataset_impl.df, new_rows_df], ignore_index=True) diff --git a/llama_stack/providers/utils/datasetio/url_utils.py b/llama_stack/providers/utils/datasetio/url_utils.py index 6a544ea49..386ee736d 100644 --- a/llama_stack/providers/utils/datasetio/url_utils.py +++ b/llama_stack/providers/utils/datasetio/url_utils.py @@ -4,6 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import asyncio import base64 import io from urllib.parse import unquote @@ -13,12 +14,15 @@ import pandas from llama_stack.providers.utils.memory.vector_store import parse_data_url -def get_dataframe_from_uri(uri: str): +async def get_dataframe_from_uri(uri: str): df = None if uri.endswith(".csv"): - df = pandas.read_csv(uri) + # Moving to its own thread to avoid io from blocking the eventloop + # This isn't ideal as it moves more then just the IO to a new thread + # but it is as close as we can easly get + df = await asyncio.to_thread(pandas.read_csv, uri) elif uri.endswith(".xlsx"): - df = pandas.read_excel(uri) + df = await asyncio.to_thread(pandas.read_excel, uri) elif uri.startswith("data:"): parts = parse_data_url(uri) data = parts["data"] From ab777ef5cd919c73f77d9a7af8d3c5f03ab57098 Mon Sep 17 00:00:00 2001 From: Botao Chen Date: Wed, 19 Mar 2025 11:27:11 -0700 Subject: [PATCH 5/5] fix: fix open-benchmark template (#1695) ## What does this PR do? open-benchmark templated is broken after the datasets api refactor due to 2 reasons - provider_id and provider_resource_id are no longer needed - the type in run.yaml will be resolved as dict this PR is to fix the above 2 issues ## Test spin up a llama stack server successfully with llama stack run `llama_stack/templates/open-benchmark/run.yaml` --- llama_stack/apis/datasets/datasets.py | 2 -- llama_stack/distribution/routers/routing_tables.py | 8 ++++++++ llama_stack/templates/open-benchmark/open_benchmark.py | 5 ----- llama_stack/templates/open-benchmark/run.yaml | 5 ----- 4 files changed, 8 insertions(+), 12 deletions(-) diff --git a/llama_stack/apis/datasets/datasets.py b/llama_stack/apis/datasets/datasets.py index 616371c7d..e2c940f64 100644 --- a/llama_stack/apis/datasets/datasets.py +++ b/llama_stack/apis/datasets/datasets.py @@ -121,8 +121,6 @@ class Dataset(CommonDatasetFields, Resource): class DatasetInput(CommonDatasetFields, BaseModel): dataset_id: str - provider_id: Optional[str] = None - provider_dataset_id: Optional[str] = None class ListDatasetsResponse(BaseModel): diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py index 5dea942f7..7aef2f8d5 100644 --- a/llama_stack/distribution/routers/routing_tables.py +++ b/llama_stack/distribution/routers/routing_tables.py @@ -20,6 +20,8 @@ from llama_stack.apis.datasets import ( DatasetType, DataSource, ListDatasetsResponse, + RowsDataSource, + URIDataSource, ) from llama_stack.apis.models import ListModelsResponse, Model, Models, ModelType from llama_stack.apis.resource import ResourceType @@ -377,6 +379,12 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets): metadata: Optional[Dict[str, Any]] = None, dataset_id: Optional[str] = None, ) -> Dataset: + if isinstance(source, dict): + if source["type"] == "uri": + source = URIDataSource.parse_obj(source) + elif source["type"] == "rows": + source = RowsDataSource.parse_obj(source) + if not dataset_id: dataset_id = f"dataset-{str(uuid.uuid4())}" diff --git a/llama_stack/templates/open-benchmark/open_benchmark.py b/llama_stack/templates/open-benchmark/open_benchmark.py index b339e8c80..acfbd78d6 100644 --- a/llama_stack/templates/open-benchmark/open_benchmark.py +++ b/llama_stack/templates/open-benchmark/open_benchmark.py @@ -170,7 +170,6 @@ def get_distribution_template() -> DistributionTemplate: default_datasets = [ DatasetInput( dataset_id="simpleqa", - provider_id="huggingface", purpose=DatasetPurpose.eval_messages_answer, source=URIDataSource( uri="huggingface://datasets/llamastack/simpleqa?split=train", @@ -178,7 +177,6 @@ def get_distribution_template() -> DistributionTemplate: ), DatasetInput( dataset_id="mmlu_cot", - provider_id="huggingface", purpose=DatasetPurpose.eval_messages_answer, source=URIDataSource( uri="huggingface://datasets/llamastack/mmlu_cot?split=test&name=all", @@ -186,7 +184,6 @@ def get_distribution_template() -> DistributionTemplate: ), DatasetInput( dataset_id="gpqa_cot", - provider_id="huggingface", purpose=DatasetPurpose.eval_messages_answer, source=URIDataSource( uri="huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main", @@ -194,7 +191,6 @@ def get_distribution_template() -> DistributionTemplate: ), DatasetInput( dataset_id="math_500", - provider_id="huggingface", purpose=DatasetPurpose.eval_messages_answer, source=URIDataSource( uri="huggingface://datasets/llamastack/math_500?split=test", @@ -202,7 +198,6 @@ def get_distribution_template() -> DistributionTemplate: ), DatasetInput( dataset_id="bfcl", - provider_id="huggingface", purpose=DatasetPurpose.eval_messages_answer, source=URIDataSource( uri="huggingface://datasets/llamastack/bfcl_v3?split=train", diff --git a/llama_stack/templates/open-benchmark/run.yaml b/llama_stack/templates/open-benchmark/run.yaml index 93f437273..8dbf51472 100644 --- a/llama_stack/templates/open-benchmark/run.yaml +++ b/llama_stack/templates/open-benchmark/run.yaml @@ -164,35 +164,30 @@ datasets: uri: huggingface://datasets/llamastack/simpleqa?split=train metadata: {} dataset_id: simpleqa - provider_id: huggingface - purpose: eval/messages-answer source: type: uri uri: huggingface://datasets/llamastack/mmlu_cot?split=test&name=all metadata: {} dataset_id: mmlu_cot - provider_id: huggingface - purpose: eval/messages-answer source: type: uri uri: huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main metadata: {} dataset_id: gpqa_cot - provider_id: huggingface - purpose: eval/messages-answer source: type: uri uri: huggingface://datasets/llamastack/math_500?split=test metadata: {} dataset_id: math_500 - provider_id: huggingface - purpose: eval/messages-answer source: type: uri uri: huggingface://datasets/llamastack/bfcl_v3?split=train metadata: {} dataset_id: bfcl - provider_id: huggingface scoring_fns: [] benchmarks: - dataset_id: simpleqa