From 3d891fc9ba764baf50fbb7d4ecc194a3a7b680ba Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Thu, 20 Feb 2025 11:21:13 -0800 Subject: [PATCH 1/7] ModelAlias cleanup --- .../providers/utils/inference/model_registry.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/llama_stack/providers/utils/inference/model_registry.py b/llama_stack/providers/utils/inference/model_registry.py index c5f6cd6b5..5cb785843 100644 --- a/llama_stack/providers/utils/inference/model_registry.py +++ b/llama_stack/providers/utils/inference/model_registry.py @@ -4,9 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from collections import namedtuple from typing import List, Optional +from pydantic import BaseModel, Field + from llama_stack.apis.models.models import ModelType from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate @@ -14,7 +15,14 @@ from llama_stack.providers.utils.inference import ( ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR, ) -ModelAlias = namedtuple("ModelAlias", ["provider_model_id", "aliases", "llama_model"]) + +# TODO: this class is more confusing than useful right now. We need to make it +# more closer to the Model class. +class ModelAlias(BaseModel): + provider_model_id: str + aliases: List[str] = Field(default_factory=list) + llama_model: Optional[str] = None + model_type: ModelType = ModelType.llm def get_huggingface_repo(model_descriptor: str) -> Optional[str]: From 2eda050aef2e33272be08e41f9f9adea76777d28 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Thu, 20 Feb 2025 11:46:02 -0800 Subject: [PATCH 2/7] Fix ollama fixture --- llama_stack/providers/tests/inference/fixtures.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/llama_stack/providers/tests/inference/fixtures.py b/llama_stack/providers/tests/inference/fixtures.py index 2a782befc..ec4e094c9 100644 --- a/llama_stack/providers/tests/inference/fixtures.py +++ b/llama_stack/providers/tests/inference/fixtures.py @@ -83,17 +83,13 @@ def inference_cerebras() -> ProviderFixture: @pytest.fixture(scope="session") -def inference_ollama(inference_model) -> ProviderFixture: - inference_model = [inference_model] if isinstance(inference_model, str) else inference_model - if inference_model and "Llama3.1-8B-Instruct" in inference_model: - pytest.skip("Ollama only supports Llama3.2-3B-Instruct for testing") - +def inference_ollama() -> ProviderFixture: return ProviderFixture( providers=[ Provider( provider_id="ollama", provider_type="remote::ollama", - config=OllamaImplConfig(host="localhost", port=os.getenv("OLLAMA_PORT", 11434)).model_dump(), + config=OllamaImplConfig(url=get_env_or_fail("OLLAMA_URL")).model_dump(), ) ], ) From eddef0b2aea8bd38e18ea11175c42214cc702928 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Thu, 20 Feb 2025 11:48:46 -0800 Subject: [PATCH 3/7] chore: slight renaming of model alias stuff (#1181) Quick test by running: ``` LLAMA_STACK_CONFIG=fireworks pytest -s -v tests/client-sdk ``` --- .../inference/meta_reference/inference.py | 4 +-- .../remote/inference/bedrock/models.py | 8 ++--- .../remote/inference/cerebras/models.py | 6 ++-- .../remote/inference/databricks/databricks.py | 6 ++-- .../remote/inference/fireworks/models.py | 22 ++++++------ .../providers/remote/inference/groq/groq.py | 12 +++---- .../remote/inference/nvidia/models.py | 20 +++++------ .../remote/inference/ollama/ollama.py | 36 +++++++++---------- .../remote/inference/sambanova/models.py | 20 +++++------ .../providers/remote/inference/tgi/tgi.py | 8 ++--- .../remote/inference/together/models.py | 20 +++++------ .../providers/remote/inference/vllm/vllm.py | 8 ++--- .../utils/inference/model_registry.py | 4 +-- 13 files changed, 87 insertions(+), 87 deletions(-) diff --git a/llama_stack/providers/inline/inference/meta_reference/inference.py b/llama_stack/providers/inline/inference/meta_reference/inference.py index c79f97def..dfd27d408 100644 --- a/llama_stack/providers/inline/inference/meta_reference/inference.py +++ b/llama_stack/providers/inline/inference/meta_reference/inference.py @@ -46,7 +46,7 @@ from llama_stack.providers.utils.inference.embedding_mixin import ( ) from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, - build_model_alias, + build_hf_repo_model_alias, ) from llama_stack.providers.utils.inference.prompt_adapter import ( augment_content_with_response_format_prompt, @@ -116,7 +116,7 @@ class MetaReferenceInferenceImpl( self.model_registry_helper = ModelRegistryHelper( [ - build_model_alias( + build_hf_repo_model_alias( llama_model.descriptor(), llama_model.core_model_id.value, ) diff --git a/llama_stack/providers/remote/inference/bedrock/models.py b/llama_stack/providers/remote/inference/bedrock/models.py index b629e05d5..4c5248619 100644 --- a/llama_stack/providers/remote/inference/bedrock/models.py +++ b/llama_stack/providers/remote/inference/bedrock/models.py @@ -6,19 +6,19 @@ from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( - build_model_alias, + build_hf_repo_model_alias, ) MODEL_ALIASES = [ - build_model_alias( + build_hf_repo_model_alias( "meta.llama3-1-8b-instruct-v1:0", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta.llama3-1-70b-instruct-v1:0", CoreModelId.llama3_1_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta.llama3-1-405b-instruct-v1:0", CoreModelId.llama3_1_405b_instruct.value, ), diff --git a/llama_stack/providers/remote/inference/cerebras/models.py b/llama_stack/providers/remote/inference/cerebras/models.py index 03ffeb492..53b0d5b55 100644 --- a/llama_stack/providers/remote/inference/cerebras/models.py +++ b/llama_stack/providers/remote/inference/cerebras/models.py @@ -6,15 +6,15 @@ from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( - build_model_alias, + build_hf_repo_model_alias, ) model_aliases = [ - build_model_alias( + build_hf_repo_model_alias( "llama3.1-8b", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama-3.3-70b", CoreModelId.llama3_3_70b_instruct.value, ), diff --git a/llama_stack/providers/remote/inference/databricks/databricks.py b/llama_stack/providers/remote/inference/databricks/databricks.py index 05e61361c..03da4d129 100644 --- a/llama_stack/providers/remote/inference/databricks/databricks.py +++ b/llama_stack/providers/remote/inference/databricks/databricks.py @@ -25,7 +25,7 @@ from llama_stack.apis.inference import ( from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, - build_model_alias, + build_hf_repo_model_alias, ) from llama_stack.providers.utils.inference.openai_compat import ( get_sampling_options, @@ -39,11 +39,11 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( from .config import DatabricksImplConfig model_aliases = [ - build_model_alias( + build_hf_repo_model_alias( "databricks-meta-llama-3-1-70b-instruct", CoreModelId.llama3_1_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "databricks-meta-llama-3-1-405b-instruct", CoreModelId.llama3_1_405b_instruct.value, ), diff --git a/llama_stack/providers/remote/inference/fireworks/models.py b/llama_stack/providers/remote/inference/fireworks/models.py index 14de585d4..8ba67c9ff 100644 --- a/llama_stack/providers/remote/inference/fireworks/models.py +++ b/llama_stack/providers/remote/inference/fireworks/models.py @@ -6,47 +6,47 @@ from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( - build_model_alias, + build_hf_repo_model_alias, ) MODEL_ALIASES = [ - build_model_alias( + build_hf_repo_model_alias( "accounts/fireworks/models/llama-v3p1-8b-instruct", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "accounts/fireworks/models/llama-v3p1-70b-instruct", CoreModelId.llama3_1_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "accounts/fireworks/models/llama-v3p1-405b-instruct", CoreModelId.llama3_1_405b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "accounts/fireworks/models/llama-v3p2-1b-instruct", CoreModelId.llama3_2_1b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "accounts/fireworks/models/llama-v3p2-3b-instruct", CoreModelId.llama3_2_3b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "accounts/fireworks/models/llama-v3p2-11b-vision-instruct", CoreModelId.llama3_2_11b_vision_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "accounts/fireworks/models/llama-v3p2-90b-vision-instruct", CoreModelId.llama3_2_90b_vision_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "accounts/fireworks/models/llama-v3p3-70b-instruct", CoreModelId.llama3_3_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "accounts/fireworks/models/llama-guard-3-8b", CoreModelId.llama_guard_3_8b.value, ), - build_model_alias( + build_hf_repo_model_alias( "accounts/fireworks/models/llama-guard-3-11b-vision", CoreModelId.llama_guard_3_11b_vision.value, ), diff --git a/llama_stack/providers/remote/inference/groq/groq.py b/llama_stack/providers/remote/inference/groq/groq.py index 441b6af5c..12ee613fe 100644 --- a/llama_stack/providers/remote/inference/groq/groq.py +++ b/llama_stack/providers/remote/inference/groq/groq.py @@ -31,8 +31,8 @@ from llama_stack.models.llama.sku_list import CoreModelId from llama_stack.providers.remote.inference.groq.config import GroqConfig from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, + build_hf_repo_model_alias, build_model_alias, - build_model_alias_with_just_provider_model_id, ) from .groq_utils import ( @@ -42,19 +42,19 @@ from .groq_utils import ( ) _MODEL_ALIASES = [ - build_model_alias( + build_hf_repo_model_alias( "llama3-8b-8192", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias_with_just_provider_model_id( + build_model_alias( "llama-3.1-8b-instant", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama3-70b-8192", CoreModelId.llama3_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama-3.3-70b-versatile", CoreModelId.llama3_3_70b_instruct.value, ), @@ -62,7 +62,7 @@ _MODEL_ALIASES = [ # Preview models aren't recommended for production use, but we include this one # to pass the test fixture # TODO(aidand): Replace this with a stable model once Groq supports it - build_model_alias( + build_hf_repo_model_alias( "llama-3.2-3b-preview", CoreModelId.llama3_2_3b_instruct.value, ), diff --git a/llama_stack/providers/remote/inference/nvidia/models.py b/llama_stack/providers/remote/inference/nvidia/models.py index 1d9b575d4..6a359e009 100644 --- a/llama_stack/providers/remote/inference/nvidia/models.py +++ b/llama_stack/providers/remote/inference/nvidia/models.py @@ -6,43 +6,43 @@ from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( - build_model_alias, + build_hf_repo_model_alias, ) _MODEL_ALIASES = [ - build_model_alias( + build_hf_repo_model_alias( "meta/llama3-8b-instruct", CoreModelId.llama3_8b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta/llama3-70b-instruct", CoreModelId.llama3_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta/llama-3.1-8b-instruct", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta/llama-3.1-70b-instruct", CoreModelId.llama3_1_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta/llama-3.1-405b-instruct", CoreModelId.llama3_1_405b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta/llama-3.2-1b-instruct", CoreModelId.llama3_2_1b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta/llama-3.2-3b-instruct", CoreModelId.llama3_2_3b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta/llama-3.2-11b-vision-instruct", CoreModelId.llama3_2_11b_vision_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta/llama-3.2-90b-vision-instruct", CoreModelId.llama3_2_90b_vision_instruct.value, ), diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py index 2488d9071..287f025e0 100644 --- a/llama_stack/providers/remote/inference/ollama/ollama.py +++ b/llama_stack/providers/remote/inference/ollama/ollama.py @@ -35,8 +35,8 @@ from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.datatypes import ModelsProtocolPrivate from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, + build_hf_repo_model_alias, build_model_alias, - build_model_alias_with_just_provider_model_id, ) from llama_stack.providers.utils.inference.openai_compat import ( OpenAICompatCompletionChoice, @@ -59,73 +59,73 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( log = logging.getLogger(__name__) model_aliases = [ - build_model_alias( + build_hf_repo_model_alias( "llama3.1:8b-instruct-fp16", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias_with_just_provider_model_id( + build_model_alias( "llama3.1:8b", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama3.1:70b-instruct-fp16", CoreModelId.llama3_1_70b_instruct.value, ), - build_model_alias_with_just_provider_model_id( + build_model_alias( "llama3.1:70b", CoreModelId.llama3_1_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama3.1:405b-instruct-fp16", CoreModelId.llama3_1_405b_instruct.value, ), - build_model_alias_with_just_provider_model_id( + build_model_alias( "llama3.1:405b", CoreModelId.llama3_1_405b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama3.2:1b-instruct-fp16", CoreModelId.llama3_2_1b_instruct.value, ), - build_model_alias_with_just_provider_model_id( + build_model_alias( "llama3.2:1b", CoreModelId.llama3_2_1b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama3.2:3b-instruct-fp16", CoreModelId.llama3_2_3b_instruct.value, ), - build_model_alias_with_just_provider_model_id( + build_model_alias( "llama3.2:3b", CoreModelId.llama3_2_3b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama3.2-vision:11b-instruct-fp16", CoreModelId.llama3_2_11b_vision_instruct.value, ), - build_model_alias_with_just_provider_model_id( + build_model_alias( "llama3.2-vision:latest", CoreModelId.llama3_2_11b_vision_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama3.2-vision:90b-instruct-fp16", CoreModelId.llama3_2_90b_vision_instruct.value, ), - build_model_alias_with_just_provider_model_id( + build_model_alias( "llama3.2-vision:90b", CoreModelId.llama3_2_90b_vision_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama3.3:70b", CoreModelId.llama3_3_70b_instruct.value, ), # The Llama Guard models don't have their full fp16 versions # so we are going to alias their default version to the canonical SKU - build_model_alias( + build_hf_repo_model_alias( "llama-guard3:8b", CoreModelId.llama_guard_3_8b.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama-guard3:1b", CoreModelId.llama_guard_3_1b.value, ), diff --git a/llama_stack/providers/remote/inference/sambanova/models.py b/llama_stack/providers/remote/inference/sambanova/models.py index 27a4a149e..1e002c81d 100644 --- a/llama_stack/providers/remote/inference/sambanova/models.py +++ b/llama_stack/providers/remote/inference/sambanova/models.py @@ -6,43 +6,43 @@ from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( - build_model_alias, + build_hf_repo_model_alias, ) MODEL_ALIASES = [ - build_model_alias( + build_hf_repo_model_alias( "Meta-Llama-3.1-8B-Instruct", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "Meta-Llama-3.1-70B-Instruct", CoreModelId.llama3_1_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "Meta-Llama-3.1-405B-Instruct", CoreModelId.llama3_1_405b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "Meta-Llama-3.2-1B-Instruct", CoreModelId.llama3_2_1b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "Meta-Llama-3.2-3B-Instruct", CoreModelId.llama3_2_3b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "Meta-Llama-3.3-70B-Instruct", CoreModelId.llama3_3_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "Llama-3.2-11B-Vision-Instruct", CoreModelId.llama3_2_11b_vision_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "Llama-3.2-90B-Vision-Instruct", CoreModelId.llama3_2_90b_vision_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "Meta-Llama-Guard-3-8B", CoreModelId.llama_guard_3_8b.value, ), diff --git a/llama_stack/providers/remote/inference/tgi/tgi.py b/llama_stack/providers/remote/inference/tgi/tgi.py index 7ffeced95..cd2311a48 100644 --- a/llama_stack/providers/remote/inference/tgi/tgi.py +++ b/llama_stack/providers/remote/inference/tgi/tgi.py @@ -32,7 +32,7 @@ from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.datatypes import ModelsProtocolPrivate from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, - build_model_alias, + build_hf_repo_model_alias, ) from llama_stack.providers.utils.inference.openai_compat import ( OpenAICompatCompletionChoice, @@ -53,9 +53,9 @@ from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImpl log = logging.getLogger(__name__) -def build_model_aliases(): +def build_hf_repo_model_aliases(): return [ - build_model_alias( + build_hf_repo_model_alias( model.huggingface_repo, model.descriptor(), ) @@ -70,7 +70,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate): model_id: str def __init__(self) -> None: - self.register_helper = ModelRegistryHelper(build_model_aliases()) + self.register_helper = ModelRegistryHelper(build_hf_repo_model_aliases()) self.huggingface_repo_to_llama_model_id = { model.huggingface_repo: model.descriptor() for model in all_registered_models() if model.huggingface_repo } diff --git a/llama_stack/providers/remote/inference/together/models.py b/llama_stack/providers/remote/inference/together/models.py index 87d282ea5..87904c47b 100644 --- a/llama_stack/providers/remote/inference/together/models.py +++ b/llama_stack/providers/remote/inference/together/models.py @@ -6,43 +6,43 @@ from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( - build_model_alias, + build_hf_repo_model_alias, ) MODEL_ALIASES = [ - build_model_alias( + build_hf_repo_model_alias( "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", CoreModelId.llama3_1_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo", CoreModelId.llama3_1_405b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta-llama/Llama-3.2-3B-Instruct-Turbo", CoreModelId.llama3_2_3b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo", CoreModelId.llama3_2_11b_vision_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo", CoreModelId.llama3_2_90b_vision_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta-llama/Llama-3.3-70B-Instruct-Turbo", CoreModelId.llama3_3_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta-llama/Meta-Llama-Guard-3-8B", CoreModelId.llama_guard_3_8b.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta-llama/Llama-Guard-3-11B-Vision-Turbo", CoreModelId.llama_guard_3_11b_vision.value, ), diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index 220bf4bde..75dc432e4 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -38,7 +38,7 @@ from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.datatypes import ModelsProtocolPrivate from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, - build_model_alias, + build_hf_repo_model_alias, ) from llama_stack.providers.utils.inference.openai_compat import ( OpenAICompatCompletionResponse, @@ -62,9 +62,9 @@ from .config import VLLMInferenceAdapterConfig log = logging.getLogger(__name__) -def build_model_aliases(): +def build_hf_repo_model_aliases(): return [ - build_model_alias( + build_hf_repo_model_alias( model.huggingface_repo, model.descriptor(), ) @@ -204,7 +204,7 @@ async def _process_vllm_chat_completion_stream_response( class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate): def __init__(self, config: VLLMInferenceAdapterConfig) -> None: - self.register_helper = ModelRegistryHelper(build_model_aliases()) + self.register_helper = ModelRegistryHelper(build_hf_repo_model_aliases()) self.config = config self.client = None diff --git a/llama_stack/providers/utils/inference/model_registry.py b/llama_stack/providers/utils/inference/model_registry.py index 5cb785843..e14a733d1 100644 --- a/llama_stack/providers/utils/inference/model_registry.py +++ b/llama_stack/providers/utils/inference/model_registry.py @@ -32,7 +32,7 @@ def get_huggingface_repo(model_descriptor: str) -> Optional[str]: return None -def build_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAlias: +def build_hf_repo_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAlias: return ModelAlias( provider_model_id=provider_model_id, aliases=[ @@ -42,7 +42,7 @@ def build_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAli ) -def build_model_alias_with_just_provider_model_id(provider_model_id: str, model_descriptor: str) -> ModelAlias: +def build_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAlias: return ModelAlias( provider_model_id=provider_model_id, aliases=[], From f7161611c66913f4c0e1ac9f67dfae28f413af5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladimir=20Ivi=C4=87?= Date: Thu, 20 Feb 2025 13:09:00 -0800 Subject: [PATCH 4/7] feat: adding endpoints for files and uploads (#1070) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: Adds spec definitions for file uploads operations. This API focuses around two high level operations: * Initiating and managing upload session * Accessing uploaded file information Usage examples: To start a file upload session: ``` curl -X POST https://localhost:8321/v1/files \ -d '{ "key": "image123.jpg', "bucket": "images", "mime_type": "image/jpg", "size": 12345 }' # Returns { “id”: “url”: “https://localhost:8321/v1/files/session:”, "offset": 0, "size": 12345 } ``` To upload file content to an existing session ``` curl -i -X POST "https://localhost:8321/v1/files/session: \ --data-binary @ # Returns { "key": "image123.jpg", "bucket": "images", "mime_type": "image/jpg", "bytes": 12345, "created_at": 1737492240 } # Implementing on server side (Flask example for simplicity): @app.route('/uploads/{upload_id}', methods=['POST']) def upload_content_to_session(upload_id): try: # Get the binary file data from the request body file_data = request.data # Save the file to disk save_path = f"./uploads/{upload_id}" with open(save_path, 'wb') as f: f.write(file_data) return {__uploaded_file_json__}, 200 except Exception as e: return 500 ``` To read information about an existing upload session ``` curl -i -X GET "https://localhost:8321/v1/files/session: # Returns { “id”: “url”: “https://localhost:8321/v1/files/session:”, "offset": 1024, "size": 12345 } ``` To list buckets ``` GET /files # Returns { "data": [ {"name": "bucket1"}, {"name": "bucket2"}, ] } ``` To list all files in a bucket ``` GET /files/{bucket} # Returns { "data": [ { "key": "shiba.jpg", "bucket": "dogs", "mime_type": "image/jpg", "bytes": 82334, "created_at": 1737492240, }, { "key": "persian_cat.jpg", "mime_type": "image/jpg", "bucket": "cats", "bytes": 39924, "created_at": 1727493440, }, ] } ``` To get specific file info ``` GET /files/{bucket}/{key} { "key": "shiba.jpg", "bucket": "dogs", "mime_type": "image/jpg", "bytes": 82334, "created_at": 1737492240, } ``` To delete specific file ``` DELETE /files/{bucket}/{key} { "key": "shiba.jpg", "bucket": "dogs", "mime_type": "image/jpg", "bytes": 82334, "created_at": 1737492240, } ``` --- docs/_static/llama-stack-spec.html | 405 ++++++++++++++++++ docs/_static/llama-stack-spec.yaml | 280 ++++++++++++ docs/openapi_generator/pyopenapi/generator.py | 27 +- .../pyopenapi/specification.py | 2 +- llama_stack/apis/files/__init__.py | 7 + llama_stack/apis/files/files.py | 174 ++++++++ llama_stack/distribution/stack.py | 2 + llama_stack/schema_utils.py | 3 + 8 files changed, 897 insertions(+), 3 deletions(-) create mode 100644 llama_stack/apis/files/__init__.py create mode 100644 llama_stack/apis/files/files.py diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 2b6e1d11c..02d05776d 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -678,6 +678,65 @@ } } }, + "/v1/files": { + "get": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ListBucketResponse" + } + } + } + } + }, + "tags": [ + "Files (Coming Soon)" + ], + "description": "List all buckets.", + "parameters": [ + { + "name": "bucket", + "in": "query", + "required": true, + "schema": { + "type": "string" + } + } + ] + }, + "post": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FileUploadResponse" + } + } + } + } + }, + "tags": [ + "Files (Coming Soon)" + ], + "description": "Create a new upload session for a file identified by a bucket and key.", + "parameters": [], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateUploadSessionRequest" + } + } + }, + "required": true + } + } + }, "/v1/agents/{agent_id}": { "delete": { "responses": { @@ -779,6 +838,84 @@ ] } }, + "/v1/files/{bucket}/{key}": { + "get": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FileResponse" + } + } + } + } + }, + "tags": [ + "Files (Coming Soon)" + ], + "description": "Get a file info identified by a bucket and key.", + "parameters": [ + { + "name": "bucket", + "in": "path", + "description": "Bucket name (valid chars: a-zA-Z0-9_-)", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "key", + "in": "path", + "description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)", + "required": true, + "schema": { + "type": "string" + } + } + ] + }, + "delete": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FileResponse" + } + } + } + } + }, + "tags": [ + "Files (Coming Soon)" + ], + "description": "Delete a file identified by a bucket and key.", + "parameters": [ + { + "name": "bucket", + "in": "path", + "description": "Bucket name (valid chars: a-zA-Z0-9_-)", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "key", + "in": "path", + "description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)", + "required": true, + "schema": { + "type": "string" + } + } + ] + } + }, "/v1/inference/embeddings": { "post": { "responses": { @@ -1470,6 +1607,91 @@ "parameters": [] } }, + "/v1/files/session:{upload_id}": { + "get": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "oneOf": [ + { + "$ref": "#/components/schemas/FileUploadResponse" + }, + { + "type": "null" + } + ] + } + } + } + } + }, + "tags": [ + "Files (Coming Soon)" + ], + "description": "Returns information about an existsing upload session", + "parameters": [ + { + "name": "upload_id", + "in": "path", + "description": "ID of the upload session", + "required": true, + "schema": { + "type": "string" + } + } + ] + }, + "post": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "oneOf": [ + { + "$ref": "#/components/schemas/FileResponse" + }, + { + "type": "null" + } + ] + } + } + } + } + }, + "tags": [ + "Files (Coming Soon)" + ], + "description": "Upload file content to an existing upload session. On the server, request body will have the raw bytes that are uploaded.", + "parameters": [ + { + "name": "upload_id", + "in": "path", + "description": "ID of the upload session", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/octet-stream": { + "schema": { + "type": "string", + "format": "binary" + } + } + }, + "required": true + } + } + }, "/v1/vector-dbs/{vector_db_id}": { "get": { "responses": { @@ -1826,6 +2048,37 @@ } } }, + "/v1/files/{bucket}": { + "get": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ListFileResponse" + } + } + } + } + }, + "tags": [ + "Files (Coming Soon)" + ], + "description": "List all files in a bucket.", + "parameters": [ + { + "name": "bucket", + "in": "path", + "description": "Bucket name (valid chars: a-zA-Z0-9_-)", + "required": true, + "schema": { + "type": "string" + } + } + ] + } + }, "/v1/models": { "get": { "responses": { @@ -5441,6 +5694,105 @@ ], "title": "AgentTurnResponseTurnStartPayload" }, + "CreateUploadSessionRequest": { + "type": "object", + "properties": { + "bucket": { + "type": "string", + "description": "Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)" + }, + "key": { + "type": "string", + "description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)" + }, + "mime_type": { + "type": "string", + "description": "MIME type of the file" + }, + "size": { + "type": "integer", + "description": "File size in bytes" + } + }, + "additionalProperties": false, + "required": [ + "bucket", + "key", + "mime_type", + "size" + ], + "title": "CreateUploadSessionRequest" + }, + "FileUploadResponse": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "ID of the upload session" + }, + "url": { + "type": "string", + "description": "Upload URL for the file or file parts" + }, + "offset": { + "type": "integer", + "description": "Upload content offset" + }, + "size": { + "type": "integer", + "description": "Upload content size" + } + }, + "additionalProperties": false, + "required": [ + "id", + "url", + "offset", + "size" + ], + "title": "FileUploadResponse", + "description": "Response after initiating a file upload session." + }, + "FileResponse": { + "type": "object", + "properties": { + "bucket": { + "type": "string", + "description": "Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)" + }, + "key": { + "type": "string", + "description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)" + }, + "mime_type": { + "type": "string", + "description": "MIME type of the file" + }, + "url": { + "type": "string", + "description": "Upload URL for the file contents" + }, + "bytes": { + "type": "integer", + "description": "Size of the file in bytes" + }, + "created_at": { + "type": "integer", + "description": "Timestamp of when the file was created" + } + }, + "additionalProperties": false, + "required": [ + "bucket", + "key", + "mime_type", + "url", + "bytes", + "created_at" + ], + "title": "FileResponse", + "description": "Response representing a file entry." + }, "EmbeddingsRequest": { "type": "object", "properties": { @@ -6756,6 +7108,37 @@ ], "title": "ToolInvocationResult" }, + "BucketResponse": { + "type": "object", + "properties": { + "name": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "name" + ], + "title": "BucketResponse" + }, + "ListBucketResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/BucketResponse" + }, + "description": "List of FileResponse entries" + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListBucketResponse", + "description": "Response representing a list of file entries." + }, "ListDatasetsResponse": { "type": "object", "properties": { @@ -6772,6 +7155,24 @@ ], "title": "ListDatasetsResponse" }, + "ListFileResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/FileResponse" + }, + "description": "List of FileResponse entries" + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListFileResponse", + "description": "Response representing a list of file entries." + }, "ListModelsResponse": { "type": "object", "properties": { @@ -8543,6 +8944,9 @@ { "name": "Eval" }, + { + "name": "Files (Coming Soon)" + }, { "name": "Inference", "description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.", @@ -8598,6 +9002,7 @@ "DatasetIO", "Datasets", "Eval", + "Files (Coming Soon)", "Inference", "Inspect", "Models", diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index 99300fedf..f79120f1d 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -406,6 +406,43 @@ paths: schema: $ref: '#/components/schemas/CreateAgentTurnRequest' required: true + /v1/files: + get: + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ListBucketResponse' + tags: + - Files (Coming Soon) + description: List all buckets. + parameters: + - name: bucket + in: query + required: true + schema: + type: string + post: + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/FileUploadResponse' + tags: + - Files (Coming Soon) + description: >- + Create a new upload session for a file identified by a bucket and key. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CreateUploadSessionRequest' + required: true /v1/agents/{agent_id}: delete: responses: @@ -468,6 +505,59 @@ paths: required: true schema: type: string + /v1/files/{bucket}/{key}: + get: + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/FileResponse' + tags: + - Files (Coming Soon) + description: >- + Get a file info identified by a bucket and key. + parameters: + - name: bucket + in: path + description: 'Bucket name (valid chars: a-zA-Z0-9_-)' + required: true + schema: + type: string + - name: key + in: path + description: >- + Key under which the file is stored (valid chars: a-zA-Z0-9_-/.) + required: true + schema: + type: string + delete: + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/FileResponse' + tags: + - Files (Coming Soon) + description: >- + Delete a file identified by a bucket and key. + parameters: + - name: bucket + in: path + description: 'Bucket name (valid chars: a-zA-Z0-9_-)' + required: true + schema: + type: string + - name: key + in: path + description: >- + Key under which the file is stored (valid chars: a-zA-Z0-9_-/.) + required: true + schema: + type: string /v1/inference/embeddings: post: responses: @@ -875,6 +965,57 @@ paths: - PostTraining (Coming Soon) description: '' parameters: [] + /v1/files/session:{upload_id}: + get: + responses: + '200': + description: OK + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/FileUploadResponse' + - type: 'null' + tags: + - Files (Coming Soon) + description: >- + Returns information about an existsing upload session + parameters: + - name: upload_id + in: path + description: ID of the upload session + required: true + schema: + type: string + post: + responses: + '200': + description: OK + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/FileResponse' + - type: 'null' + tags: + - Files (Coming Soon) + description: >- + Upload file content to an existing upload session. On the server, request + body will have the raw bytes that are uploaded. + parameters: + - name: upload_id + in: path + description: ID of the upload session + required: true + schema: + type: string + requestBody: + content: + application/octet-stream: + schema: + type: string + format: binary + required: true /v1/vector-dbs/{vector_db_id}: get: responses: @@ -1091,6 +1232,25 @@ paths: schema: $ref: '#/components/schemas/RegisterDatasetRequest' required: true + /v1/files/{bucket}: + get: + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ListFileResponse' + tags: + - Files (Coming Soon) + description: List all files in a bucket. + parameters: + - name: bucket + in: path + description: 'Bucket name (valid chars: a-zA-Z0-9_-)' + required: true + schema: + type: string /v1/models: get: responses: @@ -3508,6 +3668,87 @@ components: - event_type - turn_id title: AgentTurnResponseTurnStartPayload + CreateUploadSessionRequest: + type: object + properties: + bucket: + type: string + description: >- + Bucket under which the file is stored (valid chars: a-zA-Z0-9_-) + key: + type: string + description: >- + Key under which the file is stored (valid chars: a-zA-Z0-9_-/.) + mime_type: + type: string + description: MIME type of the file + size: + type: integer + description: File size in bytes + additionalProperties: false + required: + - bucket + - key + - mime_type + - size + title: CreateUploadSessionRequest + FileUploadResponse: + type: object + properties: + id: + type: string + description: ID of the upload session + url: + type: string + description: Upload URL for the file or file parts + offset: + type: integer + description: Upload content offset + size: + type: integer + description: Upload content size + additionalProperties: false + required: + - id + - url + - offset + - size + title: FileUploadResponse + description: >- + Response after initiating a file upload session. + FileResponse: + type: object + properties: + bucket: + type: string + description: >- + Bucket under which the file is stored (valid chars: a-zA-Z0-9_-) + key: + type: string + description: >- + Key under which the file is stored (valid chars: a-zA-Z0-9_-/.) + mime_type: + type: string + description: MIME type of the file + url: + type: string + description: Upload URL for the file contents + bytes: + type: integer + description: Size of the file in bytes + created_at: + type: integer + description: Timestamp of when the file was created + additionalProperties: false + required: + - bucket + - key + - mime_type + - url + - bytes + - created_at + title: FileResponse + description: Response representing a file entry. EmbeddingsRequest: type: object properties: @@ -4339,6 +4580,29 @@ components: required: - content title: ToolInvocationResult + BucketResponse: + type: object + properties: + name: + type: string + additionalProperties: false + required: + - name + title: BucketResponse + ListBucketResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/BucketResponse' + description: List of FileResponse entries + additionalProperties: false + required: + - data + title: ListBucketResponse + description: >- + Response representing a list of file entries. ListDatasetsResponse: type: object properties: @@ -4350,6 +4614,20 @@ components: required: - data title: ListDatasetsResponse + ListFileResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/FileResponse' + description: List of FileResponse entries + additionalProperties: false + required: + - data + title: ListFileResponse + description: >- + Response representing a list of file entries. ListModelsResponse: type: object properties: @@ -5467,6 +5745,7 @@ tags: - name: DatasetIO - name: Datasets - name: Eval + - name: Files (Coming Soon) - name: Inference description: >- This API provides the raw interface to the underlying models. Two kinds of models @@ -5501,6 +5780,7 @@ x-tagGroups: - DatasetIO - Datasets - Eval + - Files (Coming Soon) - Inference - Inspect - Models diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py index 60cd7a242..4220cfc05 100644 --- a/docs/openapi_generator/pyopenapi/generator.py +++ b/docs/openapi_generator/pyopenapi/generator.py @@ -477,6 +477,7 @@ class Generator: "SyntheticDataGeneration", "PostTraining", "BatchInference", + "Files", ]: op.defining_class.__name__ = f"{op.defining_class.__name__} (Coming Soon)" print(op.defining_class.__name__) @@ -520,8 +521,30 @@ class Generator: # parameters passed anywhere parameters = path_parameters + query_parameters - # data passed in payload - if op.request_params: + webmethod = getattr(op.func_ref, "__webmethod__", None) + raw_bytes_request_body = False + if webmethod: + raw_bytes_request_body = getattr(webmethod, "raw_bytes_request_body", False) + + # data passed in request body as raw bytes cannot have request parameters + if raw_bytes_request_body and op.request_params: + raise ValueError("Cannot have both raw bytes request body and request parameters") + + # data passed in request body as raw bytes + if raw_bytes_request_body: + requestBody = RequestBody( + content={ + "application/octet-stream": { + "schema": { + "type": "string", + "format": "binary", + } + } + }, + required=True, + ) + # data passed in payload as JSON and mapped to request parameters + elif op.request_params: builder = ContentBuilder(self.schema_builder) first = next(iter(op.request_params)) request_name, request_type = first diff --git a/docs/openapi_generator/pyopenapi/specification.py b/docs/openapi_generator/pyopenapi/specification.py index 9e5363b4a..d3e5a1f19 100644 --- a/docs/openapi_generator/pyopenapi/specification.py +++ b/docs/openapi_generator/pyopenapi/specification.py @@ -78,7 +78,7 @@ class MediaType: @dataclass class RequestBody: - content: Dict[str, MediaType] + content: Dict[str, MediaType | Dict[str, Any]] description: Optional[str] = None required: Optional[bool] = None diff --git a/llama_stack/apis/files/__init__.py b/llama_stack/apis/files/__init__.py new file mode 100644 index 000000000..269baf177 --- /dev/null +++ b/llama_stack/apis/files/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from .files import * # noqa: F401 F403 diff --git a/llama_stack/apis/files/files.py b/llama_stack/apis/files/files.py new file mode 100644 index 000000000..f17fadc8c --- /dev/null +++ b/llama_stack/apis/files/files.py @@ -0,0 +1,174 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import List, Optional, Protocol, runtime_checkable + +from pydantic import BaseModel + +from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol +from llama_stack.schema_utils import json_schema_type, webmethod + + +@json_schema_type +class FileUploadResponse(BaseModel): + """ + Response after initiating a file upload session. + + :param id: ID of the upload session + :param url: Upload URL for the file or file parts + :param offset: Upload content offset + :param size: Upload content size + """ + + id: str + url: str + offset: int + size: int + + +@json_schema_type +class BucketResponse(BaseModel): + name: str + + +@json_schema_type +class ListBucketResponse(BaseModel): + """ + Response representing a list of file entries. + + :param data: List of FileResponse entries + """ + + data: List[BucketResponse] + + +@json_schema_type +class FileResponse(BaseModel): + """ + Response representing a file entry. + + :param bucket: Bucket under which the file is stored (valid chars: a-zA-Z0-9_-) + :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.) + :param mime_type: MIME type of the file + :param url: Upload URL for the file contents + :param bytes: Size of the file in bytes + :param created_at: Timestamp of when the file was created + """ + + bucket: str + key: str + mime_type: str + url: str + bytes: int + created_at: int + + +@json_schema_type +class ListFileResponse(BaseModel): + """ + Response representing a list of file entries. + + :param data: List of FileResponse entries + """ + + data: List[FileResponse] + + +@runtime_checkable +@trace_protocol +class Files(Protocol): + @webmethod(route="/files", method="POST") + async def create_upload_session( + self, + bucket: str, + key: str, + mime_type: str, + size: int, + ) -> FileUploadResponse: + """ + Create a new upload session for a file identified by a bucket and key. + + :param bucket: Bucket under which the file is stored (valid chars: a-zA-Z0-9_-) + :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.) + :param mime_type: MIME type of the file + :param size: File size in bytes + """ + ... + + @webmethod(route="/files/session:{upload_id}", method="POST", raw_bytes_request_body=True) + async def upload_content_to_session( + self, + upload_id: str, + ) -> Optional[FileResponse]: + """ + Upload file content to an existing upload session. + On the server, request body will have the raw bytes that are uploaded. + + :param upload_id: ID of the upload session + """ + ... + + @webmethod(route="/files/session:{upload_id}", method="GET") + async def get_upload_session_info( + self, + upload_id: str, + ) -> Optional[FileUploadResponse]: + """ + Returns information about an existsing upload session + + :param upload_id: ID of the upload session + """ + ... + + @webmethod(route="/files", method="GET") + async def list_all_buckets( + self, + bucket: str, + ) -> ListBucketResponse: + """ + List all buckets. + """ + ... + + @webmethod(route="/files/{bucket}", method="GET") + async def list_files_in_bucket( + self, + bucket: str, + ) -> ListFileResponse: + """ + List all files in a bucket. + + :param bucket: Bucket name (valid chars: a-zA-Z0-9_-) + """ + ... + + @webmethod(route="/files/{bucket}/{key:path}", method="GET") + async def get_file( + self, + bucket: str, + key: str, + ) -> FileResponse: + """ + Get a file info identified by a bucket and key. + + :param bucket: Bucket name (valid chars: a-zA-Z0-9_-) + :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.) + """ + ... + + @webmethod(route="/files/{bucket}/{key:path}", method="DELETE") + async def delete_file( + self, + bucket: str, + key: str, + ) -> FileResponse: + """ + Delete a file identified by a bucket and key. + + :param bucket: Bucket name (valid chars: a-zA-Z0-9_-) + :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.) + """ + ... diff --git a/llama_stack/distribution/stack.py b/llama_stack/distribution/stack.py index 9335dc3a9..1328c88ef 100644 --- a/llama_stack/distribution/stack.py +++ b/llama_stack/distribution/stack.py @@ -19,6 +19,7 @@ from llama_stack.apis.benchmarks import Benchmarks from llama_stack.apis.datasetio import DatasetIO from llama_stack.apis.datasets import Datasets from llama_stack.apis.eval import Eval +from llama_stack.apis.files import Files from llama_stack.apis.inference import Inference from llama_stack.apis.inspect import Inspect from llama_stack.apis.models import Models @@ -63,6 +64,7 @@ class LlamaStack( ToolGroups, ToolRuntime, RAGToolRuntime, + Files, ): pass diff --git a/llama_stack/schema_utils.py b/llama_stack/schema_utils.py index 56b9e5e4c..581404844 100644 --- a/llama_stack/schema_utils.py +++ b/llama_stack/schema_utils.py @@ -19,6 +19,7 @@ class WebMethod: request_examples: Optional[List[Any]] = None response_examples: Optional[List[Any]] = None method: Optional[str] = None + raw_bytes_request_body: Optional[bool] = False def webmethod( @@ -27,6 +28,7 @@ def webmethod( public: Optional[bool] = False, request_examples: Optional[List[Any]] = None, response_examples: Optional[List[Any]] = None, + raw_bytes_request_body: Optional[bool] = False, ) -> Callable[[T], T]: """ Decorator that supplies additional metadata to an endpoint operation function. @@ -44,6 +46,7 @@ def webmethod( public=public or False, request_examples=request_examples, response_examples=response_examples, + raw_bytes_request_body=raw_bytes_request_body, ) return cls From 82109749ea8e9728ec9cc28ae45f7bd43b48e652 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Thu, 20 Feb 2025 13:56:47 -0800 Subject: [PATCH 5/7] rename --- llama_stack/apis/agents/agents.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py index 2f374b638..b88fb6146 100644 --- a/llama_stack/apis/agents/agents.py +++ b/llama_stack/apis/agents/agents.py @@ -194,7 +194,7 @@ class AgentTurnResponseEventType(Enum): turn_start = "turn_start" turn_complete = "turn_complete" - turn_pending = "turn_pending" + turn_awaiting_input = "turn_awaiting_input" @json_schema_type @@ -344,15 +344,20 @@ class Agents(Protocol): ) -> Union[Turn, AsyncIterator[AgentTurnResponseStreamChunk]]: ... @webmethod( - route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/submit_tool_response_messages", + route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/continue", method="POST", ) - async def submit_tool_response_messages( + async def continue_agent_turn( self, agent_id: str, session_id: str, turn_id: str, - tool_response_messages: List[ToolResponseMessage], + new_messages: List[ + Union[ + UserMessage, + ToolResponseMessage, + ] + ], ) -> Union[Turn, AsyncIterator[AgentTurnResponseStreamChunk]]: ... @webmethod( From ff876771023e6d379f6eb95485155028fee27d43 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Thu, 20 Feb 2025 14:00:08 -0800 Subject: [PATCH 6/7] rename --- llama_stack/apis/agents/agents.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py index b88fb6146..c3301d5ed 100644 --- a/llama_stack/apis/agents/agents.py +++ b/llama_stack/apis/agents/agents.py @@ -237,8 +237,10 @@ class AgentTurnResponseTurnCompletePayload(BaseModel): @json_schema_type -class AgentTurnResponseTurnPendingPayload(BaseModel): - event_type: Literal[AgentTurnResponseEventType.turn_pending.value] = AgentTurnResponseEventType.turn_pending.value +class AgentTurnResponseTurnAwaitingInputPayload(BaseModel): + event_type: Literal[AgentTurnResponseEventType.turn_awaiting_input.value] = ( + AgentTurnResponseEventType.turn_awaiting_input.value + ) turn: Turn From a44d23067698e6c92934155947ea11b1c24d084e Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Thu, 20 Feb 2025 14:02:17 -0800 Subject: [PATCH 7/7] rename --- docs/_static/llama-stack-spec.html | 411 +++++++++--------- docs/_static/llama-stack-spec.yaml | 271 ++++++------ llama_stack/apis/agents/agents.py | 2 +- .../inline/agents/meta_reference/agents.py | 9 +- 4 files changed, 354 insertions(+), 339 deletions(-) diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 9a38ed692..72db82f28 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -554,6 +554,67 @@ } } }, + "/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/continue": { + "post": { + "responses": { + "200": { + "description": "A single turn in an interaction with an Agentic System. **OR** streamed agent turn completion response.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Turn" + } + }, + "text/event-stream": { + "schema": { + "$ref": "#/components/schemas/AgentTurnResponseStreamChunk" + } + } + } + } + }, + "tags": [ + "Agents" + ], + "description": "", + "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "session_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "turn_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ContinueAgentTurnRequest" + } + } + }, + "required": true + } + } + }, "/v1/agents": { "post": { "responses": { @@ -2811,67 +2872,6 @@ } } }, - "/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/submit_tool_response_messages": { - "post": { - "responses": { - "200": { - "description": "A single turn in an interaction with an Agentic System. **OR** streamed agent turn completion response.", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Turn" - } - }, - "text/event-stream": { - "schema": { - "$ref": "#/components/schemas/AgentTurnResponseStreamChunk" - } - } - } - } - }, - "tags": [ - "Agents" - ], - "description": "", - "parameters": [ - { - "name": "agent_id", - "in": "path", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "session_id", - "in": "path", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "turn_id", - "in": "path", - "required": true, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/SubmitToolResponseMessagesRequest" - } - } - }, - "required": true - } - } - }, "/v1/post-training/supervised-fine-tune": { "post": { "responses": { @@ -5044,62 +5044,10 @@ "title": "CompletionResponseStreamChunk", "description": "A chunk of a streamed completion response." }, - "CreateAgentRequest": { + "ContinueAgentTurnRequest": { "type": "object", "properties": { - "agent_config": { - "$ref": "#/components/schemas/AgentConfig" - } - }, - "additionalProperties": false, - "required": [ - "agent_config" - ], - "title": "CreateAgentRequest" - }, - "AgentCreateResponse": { - "type": "object", - "properties": { - "agent_id": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "agent_id" - ], - "title": "AgentCreateResponse" - }, - "CreateAgentSessionRequest": { - "type": "object", - "properties": { - "session_name": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "session_name" - ], - "title": "CreateAgentSessionRequest" - }, - "AgentSessionCreateResponse": { - "type": "object", - "properties": { - "session_id": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "session_id" - ], - "title": "AgentSessionCreateResponse" - }, - "CreateAgentTurnRequest": { - "type": "object", - "properties": { - "messages": { + "new_messages": { "type": "array", "items": { "oneOf": [ @@ -5111,61 +5059,13 @@ } ] } - }, - "stream": { - "type": "boolean" - }, - "documents": { - "type": "array", - "items": { - "type": "object", - "properties": { - "content": { - "oneOf": [ - { - "type": "string" - }, - { - "$ref": "#/components/schemas/InterleavedContentItem" - }, - { - "type": "array", - "items": { - "$ref": "#/components/schemas/InterleavedContentItem" - } - }, - { - "$ref": "#/components/schemas/URL" - } - ] - }, - "mime_type": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "content", - "mime_type" - ], - "title": "Document" - } - }, - "toolgroups": { - "type": "array", - "items": { - "$ref": "#/components/schemas/AgentTool" - } - }, - "tool_config": { - "$ref": "#/components/schemas/ToolConfig" } }, "additionalProperties": false, "required": [ - "messages" + "new_messages" ], - "title": "CreateAgentTurnRequest" + "title": "ContinueAgentTurnRequest" }, "InferenceStep": { "type": "object", @@ -5547,7 +5447,7 @@ "$ref": "#/components/schemas/AgentTurnResponseTurnCompletePayload" }, { - "$ref": "#/components/schemas/AgentTurnResponseTurnPendingPayload" + "$ref": "#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload" } ], "discriminator": { @@ -5558,7 +5458,7 @@ "step_complete": "#/components/schemas/AgentTurnResponseStepCompletePayload", "turn_start": "#/components/schemas/AgentTurnResponseTurnStartPayload", "turn_complete": "#/components/schemas/AgentTurnResponseTurnCompletePayload", - "turn_pending": "#/components/schemas/AgentTurnResponseTurnPendingPayload" + "turn_awaiting_input": "#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload" } } }, @@ -5721,6 +5621,25 @@ "title": "AgentTurnResponseStreamChunk", "description": "streamed agent turn completion response." }, + "AgentTurnResponseTurnAwaitingInputPayload": { + "type": "object", + "properties": { + "event_type": { + "type": "string", + "const": "turn_awaiting_input", + "default": "turn_awaiting_input" + }, + "turn": { + "$ref": "#/components/schemas/Turn" + } + }, + "additionalProperties": false, + "required": [ + "event_type", + "turn" + ], + "title": "AgentTurnResponseTurnAwaitingInputPayload" + }, "AgentTurnResponseTurnCompletePayload": { "type": "object", "properties": { @@ -5740,25 +5659,6 @@ ], "title": "AgentTurnResponseTurnCompletePayload" }, - "AgentTurnResponseTurnPendingPayload": { - "type": "object", - "properties": { - "event_type": { - "type": "string", - "const": "turn_pending", - "default": "turn_pending" - }, - "turn": { - "$ref": "#/components/schemas/Turn" - } - }, - "additionalProperties": false, - "required": [ - "event_type", - "turn" - ], - "title": "AgentTurnResponseTurnPendingPayload" - }, "AgentTurnResponseTurnStartPayload": { "type": "object", "properties": { @@ -5778,6 +5678,129 @@ ], "title": "AgentTurnResponseTurnStartPayload" }, + "CreateAgentRequest": { + "type": "object", + "properties": { + "agent_config": { + "$ref": "#/components/schemas/AgentConfig" + } + }, + "additionalProperties": false, + "required": [ + "agent_config" + ], + "title": "CreateAgentRequest" + }, + "AgentCreateResponse": { + "type": "object", + "properties": { + "agent_id": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "agent_id" + ], + "title": "AgentCreateResponse" + }, + "CreateAgentSessionRequest": { + "type": "object", + "properties": { + "session_name": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "session_name" + ], + "title": "CreateAgentSessionRequest" + }, + "AgentSessionCreateResponse": { + "type": "object", + "properties": { + "session_id": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "session_id" + ], + "title": "AgentSessionCreateResponse" + }, + "CreateAgentTurnRequest": { + "type": "object", + "properties": { + "messages": { + "type": "array", + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/UserMessage" + }, + { + "$ref": "#/components/schemas/ToolResponseMessage" + } + ] + } + }, + "stream": { + "type": "boolean" + }, + "documents": { + "type": "array", + "items": { + "type": "object", + "properties": { + "content": { + "oneOf": [ + { + "type": "string" + }, + { + "$ref": "#/components/schemas/InterleavedContentItem" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/InterleavedContentItem" + } + }, + { + "$ref": "#/components/schemas/URL" + } + ] + }, + "mime_type": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "content", + "mime_type" + ], + "title": "Document" + } + }, + "toolgroups": { + "type": "array", + "items": { + "$ref": "#/components/schemas/AgentTool" + } + }, + "tool_config": { + "$ref": "#/components/schemas/ToolConfig" + } + }, + "additionalProperties": false, + "required": [ + "messages" + ], + "title": "CreateAgentTurnRequest" + }, "CreateUploadSessionRequest": { "type": "object", "properties": { @@ -8724,22 +8747,6 @@ ], "title": "ScoreBatchResponse" }, - "SubmitToolResponseMessagesRequest": { - "type": "object", - "properties": { - "tool_response_messages": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ToolResponseMessage" - } - } - }, - "additionalProperties": false, - "required": [ - "tool_response_messages" - ], - "title": "SubmitToolResponseMessagesRequest" - }, "AlgorithmConfig": { "oneOf": [ { diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index fe94e34ff..efffb3f80 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -329,6 +329,45 @@ paths: schema: $ref: '#/components/schemas/CompletionRequest' required: true + /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/continue: + post: + responses: + '200': + description: >- + A single turn in an interaction with an Agentic System. **OR** streamed + agent turn completion response. + content: + application/json: + schema: + $ref: '#/components/schemas/Turn' + text/event-stream: + schema: + $ref: '#/components/schemas/AgentTurnResponseStreamChunk' + tags: + - Agents + description: '' + parameters: + - name: agent_id + in: path + required: true + schema: + type: string + - name: session_id + in: path + required: true + schema: + type: string + - name: turn_id + in: path + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/ContinueAgentTurnRequest' + required: true /v1/agents: post: responses: @@ -1703,45 +1742,6 @@ paths: schema: $ref: '#/components/schemas/ScoreBatchRequest' required: true - /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/submit_tool_response_messages: - post: - responses: - '200': - description: >- - A single turn in an interaction with an Agentic System. **OR** streamed - agent turn completion response. - content: - application/json: - schema: - $ref: '#/components/schemas/Turn' - text/event-stream: - schema: - $ref: '#/components/schemas/AgentTurnResponseStreamChunk' - tags: - - Agents - description: '' - parameters: - - name: agent_id - in: path - required: true - schema: - type: string - - name: session_id - in: path - required: true - schema: - type: string - - name: turn_id - in: path - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/SubmitToolResponseMessagesRequest' - required: true /v1/post-training/supervised-fine-tune: post: responses: @@ -3240,83 +3240,19 @@ components: title: CompletionResponseStreamChunk description: >- A chunk of a streamed completion response. - CreateAgentRequest: + ContinueAgentTurnRequest: type: object properties: - agent_config: - $ref: '#/components/schemas/AgentConfig' - additionalProperties: false - required: - - agent_config - title: CreateAgentRequest - AgentCreateResponse: - type: object - properties: - agent_id: - type: string - additionalProperties: false - required: - - agent_id - title: AgentCreateResponse - CreateAgentSessionRequest: - type: object - properties: - session_name: - type: string - additionalProperties: false - required: - - session_name - title: CreateAgentSessionRequest - AgentSessionCreateResponse: - type: object - properties: - session_id: - type: string - additionalProperties: false - required: - - session_id - title: AgentSessionCreateResponse - CreateAgentTurnRequest: - type: object - properties: - messages: + new_messages: type: array items: oneOf: - $ref: '#/components/schemas/UserMessage' - $ref: '#/components/schemas/ToolResponseMessage' - stream: - type: boolean - documents: - type: array - items: - type: object - properties: - content: - oneOf: - - type: string - - $ref: '#/components/schemas/InterleavedContentItem' - - type: array - items: - $ref: '#/components/schemas/InterleavedContentItem' - - $ref: '#/components/schemas/URL' - mime_type: - type: string - additionalProperties: false - required: - - content - - mime_type - title: Document - toolgroups: - type: array - items: - $ref: '#/components/schemas/AgentTool' - tool_config: - $ref: '#/components/schemas/ToolConfig' additionalProperties: false required: - - messages - title: CreateAgentTurnRequest + - new_messages + title: ContinueAgentTurnRequest InferenceStep: type: object properties: @@ -3565,7 +3501,7 @@ components: - $ref: '#/components/schemas/AgentTurnResponseStepCompletePayload' - $ref: '#/components/schemas/AgentTurnResponseTurnStartPayload' - $ref: '#/components/schemas/AgentTurnResponseTurnCompletePayload' - - $ref: '#/components/schemas/AgentTurnResponseTurnPendingPayload' + - $ref: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload' discriminator: propertyName: event_type mapping: @@ -3574,7 +3510,7 @@ components: step_complete: '#/components/schemas/AgentTurnResponseStepCompletePayload' turn_start: '#/components/schemas/AgentTurnResponseTurnStartPayload' turn_complete: '#/components/schemas/AgentTurnResponseTurnCompletePayload' - turn_pending: '#/components/schemas/AgentTurnResponseTurnPendingPayload' + turn_awaiting_input: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload' AgentTurnResponseStepCompletePayload: type: object properties: @@ -3681,6 +3617,21 @@ components: - event title: AgentTurnResponseStreamChunk description: streamed agent turn completion response. + "AgentTurnResponseTurnAwaitingInputPayload": + type: object + properties: + event_type: + type: string + const: turn_awaiting_input + default: turn_awaiting_input + turn: + $ref: '#/components/schemas/Turn' + additionalProperties: false + required: + - event_type + - turn + title: >- + AgentTurnResponseTurnAwaitingInputPayload AgentTurnResponseTurnCompletePayload: type: object properties: @@ -3695,20 +3646,6 @@ components: - event_type - turn title: AgentTurnResponseTurnCompletePayload - AgentTurnResponseTurnPendingPayload: - type: object - properties: - event_type: - type: string - const: turn_pending - default: turn_pending - turn: - $ref: '#/components/schemas/Turn' - additionalProperties: false - required: - - event_type - - turn - title: AgentTurnResponseTurnPendingPayload AgentTurnResponseTurnStartPayload: type: object properties: @@ -3723,6 +3660,83 @@ components: - event_type - turn_id title: AgentTurnResponseTurnStartPayload + CreateAgentRequest: + type: object + properties: + agent_config: + $ref: '#/components/schemas/AgentConfig' + additionalProperties: false + required: + - agent_config + title: CreateAgentRequest + AgentCreateResponse: + type: object + properties: + agent_id: + type: string + additionalProperties: false + required: + - agent_id + title: AgentCreateResponse + CreateAgentSessionRequest: + type: object + properties: + session_name: + type: string + additionalProperties: false + required: + - session_name + title: CreateAgentSessionRequest + AgentSessionCreateResponse: + type: object + properties: + session_id: + type: string + additionalProperties: false + required: + - session_id + title: AgentSessionCreateResponse + CreateAgentTurnRequest: + type: object + properties: + messages: + type: array + items: + oneOf: + - $ref: '#/components/schemas/UserMessage' + - $ref: '#/components/schemas/ToolResponseMessage' + stream: + type: boolean + documents: + type: array + items: + type: object + properties: + content: + oneOf: + - type: string + - $ref: '#/components/schemas/InterleavedContentItem' + - type: array + items: + $ref: '#/components/schemas/InterleavedContentItem' + - $ref: '#/components/schemas/URL' + mime_type: + type: string + additionalProperties: false + required: + - content + - mime_type + title: Document + toolgroups: + type: array + items: + $ref: '#/components/schemas/AgentTool' + tool_config: + $ref: '#/components/schemas/ToolConfig' + additionalProperties: false + required: + - messages + title: CreateAgentTurnRequest CreateUploadSessionRequest: type: object properties: @@ -5606,17 +5620,6 @@ components: required: - results title: ScoreBatchResponse - SubmitToolResponseMessagesRequest: - type: object - properties: - tool_response_messages: - type: array - items: - $ref: '#/components/schemas/ToolResponseMessage' - additionalProperties: false - required: - - tool_response_messages - title: SubmitToolResponseMessagesRequest AlgorithmConfig: oneOf: - $ref: '#/components/schemas/LoraFinetuningConfig' diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py index c3301d5ed..05ddf587a 100644 --- a/llama_stack/apis/agents/agents.py +++ b/llama_stack/apis/agents/agents.py @@ -252,7 +252,7 @@ AgentTurnResponseEventPayload = register_schema( AgentTurnResponseStepCompletePayload, AgentTurnResponseTurnStartPayload, AgentTurnResponseTurnCompletePayload, - AgentTurnResponseTurnPendingPayload, + AgentTurnResponseTurnAwaitingInputPayload, ], Field(discriminator="event_type"), ], diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py index c208388be..cb8192968 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agents.py +++ b/llama_stack/providers/inline/agents/meta_reference/agents.py @@ -169,12 +169,17 @@ class MetaReferenceAgentsImpl(Agents): async for event in agent.create_and_execute_turn(request): yield event - async def submit_tool_response_messages( + async def continue_agent_turn( self, agent_id: str, session_id: str, turn_id: str, - tool_response_messages: List[ToolResponseMessage], + new_messages: List[ + Union[ + UserMessage, + ToolResponseMessage, + ] + ], ) -> AsyncGenerator: pass