From 3d891fc9ba764baf50fbb7d4ecc194a3a7b680ba Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Thu, 20 Feb 2025 11:21:13 -0800 Subject: [PATCH 1/4] ModelAlias cleanup --- .../providers/utils/inference/model_registry.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/llama_stack/providers/utils/inference/model_registry.py b/llama_stack/providers/utils/inference/model_registry.py index c5f6cd6b5..5cb785843 100644 --- a/llama_stack/providers/utils/inference/model_registry.py +++ b/llama_stack/providers/utils/inference/model_registry.py @@ -4,9 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from collections import namedtuple from typing import List, Optional +from pydantic import BaseModel, Field + from llama_stack.apis.models.models import ModelType from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate @@ -14,7 +15,14 @@ from llama_stack.providers.utils.inference import ( ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR, ) -ModelAlias = namedtuple("ModelAlias", ["provider_model_id", "aliases", "llama_model"]) + +# TODO: this class is more confusing than useful right now. We need to make it +# more closer to the Model class. +class ModelAlias(BaseModel): + provider_model_id: str + aliases: List[str] = Field(default_factory=list) + llama_model: Optional[str] = None + model_type: ModelType = ModelType.llm def get_huggingface_repo(model_descriptor: str) -> Optional[str]: From 2eda050aef2e33272be08e41f9f9adea76777d28 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Thu, 20 Feb 2025 11:46:02 -0800 Subject: [PATCH 2/4] Fix ollama fixture --- llama_stack/providers/tests/inference/fixtures.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/llama_stack/providers/tests/inference/fixtures.py b/llama_stack/providers/tests/inference/fixtures.py index 2a782befc..ec4e094c9 100644 --- a/llama_stack/providers/tests/inference/fixtures.py +++ b/llama_stack/providers/tests/inference/fixtures.py @@ -83,17 +83,13 @@ def inference_cerebras() -> ProviderFixture: @pytest.fixture(scope="session") -def inference_ollama(inference_model) -> ProviderFixture: - inference_model = [inference_model] if isinstance(inference_model, str) else inference_model - if inference_model and "Llama3.1-8B-Instruct" in inference_model: - pytest.skip("Ollama only supports Llama3.2-3B-Instruct for testing") - +def inference_ollama() -> ProviderFixture: return ProviderFixture( providers=[ Provider( provider_id="ollama", provider_type="remote::ollama", - config=OllamaImplConfig(host="localhost", port=os.getenv("OLLAMA_PORT", 11434)).model_dump(), + config=OllamaImplConfig(url=get_env_or_fail("OLLAMA_URL")).model_dump(), ) ], ) From eddef0b2aea8bd38e18ea11175c42214cc702928 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Thu, 20 Feb 2025 11:48:46 -0800 Subject: [PATCH 3/4] chore: slight renaming of model alias stuff (#1181) Quick test by running: ``` LLAMA_STACK_CONFIG=fireworks pytest -s -v tests/client-sdk ``` --- .../inference/meta_reference/inference.py | 4 +-- .../remote/inference/bedrock/models.py | 8 ++--- .../remote/inference/cerebras/models.py | 6 ++-- .../remote/inference/databricks/databricks.py | 6 ++-- .../remote/inference/fireworks/models.py | 22 ++++++------ .../providers/remote/inference/groq/groq.py | 12 +++---- .../remote/inference/nvidia/models.py | 20 +++++------ .../remote/inference/ollama/ollama.py | 36 +++++++++---------- .../remote/inference/sambanova/models.py | 20 +++++------ .../providers/remote/inference/tgi/tgi.py | 8 ++--- .../remote/inference/together/models.py | 20 +++++------ .../providers/remote/inference/vllm/vllm.py | 8 ++--- .../utils/inference/model_registry.py | 4 +-- 13 files changed, 87 insertions(+), 87 deletions(-) diff --git a/llama_stack/providers/inline/inference/meta_reference/inference.py b/llama_stack/providers/inline/inference/meta_reference/inference.py index c79f97def..dfd27d408 100644 --- a/llama_stack/providers/inline/inference/meta_reference/inference.py +++ b/llama_stack/providers/inline/inference/meta_reference/inference.py @@ -46,7 +46,7 @@ from llama_stack.providers.utils.inference.embedding_mixin import ( ) from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, - build_model_alias, + build_hf_repo_model_alias, ) from llama_stack.providers.utils.inference.prompt_adapter import ( augment_content_with_response_format_prompt, @@ -116,7 +116,7 @@ class MetaReferenceInferenceImpl( self.model_registry_helper = ModelRegistryHelper( [ - build_model_alias( + build_hf_repo_model_alias( llama_model.descriptor(), llama_model.core_model_id.value, ) diff --git a/llama_stack/providers/remote/inference/bedrock/models.py b/llama_stack/providers/remote/inference/bedrock/models.py index b629e05d5..4c5248619 100644 --- a/llama_stack/providers/remote/inference/bedrock/models.py +++ b/llama_stack/providers/remote/inference/bedrock/models.py @@ -6,19 +6,19 @@ from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( - build_model_alias, + build_hf_repo_model_alias, ) MODEL_ALIASES = [ - build_model_alias( + build_hf_repo_model_alias( "meta.llama3-1-8b-instruct-v1:0", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta.llama3-1-70b-instruct-v1:0", CoreModelId.llama3_1_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta.llama3-1-405b-instruct-v1:0", CoreModelId.llama3_1_405b_instruct.value, ), diff --git a/llama_stack/providers/remote/inference/cerebras/models.py b/llama_stack/providers/remote/inference/cerebras/models.py index 03ffeb492..53b0d5b55 100644 --- a/llama_stack/providers/remote/inference/cerebras/models.py +++ b/llama_stack/providers/remote/inference/cerebras/models.py @@ -6,15 +6,15 @@ from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( - build_model_alias, + build_hf_repo_model_alias, ) model_aliases = [ - build_model_alias( + build_hf_repo_model_alias( "llama3.1-8b", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama-3.3-70b", CoreModelId.llama3_3_70b_instruct.value, ), diff --git a/llama_stack/providers/remote/inference/databricks/databricks.py b/llama_stack/providers/remote/inference/databricks/databricks.py index 05e61361c..03da4d129 100644 --- a/llama_stack/providers/remote/inference/databricks/databricks.py +++ b/llama_stack/providers/remote/inference/databricks/databricks.py @@ -25,7 +25,7 @@ from llama_stack.apis.inference import ( from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, - build_model_alias, + build_hf_repo_model_alias, ) from llama_stack.providers.utils.inference.openai_compat import ( get_sampling_options, @@ -39,11 +39,11 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( from .config import DatabricksImplConfig model_aliases = [ - build_model_alias( + build_hf_repo_model_alias( "databricks-meta-llama-3-1-70b-instruct", CoreModelId.llama3_1_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "databricks-meta-llama-3-1-405b-instruct", CoreModelId.llama3_1_405b_instruct.value, ), diff --git a/llama_stack/providers/remote/inference/fireworks/models.py b/llama_stack/providers/remote/inference/fireworks/models.py index 14de585d4..8ba67c9ff 100644 --- a/llama_stack/providers/remote/inference/fireworks/models.py +++ b/llama_stack/providers/remote/inference/fireworks/models.py @@ -6,47 +6,47 @@ from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( - build_model_alias, + build_hf_repo_model_alias, ) MODEL_ALIASES = [ - build_model_alias( + build_hf_repo_model_alias( "accounts/fireworks/models/llama-v3p1-8b-instruct", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "accounts/fireworks/models/llama-v3p1-70b-instruct", CoreModelId.llama3_1_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "accounts/fireworks/models/llama-v3p1-405b-instruct", CoreModelId.llama3_1_405b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "accounts/fireworks/models/llama-v3p2-1b-instruct", CoreModelId.llama3_2_1b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "accounts/fireworks/models/llama-v3p2-3b-instruct", CoreModelId.llama3_2_3b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "accounts/fireworks/models/llama-v3p2-11b-vision-instruct", CoreModelId.llama3_2_11b_vision_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "accounts/fireworks/models/llama-v3p2-90b-vision-instruct", CoreModelId.llama3_2_90b_vision_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "accounts/fireworks/models/llama-v3p3-70b-instruct", CoreModelId.llama3_3_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "accounts/fireworks/models/llama-guard-3-8b", CoreModelId.llama_guard_3_8b.value, ), - build_model_alias( + build_hf_repo_model_alias( "accounts/fireworks/models/llama-guard-3-11b-vision", CoreModelId.llama_guard_3_11b_vision.value, ), diff --git a/llama_stack/providers/remote/inference/groq/groq.py b/llama_stack/providers/remote/inference/groq/groq.py index 441b6af5c..12ee613fe 100644 --- a/llama_stack/providers/remote/inference/groq/groq.py +++ b/llama_stack/providers/remote/inference/groq/groq.py @@ -31,8 +31,8 @@ from llama_stack.models.llama.sku_list import CoreModelId from llama_stack.providers.remote.inference.groq.config import GroqConfig from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, + build_hf_repo_model_alias, build_model_alias, - build_model_alias_with_just_provider_model_id, ) from .groq_utils import ( @@ -42,19 +42,19 @@ from .groq_utils import ( ) _MODEL_ALIASES = [ - build_model_alias( + build_hf_repo_model_alias( "llama3-8b-8192", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias_with_just_provider_model_id( + build_model_alias( "llama-3.1-8b-instant", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama3-70b-8192", CoreModelId.llama3_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama-3.3-70b-versatile", CoreModelId.llama3_3_70b_instruct.value, ), @@ -62,7 +62,7 @@ _MODEL_ALIASES = [ # Preview models aren't recommended for production use, but we include this one # to pass the test fixture # TODO(aidand): Replace this with a stable model once Groq supports it - build_model_alias( + build_hf_repo_model_alias( "llama-3.2-3b-preview", CoreModelId.llama3_2_3b_instruct.value, ), diff --git a/llama_stack/providers/remote/inference/nvidia/models.py b/llama_stack/providers/remote/inference/nvidia/models.py index 1d9b575d4..6a359e009 100644 --- a/llama_stack/providers/remote/inference/nvidia/models.py +++ b/llama_stack/providers/remote/inference/nvidia/models.py @@ -6,43 +6,43 @@ from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( - build_model_alias, + build_hf_repo_model_alias, ) _MODEL_ALIASES = [ - build_model_alias( + build_hf_repo_model_alias( "meta/llama3-8b-instruct", CoreModelId.llama3_8b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta/llama3-70b-instruct", CoreModelId.llama3_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta/llama-3.1-8b-instruct", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta/llama-3.1-70b-instruct", CoreModelId.llama3_1_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta/llama-3.1-405b-instruct", CoreModelId.llama3_1_405b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta/llama-3.2-1b-instruct", CoreModelId.llama3_2_1b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta/llama-3.2-3b-instruct", CoreModelId.llama3_2_3b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta/llama-3.2-11b-vision-instruct", CoreModelId.llama3_2_11b_vision_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta/llama-3.2-90b-vision-instruct", CoreModelId.llama3_2_90b_vision_instruct.value, ), diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py index 2488d9071..287f025e0 100644 --- a/llama_stack/providers/remote/inference/ollama/ollama.py +++ b/llama_stack/providers/remote/inference/ollama/ollama.py @@ -35,8 +35,8 @@ from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.datatypes import ModelsProtocolPrivate from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, + build_hf_repo_model_alias, build_model_alias, - build_model_alias_with_just_provider_model_id, ) from llama_stack.providers.utils.inference.openai_compat import ( OpenAICompatCompletionChoice, @@ -59,73 +59,73 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( log = logging.getLogger(__name__) model_aliases = [ - build_model_alias( + build_hf_repo_model_alias( "llama3.1:8b-instruct-fp16", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias_with_just_provider_model_id( + build_model_alias( "llama3.1:8b", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama3.1:70b-instruct-fp16", CoreModelId.llama3_1_70b_instruct.value, ), - build_model_alias_with_just_provider_model_id( + build_model_alias( "llama3.1:70b", CoreModelId.llama3_1_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama3.1:405b-instruct-fp16", CoreModelId.llama3_1_405b_instruct.value, ), - build_model_alias_with_just_provider_model_id( + build_model_alias( "llama3.1:405b", CoreModelId.llama3_1_405b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama3.2:1b-instruct-fp16", CoreModelId.llama3_2_1b_instruct.value, ), - build_model_alias_with_just_provider_model_id( + build_model_alias( "llama3.2:1b", CoreModelId.llama3_2_1b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama3.2:3b-instruct-fp16", CoreModelId.llama3_2_3b_instruct.value, ), - build_model_alias_with_just_provider_model_id( + build_model_alias( "llama3.2:3b", CoreModelId.llama3_2_3b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama3.2-vision:11b-instruct-fp16", CoreModelId.llama3_2_11b_vision_instruct.value, ), - build_model_alias_with_just_provider_model_id( + build_model_alias( "llama3.2-vision:latest", CoreModelId.llama3_2_11b_vision_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama3.2-vision:90b-instruct-fp16", CoreModelId.llama3_2_90b_vision_instruct.value, ), - build_model_alias_with_just_provider_model_id( + build_model_alias( "llama3.2-vision:90b", CoreModelId.llama3_2_90b_vision_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama3.3:70b", CoreModelId.llama3_3_70b_instruct.value, ), # The Llama Guard models don't have their full fp16 versions # so we are going to alias their default version to the canonical SKU - build_model_alias( + build_hf_repo_model_alias( "llama-guard3:8b", CoreModelId.llama_guard_3_8b.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama-guard3:1b", CoreModelId.llama_guard_3_1b.value, ), diff --git a/llama_stack/providers/remote/inference/sambanova/models.py b/llama_stack/providers/remote/inference/sambanova/models.py index 27a4a149e..1e002c81d 100644 --- a/llama_stack/providers/remote/inference/sambanova/models.py +++ b/llama_stack/providers/remote/inference/sambanova/models.py @@ -6,43 +6,43 @@ from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( - build_model_alias, + build_hf_repo_model_alias, ) MODEL_ALIASES = [ - build_model_alias( + build_hf_repo_model_alias( "Meta-Llama-3.1-8B-Instruct", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "Meta-Llama-3.1-70B-Instruct", CoreModelId.llama3_1_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "Meta-Llama-3.1-405B-Instruct", CoreModelId.llama3_1_405b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "Meta-Llama-3.2-1B-Instruct", CoreModelId.llama3_2_1b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "Meta-Llama-3.2-3B-Instruct", CoreModelId.llama3_2_3b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "Meta-Llama-3.3-70B-Instruct", CoreModelId.llama3_3_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "Llama-3.2-11B-Vision-Instruct", CoreModelId.llama3_2_11b_vision_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "Llama-3.2-90B-Vision-Instruct", CoreModelId.llama3_2_90b_vision_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "Meta-Llama-Guard-3-8B", CoreModelId.llama_guard_3_8b.value, ), diff --git a/llama_stack/providers/remote/inference/tgi/tgi.py b/llama_stack/providers/remote/inference/tgi/tgi.py index 7ffeced95..cd2311a48 100644 --- a/llama_stack/providers/remote/inference/tgi/tgi.py +++ b/llama_stack/providers/remote/inference/tgi/tgi.py @@ -32,7 +32,7 @@ from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.datatypes import ModelsProtocolPrivate from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, - build_model_alias, + build_hf_repo_model_alias, ) from llama_stack.providers.utils.inference.openai_compat import ( OpenAICompatCompletionChoice, @@ -53,9 +53,9 @@ from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImpl log = logging.getLogger(__name__) -def build_model_aliases(): +def build_hf_repo_model_aliases(): return [ - build_model_alias( + build_hf_repo_model_alias( model.huggingface_repo, model.descriptor(), ) @@ -70,7 +70,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate): model_id: str def __init__(self) -> None: - self.register_helper = ModelRegistryHelper(build_model_aliases()) + self.register_helper = ModelRegistryHelper(build_hf_repo_model_aliases()) self.huggingface_repo_to_llama_model_id = { model.huggingface_repo: model.descriptor() for model in all_registered_models() if model.huggingface_repo } diff --git a/llama_stack/providers/remote/inference/together/models.py b/llama_stack/providers/remote/inference/together/models.py index 87d282ea5..87904c47b 100644 --- a/llama_stack/providers/remote/inference/together/models.py +++ b/llama_stack/providers/remote/inference/together/models.py @@ -6,43 +6,43 @@ from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( - build_model_alias, + build_hf_repo_model_alias, ) MODEL_ALIASES = [ - build_model_alias( + build_hf_repo_model_alias( "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", CoreModelId.llama3_1_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo", CoreModelId.llama3_1_405b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta-llama/Llama-3.2-3B-Instruct-Turbo", CoreModelId.llama3_2_3b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo", CoreModelId.llama3_2_11b_vision_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo", CoreModelId.llama3_2_90b_vision_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta-llama/Llama-3.3-70B-Instruct-Turbo", CoreModelId.llama3_3_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta-llama/Meta-Llama-Guard-3-8B", CoreModelId.llama_guard_3_8b.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta-llama/Llama-Guard-3-11B-Vision-Turbo", CoreModelId.llama_guard_3_11b_vision.value, ), diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index 220bf4bde..75dc432e4 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -38,7 +38,7 @@ from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.datatypes import ModelsProtocolPrivate from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, - build_model_alias, + build_hf_repo_model_alias, ) from llama_stack.providers.utils.inference.openai_compat import ( OpenAICompatCompletionResponse, @@ -62,9 +62,9 @@ from .config import VLLMInferenceAdapterConfig log = logging.getLogger(__name__) -def build_model_aliases(): +def build_hf_repo_model_aliases(): return [ - build_model_alias( + build_hf_repo_model_alias( model.huggingface_repo, model.descriptor(), ) @@ -204,7 +204,7 @@ async def _process_vllm_chat_completion_stream_response( class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate): def __init__(self, config: VLLMInferenceAdapterConfig) -> None: - self.register_helper = ModelRegistryHelper(build_model_aliases()) + self.register_helper = ModelRegistryHelper(build_hf_repo_model_aliases()) self.config = config self.client = None diff --git a/llama_stack/providers/utils/inference/model_registry.py b/llama_stack/providers/utils/inference/model_registry.py index 5cb785843..e14a733d1 100644 --- a/llama_stack/providers/utils/inference/model_registry.py +++ b/llama_stack/providers/utils/inference/model_registry.py @@ -32,7 +32,7 @@ def get_huggingface_repo(model_descriptor: str) -> Optional[str]: return None -def build_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAlias: +def build_hf_repo_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAlias: return ModelAlias( provider_model_id=provider_model_id, aliases=[ @@ -42,7 +42,7 @@ def build_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAli ) -def build_model_alias_with_just_provider_model_id(provider_model_id: str, model_descriptor: str) -> ModelAlias: +def build_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAlias: return ModelAlias( provider_model_id=provider_model_id, aliases=[], From f7161611c66913f4c0e1ac9f67dfae28f413af5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladimir=20Ivi=C4=87?= Date: Thu, 20 Feb 2025 13:09:00 -0800 Subject: [PATCH 4/4] feat: adding endpoints for files and uploads (#1070) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: Adds spec definitions for file uploads operations. This API focuses around two high level operations: * Initiating and managing upload session * Accessing uploaded file information Usage examples: To start a file upload session: ``` curl -X POST https://localhost:8321/v1/files \ -d '{ "key": "image123.jpg', "bucket": "images", "mime_type": "image/jpg", "size": 12345 }' # Returns { “id”: “url”: “https://localhost:8321/v1/files/session:”, "offset": 0, "size": 12345 } ``` To upload file content to an existing session ``` curl -i -X POST "https://localhost:8321/v1/files/session: \ --data-binary @ # Returns { "key": "image123.jpg", "bucket": "images", "mime_type": "image/jpg", "bytes": 12345, "created_at": 1737492240 } # Implementing on server side (Flask example for simplicity): @app.route('/uploads/{upload_id}', methods=['POST']) def upload_content_to_session(upload_id): try: # Get the binary file data from the request body file_data = request.data # Save the file to disk save_path = f"./uploads/{upload_id}" with open(save_path, 'wb') as f: f.write(file_data) return {__uploaded_file_json__}, 200 except Exception as e: return 500 ``` To read information about an existing upload session ``` curl -i -X GET "https://localhost:8321/v1/files/session: # Returns { “id”: “url”: “https://localhost:8321/v1/files/session:”, "offset": 1024, "size": 12345 } ``` To list buckets ``` GET /files # Returns { "data": [ {"name": "bucket1"}, {"name": "bucket2"}, ] } ``` To list all files in a bucket ``` GET /files/{bucket} # Returns { "data": [ { "key": "shiba.jpg", "bucket": "dogs", "mime_type": "image/jpg", "bytes": 82334, "created_at": 1737492240, }, { "key": "persian_cat.jpg", "mime_type": "image/jpg", "bucket": "cats", "bytes": 39924, "created_at": 1727493440, }, ] } ``` To get specific file info ``` GET /files/{bucket}/{key} { "key": "shiba.jpg", "bucket": "dogs", "mime_type": "image/jpg", "bytes": 82334, "created_at": 1737492240, } ``` To delete specific file ``` DELETE /files/{bucket}/{key} { "key": "shiba.jpg", "bucket": "dogs", "mime_type": "image/jpg", "bytes": 82334, "created_at": 1737492240, } ``` --- docs/_static/llama-stack-spec.html | 405 ++++++++++++++++++ docs/_static/llama-stack-spec.yaml | 280 ++++++++++++ docs/openapi_generator/pyopenapi/generator.py | 27 +- .../pyopenapi/specification.py | 2 +- llama_stack/apis/files/__init__.py | 7 + llama_stack/apis/files/files.py | 174 ++++++++ llama_stack/distribution/stack.py | 2 + llama_stack/schema_utils.py | 3 + 8 files changed, 897 insertions(+), 3 deletions(-) create mode 100644 llama_stack/apis/files/__init__.py create mode 100644 llama_stack/apis/files/files.py diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 2b6e1d11c..02d05776d 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -678,6 +678,65 @@ } } }, + "/v1/files": { + "get": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ListBucketResponse" + } + } + } + } + }, + "tags": [ + "Files (Coming Soon)" + ], + "description": "List all buckets.", + "parameters": [ + { + "name": "bucket", + "in": "query", + "required": true, + "schema": { + "type": "string" + } + } + ] + }, + "post": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FileUploadResponse" + } + } + } + } + }, + "tags": [ + "Files (Coming Soon)" + ], + "description": "Create a new upload session for a file identified by a bucket and key.", + "parameters": [], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateUploadSessionRequest" + } + } + }, + "required": true + } + } + }, "/v1/agents/{agent_id}": { "delete": { "responses": { @@ -779,6 +838,84 @@ ] } }, + "/v1/files/{bucket}/{key}": { + "get": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FileResponse" + } + } + } + } + }, + "tags": [ + "Files (Coming Soon)" + ], + "description": "Get a file info identified by a bucket and key.", + "parameters": [ + { + "name": "bucket", + "in": "path", + "description": "Bucket name (valid chars: a-zA-Z0-9_-)", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "key", + "in": "path", + "description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)", + "required": true, + "schema": { + "type": "string" + } + } + ] + }, + "delete": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FileResponse" + } + } + } + } + }, + "tags": [ + "Files (Coming Soon)" + ], + "description": "Delete a file identified by a bucket and key.", + "parameters": [ + { + "name": "bucket", + "in": "path", + "description": "Bucket name (valid chars: a-zA-Z0-9_-)", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "key", + "in": "path", + "description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)", + "required": true, + "schema": { + "type": "string" + } + } + ] + } + }, "/v1/inference/embeddings": { "post": { "responses": { @@ -1470,6 +1607,91 @@ "parameters": [] } }, + "/v1/files/session:{upload_id}": { + "get": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "oneOf": [ + { + "$ref": "#/components/schemas/FileUploadResponse" + }, + { + "type": "null" + } + ] + } + } + } + } + }, + "tags": [ + "Files (Coming Soon)" + ], + "description": "Returns information about an existsing upload session", + "parameters": [ + { + "name": "upload_id", + "in": "path", + "description": "ID of the upload session", + "required": true, + "schema": { + "type": "string" + } + } + ] + }, + "post": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "oneOf": [ + { + "$ref": "#/components/schemas/FileResponse" + }, + { + "type": "null" + } + ] + } + } + } + } + }, + "tags": [ + "Files (Coming Soon)" + ], + "description": "Upload file content to an existing upload session. On the server, request body will have the raw bytes that are uploaded.", + "parameters": [ + { + "name": "upload_id", + "in": "path", + "description": "ID of the upload session", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/octet-stream": { + "schema": { + "type": "string", + "format": "binary" + } + } + }, + "required": true + } + } + }, "/v1/vector-dbs/{vector_db_id}": { "get": { "responses": { @@ -1826,6 +2048,37 @@ } } }, + "/v1/files/{bucket}": { + "get": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ListFileResponse" + } + } + } + } + }, + "tags": [ + "Files (Coming Soon)" + ], + "description": "List all files in a bucket.", + "parameters": [ + { + "name": "bucket", + "in": "path", + "description": "Bucket name (valid chars: a-zA-Z0-9_-)", + "required": true, + "schema": { + "type": "string" + } + } + ] + } + }, "/v1/models": { "get": { "responses": { @@ -5441,6 +5694,105 @@ ], "title": "AgentTurnResponseTurnStartPayload" }, + "CreateUploadSessionRequest": { + "type": "object", + "properties": { + "bucket": { + "type": "string", + "description": "Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)" + }, + "key": { + "type": "string", + "description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)" + }, + "mime_type": { + "type": "string", + "description": "MIME type of the file" + }, + "size": { + "type": "integer", + "description": "File size in bytes" + } + }, + "additionalProperties": false, + "required": [ + "bucket", + "key", + "mime_type", + "size" + ], + "title": "CreateUploadSessionRequest" + }, + "FileUploadResponse": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "ID of the upload session" + }, + "url": { + "type": "string", + "description": "Upload URL for the file or file parts" + }, + "offset": { + "type": "integer", + "description": "Upload content offset" + }, + "size": { + "type": "integer", + "description": "Upload content size" + } + }, + "additionalProperties": false, + "required": [ + "id", + "url", + "offset", + "size" + ], + "title": "FileUploadResponse", + "description": "Response after initiating a file upload session." + }, + "FileResponse": { + "type": "object", + "properties": { + "bucket": { + "type": "string", + "description": "Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)" + }, + "key": { + "type": "string", + "description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)" + }, + "mime_type": { + "type": "string", + "description": "MIME type of the file" + }, + "url": { + "type": "string", + "description": "Upload URL for the file contents" + }, + "bytes": { + "type": "integer", + "description": "Size of the file in bytes" + }, + "created_at": { + "type": "integer", + "description": "Timestamp of when the file was created" + } + }, + "additionalProperties": false, + "required": [ + "bucket", + "key", + "mime_type", + "url", + "bytes", + "created_at" + ], + "title": "FileResponse", + "description": "Response representing a file entry." + }, "EmbeddingsRequest": { "type": "object", "properties": { @@ -6756,6 +7108,37 @@ ], "title": "ToolInvocationResult" }, + "BucketResponse": { + "type": "object", + "properties": { + "name": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "name" + ], + "title": "BucketResponse" + }, + "ListBucketResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/BucketResponse" + }, + "description": "List of FileResponse entries" + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListBucketResponse", + "description": "Response representing a list of file entries." + }, "ListDatasetsResponse": { "type": "object", "properties": { @@ -6772,6 +7155,24 @@ ], "title": "ListDatasetsResponse" }, + "ListFileResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/FileResponse" + }, + "description": "List of FileResponse entries" + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListFileResponse", + "description": "Response representing a list of file entries." + }, "ListModelsResponse": { "type": "object", "properties": { @@ -8543,6 +8944,9 @@ { "name": "Eval" }, + { + "name": "Files (Coming Soon)" + }, { "name": "Inference", "description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.", @@ -8598,6 +9002,7 @@ "DatasetIO", "Datasets", "Eval", + "Files (Coming Soon)", "Inference", "Inspect", "Models", diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index 99300fedf..f79120f1d 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -406,6 +406,43 @@ paths: schema: $ref: '#/components/schemas/CreateAgentTurnRequest' required: true + /v1/files: + get: + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ListBucketResponse' + tags: + - Files (Coming Soon) + description: List all buckets. + parameters: + - name: bucket + in: query + required: true + schema: + type: string + post: + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/FileUploadResponse' + tags: + - Files (Coming Soon) + description: >- + Create a new upload session for a file identified by a bucket and key. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CreateUploadSessionRequest' + required: true /v1/agents/{agent_id}: delete: responses: @@ -468,6 +505,59 @@ paths: required: true schema: type: string + /v1/files/{bucket}/{key}: + get: + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/FileResponse' + tags: + - Files (Coming Soon) + description: >- + Get a file info identified by a bucket and key. + parameters: + - name: bucket + in: path + description: 'Bucket name (valid chars: a-zA-Z0-9_-)' + required: true + schema: + type: string + - name: key + in: path + description: >- + Key under which the file is stored (valid chars: a-zA-Z0-9_-/.) + required: true + schema: + type: string + delete: + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/FileResponse' + tags: + - Files (Coming Soon) + description: >- + Delete a file identified by a bucket and key. + parameters: + - name: bucket + in: path + description: 'Bucket name (valid chars: a-zA-Z0-9_-)' + required: true + schema: + type: string + - name: key + in: path + description: >- + Key under which the file is stored (valid chars: a-zA-Z0-9_-/.) + required: true + schema: + type: string /v1/inference/embeddings: post: responses: @@ -875,6 +965,57 @@ paths: - PostTraining (Coming Soon) description: '' parameters: [] + /v1/files/session:{upload_id}: + get: + responses: + '200': + description: OK + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/FileUploadResponse' + - type: 'null' + tags: + - Files (Coming Soon) + description: >- + Returns information about an existsing upload session + parameters: + - name: upload_id + in: path + description: ID of the upload session + required: true + schema: + type: string + post: + responses: + '200': + description: OK + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/FileResponse' + - type: 'null' + tags: + - Files (Coming Soon) + description: >- + Upload file content to an existing upload session. On the server, request + body will have the raw bytes that are uploaded. + parameters: + - name: upload_id + in: path + description: ID of the upload session + required: true + schema: + type: string + requestBody: + content: + application/octet-stream: + schema: + type: string + format: binary + required: true /v1/vector-dbs/{vector_db_id}: get: responses: @@ -1091,6 +1232,25 @@ paths: schema: $ref: '#/components/schemas/RegisterDatasetRequest' required: true + /v1/files/{bucket}: + get: + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ListFileResponse' + tags: + - Files (Coming Soon) + description: List all files in a bucket. + parameters: + - name: bucket + in: path + description: 'Bucket name (valid chars: a-zA-Z0-9_-)' + required: true + schema: + type: string /v1/models: get: responses: @@ -3508,6 +3668,87 @@ components: - event_type - turn_id title: AgentTurnResponseTurnStartPayload + CreateUploadSessionRequest: + type: object + properties: + bucket: + type: string + description: >- + Bucket under which the file is stored (valid chars: a-zA-Z0-9_-) + key: + type: string + description: >- + Key under which the file is stored (valid chars: a-zA-Z0-9_-/.) + mime_type: + type: string + description: MIME type of the file + size: + type: integer + description: File size in bytes + additionalProperties: false + required: + - bucket + - key + - mime_type + - size + title: CreateUploadSessionRequest + FileUploadResponse: + type: object + properties: + id: + type: string + description: ID of the upload session + url: + type: string + description: Upload URL for the file or file parts + offset: + type: integer + description: Upload content offset + size: + type: integer + description: Upload content size + additionalProperties: false + required: + - id + - url + - offset + - size + title: FileUploadResponse + description: >- + Response after initiating a file upload session. + FileResponse: + type: object + properties: + bucket: + type: string + description: >- + Bucket under which the file is stored (valid chars: a-zA-Z0-9_-) + key: + type: string + description: >- + Key under which the file is stored (valid chars: a-zA-Z0-9_-/.) + mime_type: + type: string + description: MIME type of the file + url: + type: string + description: Upload URL for the file contents + bytes: + type: integer + description: Size of the file in bytes + created_at: + type: integer + description: Timestamp of when the file was created + additionalProperties: false + required: + - bucket + - key + - mime_type + - url + - bytes + - created_at + title: FileResponse + description: Response representing a file entry. EmbeddingsRequest: type: object properties: @@ -4339,6 +4580,29 @@ components: required: - content title: ToolInvocationResult + BucketResponse: + type: object + properties: + name: + type: string + additionalProperties: false + required: + - name + title: BucketResponse + ListBucketResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/BucketResponse' + description: List of FileResponse entries + additionalProperties: false + required: + - data + title: ListBucketResponse + description: >- + Response representing a list of file entries. ListDatasetsResponse: type: object properties: @@ -4350,6 +4614,20 @@ components: required: - data title: ListDatasetsResponse + ListFileResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/FileResponse' + description: List of FileResponse entries + additionalProperties: false + required: + - data + title: ListFileResponse + description: >- + Response representing a list of file entries. ListModelsResponse: type: object properties: @@ -5467,6 +5745,7 @@ tags: - name: DatasetIO - name: Datasets - name: Eval + - name: Files (Coming Soon) - name: Inference description: >- This API provides the raw interface to the underlying models. Two kinds of models @@ -5501,6 +5780,7 @@ x-tagGroups: - DatasetIO - Datasets - Eval + - Files (Coming Soon) - Inference - Inspect - Models diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py index 60cd7a242..4220cfc05 100644 --- a/docs/openapi_generator/pyopenapi/generator.py +++ b/docs/openapi_generator/pyopenapi/generator.py @@ -477,6 +477,7 @@ class Generator: "SyntheticDataGeneration", "PostTraining", "BatchInference", + "Files", ]: op.defining_class.__name__ = f"{op.defining_class.__name__} (Coming Soon)" print(op.defining_class.__name__) @@ -520,8 +521,30 @@ class Generator: # parameters passed anywhere parameters = path_parameters + query_parameters - # data passed in payload - if op.request_params: + webmethod = getattr(op.func_ref, "__webmethod__", None) + raw_bytes_request_body = False + if webmethod: + raw_bytes_request_body = getattr(webmethod, "raw_bytes_request_body", False) + + # data passed in request body as raw bytes cannot have request parameters + if raw_bytes_request_body and op.request_params: + raise ValueError("Cannot have both raw bytes request body and request parameters") + + # data passed in request body as raw bytes + if raw_bytes_request_body: + requestBody = RequestBody( + content={ + "application/octet-stream": { + "schema": { + "type": "string", + "format": "binary", + } + } + }, + required=True, + ) + # data passed in payload as JSON and mapped to request parameters + elif op.request_params: builder = ContentBuilder(self.schema_builder) first = next(iter(op.request_params)) request_name, request_type = first diff --git a/docs/openapi_generator/pyopenapi/specification.py b/docs/openapi_generator/pyopenapi/specification.py index 9e5363b4a..d3e5a1f19 100644 --- a/docs/openapi_generator/pyopenapi/specification.py +++ b/docs/openapi_generator/pyopenapi/specification.py @@ -78,7 +78,7 @@ class MediaType: @dataclass class RequestBody: - content: Dict[str, MediaType] + content: Dict[str, MediaType | Dict[str, Any]] description: Optional[str] = None required: Optional[bool] = None diff --git a/llama_stack/apis/files/__init__.py b/llama_stack/apis/files/__init__.py new file mode 100644 index 000000000..269baf177 --- /dev/null +++ b/llama_stack/apis/files/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from .files import * # noqa: F401 F403 diff --git a/llama_stack/apis/files/files.py b/llama_stack/apis/files/files.py new file mode 100644 index 000000000..f17fadc8c --- /dev/null +++ b/llama_stack/apis/files/files.py @@ -0,0 +1,174 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import List, Optional, Protocol, runtime_checkable + +from pydantic import BaseModel + +from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol +from llama_stack.schema_utils import json_schema_type, webmethod + + +@json_schema_type +class FileUploadResponse(BaseModel): + """ + Response after initiating a file upload session. + + :param id: ID of the upload session + :param url: Upload URL for the file or file parts + :param offset: Upload content offset + :param size: Upload content size + """ + + id: str + url: str + offset: int + size: int + + +@json_schema_type +class BucketResponse(BaseModel): + name: str + + +@json_schema_type +class ListBucketResponse(BaseModel): + """ + Response representing a list of file entries. + + :param data: List of FileResponse entries + """ + + data: List[BucketResponse] + + +@json_schema_type +class FileResponse(BaseModel): + """ + Response representing a file entry. + + :param bucket: Bucket under which the file is stored (valid chars: a-zA-Z0-9_-) + :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.) + :param mime_type: MIME type of the file + :param url: Upload URL for the file contents + :param bytes: Size of the file in bytes + :param created_at: Timestamp of when the file was created + """ + + bucket: str + key: str + mime_type: str + url: str + bytes: int + created_at: int + + +@json_schema_type +class ListFileResponse(BaseModel): + """ + Response representing a list of file entries. + + :param data: List of FileResponse entries + """ + + data: List[FileResponse] + + +@runtime_checkable +@trace_protocol +class Files(Protocol): + @webmethod(route="/files", method="POST") + async def create_upload_session( + self, + bucket: str, + key: str, + mime_type: str, + size: int, + ) -> FileUploadResponse: + """ + Create a new upload session for a file identified by a bucket and key. + + :param bucket: Bucket under which the file is stored (valid chars: a-zA-Z0-9_-) + :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.) + :param mime_type: MIME type of the file + :param size: File size in bytes + """ + ... + + @webmethod(route="/files/session:{upload_id}", method="POST", raw_bytes_request_body=True) + async def upload_content_to_session( + self, + upload_id: str, + ) -> Optional[FileResponse]: + """ + Upload file content to an existing upload session. + On the server, request body will have the raw bytes that are uploaded. + + :param upload_id: ID of the upload session + """ + ... + + @webmethod(route="/files/session:{upload_id}", method="GET") + async def get_upload_session_info( + self, + upload_id: str, + ) -> Optional[FileUploadResponse]: + """ + Returns information about an existsing upload session + + :param upload_id: ID of the upload session + """ + ... + + @webmethod(route="/files", method="GET") + async def list_all_buckets( + self, + bucket: str, + ) -> ListBucketResponse: + """ + List all buckets. + """ + ... + + @webmethod(route="/files/{bucket}", method="GET") + async def list_files_in_bucket( + self, + bucket: str, + ) -> ListFileResponse: + """ + List all files in a bucket. + + :param bucket: Bucket name (valid chars: a-zA-Z0-9_-) + """ + ... + + @webmethod(route="/files/{bucket}/{key:path}", method="GET") + async def get_file( + self, + bucket: str, + key: str, + ) -> FileResponse: + """ + Get a file info identified by a bucket and key. + + :param bucket: Bucket name (valid chars: a-zA-Z0-9_-) + :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.) + """ + ... + + @webmethod(route="/files/{bucket}/{key:path}", method="DELETE") + async def delete_file( + self, + bucket: str, + key: str, + ) -> FileResponse: + """ + Delete a file identified by a bucket and key. + + :param bucket: Bucket name (valid chars: a-zA-Z0-9_-) + :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.) + """ + ... diff --git a/llama_stack/distribution/stack.py b/llama_stack/distribution/stack.py index 9335dc3a9..1328c88ef 100644 --- a/llama_stack/distribution/stack.py +++ b/llama_stack/distribution/stack.py @@ -19,6 +19,7 @@ from llama_stack.apis.benchmarks import Benchmarks from llama_stack.apis.datasetio import DatasetIO from llama_stack.apis.datasets import Datasets from llama_stack.apis.eval import Eval +from llama_stack.apis.files import Files from llama_stack.apis.inference import Inference from llama_stack.apis.inspect import Inspect from llama_stack.apis.models import Models @@ -63,6 +64,7 @@ class LlamaStack( ToolGroups, ToolRuntime, RAGToolRuntime, + Files, ): pass diff --git a/llama_stack/schema_utils.py b/llama_stack/schema_utils.py index 56b9e5e4c..581404844 100644 --- a/llama_stack/schema_utils.py +++ b/llama_stack/schema_utils.py @@ -19,6 +19,7 @@ class WebMethod: request_examples: Optional[List[Any]] = None response_examples: Optional[List[Any]] = None method: Optional[str] = None + raw_bytes_request_body: Optional[bool] = False def webmethod( @@ -27,6 +28,7 @@ def webmethod( public: Optional[bool] = False, request_examples: Optional[List[Any]] = None, response_examples: Optional[List[Any]] = None, + raw_bytes_request_body: Optional[bool] = False, ) -> Callable[[T], T]: """ Decorator that supplies additional metadata to an endpoint operation function. @@ -44,6 +46,7 @@ def webmethod( public=public or False, request_examples=request_examples, response_examples=response_examples, + raw_bytes_request_body=raw_bytes_request_body, ) return cls