From 3d891fc9ba764baf50fbb7d4ecc194a3a7b680ba Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Thu, 20 Feb 2025 11:21:13 -0800
Subject: [PATCH 1/4] ModelAlias cleanup

---
 .../providers/utils/inference/model_registry.py      | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/llama_stack/providers/utils/inference/model_registry.py b/llama_stack/providers/utils/inference/model_registry.py
index c5f6cd6b5..5cb785843 100644
--- a/llama_stack/providers/utils/inference/model_registry.py
+++ b/llama_stack/providers/utils/inference/model_registry.py
@@ -4,9 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from collections import namedtuple
 from typing import List, Optional
 
+from pydantic import BaseModel, Field
+
 from llama_stack.apis.models.models import ModelType
 from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
@@ -14,7 +15,14 @@ from llama_stack.providers.utils.inference import (
     ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR,
 )
 
-ModelAlias = namedtuple("ModelAlias", ["provider_model_id", "aliases", "llama_model"])
+
+# TODO: this class is more confusing than useful right now. We need to make it
+# more closer to the Model class.
+class ModelAlias(BaseModel):
+    provider_model_id: str
+    aliases: List[str] = Field(default_factory=list)
+    llama_model: Optional[str] = None
+    model_type: ModelType = ModelType.llm
 
 
 def get_huggingface_repo(model_descriptor: str) -> Optional[str]:

From 2eda050aef2e33272be08e41f9f9adea76777d28 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Thu, 20 Feb 2025 11:46:02 -0800
Subject: [PATCH 2/4] Fix ollama fixture

---
 llama_stack/providers/tests/inference/fixtures.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/llama_stack/providers/tests/inference/fixtures.py b/llama_stack/providers/tests/inference/fixtures.py
index 2a782befc..ec4e094c9 100644
--- a/llama_stack/providers/tests/inference/fixtures.py
+++ b/llama_stack/providers/tests/inference/fixtures.py
@@ -83,17 +83,13 @@ def inference_cerebras() -> ProviderFixture:
 
 
 @pytest.fixture(scope="session")
-def inference_ollama(inference_model) -> ProviderFixture:
-    inference_model = [inference_model] if isinstance(inference_model, str) else inference_model
-    if inference_model and "Llama3.1-8B-Instruct" in inference_model:
-        pytest.skip("Ollama only supports Llama3.2-3B-Instruct for testing")
-
+def inference_ollama() -> ProviderFixture:
     return ProviderFixture(
         providers=[
             Provider(
                 provider_id="ollama",
                 provider_type="remote::ollama",
-                config=OllamaImplConfig(host="localhost", port=os.getenv("OLLAMA_PORT", 11434)).model_dump(),
+                config=OllamaImplConfig(url=get_env_or_fail("OLLAMA_URL")).model_dump(),
             )
         ],
     )

From eddef0b2aea8bd38e18ea11175c42214cc702928 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Thu, 20 Feb 2025 11:48:46 -0800
Subject: [PATCH 3/4] chore: slight renaming of model alias stuff (#1181)

Quick test by running:
```
LLAMA_STACK_CONFIG=fireworks pytest -s -v tests/client-sdk
```
---
 .../inference/meta_reference/inference.py     |  4 +--
 .../remote/inference/bedrock/models.py        |  8 ++---
 .../remote/inference/cerebras/models.py       |  6 ++--
 .../remote/inference/databricks/databricks.py |  6 ++--
 .../remote/inference/fireworks/models.py      | 22 ++++++------
 .../providers/remote/inference/groq/groq.py   | 12 +++----
 .../remote/inference/nvidia/models.py         | 20 +++++------
 .../remote/inference/ollama/ollama.py         | 36 +++++++++----------
 .../remote/inference/sambanova/models.py      | 20 +++++------
 .../providers/remote/inference/tgi/tgi.py     |  8 ++---
 .../remote/inference/together/models.py       | 20 +++++------
 .../providers/remote/inference/vllm/vllm.py   |  8 ++---
 .../utils/inference/model_registry.py         |  4 +--
 13 files changed, 87 insertions(+), 87 deletions(-)

diff --git a/llama_stack/providers/inline/inference/meta_reference/inference.py b/llama_stack/providers/inline/inference/meta_reference/inference.py
index c79f97def..dfd27d408 100644
--- a/llama_stack/providers/inline/inference/meta_reference/inference.py
+++ b/llama_stack/providers/inline/inference/meta_reference/inference.py
@@ -46,7 +46,7 @@ from llama_stack.providers.utils.inference.embedding_mixin import (
 )
 from llama_stack.providers.utils.inference.model_registry import (
     ModelRegistryHelper,
-    build_model_alias,
+    build_hf_repo_model_alias,
 )
 from llama_stack.providers.utils.inference.prompt_adapter import (
     augment_content_with_response_format_prompt,
@@ -116,7 +116,7 @@ class MetaReferenceInferenceImpl(
 
         self.model_registry_helper = ModelRegistryHelper(
             [
-                build_model_alias(
+                build_hf_repo_model_alias(
                     llama_model.descriptor(),
                     llama_model.core_model_id.value,
                 )
diff --git a/llama_stack/providers/remote/inference/bedrock/models.py b/llama_stack/providers/remote/inference/bedrock/models.py
index b629e05d5..4c5248619 100644
--- a/llama_stack/providers/remote/inference/bedrock/models.py
+++ b/llama_stack/providers/remote/inference/bedrock/models.py
@@ -6,19 +6,19 @@
 
 from llama_stack.models.llama.datatypes import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
-    build_model_alias,
+    build_hf_repo_model_alias,
 )
 
 MODEL_ALIASES = [
-    build_model_alias(
+    build_hf_repo_model_alias(
         "meta.llama3-1-8b-instruct-v1:0",
         CoreModelId.llama3_1_8b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "meta.llama3-1-70b-instruct-v1:0",
         CoreModelId.llama3_1_70b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "meta.llama3-1-405b-instruct-v1:0",
         CoreModelId.llama3_1_405b_instruct.value,
     ),
diff --git a/llama_stack/providers/remote/inference/cerebras/models.py b/llama_stack/providers/remote/inference/cerebras/models.py
index 03ffeb492..53b0d5b55 100644
--- a/llama_stack/providers/remote/inference/cerebras/models.py
+++ b/llama_stack/providers/remote/inference/cerebras/models.py
@@ -6,15 +6,15 @@
 
 from llama_stack.models.llama.datatypes import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
-    build_model_alias,
+    build_hf_repo_model_alias,
 )
 
 model_aliases = [
-    build_model_alias(
+    build_hf_repo_model_alias(
         "llama3.1-8b",
         CoreModelId.llama3_1_8b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "llama-3.3-70b",
         CoreModelId.llama3_3_70b_instruct.value,
     ),
diff --git a/llama_stack/providers/remote/inference/databricks/databricks.py b/llama_stack/providers/remote/inference/databricks/databricks.py
index 05e61361c..03da4d129 100644
--- a/llama_stack/providers/remote/inference/databricks/databricks.py
+++ b/llama_stack/providers/remote/inference/databricks/databricks.py
@@ -25,7 +25,7 @@ from llama_stack.apis.inference import (
 from llama_stack.models.llama.datatypes import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
     ModelRegistryHelper,
-    build_model_alias,
+    build_hf_repo_model_alias,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
     get_sampling_options,
@@ -39,11 +39,11 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
 from .config import DatabricksImplConfig
 
 model_aliases = [
-    build_model_alias(
+    build_hf_repo_model_alias(
         "databricks-meta-llama-3-1-70b-instruct",
         CoreModelId.llama3_1_70b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "databricks-meta-llama-3-1-405b-instruct",
         CoreModelId.llama3_1_405b_instruct.value,
     ),
diff --git a/llama_stack/providers/remote/inference/fireworks/models.py b/llama_stack/providers/remote/inference/fireworks/models.py
index 14de585d4..8ba67c9ff 100644
--- a/llama_stack/providers/remote/inference/fireworks/models.py
+++ b/llama_stack/providers/remote/inference/fireworks/models.py
@@ -6,47 +6,47 @@
 
 from llama_stack.models.llama.datatypes import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
-    build_model_alias,
+    build_hf_repo_model_alias,
 )
 
 MODEL_ALIASES = [
-    build_model_alias(
+    build_hf_repo_model_alias(
         "accounts/fireworks/models/llama-v3p1-8b-instruct",
         CoreModelId.llama3_1_8b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "accounts/fireworks/models/llama-v3p1-70b-instruct",
         CoreModelId.llama3_1_70b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "accounts/fireworks/models/llama-v3p1-405b-instruct",
         CoreModelId.llama3_1_405b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "accounts/fireworks/models/llama-v3p2-1b-instruct",
         CoreModelId.llama3_2_1b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "accounts/fireworks/models/llama-v3p2-3b-instruct",
         CoreModelId.llama3_2_3b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "accounts/fireworks/models/llama-v3p2-11b-vision-instruct",
         CoreModelId.llama3_2_11b_vision_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "accounts/fireworks/models/llama-v3p2-90b-vision-instruct",
         CoreModelId.llama3_2_90b_vision_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "accounts/fireworks/models/llama-v3p3-70b-instruct",
         CoreModelId.llama3_3_70b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "accounts/fireworks/models/llama-guard-3-8b",
         CoreModelId.llama_guard_3_8b.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "accounts/fireworks/models/llama-guard-3-11b-vision",
         CoreModelId.llama_guard_3_11b_vision.value,
     ),
diff --git a/llama_stack/providers/remote/inference/groq/groq.py b/llama_stack/providers/remote/inference/groq/groq.py
index 441b6af5c..12ee613fe 100644
--- a/llama_stack/providers/remote/inference/groq/groq.py
+++ b/llama_stack/providers/remote/inference/groq/groq.py
@@ -31,8 +31,8 @@ from llama_stack.models.llama.sku_list import CoreModelId
 from llama_stack.providers.remote.inference.groq.config import GroqConfig
 from llama_stack.providers.utils.inference.model_registry import (
     ModelRegistryHelper,
+    build_hf_repo_model_alias,
     build_model_alias,
-    build_model_alias_with_just_provider_model_id,
 )
 
 from .groq_utils import (
@@ -42,19 +42,19 @@ from .groq_utils import (
 )
 
 _MODEL_ALIASES = [
-    build_model_alias(
+    build_hf_repo_model_alias(
         "llama3-8b-8192",
         CoreModelId.llama3_1_8b_instruct.value,
     ),
-    build_model_alias_with_just_provider_model_id(
+    build_model_alias(
         "llama-3.1-8b-instant",
         CoreModelId.llama3_1_8b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "llama3-70b-8192",
         CoreModelId.llama3_70b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "llama-3.3-70b-versatile",
         CoreModelId.llama3_3_70b_instruct.value,
     ),
@@ -62,7 +62,7 @@ _MODEL_ALIASES = [
     # Preview models aren't recommended for production use, but we include this one
     # to pass the test fixture
     # TODO(aidand): Replace this with a stable model once Groq supports it
-    build_model_alias(
+    build_hf_repo_model_alias(
         "llama-3.2-3b-preview",
         CoreModelId.llama3_2_3b_instruct.value,
     ),
diff --git a/llama_stack/providers/remote/inference/nvidia/models.py b/llama_stack/providers/remote/inference/nvidia/models.py
index 1d9b575d4..6a359e009 100644
--- a/llama_stack/providers/remote/inference/nvidia/models.py
+++ b/llama_stack/providers/remote/inference/nvidia/models.py
@@ -6,43 +6,43 @@
 
 from llama_stack.models.llama.datatypes import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
-    build_model_alias,
+    build_hf_repo_model_alias,
 )
 
 _MODEL_ALIASES = [
-    build_model_alias(
+    build_hf_repo_model_alias(
         "meta/llama3-8b-instruct",
         CoreModelId.llama3_8b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "meta/llama3-70b-instruct",
         CoreModelId.llama3_70b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "meta/llama-3.1-8b-instruct",
         CoreModelId.llama3_1_8b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "meta/llama-3.1-70b-instruct",
         CoreModelId.llama3_1_70b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "meta/llama-3.1-405b-instruct",
         CoreModelId.llama3_1_405b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "meta/llama-3.2-1b-instruct",
         CoreModelId.llama3_2_1b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "meta/llama-3.2-3b-instruct",
         CoreModelId.llama3_2_3b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "meta/llama-3.2-11b-vision-instruct",
         CoreModelId.llama3_2_11b_vision_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "meta/llama-3.2-90b-vision-instruct",
         CoreModelId.llama3_2_90b_vision_instruct.value,
     ),
diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py
index 2488d9071..287f025e0 100644
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@@ -35,8 +35,8 @@ from llama_stack.models.llama.datatypes import CoreModelId
 from llama_stack.providers.datatypes import ModelsProtocolPrivate
 from llama_stack.providers.utils.inference.model_registry import (
     ModelRegistryHelper,
+    build_hf_repo_model_alias,
     build_model_alias,
-    build_model_alias_with_just_provider_model_id,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
     OpenAICompatCompletionChoice,
@@ -59,73 +59,73 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
 log = logging.getLogger(__name__)
 
 model_aliases = [
-    build_model_alias(
+    build_hf_repo_model_alias(
         "llama3.1:8b-instruct-fp16",
         CoreModelId.llama3_1_8b_instruct.value,
     ),
-    build_model_alias_with_just_provider_model_id(
+    build_model_alias(
         "llama3.1:8b",
         CoreModelId.llama3_1_8b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "llama3.1:70b-instruct-fp16",
         CoreModelId.llama3_1_70b_instruct.value,
     ),
-    build_model_alias_with_just_provider_model_id(
+    build_model_alias(
         "llama3.1:70b",
         CoreModelId.llama3_1_70b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "llama3.1:405b-instruct-fp16",
         CoreModelId.llama3_1_405b_instruct.value,
     ),
-    build_model_alias_with_just_provider_model_id(
+    build_model_alias(
         "llama3.1:405b",
         CoreModelId.llama3_1_405b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "llama3.2:1b-instruct-fp16",
         CoreModelId.llama3_2_1b_instruct.value,
     ),
-    build_model_alias_with_just_provider_model_id(
+    build_model_alias(
         "llama3.2:1b",
         CoreModelId.llama3_2_1b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "llama3.2:3b-instruct-fp16",
         CoreModelId.llama3_2_3b_instruct.value,
     ),
-    build_model_alias_with_just_provider_model_id(
+    build_model_alias(
         "llama3.2:3b",
         CoreModelId.llama3_2_3b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "llama3.2-vision:11b-instruct-fp16",
         CoreModelId.llama3_2_11b_vision_instruct.value,
     ),
-    build_model_alias_with_just_provider_model_id(
+    build_model_alias(
         "llama3.2-vision:latest",
         CoreModelId.llama3_2_11b_vision_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "llama3.2-vision:90b-instruct-fp16",
         CoreModelId.llama3_2_90b_vision_instruct.value,
     ),
-    build_model_alias_with_just_provider_model_id(
+    build_model_alias(
         "llama3.2-vision:90b",
         CoreModelId.llama3_2_90b_vision_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "llama3.3:70b",
         CoreModelId.llama3_3_70b_instruct.value,
     ),
     # The Llama Guard models don't have their full fp16 versions
     # so we are going to alias their default version to the canonical SKU
-    build_model_alias(
+    build_hf_repo_model_alias(
         "llama-guard3:8b",
         CoreModelId.llama_guard_3_8b.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "llama-guard3:1b",
         CoreModelId.llama_guard_3_1b.value,
     ),
diff --git a/llama_stack/providers/remote/inference/sambanova/models.py b/llama_stack/providers/remote/inference/sambanova/models.py
index 27a4a149e..1e002c81d 100644
--- a/llama_stack/providers/remote/inference/sambanova/models.py
+++ b/llama_stack/providers/remote/inference/sambanova/models.py
@@ -6,43 +6,43 @@
 
 from llama_stack.models.llama.datatypes import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
-    build_model_alias,
+    build_hf_repo_model_alias,
 )
 
 MODEL_ALIASES = [
-    build_model_alias(
+    build_hf_repo_model_alias(
         "Meta-Llama-3.1-8B-Instruct",
         CoreModelId.llama3_1_8b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "Meta-Llama-3.1-70B-Instruct",
         CoreModelId.llama3_1_70b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "Meta-Llama-3.1-405B-Instruct",
         CoreModelId.llama3_1_405b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "Meta-Llama-3.2-1B-Instruct",
         CoreModelId.llama3_2_1b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "Meta-Llama-3.2-3B-Instruct",
         CoreModelId.llama3_2_3b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "Meta-Llama-3.3-70B-Instruct",
         CoreModelId.llama3_3_70b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "Llama-3.2-11B-Vision-Instruct",
         CoreModelId.llama3_2_11b_vision_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "Llama-3.2-90B-Vision-Instruct",
         CoreModelId.llama3_2_90b_vision_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "Meta-Llama-Guard-3-8B",
         CoreModelId.llama_guard_3_8b.value,
     ),
diff --git a/llama_stack/providers/remote/inference/tgi/tgi.py b/llama_stack/providers/remote/inference/tgi/tgi.py
index 7ffeced95..cd2311a48 100644
--- a/llama_stack/providers/remote/inference/tgi/tgi.py
+++ b/llama_stack/providers/remote/inference/tgi/tgi.py
@@ -32,7 +32,7 @@ from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.datatypes import ModelsProtocolPrivate
 from llama_stack.providers.utils.inference.model_registry import (
     ModelRegistryHelper,
-    build_model_alias,
+    build_hf_repo_model_alias,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
     OpenAICompatCompletionChoice,
@@ -53,9 +53,9 @@ from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImpl
 log = logging.getLogger(__name__)
 
 
-def build_model_aliases():
+def build_hf_repo_model_aliases():
     return [
-        build_model_alias(
+        build_hf_repo_model_alias(
             model.huggingface_repo,
             model.descriptor(),
         )
@@ -70,7 +70,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):
     model_id: str
 
     def __init__(self) -> None:
-        self.register_helper = ModelRegistryHelper(build_model_aliases())
+        self.register_helper = ModelRegistryHelper(build_hf_repo_model_aliases())
         self.huggingface_repo_to_llama_model_id = {
             model.huggingface_repo: model.descriptor() for model in all_registered_models() if model.huggingface_repo
         }
diff --git a/llama_stack/providers/remote/inference/together/models.py b/llama_stack/providers/remote/inference/together/models.py
index 87d282ea5..87904c47b 100644
--- a/llama_stack/providers/remote/inference/together/models.py
+++ b/llama_stack/providers/remote/inference/together/models.py
@@ -6,43 +6,43 @@
 
 from llama_stack.models.llama.datatypes import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
-    build_model_alias,
+    build_hf_repo_model_alias,
 )
 
 MODEL_ALIASES = [
-    build_model_alias(
+    build_hf_repo_model_alias(
         "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
         CoreModelId.llama3_1_8b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
         CoreModelId.llama3_1_70b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
         CoreModelId.llama3_1_405b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "meta-llama/Llama-3.2-3B-Instruct-Turbo",
         CoreModelId.llama3_2_3b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
         CoreModelId.llama3_2_11b_vision_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
         CoreModelId.llama3_2_90b_vision_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "meta-llama/Llama-3.3-70B-Instruct-Turbo",
         CoreModelId.llama3_3_70b_instruct.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "meta-llama/Meta-Llama-Guard-3-8B",
         CoreModelId.llama_guard_3_8b.value,
     ),
-    build_model_alias(
+    build_hf_repo_model_alias(
         "meta-llama/Llama-Guard-3-11B-Vision-Turbo",
         CoreModelId.llama_guard_3_11b_vision.value,
     ),
diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py
index 220bf4bde..75dc432e4 100644
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -38,7 +38,7 @@ from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.datatypes import ModelsProtocolPrivate
 from llama_stack.providers.utils.inference.model_registry import (
     ModelRegistryHelper,
-    build_model_alias,
+    build_hf_repo_model_alias,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
     OpenAICompatCompletionResponse,
@@ -62,9 +62,9 @@ from .config import VLLMInferenceAdapterConfig
 log = logging.getLogger(__name__)
 
 
-def build_model_aliases():
+def build_hf_repo_model_aliases():
     return [
-        build_model_alias(
+        build_hf_repo_model_alias(
             model.huggingface_repo,
             model.descriptor(),
         )
@@ -204,7 +204,7 @@ async def _process_vllm_chat_completion_stream_response(
 
 class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
     def __init__(self, config: VLLMInferenceAdapterConfig) -> None:
-        self.register_helper = ModelRegistryHelper(build_model_aliases())
+        self.register_helper = ModelRegistryHelper(build_hf_repo_model_aliases())
         self.config = config
         self.client = None
 
diff --git a/llama_stack/providers/utils/inference/model_registry.py b/llama_stack/providers/utils/inference/model_registry.py
index 5cb785843..e14a733d1 100644
--- a/llama_stack/providers/utils/inference/model_registry.py
+++ b/llama_stack/providers/utils/inference/model_registry.py
@@ -32,7 +32,7 @@ def get_huggingface_repo(model_descriptor: str) -> Optional[str]:
     return None
 
 
-def build_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAlias:
+def build_hf_repo_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAlias:
     return ModelAlias(
         provider_model_id=provider_model_id,
         aliases=[
@@ -42,7 +42,7 @@ def build_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAli
     )
 
 
-def build_model_alias_with_just_provider_model_id(provider_model_id: str, model_descriptor: str) -> ModelAlias:
+def build_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAlias:
     return ModelAlias(
         provider_model_id=provider_model_id,
         aliases=[],

From f7161611c66913f4c0e1ac9f67dfae28f413af5a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Vladimir=20Ivi=C4=87?= <vivic@meta.com>
Date: Thu, 20 Feb 2025 13:09:00 -0800
Subject: [PATCH 4/4] feat: adding endpoints for files and uploads (#1070)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Summary:
Adds spec definitions for file uploads operations.

This API focuses around two high level operations:
* Initiating and managing upload session
* Accessing uploaded file information

Usage examples:

To start a file upload session:
```
curl -X POST https://localhost:8321/v1/files \
-d '{
   "key": "image123.jpg',
   "bucket": "images",
   "mime_type": "image/jpg",
   "size": 12345
}'

# Returns
{
  “id”: <session_id>
  “url”: “https://localhost:8321/v1/files/session:<session_id>”,
  "offset": 0,
  "size": 12345
}

```

To upload file content to an existing session
```
curl -i -X POST "https://localhost:8321/v1/files/session:<session_id> \
  --data-binary @<path_to_local_file>

# Returns
{
  "key": "image123.jpg",
  "bucket": "images",
  "mime_type": "image/jpg",
  "bytes": 12345,
  "created_at": 1737492240
}

# Implementing on server side (Flask example for simplicity):
@app.route('/uploads/{upload_id}', methods=['POST'])
def upload_content_to_session(upload_id):
    try:
        # Get the binary file data from the request body
        file_data = request.data

        # Save the file to disk
        save_path = f"./uploads/{upload_id}"
        with open(save_path, 'wb') as f:
            f.write(file_data)
        return {__uploaded_file_json__}, 200
    except Exception as e:
        return 500

```

To read information about an existing upload session
```
curl -i -X GET "https://localhost:8321/v1/files/session:<session_id>

# Returns
{
  “id”: <session_id>
  “url”: “https://localhost:8321/v1/files/session:<session_id>”,
  "offset": 1024,
  "size": 12345
}
```

To list buckets
```
GET /files

# Returns
{
  "data": [
     {"name": "bucket1"},
     {"name": "bucket2"},
   ]
}
```

To list all files in a bucket
```
GET /files/{bucket}

# Returns
{
  "data": [
    {
      "key": "shiba.jpg",
      "bucket": "dogs",
      "mime_type": "image/jpg",
      "bytes": 82334,
      "created_at": 1737492240,
    },
    {
      "key": "persian_cat.jpg",
      "mime_type": "image/jpg",
      "bucket": "cats",
      "bytes": 39924,
      "created_at": 1727493440,
    },
  ]
}
```

To get specific file info
```
GET /files/{bucket}/{key}

{
  "key": "shiba.jpg",
  "bucket": "dogs",
  "mime_type": "image/jpg",
  "bytes": 82334,
  "created_at": 1737492240,
}

```

To delete specific file
```
DELETE /files/{bucket}/{key}

{
  "key": "shiba.jpg",
  "bucket": "dogs",
  "mime_type": "image/jpg",
  "bytes": 82334,
  "created_at": 1737492240,
}

```
---
 docs/_static/llama-stack-spec.html            | 405 ++++++++++++++++++
 docs/_static/llama-stack-spec.yaml            | 280 ++++++++++++
 docs/openapi_generator/pyopenapi/generator.py |  27 +-
 .../pyopenapi/specification.py                |   2 +-
 llama_stack/apis/files/__init__.py            |   7 +
 llama_stack/apis/files/files.py               | 174 ++++++++
 llama_stack/distribution/stack.py             |   2 +
 llama_stack/schema_utils.py                   |   3 +
 8 files changed, 897 insertions(+), 3 deletions(-)
 create mode 100644 llama_stack/apis/files/__init__.py
 create mode 100644 llama_stack/apis/files/files.py

diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 2b6e1d11c..02d05776d 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -678,6 +678,65 @@
                 }
             }
         },
+        "/v1/files": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ListBucketResponse"
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "Files (Coming Soon)"
+                ],
+                "description": "List all buckets.",
+                "parameters": [
+                    {
+                        "name": "bucket",
+                        "in": "query",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            },
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/FileUploadResponse"
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "Files (Coming Soon)"
+                ],
+                "description": "Create a new upload session for a file identified by a bucket and key.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/CreateUploadSessionRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/v1/agents/{agent_id}": {
             "delete": {
                 "responses": {
@@ -779,6 +838,84 @@
                 ]
             }
         },
+        "/v1/files/{bucket}/{key}": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/FileResponse"
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "Files (Coming Soon)"
+                ],
+                "description": "Get a file info identified by a bucket and key.",
+                "parameters": [
+                    {
+                        "name": "bucket",
+                        "in": "path",
+                        "description": "Bucket name (valid chars: a-zA-Z0-9_-)",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "key",
+                        "in": "path",
+                        "description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            },
+            "delete": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/FileResponse"
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "Files (Coming Soon)"
+                ],
+                "description": "Delete a file identified by a bucket and key.",
+                "parameters": [
+                    {
+                        "name": "bucket",
+                        "in": "path",
+                        "description": "Bucket name (valid chars: a-zA-Z0-9_-)",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "key",
+                        "in": "path",
+                        "description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
         "/v1/inference/embeddings": {
             "post": {
                 "responses": {
@@ -1470,6 +1607,91 @@
                 "parameters": []
             }
         },
+        "/v1/files/session:{upload_id}": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "oneOf": [
+                                        {
+                                            "$ref": "#/components/schemas/FileUploadResponse"
+                                        },
+                                        {
+                                            "type": "null"
+                                        }
+                                    ]
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "Files (Coming Soon)"
+                ],
+                "description": "Returns information about an existsing upload session",
+                "parameters": [
+                    {
+                        "name": "upload_id",
+                        "in": "path",
+                        "description": "ID of the upload session",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            },
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "oneOf": [
+                                        {
+                                            "$ref": "#/components/schemas/FileResponse"
+                                        },
+                                        {
+                                            "type": "null"
+                                        }
+                                    ]
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "Files (Coming Soon)"
+                ],
+                "description": "Upload file content to an existing upload session. On the server, request body will have the raw bytes that are uploaded.",
+                "parameters": [
+                    {
+                        "name": "upload_id",
+                        "in": "path",
+                        "description": "ID of the upload session",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "requestBody": {
+                    "content": {
+                        "application/octet-stream": {
+                            "schema": {
+                                "type": "string",
+                                "format": "binary"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/v1/vector-dbs/{vector_db_id}": {
             "get": {
                 "responses": {
@@ -1826,6 +2048,37 @@
                 }
             }
         },
+        "/v1/files/{bucket}": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ListFileResponse"
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "Files (Coming Soon)"
+                ],
+                "description": "List all files in a bucket.",
+                "parameters": [
+                    {
+                        "name": "bucket",
+                        "in": "path",
+                        "description": "Bucket name (valid chars: a-zA-Z0-9_-)",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
         "/v1/models": {
             "get": {
                 "responses": {
@@ -5441,6 +5694,105 @@
                 ],
                 "title": "AgentTurnResponseTurnStartPayload"
             },
+            "CreateUploadSessionRequest": {
+                "type": "object",
+                "properties": {
+                    "bucket": {
+                        "type": "string",
+                        "description": "Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)"
+                    },
+                    "key": {
+                        "type": "string",
+                        "description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)"
+                    },
+                    "mime_type": {
+                        "type": "string",
+                        "description": "MIME type of the file"
+                    },
+                    "size": {
+                        "type": "integer",
+                        "description": "File size in bytes"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "bucket",
+                    "key",
+                    "mime_type",
+                    "size"
+                ],
+                "title": "CreateUploadSessionRequest"
+            },
+            "FileUploadResponse": {
+                "type": "object",
+                "properties": {
+                    "id": {
+                        "type": "string",
+                        "description": "ID of the upload session"
+                    },
+                    "url": {
+                        "type": "string",
+                        "description": "Upload URL for the file or file parts"
+                    },
+                    "offset": {
+                        "type": "integer",
+                        "description": "Upload content offset"
+                    },
+                    "size": {
+                        "type": "integer",
+                        "description": "Upload content size"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id",
+                    "url",
+                    "offset",
+                    "size"
+                ],
+                "title": "FileUploadResponse",
+                "description": "Response after initiating a file upload session."
+            },
+            "FileResponse": {
+                "type": "object",
+                "properties": {
+                    "bucket": {
+                        "type": "string",
+                        "description": "Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)"
+                    },
+                    "key": {
+                        "type": "string",
+                        "description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)"
+                    },
+                    "mime_type": {
+                        "type": "string",
+                        "description": "MIME type of the file"
+                    },
+                    "url": {
+                        "type": "string",
+                        "description": "Upload URL for the file contents"
+                    },
+                    "bytes": {
+                        "type": "integer",
+                        "description": "Size of the file in bytes"
+                    },
+                    "created_at": {
+                        "type": "integer",
+                        "description": "Timestamp of when the file was created"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "bucket",
+                    "key",
+                    "mime_type",
+                    "url",
+                    "bytes",
+                    "created_at"
+                ],
+                "title": "FileResponse",
+                "description": "Response representing a file entry."
+            },
             "EmbeddingsRequest": {
                 "type": "object",
                 "properties": {
@@ -6756,6 +7108,37 @@
                 ],
                 "title": "ToolInvocationResult"
             },
+            "BucketResponse": {
+                "type": "object",
+                "properties": {
+                    "name": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "name"
+                ],
+                "title": "BucketResponse"
+            },
+            "ListBucketResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/BucketResponse"
+                        },
+                        "description": "List of FileResponse entries"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "ListBucketResponse",
+                "description": "Response representing a list of file entries."
+            },
             "ListDatasetsResponse": {
                 "type": "object",
                 "properties": {
@@ -6772,6 +7155,24 @@
                 ],
                 "title": "ListDatasetsResponse"
             },
+            "ListFileResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/FileResponse"
+                        },
+                        "description": "List of FileResponse entries"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "ListFileResponse",
+                "description": "Response representing a list of file entries."
+            },
             "ListModelsResponse": {
                 "type": "object",
                 "properties": {
@@ -8543,6 +8944,9 @@
         {
             "name": "Eval"
         },
+        {
+            "name": "Files (Coming Soon)"
+        },
         {
             "name": "Inference",
             "description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
@@ -8598,6 +9002,7 @@
                 "DatasetIO",
                 "Datasets",
                 "Eval",
+                "Files (Coming Soon)",
                 "Inference",
                 "Inspect",
                 "Models",
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 99300fedf..f79120f1d 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -406,6 +406,43 @@ paths:
             schema:
               $ref: '#/components/schemas/CreateAgentTurnRequest'
         required: true
+  /v1/files:
+    get:
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListBucketResponse'
+      tags:
+        - Files (Coming Soon)
+      description: List all buckets.
+      parameters:
+        - name: bucket
+          in: query
+          required: true
+          schema:
+            type: string
+    post:
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/FileUploadResponse'
+      tags:
+        - Files (Coming Soon)
+      description: >-
+        Create a new upload session for a file identified by a bucket and key.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CreateUploadSessionRequest'
+        required: true
   /v1/agents/{agent_id}:
     delete:
       responses:
@@ -468,6 +505,59 @@ paths:
           required: true
           schema:
             type: string
+  /v1/files/{bucket}/{key}:
+    get:
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/FileResponse'
+      tags:
+        - Files (Coming Soon)
+      description: >-
+        Get a file info identified by a bucket and key.
+      parameters:
+        - name: bucket
+          in: path
+          description: 'Bucket name (valid chars: a-zA-Z0-9_-)'
+          required: true
+          schema:
+            type: string
+        - name: key
+          in: path
+          description: >-
+            Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
+          required: true
+          schema:
+            type: string
+    delete:
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/FileResponse'
+      tags:
+        - Files (Coming Soon)
+      description: >-
+        Delete a file identified by a bucket and key.
+      parameters:
+        - name: bucket
+          in: path
+          description: 'Bucket name (valid chars: a-zA-Z0-9_-)'
+          required: true
+          schema:
+            type: string
+        - name: key
+          in: path
+          description: >-
+            Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
+          required: true
+          schema:
+            type: string
   /v1/inference/embeddings:
     post:
       responses:
@@ -875,6 +965,57 @@ paths:
         - PostTraining (Coming Soon)
       description: ''
       parameters: []
+  /v1/files/session:{upload_id}:
+    get:
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                oneOf:
+                  - $ref: '#/components/schemas/FileUploadResponse'
+                  - type: 'null'
+      tags:
+        - Files (Coming Soon)
+      description: >-
+        Returns information about an existsing upload session
+      parameters:
+        - name: upload_id
+          in: path
+          description: ID of the upload session
+          required: true
+          schema:
+            type: string
+    post:
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                oneOf:
+                  - $ref: '#/components/schemas/FileResponse'
+                  - type: 'null'
+      tags:
+        - Files (Coming Soon)
+      description: >-
+        Upload file content to an existing upload session. On the server, request
+        body will have the raw bytes that are uploaded.
+      parameters:
+        - name: upload_id
+          in: path
+          description: ID of the upload session
+          required: true
+          schema:
+            type: string
+      requestBody:
+        content:
+          application/octet-stream:
+            schema:
+              type: string
+              format: binary
+        required: true
   /v1/vector-dbs/{vector_db_id}:
     get:
       responses:
@@ -1091,6 +1232,25 @@ paths:
             schema:
               $ref: '#/components/schemas/RegisterDatasetRequest'
         required: true
+  /v1/files/{bucket}:
+    get:
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListFileResponse'
+      tags:
+        - Files (Coming Soon)
+      description: List all files in a bucket.
+      parameters:
+        - name: bucket
+          in: path
+          description: 'Bucket name (valid chars: a-zA-Z0-9_-)'
+          required: true
+          schema:
+            type: string
   /v1/models:
     get:
       responses:
@@ -3508,6 +3668,87 @@ components:
         - event_type
         - turn_id
       title: AgentTurnResponseTurnStartPayload
+    CreateUploadSessionRequest:
+      type: object
+      properties:
+        bucket:
+          type: string
+          description: >-
+            Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)
+        key:
+          type: string
+          description: >-
+            Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
+        mime_type:
+          type: string
+          description: MIME type of the file
+        size:
+          type: integer
+          description: File size in bytes
+      additionalProperties: false
+      required:
+        - bucket
+        - key
+        - mime_type
+        - size
+      title: CreateUploadSessionRequest
+    FileUploadResponse:
+      type: object
+      properties:
+        id:
+          type: string
+          description: ID of the upload session
+        url:
+          type: string
+          description: Upload URL for the file or file parts
+        offset:
+          type: integer
+          description: Upload content offset
+        size:
+          type: integer
+          description: Upload content size
+      additionalProperties: false
+      required:
+        - id
+        - url
+        - offset
+        - size
+      title: FileUploadResponse
+      description: >-
+        Response after initiating a file upload session.
+    FileResponse:
+      type: object
+      properties:
+        bucket:
+          type: string
+          description: >-
+            Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)
+        key:
+          type: string
+          description: >-
+            Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
+        mime_type:
+          type: string
+          description: MIME type of the file
+        url:
+          type: string
+          description: Upload URL for the file contents
+        bytes:
+          type: integer
+          description: Size of the file in bytes
+        created_at:
+          type: integer
+          description: Timestamp of when the file was created
+      additionalProperties: false
+      required:
+        - bucket
+        - key
+        - mime_type
+        - url
+        - bytes
+        - created_at
+      title: FileResponse
+      description: Response representing a file entry.
     EmbeddingsRequest:
       type: object
       properties:
@@ -4339,6 +4580,29 @@ components:
       required:
         - content
       title: ToolInvocationResult
+    BucketResponse:
+      type: object
+      properties:
+        name:
+          type: string
+      additionalProperties: false
+      required:
+        - name
+      title: BucketResponse
+    ListBucketResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/BucketResponse'
+          description: List of FileResponse entries
+      additionalProperties: false
+      required:
+        - data
+      title: ListBucketResponse
+      description: >-
+        Response representing a list of file entries.
     ListDatasetsResponse:
       type: object
       properties:
@@ -4350,6 +4614,20 @@ components:
       required:
         - data
       title: ListDatasetsResponse
+    ListFileResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/FileResponse'
+          description: List of FileResponse entries
+      additionalProperties: false
+      required:
+        - data
+      title: ListFileResponse
+      description: >-
+        Response representing a list of file entries.
     ListModelsResponse:
       type: object
       properties:
@@ -5467,6 +5745,7 @@ tags:
   - name: DatasetIO
   - name: Datasets
   - name: Eval
+  - name: Files (Coming Soon)
   - name: Inference
     description: >-
       This API provides the raw interface to the underlying models. Two kinds of models
@@ -5501,6 +5780,7 @@ x-tagGroups:
       - DatasetIO
       - Datasets
       - Eval
+      - Files (Coming Soon)
       - Inference
       - Inspect
       - Models
diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py
index 60cd7a242..4220cfc05 100644
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@@ -477,6 +477,7 @@ class Generator:
             "SyntheticDataGeneration",
             "PostTraining",
             "BatchInference",
+            "Files",
         ]:
             op.defining_class.__name__ = f"{op.defining_class.__name__} (Coming Soon)"
             print(op.defining_class.__name__)
@@ -520,8 +521,30 @@ class Generator:
         # parameters passed anywhere
         parameters = path_parameters + query_parameters
 
-        # data passed in payload
-        if op.request_params:
+        webmethod = getattr(op.func_ref, "__webmethod__", None)
+        raw_bytes_request_body = False
+        if webmethod:
+            raw_bytes_request_body = getattr(webmethod, "raw_bytes_request_body", False)
+
+        # data passed in request body as raw bytes cannot have request parameters
+        if raw_bytes_request_body and op.request_params:
+            raise ValueError("Cannot have both raw bytes request body and request parameters")
+
+        # data passed in request body as raw bytes
+        if raw_bytes_request_body:
+            requestBody = RequestBody(
+                content={
+                    "application/octet-stream": {
+                        "schema": {
+                            "type": "string",
+                            "format": "binary",
+                        }
+                    }
+                },
+                required=True,
+            )
+        # data passed in payload as JSON and mapped to request parameters
+        elif op.request_params:
             builder = ContentBuilder(self.schema_builder)
             first = next(iter(op.request_params))
             request_name, request_type = first
diff --git a/docs/openapi_generator/pyopenapi/specification.py b/docs/openapi_generator/pyopenapi/specification.py
index 9e5363b4a..d3e5a1f19 100644
--- a/docs/openapi_generator/pyopenapi/specification.py
+++ b/docs/openapi_generator/pyopenapi/specification.py
@@ -78,7 +78,7 @@ class MediaType:
 
 @dataclass
 class RequestBody:
-    content: Dict[str, MediaType]
+    content: Dict[str, MediaType | Dict[str, Any]]
     description: Optional[str] = None
     required: Optional[bool] = None
 
diff --git a/llama_stack/apis/files/__init__.py b/llama_stack/apis/files/__init__.py
new file mode 100644
index 000000000..269baf177
--- /dev/null
+++ b/llama_stack/apis/files/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .files import *  # noqa: F401 F403
diff --git a/llama_stack/apis/files/files.py b/llama_stack/apis/files/files.py
new file mode 100644
index 000000000..f17fadc8c
--- /dev/null
+++ b/llama_stack/apis/files/files.py
@@ -0,0 +1,174 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import List, Optional, Protocol, runtime_checkable
+
+from pydantic import BaseModel
+
+from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
+from llama_stack.schema_utils import json_schema_type, webmethod
+
+
+@json_schema_type
+class FileUploadResponse(BaseModel):
+    """
+    Response after initiating a file upload session.
+
+    :param id: ID of the upload session
+    :param url: Upload URL for the file or file parts
+    :param offset: Upload content offset
+    :param size: Upload content size
+    """
+
+    id: str
+    url: str
+    offset: int
+    size: int
+
+
+@json_schema_type
+class BucketResponse(BaseModel):
+    name: str
+
+
+@json_schema_type
+class ListBucketResponse(BaseModel):
+    """
+    Response representing a list of file entries.
+
+    :param data: List of FileResponse entries
+    """
+
+    data: List[BucketResponse]
+
+
+@json_schema_type
+class FileResponse(BaseModel):
+    """
+    Response representing a file entry.
+
+    :param bucket: Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)
+    :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
+    :param mime_type: MIME type of the file
+    :param url: Upload URL for the file contents
+    :param bytes: Size of the file in bytes
+    :param created_at: Timestamp of when the file was created
+    """
+
+    bucket: str
+    key: str
+    mime_type: str
+    url: str
+    bytes: int
+    created_at: int
+
+
+@json_schema_type
+class ListFileResponse(BaseModel):
+    """
+    Response representing a list of file entries.
+
+    :param data: List of FileResponse entries
+    """
+
+    data: List[FileResponse]
+
+
+@runtime_checkable
+@trace_protocol
+class Files(Protocol):
+    @webmethod(route="/files", method="POST")
+    async def create_upload_session(
+        self,
+        bucket: str,
+        key: str,
+        mime_type: str,
+        size: int,
+    ) -> FileUploadResponse:
+        """
+        Create a new upload session for a file identified by a bucket and key.
+
+        :param bucket: Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)
+        :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
+        :param mime_type: MIME type of the file
+        :param size: File size in bytes
+        """
+        ...
+
+    @webmethod(route="/files/session:{upload_id}", method="POST", raw_bytes_request_body=True)
+    async def upload_content_to_session(
+        self,
+        upload_id: str,
+    ) -> Optional[FileResponse]:
+        """
+        Upload file content to an existing upload session.
+        On the server, request body will have the raw bytes that are uploaded.
+
+        :param upload_id: ID of the upload session
+        """
+        ...
+
+    @webmethod(route="/files/session:{upload_id}", method="GET")
+    async def get_upload_session_info(
+        self,
+        upload_id: str,
+    ) -> Optional[FileUploadResponse]:
+        """
+        Returns information about an existsing upload session
+
+        :param upload_id: ID of the upload session
+        """
+        ...
+
+    @webmethod(route="/files", method="GET")
+    async def list_all_buckets(
+        self,
+        bucket: str,
+    ) -> ListBucketResponse:
+        """
+        List all buckets.
+        """
+        ...
+
+    @webmethod(route="/files/{bucket}", method="GET")
+    async def list_files_in_bucket(
+        self,
+        bucket: str,
+    ) -> ListFileResponse:
+        """
+        List all files in a bucket.
+
+        :param bucket: Bucket name (valid chars: a-zA-Z0-9_-)
+        """
+        ...
+
+    @webmethod(route="/files/{bucket}/{key:path}", method="GET")
+    async def get_file(
+        self,
+        bucket: str,
+        key: str,
+    ) -> FileResponse:
+        """
+        Get a file info identified by a bucket and key.
+
+        :param bucket: Bucket name (valid chars: a-zA-Z0-9_-)
+        :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
+        """
+        ...
+
+    @webmethod(route="/files/{bucket}/{key:path}", method="DELETE")
+    async def delete_file(
+        self,
+        bucket: str,
+        key: str,
+    ) -> FileResponse:
+        """
+        Delete a file identified by a bucket and key.
+
+        :param bucket: Bucket name (valid chars: a-zA-Z0-9_-)
+        :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
+        """
+        ...
diff --git a/llama_stack/distribution/stack.py b/llama_stack/distribution/stack.py
index 9335dc3a9..1328c88ef 100644
--- a/llama_stack/distribution/stack.py
+++ b/llama_stack/distribution/stack.py
@@ -19,6 +19,7 @@ from llama_stack.apis.benchmarks import Benchmarks
 from llama_stack.apis.datasetio import DatasetIO
 from llama_stack.apis.datasets import Datasets
 from llama_stack.apis.eval import Eval
+from llama_stack.apis.files import Files
 from llama_stack.apis.inference import Inference
 from llama_stack.apis.inspect import Inspect
 from llama_stack.apis.models import Models
@@ -63,6 +64,7 @@ class LlamaStack(
     ToolGroups,
     ToolRuntime,
     RAGToolRuntime,
+    Files,
 ):
     pass
 
diff --git a/llama_stack/schema_utils.py b/llama_stack/schema_utils.py
index 56b9e5e4c..581404844 100644
--- a/llama_stack/schema_utils.py
+++ b/llama_stack/schema_utils.py
@@ -19,6 +19,7 @@ class WebMethod:
     request_examples: Optional[List[Any]] = None
     response_examples: Optional[List[Any]] = None
     method: Optional[str] = None
+    raw_bytes_request_body: Optional[bool] = False
 
 
 def webmethod(
@@ -27,6 +28,7 @@ def webmethod(
     public: Optional[bool] = False,
     request_examples: Optional[List[Any]] = None,
     response_examples: Optional[List[Any]] = None,
+    raw_bytes_request_body: Optional[bool] = False,
 ) -> Callable[[T], T]:
     """
     Decorator that supplies additional metadata to an endpoint operation function.
@@ -44,6 +46,7 @@ def webmethod(
             public=public or False,
             request_examples=request_examples,
             response_examples=response_examples,
+            raw_bytes_request_body=raw_bytes_request_body,
         )
         return cls