diff --git a/llama_toolchain/distribution/registry.py b/llama_toolchain/distribution/registry.py
index ef27a2bbc..bb25bb3c0 100644
--- a/llama_toolchain/distribution/registry.py
+++ b/llama_toolchain/distribution/registry.py
@@ -9,7 +9,7 @@ from typing import List, Optional
 
 from llama_toolchain.inference.adapters import available_inference_adapters
 
-from .datatypes import ApiSurface, Distribution
+from .datatypes import ApiSurface, Distribution, PassthroughApiAdapter
 
 # This is currently duplicated from `requirements.txt` with a few minor changes
 # dev-dependencies like "ufmt" etc. are nuked. A few specialized dependencies
@@ -57,6 +57,28 @@ def available_distributions() -> List[Distribution]:
                 ApiSurface.inference: inference_adapters_by_id["meta-reference"],
             },
         ),
+        Distribution(
+            name="full-passthrough",
+            description="Point to remote services for all llama stack APIs",
+            additional_pip_packages=[
+                "python-dotenv",
+                "blobfile",
+                "codeshield",
+                "fairscale",
+                "fastapi",
+                "fire",
+                "flake8",
+                "httpx",
+                "huggingface-hub",
+            ],
+            adapters={
+                ApiSurface.inference: PassthroughApiAdapter(
+                    api_surface=ApiSurface.inference,
+                    adapter_id="inference-passthrough",
+                    base_url="http://localhost:5001",
+                ),
+            },
+        ),
         Distribution(
             name="local-ollama",
             description="Like local-source, but use ollama for running LLM inference",
diff --git a/llama_toolchain/distribution/server.py b/llama_toolchain/distribution/server.py
index 9b96d31fc..128b78112 100644
--- a/llama_toolchain/distribution/server.py
+++ b/llama_toolchain/distribution/server.py
@@ -157,7 +157,7 @@ def create_dynamic_typed_route(func: Any):
                     )
 
             return StreamingResponse(
-                sse_generator(func(request2)), media_type="text/event-stream"
+                sse_generator(func(request)), media_type="text/event-stream"
             )
 
     else: