diff --git a/llama_toolchain/distribution/registry.py b/llama_toolchain/distribution/registry.py index ef27a2bbc..bb25bb3c0 100644 --- a/llama_toolchain/distribution/registry.py +++ b/llama_toolchain/distribution/registry.py @@ -9,7 +9,7 @@ from typing import List, Optional from llama_toolchain.inference.adapters import available_inference_adapters -from .datatypes import ApiSurface, Distribution +from .datatypes import ApiSurface, Distribution, PassthroughApiAdapter # This is currently duplicated from `requirements.txt` with a few minor changes # dev-dependencies like "ufmt" etc. are nuked. A few specialized dependencies @@ -57,6 +57,28 @@ def available_distributions() -> List[Distribution]: ApiSurface.inference: inference_adapters_by_id["meta-reference"], }, ), + Distribution( + name="full-passthrough", + description="Point to remote services for all llama stack APIs", + additional_pip_packages=[ + "python-dotenv", + "blobfile", + "codeshield", + "fairscale", + "fastapi", + "fire", + "flake8", + "httpx", + "huggingface-hub", + ], + adapters={ + ApiSurface.inference: PassthroughApiAdapter( + api_surface=ApiSurface.inference, + adapter_id="inference-passthrough", + base_url="http://localhost:5001", + ), + }, + ), Distribution( name="local-ollama", description="Like local-source, but use ollama for running LLM inference", diff --git a/llama_toolchain/distribution/server.py b/llama_toolchain/distribution/server.py index 9b96d31fc..128b78112 100644 --- a/llama_toolchain/distribution/server.py +++ b/llama_toolchain/distribution/server.py @@ -157,7 +157,7 @@ def create_dynamic_typed_route(func: Any): ) return StreamingResponse( - sse_generator(func(request2)), media_type="text/event-stream" + sse_generator(func(request)), media_type="text/event-stream" ) else: