diff --git a/distributions/tgi/gpu/compose.yaml b/distributions/tgi/compose.yaml
similarity index 100%
rename from distributions/tgi/gpu/compose.yaml
rename to distributions/tgi/compose.yaml
diff --git a/distributions/tgi/cpu/compose.yaml b/distributions/tgi/cpu/compose.yaml
deleted file mode 100644
index 3ff6345e2..000000000
--- a/distributions/tgi/cpu/compose.yaml
+++ /dev/null
@@ -1,33 +0,0 @@
-services:
-  text-generation-inference:
-    image: ghcr.io/huggingface/text-generation-inference:latest
-    network_mode: "host"
-    volumes:
-      - $HOME/.cache/huggingface:/data
-    ports:
-      - "5009:5009"
-    command: ["--dtype", "bfloat16", "--usage-stats", "on", "--sharded", "false", "--model-id", "meta-llama/Llama-3.1-8B-Instruct", "--port", "5009", "--cuda-memory-fraction", "0.3"]
-    runtime: nvidia
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://text-generation-inference:5009/health"]
-      interval: 5s
-      timeout: 5s
-      retries: 30
-  llamastack:
-    depends_on:
-      text-generation-inference:
-        condition: service_healthy
-    image: llamastack/llamastack-tgi
-    network_mode: "host"
-    volumes:
-      - ~/.llama:/root/.llama
-      # Link to run.yaml file
-      - ./run.yaml:/root/my-run.yaml
-    ports:
-      - "5000:5000"
-    entrypoint: bash -c "python -m llama_stack.distribution.server.server --yaml_config /root/my-run.yaml"
-    restart_policy:
-      condition: on-failure
-      delay: 3s
-      max_attempts: 5
-      window: 60s
diff --git a/distributions/tgi/cpu/run.yaml b/distributions/tgi/cpu/run.yaml
deleted file mode 100644
index bf46391b4..000000000
--- a/distributions/tgi/cpu/run.yaml
+++ /dev/null
@@ -1,46 +0,0 @@
-version: '2'
-built_at: '2024-10-08T17:40:45.325529'
-image_name: local
-docker_image: null
-conda_env: local
-apis:
-- shields
-- agents
-- models
-- memory
-- memory_banks
-- inference
-- safety
-providers:
-  inference:
-  - provider_id: tgi0
-    provider_type: remote::tgi
-    config:
-      url: <ENTER_YOUR_TGI_HOSTED_ENDPOINT>
-  safety:
-  - provider_id: meta0
-    provider_type: meta-reference
-    config:
-      llama_guard_shield:
-        model: Llama-Guard-3-1B
-        excluded_categories: []
-        disable_input_check: false
-        disable_output_check: false
-      prompt_guard_shield:
-        model: Prompt-Guard-86M
-  memory:
-  - provider_id: meta0
-    provider_type: meta-reference
-    config: {}
-  agents:
-  - provider_id: meta0
-    provider_type: meta-reference
-    config:
-      persistence_store:
-        namespace: null
-        type: sqlite
-        db_path: ~/.llama/runtime/kvstore.db
-  telemetry:
-  - provider_id: meta0
-    provider_type: meta-reference
-    config: {}
diff --git a/distributions/tgi/gpu/run.yaml b/distributions/tgi/run.yaml
similarity index 100%
rename from distributions/tgi/gpu/run.yaml
rename to distributions/tgi/run.yaml
diff --git a/docs/source/getting_started/distributions/self_hosted_distro/tgi.md b/docs/source/getting_started/distributions/self_hosted_distro/tgi.md
index 3ee079360..8ad9de181 100644
--- a/docs/source/getting_started/distributions/self_hosted_distro/tgi.md
+++ b/docs/source/getting_started/distributions/self_hosted_distro/tgi.md
@@ -15,7 +15,7 @@ The `llamastack/distribution-tgi` distribution consists of the following provide
 
 
 ```
-$ cd distributions/tgi/gpu && docker compose up
+$ cd distributions/tgi && docker compose up
 ```
 
 The script will first start up TGI server, then start up Llama Stack distribution server hooking up to the remote TGI provider for inference. You should be able to see the following outputs --
@@ -34,23 +34,6 @@ To kill the server
 docker compose down
 ```
 
-### Docker: Start the Distribution (Single Node CPU)
-
-> [!NOTE]
-> This assumes you have an hosted endpoint compatible with TGI server.
-
-```
-$ cd distributions/tgi/cpu && docker compose up
-```
-
-Replace <ENTER_YOUR_TGI_HOSTED_ENDPOINT> in `run.yaml` file with your TGI endpoint.
-```
-inference:
-  - provider_id: tgi0
-    provider_type: remote::tgi
-    config:
-      url: <ENTER_YOUR_TGI_HOSTED_ENDPOINT>
-```
 
 ### Conda: TGI server + llama stack run