diff --git a/docs/source/providers/inference/remote_llamacpp.md b/docs/source/providers/inference/remote_llamacpp.md
index d17b5adf2..07ac0769e 100644
--- a/docs/source/providers/inference/remote_llamacpp.md
+++ b/docs/source/providers/inference/remote_llamacpp.md
@@ -5,12 +5,12 @@
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `api_key` | `str \| None` | No |  | The llama.cpp server API key (optional for local servers) |
-| `openai_compat_api_base` | `<class 'str'>` | No | http://localhost:8080/v1 | The URL for the llama.cpp server with OpenAI-compatible API |
+| `openai_compat_api_base` | `<class 'str'>` | No | http://localhost:8080 | The URL for the llama.cpp server with OpenAI-compatible API |
 
 ## Sample Configuration
 
 ```yaml
-openai_compat_api_base: ${env.LLAMACPP_URL:http://localhost:8080}/v1
+openai_compat_api_base: ${env.LLAMACPP_URL:=http://localhost:8080}
 api_key: ${env.LLAMACPP_API_KEY:=}
 
 ```
diff --git a/llama_stack/providers/remote/inference/llamacpp/config.py b/llama_stack/providers/remote/inference/llamacpp/config.py
index 4a16cb17d..3aff56df6 100644
--- a/llama_stack/providers/remote/inference/llamacpp/config.py
+++ b/llama_stack/providers/remote/inference/llamacpp/config.py
@@ -26,13 +26,13 @@ class LlamaCppImplConfig(BaseModel):
     )
 
     openai_compat_api_base: str = Field(
-        default="http://localhost:8080/v1",
+        default="http://localhost:8080",
         description="The URL for the llama.cpp server with OpenAI-compatible API",
     )
 
     @classmethod
     def sample_run_config(cls, api_key: str = "${env.LLAMACPP_API_KEY:=}") -> dict[str, Any]:
         return {
-            "openai_compat_api_base": "${env.LLAMACPP_URL:http://localhost:8080}/v1",
+            "openai_compat_api_base": "${env.LLAMACPP_URL:=http://localhost:8080}",
             "api_key": api_key,
         }
diff --git a/llama_stack/templates/llamacpp/run-with-safety.yaml b/llama_stack/templates/llamacpp/run-with-safety.yaml
index a83db1332..b225fe4da 100644
--- a/llama_stack/templates/llamacpp/run-with-safety.yaml
+++ b/llama_stack/templates/llamacpp/run-with-safety.yaml
@@ -16,7 +16,7 @@ providers:
   - provider_id: llamacpp
     provider_type: remote::llamacpp
     config:
-      openai_compat_api_base: ${env.LLAMACPP_URL:http://localhost:8080}/v1
+      openai_compat_api_base: ${env.LLAMACPP_URL:=http://localhost:8080}
       api_key: ${env.LLAMACPP_API_KEY:=}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
diff --git a/llama_stack/templates/llamacpp/run.yaml b/llama_stack/templates/llamacpp/run.yaml
index c9f7b1b8c..d10e2dd66 100644
--- a/llama_stack/templates/llamacpp/run.yaml
+++ b/llama_stack/templates/llamacpp/run.yaml
@@ -16,7 +16,7 @@ providers:
   - provider_id: llamacpp
     provider_type: remote::llamacpp
     config:
-      openai_compat_api_base: ${env.LLAMACPP_URL:http://localhost:8080}/v1
+      openai_compat_api_base: ${env.LLAMACPP_URL:=http://localhost:8080}
       api_key: ${env.LLAMACPP_API_KEY:=}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
diff --git a/llama_stack/templates/starter/run.yaml b/llama_stack/templates/starter/run.yaml
index c21b1e5bf..ca9ce9566 100644
--- a/llama_stack/templates/starter/run.yaml
+++ b/llama_stack/templates/starter/run.yaml
@@ -133,7 +133,7 @@ providers:
   - provider_id: ${env.ENABLE_LLAMACPP:=__disabled__}
     provider_type: remote::llamacpp
     config:
-      openai_compat_api_base: ${env.LLAMACPP_URL:http://localhost:8080}/v1
+      openai_compat_api_base: ${env.LLAMACPP_URL:=http://localhost:8080}
       api_key: ${env.LLAMACPP_API_KEY:=}
   - provider_id: ${env.ENABLE_SENTENCE_TRANSFORMERS:=sentence-transformers}
     provider_type: inline::sentence-transformers