diff --git a/docs/docs/providers/inference/remote_azure.mdx b/docs/docs/providers/inference/remote_azure.mdx
index fd22b157e..0382b42d7 100644
--- a/docs/docs/providers/inference/remote_azure.mdx
+++ b/docs/docs/providers/inference/remote_azure.mdx
@@ -24,7 +24,7 @@ https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
-| `api_base` | `HttpUrl` | No |  | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com) |
+| `base_url` | `HttpUrl \| None` | No |  | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com/openai/v1) |
 | `api_version` | `str \| None` | No |  | Azure API version for Azure (e.g., 2024-12-01-preview) |
 | `api_type` | `str \| None` | No | azure | Azure API type for Azure (e.g., azure) |
 
@@ -32,7 +32,7 @@ https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
 
 ```yaml
 api_key: ${env.AZURE_API_KEY:=}
-api_base: ${env.AZURE_API_BASE:=}
+base_url: ${env.AZURE_API_BASE:=}
 api_version: ${env.AZURE_API_VERSION:=}
 api_type: ${env.AZURE_API_TYPE:=}
 ```
diff --git a/docs/docs/providers/inference/remote_cerebras.mdx b/docs/docs/providers/inference/remote_cerebras.mdx
index 1fb9530bb..9fd390a29 100644
--- a/docs/docs/providers/inference/remote_cerebras.mdx
+++ b/docs/docs/providers/inference/remote_cerebras.mdx
@@ -17,11 +17,11 @@ Cerebras inference provider for running models on Cerebras Cloud platform.
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
-| `base_url` | `str` | No | https://api.cerebras.ai | Base URL for the Cerebras API |
+| `base_url` | `HttpUrl \| None` | No | https://api.cerebras.ai/v1 | Base URL for the Cerebras API |
 
 ## Sample Configuration
 
 ```yaml
-base_url: https://api.cerebras.ai
+base_url: https://api.cerebras.ai/v1
 api_key: ${env.CEREBRAS_API_KEY:=}
 ```
diff --git a/docs/docs/providers/inference/remote_databricks.mdx b/docs/docs/providers/inference/remote_databricks.mdx
index 7a926baf4..d50c52958 100644
--- a/docs/docs/providers/inference/remote_databricks.mdx
+++ b/docs/docs/providers/inference/remote_databricks.mdx
@@ -17,11 +17,11 @@ Databricks inference provider for running models on Databricks' unified analytic
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
 | `api_token` | `SecretStr \| None` | No |  | The Databricks API token |
-| `url` | `str \| None` | No |  | The URL for the Databricks model serving endpoint |
+| `base_url` | `HttpUrl \| None` | No |  | The URL for the Databricks model serving endpoint (should include /serving-endpoints path) |
 
 ## Sample Configuration
 
 ```yaml
-url: ${env.DATABRICKS_HOST:=}
+base_url: ${env.DATABRICKS_HOST:=}
 api_token: ${env.DATABRICKS_TOKEN:=}
 ```
diff --git a/docs/docs/providers/inference/remote_fireworks.mdx b/docs/docs/providers/inference/remote_fireworks.mdx
index 7db74efc4..a67403a9b 100644
--- a/docs/docs/providers/inference/remote_fireworks.mdx
+++ b/docs/docs/providers/inference/remote_fireworks.mdx
@@ -17,11 +17,11 @@ Fireworks AI inference provider for Llama models and other AI models on the Fire
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
-| `url` | `str` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server |
+| `base_url` | `HttpUrl \| None` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server |
 
 ## Sample Configuration
 
 ```yaml
-url: https://api.fireworks.ai/inference/v1
+base_url: https://api.fireworks.ai/inference/v1
 api_key: ${env.FIREWORKS_API_KEY:=}
 ```
diff --git a/docs/docs/providers/inference/remote_groq.mdx b/docs/docs/providers/inference/remote_groq.mdx
index 3ebd6f907..17acd3140 100644
--- a/docs/docs/providers/inference/remote_groq.mdx
+++ b/docs/docs/providers/inference/remote_groq.mdx
@@ -17,11 +17,11 @@ Groq inference provider for ultra-fast inference using Groq's LPU technology.
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
-| `url` | `str` | No | https://api.groq.com | The URL for the Groq AI server |
+| `base_url` | `HttpUrl \| None` | No | https://api.groq.com/openai/v1 | The URL for the Groq AI server |
 
 ## Sample Configuration
 
 ```yaml
-url: https://api.groq.com
+base_url: https://api.groq.com/openai/v1
 api_key: ${env.GROQ_API_KEY:=}
 ```
diff --git a/docs/docs/providers/inference/remote_llama-openai-compat.mdx b/docs/docs/providers/inference/remote_llama-openai-compat.mdx
index f67f40909..69e90b2ac 100644
--- a/docs/docs/providers/inference/remote_llama-openai-compat.mdx
+++ b/docs/docs/providers/inference/remote_llama-openai-compat.mdx
@@ -17,11 +17,11 @@ Llama OpenAI-compatible provider for using Llama models with OpenAI API format.
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
-| `openai_compat_api_base` | `str` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server |
+| `base_url` | `HttpUrl \| None` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server |
 
 ## Sample Configuration
 
 ```yaml
-openai_compat_api_base: https://api.llama.com/compat/v1/
+base_url: https://api.llama.com/compat/v1/
 api_key: ${env.LLAMA_API_KEY}
 ```
diff --git a/docs/docs/providers/inference/remote_nvidia.mdx b/docs/docs/providers/inference/remote_nvidia.mdx
index 6646d8b00..a890bc57f 100644
--- a/docs/docs/providers/inference/remote_nvidia.mdx
+++ b/docs/docs/providers/inference/remote_nvidia.mdx
@@ -17,15 +17,13 @@ NVIDIA inference provider for accessing NVIDIA NIM models and AI services.
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
-| `url` | `str` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM |
+| `base_url` | `HttpUrl \| None` | No | https://integrate.api.nvidia.com/v1 | A base url for accessing the NVIDIA NIM |
 | `timeout` | `int` | No | 60 | Timeout for the HTTP requests |
-| `append_api_version` | `bool` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. |
 | `rerank_model_to_url` | `dict[str, str]` | No | `{'nv-rerank-qa-mistral-4b:1': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking', 'nvidia/nv-rerankqa-mistral-4b-v3': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking', 'nvidia/llama-3.2-nv-rerankqa-1b-v2': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking'}` | Mapping of rerank model identifiers to their API endpoints.  |
 
 ## Sample Configuration
 
 ```yaml
-url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
 api_key: ${env.NVIDIA_API_KEY:=}
-append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
 ```
diff --git a/docs/docs/providers/inference/remote_ollama.mdx b/docs/docs/providers/inference/remote_ollama.mdx
index 497bfed52..f9be84add 100644
--- a/docs/docs/providers/inference/remote_ollama.mdx
+++ b/docs/docs/providers/inference/remote_ollama.mdx
@@ -16,10 +16,10 @@ Ollama inference provider for running local models through the Ollama runtime.
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
-| `url` | `str` | No | http://localhost:11434 |  |
+| `base_url` | `HttpUrl \| None` | No | http://localhost:11434/v1 |  |
 
 ## Sample Configuration
 
 ```yaml
-url: ${env.OLLAMA_URL:=http://localhost:11434}
+base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
 ```
diff --git a/docs/docs/providers/inference/remote_openai.mdx b/docs/docs/providers/inference/remote_openai.mdx
index 4931118fd..3ac3a21ad 100644
--- a/docs/docs/providers/inference/remote_openai.mdx
+++ b/docs/docs/providers/inference/remote_openai.mdx
@@ -17,7 +17,7 @@ OpenAI inference provider for accessing GPT models and other OpenAI services.
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
-| `base_url` | `str` | No | https://api.openai.com/v1 | Base URL for OpenAI API |
+| `base_url` | `HttpUrl \| None` | No | https://api.openai.com/v1 | Base URL for OpenAI API |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_passthrough.mdx b/docs/docs/providers/inference/remote_passthrough.mdx
index 009961d49..325ecc352 100644
--- a/docs/docs/providers/inference/remote_passthrough.mdx
+++ b/docs/docs/providers/inference/remote_passthrough.mdx
@@ -17,11 +17,11 @@ Passthrough inference provider for connecting to any external inference service
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
-| `url` | `str` | No |  | The URL for the passthrough endpoint |
+| `base_url` | `HttpUrl \| None` | No |  | The URL for the passthrough endpoint |
 
 ## Sample Configuration
 
 ```yaml
-url: ${env.PASSTHROUGH_URL}
+base_url: ${env.PASSTHROUGH_URL}
 api_key: ${env.PASSTHROUGH_API_KEY}
 ```
diff --git a/docs/docs/providers/inference/remote_runpod.mdx b/docs/docs/providers/inference/remote_runpod.mdx
index 3b67e157d..6cdcdd3b5 100644
--- a/docs/docs/providers/inference/remote_runpod.mdx
+++ b/docs/docs/providers/inference/remote_runpod.mdx
@@ -17,11 +17,11 @@ RunPod inference provider for running models on RunPod's cloud GPU platform.
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
 | `api_token` | `SecretStr \| None` | No |  | The API token |
-| `url` | `str \| None` | No |  | The URL for the Runpod model serving endpoint |
+| `base_url` | `HttpUrl \| None` | No |  | The URL for the Runpod model serving endpoint |
 
 ## Sample Configuration
 
 ```yaml
-url: ${env.RUNPOD_URL:=}
+base_url: ${env.RUNPOD_URL:=}
 api_token: ${env.RUNPOD_API_TOKEN}
 ```
diff --git a/docs/docs/providers/inference/remote_sambanova.mdx b/docs/docs/providers/inference/remote_sambanova.mdx
index 6f4c5d7f6..bbefdb0f0 100644
--- a/docs/docs/providers/inference/remote_sambanova.mdx
+++ b/docs/docs/providers/inference/remote_sambanova.mdx
@@ -17,11 +17,11 @@ SambaNova inference provider for running models on SambaNova's dataflow architec
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
-| `url` | `str` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
+| `base_url` | `HttpUrl \| None` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
 
 ## Sample Configuration
 
 ```yaml
-url: https://api.sambanova.ai/v1
+base_url: https://api.sambanova.ai/v1
 api_key: ${env.SAMBANOVA_API_KEY:=}
 ```
diff --git a/docs/docs/providers/inference/remote_tgi.mdx b/docs/docs/providers/inference/remote_tgi.mdx
index cd5ea7661..3790acdd4 100644
--- a/docs/docs/providers/inference/remote_tgi.mdx
+++ b/docs/docs/providers/inference/remote_tgi.mdx
@@ -16,10 +16,10 @@ Text Generation Inference (TGI) provider for HuggingFace model serving.
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
-| `url` | `str` | No |  | The URL for the TGI serving endpoint |
+| `base_url` | `HttpUrl \| None` | No |  | The URL for the TGI serving endpoint (should include /v1 path) |
 
 ## Sample Configuration
 
 ```yaml
-url: ${env.TGI_URL:=}
+base_url: ${env.TGI_URL:=}
 ```
diff --git a/docs/docs/providers/inference/remote_together.mdx b/docs/docs/providers/inference/remote_together.mdx
index 43192cc9e..dc025b5ac 100644
--- a/docs/docs/providers/inference/remote_together.mdx
+++ b/docs/docs/providers/inference/remote_together.mdx
@@ -17,11 +17,11 @@ Together AI inference provider for open-source models and collaborative AI devel
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
-| `url` | `str` | No | https://api.together.xyz/v1 | The URL for the Together AI server |
+| `base_url` | `HttpUrl \| None` | No | https://api.together.xyz/v1 | The URL for the Together AI server |
 
 ## Sample Configuration
 
 ```yaml
-url: https://api.together.xyz/v1
+base_url: https://api.together.xyz/v1
 api_key: ${env.TOGETHER_API_KEY:=}
 ```
diff --git a/docs/docs/providers/inference/remote_vllm.mdx b/docs/docs/providers/inference/remote_vllm.mdx
index 81620dbca..a52c24adb 100644
--- a/docs/docs/providers/inference/remote_vllm.mdx
+++ b/docs/docs/providers/inference/remote_vllm.mdx
@@ -17,14 +17,14 @@ Remote vLLM inference provider for connecting to vLLM servers.
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
 | `api_token` | `SecretStr \| None` | No |  | The API token |
-| `url` | `str \| None` | No |  | The URL for the vLLM model serving endpoint |
+| `base_url` | `HttpUrl \| None` | No |  | The URL for the vLLM model serving endpoint |
 | `max_tokens` | `int` | No | 4096 | Maximum number of tokens to generate. |
 | `tls_verify` | `bool \| str` | No | True | Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file. |
 
 ## Sample Configuration
 
 ```yaml
-url: ${env.VLLM_URL:=}
+base_url: ${env.VLLM_URL:=}
 max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
 api_token: ${env.VLLM_API_TOKEN:=fake}
 tls_verify: ${env.VLLM_TLS_VERIFY:=true}
diff --git a/docs/docs/providers/inference/remote_watsonx.mdx b/docs/docs/providers/inference/remote_watsonx.mdx
index 3a1dba3b4..47d543e3a 100644
--- a/docs/docs/providers/inference/remote_watsonx.mdx
+++ b/docs/docs/providers/inference/remote_watsonx.mdx
@@ -17,14 +17,14 @@ IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform
 | `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
-| `url` | `str` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
+| `base_url` | `HttpUrl \| None` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
 | `project_id` | `str \| None` | No |  | The watsonx.ai project ID |
 | `timeout` | `int` | No | 60 | Timeout for the HTTP requests |
 
 ## Sample Configuration
 
 ```yaml
-url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
+base_url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
 api_key: ${env.WATSONX_API_KEY:=}
 project_id: ${env.WATSONX_PROJECT_ID:=}
 ```
diff --git a/scripts/docker.sh b/scripts/docker.sh
index b56df8c03..3b2db5ca7 100755
--- a/scripts/docker.sh
+++ b/scripts/docker.sh
@@ -287,9 +287,9 @@ start_container() {
     # On macOS/Windows, use host.docker.internal to reach host from container
     # On Linux with --network host, use localhost
     if [[ "$(uname)" == "Darwin" ]] || [[ "$(uname)" == *"MINGW"* ]]; then
-        OLLAMA_URL="${OLLAMA_URL:-http://host.docker.internal:11434}"
+        OLLAMA_URL="${OLLAMA_URL:-http://host.docker.internal:11434/v1}"
     else
-        OLLAMA_URL="${OLLAMA_URL:-http://localhost:11434}"
+        OLLAMA_URL="${OLLAMA_URL:-http://localhost:11434/v1}"
     fi
     DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL"
 
diff --git a/scripts/install.sh b/scripts/install.sh
index 5e4939767..7fe1d3243 100755
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -640,7 +640,7 @@ cmd=( run -d "${PLATFORM_OPTS[@]}" --name llama-stack \
       --network llama-net \
       -p "${PORT}:${PORT}" \
       "${server_env_opts[@]}" \
-      -e OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}" \
+      -e OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}/v1" \
       "${SERVER_IMAGE}" --port "${PORT}")
 
 log "🦙 Starting Llama Stack..."
diff --git a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
index 5384b58fe..d942c23a4 100644
--- a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
@@ -17,32 +17,32 @@ providers:
   - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
     provider_type: remote::cerebras
     config:
-      base_url: https://api.cerebras.ai
+      base_url: https://api.cerebras.ai/v1
       api_key: ${env.CEREBRAS_API_KEY:=}
   - provider_id: ${env.OLLAMA_URL:+ollama}
     provider_type: remote::ollama
     config:
-      url: ${env.OLLAMA_URL:=http://localhost:11434}
+      base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
   - provider_id: ${env.VLLM_URL:+vllm}
     provider_type: remote::vllm
     config:
-      url: ${env.VLLM_URL:=}
+      base_url: ${env.VLLM_URL:=}
       max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
       api_token: ${env.VLLM_API_TOKEN:=fake}
       tls_verify: ${env.VLLM_TLS_VERIFY:=true}
   - provider_id: ${env.TGI_URL:+tgi}
     provider_type: remote::tgi
     config:
-      url: ${env.TGI_URL:=}
+      base_url: ${env.TGI_URL:=}
   - provider_id: fireworks
     provider_type: remote::fireworks
     config:
-      url: https://api.fireworks.ai/inference/v1
+      base_url: https://api.fireworks.ai/inference/v1
       api_key: ${env.FIREWORKS_API_KEY:=}
   - provider_id: together
     provider_type: remote::together
     config:
-      url: https://api.together.xyz/v1
+      base_url: https://api.together.xyz/v1
       api_key: ${env.TOGETHER_API_KEY:=}
   - provider_id: bedrock
     provider_type: remote::bedrock
@@ -52,9 +52,8 @@ providers:
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
     config:
-      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
       api_key: ${env.NVIDIA_API_KEY:=}
-      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
   - provider_id: openai
     provider_type: remote::openai
     config:
@@ -76,18 +75,18 @@ providers:
   - provider_id: groq
     provider_type: remote::groq
     config:
-      url: https://api.groq.com
+      base_url: https://api.groq.com/openai/v1
       api_key: ${env.GROQ_API_KEY:=}
   - provider_id: sambanova
     provider_type: remote::sambanova
     config:
-      url: https://api.sambanova.ai/v1
+      base_url: https://api.sambanova.ai/v1
       api_key: ${env.SAMBANOVA_API_KEY:=}
   - provider_id: ${env.AZURE_API_KEY:+azure}
     provider_type: remote::azure
     config:
       api_key: ${env.AZURE_API_KEY:=}
-      api_base: ${env.AZURE_API_BASE:=}
+      base_url: ${env.AZURE_API_BASE:=}
       api_version: ${env.AZURE_API_VERSION:=}
       api_type: ${env.AZURE_API_TYPE:=}
   - provider_id: sentence-transformers
diff --git a/src/llama_stack/distributions/ci-tests/run.yaml b/src/llama_stack/distributions/ci-tests/run.yaml
index 1118d2ad1..8b1cd2bb2 100644
--- a/src/llama_stack/distributions/ci-tests/run.yaml
+++ b/src/llama_stack/distributions/ci-tests/run.yaml
@@ -17,32 +17,32 @@ providers:
   - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
     provider_type: remote::cerebras
     config:
-      base_url: https://api.cerebras.ai
+      base_url: https://api.cerebras.ai/v1
       api_key: ${env.CEREBRAS_API_KEY:=}
   - provider_id: ${env.OLLAMA_URL:+ollama}
     provider_type: remote::ollama
     config:
-      url: ${env.OLLAMA_URL:=http://localhost:11434}
+      base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
   - provider_id: ${env.VLLM_URL:+vllm}
     provider_type: remote::vllm
     config:
-      url: ${env.VLLM_URL:=}
+      base_url: ${env.VLLM_URL:=}
       max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
       api_token: ${env.VLLM_API_TOKEN:=fake}
       tls_verify: ${env.VLLM_TLS_VERIFY:=true}
   - provider_id: ${env.TGI_URL:+tgi}
     provider_type: remote::tgi
     config:
-      url: ${env.TGI_URL:=}
+      base_url: ${env.TGI_URL:=}
   - provider_id: fireworks
     provider_type: remote::fireworks
     config:
-      url: https://api.fireworks.ai/inference/v1
+      base_url: https://api.fireworks.ai/inference/v1
       api_key: ${env.FIREWORKS_API_KEY:=}
   - provider_id: together
     provider_type: remote::together
     config:
-      url: https://api.together.xyz/v1
+      base_url: https://api.together.xyz/v1
       api_key: ${env.TOGETHER_API_KEY:=}
   - provider_id: bedrock
     provider_type: remote::bedrock
@@ -52,9 +52,8 @@ providers:
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
     config:
-      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
       api_key: ${env.NVIDIA_API_KEY:=}
-      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
   - provider_id: openai
     provider_type: remote::openai
     config:
@@ -76,18 +75,18 @@ providers:
   - provider_id: groq
     provider_type: remote::groq
     config:
-      url: https://api.groq.com
+      base_url: https://api.groq.com/openai/v1
       api_key: ${env.GROQ_API_KEY:=}
   - provider_id: sambanova
     provider_type: remote::sambanova
     config:
-      url: https://api.sambanova.ai/v1
+      base_url: https://api.sambanova.ai/v1
       api_key: ${env.SAMBANOVA_API_KEY:=}
   - provider_id: ${env.AZURE_API_KEY:+azure}
     provider_type: remote::azure
     config:
       api_key: ${env.AZURE_API_KEY:=}
-      api_base: ${env.AZURE_API_BASE:=}
+      base_url: ${env.AZURE_API_BASE:=}
       api_version: ${env.AZURE_API_VERSION:=}
       api_type: ${env.AZURE_API_TYPE:=}
   - provider_id: sentence-transformers
diff --git a/src/llama_stack/distributions/nvidia/run-with-safety.yaml b/src/llama_stack/distributions/nvidia/run-with-safety.yaml
index 1d57ad17a..d2c7dd090 100644
--- a/src/llama_stack/distributions/nvidia/run-with-safety.yaml
+++ b/src/llama_stack/distributions/nvidia/run-with-safety.yaml
@@ -16,9 +16,8 @@ providers:
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
-      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
       api_key: ${env.NVIDIA_API_KEY:=}
-      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
diff --git a/src/llama_stack/distributions/nvidia/run.yaml b/src/llama_stack/distributions/nvidia/run.yaml
index 8c50b8bfb..c267587c7 100644
--- a/src/llama_stack/distributions/nvidia/run.yaml
+++ b/src/llama_stack/distributions/nvidia/run.yaml
@@ -16,9 +16,8 @@ providers:
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
-      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
       api_key: ${env.NVIDIA_API_KEY:=}
-      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
   vector_io:
   - provider_id: faiss
     provider_type: inline::faiss
diff --git a/src/llama_stack/distributions/open-benchmark/run.yaml b/src/llama_stack/distributions/open-benchmark/run.yaml
index 912e48dd3..7ebc58841 100644
--- a/src/llama_stack/distributions/open-benchmark/run.yaml
+++ b/src/llama_stack/distributions/open-benchmark/run.yaml
@@ -27,12 +27,12 @@ providers:
   - provider_id: groq
     provider_type: remote::groq
     config:
-      url: https://api.groq.com
+      base_url: https://api.groq.com/openai/v1
       api_key: ${env.GROQ_API_KEY:=}
   - provider_id: together
     provider_type: remote::together
     config:
-      url: https://api.together.xyz/v1
+      base_url: https://api.together.xyz/v1
       api_key: ${env.TOGETHER_API_KEY:=}
   vector_io:
   - provider_id: sqlite-vec
diff --git a/src/llama_stack/distributions/postgres-demo/run.yaml b/src/llama_stack/distributions/postgres-demo/run.yaml
index dd1c2bc7f..049f519cd 100644
--- a/src/llama_stack/distributions/postgres-demo/run.yaml
+++ b/src/llama_stack/distributions/postgres-demo/run.yaml
@@ -11,7 +11,7 @@ providers:
   - provider_id: vllm-inference
     provider_type: remote::vllm
     config:
-      url: ${env.VLLM_URL:=http://localhost:8000/v1}
+      base_url: ${env.VLLM_URL:=}
       max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
       api_token: ${env.VLLM_API_TOKEN:=fake}
       tls_verify: ${env.VLLM_TLS_VERIFY:=true}
diff --git a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
index e29ada6f4..75cc9d188 100644
--- a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
@@ -17,32 +17,32 @@ providers:
   - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
     provider_type: remote::cerebras
     config:
-      base_url: https://api.cerebras.ai
+      base_url: https://api.cerebras.ai/v1
       api_key: ${env.CEREBRAS_API_KEY:=}
   - provider_id: ${env.OLLAMA_URL:+ollama}
     provider_type: remote::ollama
     config:
-      url: ${env.OLLAMA_URL:=http://localhost:11434}
+      base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
   - provider_id: ${env.VLLM_URL:+vllm}
     provider_type: remote::vllm
     config:
-      url: ${env.VLLM_URL:=}
+      base_url: ${env.VLLM_URL:=}
       max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
       api_token: ${env.VLLM_API_TOKEN:=fake}
       tls_verify: ${env.VLLM_TLS_VERIFY:=true}
   - provider_id: ${env.TGI_URL:+tgi}
     provider_type: remote::tgi
     config:
-      url: ${env.TGI_URL:=}
+      base_url: ${env.TGI_URL:=}
   - provider_id: fireworks
     provider_type: remote::fireworks
     config:
-      url: https://api.fireworks.ai/inference/v1
+      base_url: https://api.fireworks.ai/inference/v1
       api_key: ${env.FIREWORKS_API_KEY:=}
   - provider_id: together
     provider_type: remote::together
     config:
-      url: https://api.together.xyz/v1
+      base_url: https://api.together.xyz/v1
       api_key: ${env.TOGETHER_API_KEY:=}
   - provider_id: bedrock
     provider_type: remote::bedrock
@@ -52,9 +52,8 @@ providers:
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
     config:
-      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
       api_key: ${env.NVIDIA_API_KEY:=}
-      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
   - provider_id: openai
     provider_type: remote::openai
     config:
@@ -76,18 +75,18 @@ providers:
   - provider_id: groq
     provider_type: remote::groq
     config:
-      url: https://api.groq.com
+      base_url: https://api.groq.com/openai/v1
       api_key: ${env.GROQ_API_KEY:=}
   - provider_id: sambanova
     provider_type: remote::sambanova
     config:
-      url: https://api.sambanova.ai/v1
+      base_url: https://api.sambanova.ai/v1
       api_key: ${env.SAMBANOVA_API_KEY:=}
   - provider_id: ${env.AZURE_API_KEY:+azure}
     provider_type: remote::azure
     config:
       api_key: ${env.AZURE_API_KEY:=}
-      api_base: ${env.AZURE_API_BASE:=}
+      base_url: ${env.AZURE_API_BASE:=}
       api_version: ${env.AZURE_API_VERSION:=}
       api_type: ${env.AZURE_API_TYPE:=}
   - provider_id: sentence-transformers
diff --git a/src/llama_stack/distributions/starter-gpu/run.yaml b/src/llama_stack/distributions/starter-gpu/run.yaml
index 7149b8659..09c7be5a1 100644
--- a/src/llama_stack/distributions/starter-gpu/run.yaml
+++ b/src/llama_stack/distributions/starter-gpu/run.yaml
@@ -17,32 +17,32 @@ providers:
   - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
     provider_type: remote::cerebras
     config:
-      base_url: https://api.cerebras.ai
+      base_url: https://api.cerebras.ai/v1
       api_key: ${env.CEREBRAS_API_KEY:=}
   - provider_id: ${env.OLLAMA_URL:+ollama}
     provider_type: remote::ollama
     config:
-      url: ${env.OLLAMA_URL:=http://localhost:11434}
+      base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
   - provider_id: ${env.VLLM_URL:+vllm}
     provider_type: remote::vllm
     config:
-      url: ${env.VLLM_URL:=}
+      base_url: ${env.VLLM_URL:=}
       max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
       api_token: ${env.VLLM_API_TOKEN:=fake}
       tls_verify: ${env.VLLM_TLS_VERIFY:=true}
   - provider_id: ${env.TGI_URL:+tgi}
     provider_type: remote::tgi
     config:
-      url: ${env.TGI_URL:=}
+      base_url: ${env.TGI_URL:=}
   - provider_id: fireworks
     provider_type: remote::fireworks
     config:
-      url: https://api.fireworks.ai/inference/v1
+      base_url: https://api.fireworks.ai/inference/v1
       api_key: ${env.FIREWORKS_API_KEY:=}
   - provider_id: together
     provider_type: remote::together
     config:
-      url: https://api.together.xyz/v1
+      base_url: https://api.together.xyz/v1
       api_key: ${env.TOGETHER_API_KEY:=}
   - provider_id: bedrock
     provider_type: remote::bedrock
@@ -52,9 +52,8 @@ providers:
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
     config:
-      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
       api_key: ${env.NVIDIA_API_KEY:=}
-      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
   - provider_id: openai
     provider_type: remote::openai
     config:
@@ -76,18 +75,18 @@ providers:
   - provider_id: groq
     provider_type: remote::groq
     config:
-      url: https://api.groq.com
+      base_url: https://api.groq.com/openai/v1
       api_key: ${env.GROQ_API_KEY:=}
   - provider_id: sambanova
     provider_type: remote::sambanova
     config:
-      url: https://api.sambanova.ai/v1
+      base_url: https://api.sambanova.ai/v1
       api_key: ${env.SAMBANOVA_API_KEY:=}
   - provider_id: ${env.AZURE_API_KEY:+azure}
     provider_type: remote::azure
     config:
       api_key: ${env.AZURE_API_KEY:=}
-      api_base: ${env.AZURE_API_BASE:=}
+      base_url: ${env.AZURE_API_BASE:=}
       api_version: ${env.AZURE_API_VERSION:=}
       api_type: ${env.AZURE_API_TYPE:=}
   - provider_id: sentence-transformers
diff --git a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
index 437674bf9..f59c809d2 100644
--- a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
@@ -17,32 +17,32 @@ providers:
   - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
     provider_type: remote::cerebras
     config:
-      base_url: https://api.cerebras.ai
+      base_url: https://api.cerebras.ai/v1
       api_key: ${env.CEREBRAS_API_KEY:=}
   - provider_id: ${env.OLLAMA_URL:+ollama}
     provider_type: remote::ollama
     config:
-      url: ${env.OLLAMA_URL:=http://localhost:11434}
+      base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
   - provider_id: ${env.VLLM_URL:+vllm}
     provider_type: remote::vllm
     config:
-      url: ${env.VLLM_URL:=}
+      base_url: ${env.VLLM_URL:=}
       max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
       api_token: ${env.VLLM_API_TOKEN:=fake}
       tls_verify: ${env.VLLM_TLS_VERIFY:=true}
   - provider_id: ${env.TGI_URL:+tgi}
     provider_type: remote::tgi
     config:
-      url: ${env.TGI_URL:=}
+      base_url: ${env.TGI_URL:=}
   - provider_id: fireworks
     provider_type: remote::fireworks
     config:
-      url: https://api.fireworks.ai/inference/v1
+      base_url: https://api.fireworks.ai/inference/v1
       api_key: ${env.FIREWORKS_API_KEY:=}
   - provider_id: together
     provider_type: remote::together
     config:
-      url: https://api.together.xyz/v1
+      base_url: https://api.together.xyz/v1
       api_key: ${env.TOGETHER_API_KEY:=}
   - provider_id: bedrock
     provider_type: remote::bedrock
@@ -52,9 +52,8 @@ providers:
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
     config:
-      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
       api_key: ${env.NVIDIA_API_KEY:=}
-      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
   - provider_id: openai
     provider_type: remote::openai
     config:
@@ -76,18 +75,18 @@ providers:
   - provider_id: groq
     provider_type: remote::groq
     config:
-      url: https://api.groq.com
+      base_url: https://api.groq.com/openai/v1
       api_key: ${env.GROQ_API_KEY:=}
   - provider_id: sambanova
     provider_type: remote::sambanova
     config:
-      url: https://api.sambanova.ai/v1
+      base_url: https://api.sambanova.ai/v1
       api_key: ${env.SAMBANOVA_API_KEY:=}
   - provider_id: ${env.AZURE_API_KEY:+azure}
     provider_type: remote::azure
     config:
       api_key: ${env.AZURE_API_KEY:=}
-      api_base: ${env.AZURE_API_BASE:=}
+      base_url: ${env.AZURE_API_BASE:=}
       api_version: ${env.AZURE_API_VERSION:=}
       api_type: ${env.AZURE_API_TYPE:=}
   - provider_id: sentence-transformers
diff --git a/src/llama_stack/distributions/starter/run.yaml b/src/llama_stack/distributions/starter/run.yaml
index 0ce392810..435bb22a7 100644
--- a/src/llama_stack/distributions/starter/run.yaml
+++ b/src/llama_stack/distributions/starter/run.yaml
@@ -17,32 +17,32 @@ providers:
   - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
     provider_type: remote::cerebras
     config:
-      base_url: https://api.cerebras.ai
+      base_url: https://api.cerebras.ai/v1
       api_key: ${env.CEREBRAS_API_KEY:=}
   - provider_id: ${env.OLLAMA_URL:+ollama}
     provider_type: remote::ollama
     config:
-      url: ${env.OLLAMA_URL:=http://localhost:11434}
+      base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
   - provider_id: ${env.VLLM_URL:+vllm}
     provider_type: remote::vllm
     config:
-      url: ${env.VLLM_URL:=}
+      base_url: ${env.VLLM_URL:=}
       max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
       api_token: ${env.VLLM_API_TOKEN:=fake}
       tls_verify: ${env.VLLM_TLS_VERIFY:=true}
   - provider_id: ${env.TGI_URL:+tgi}
     provider_type: remote::tgi
     config:
-      url: ${env.TGI_URL:=}
+      base_url: ${env.TGI_URL:=}
   - provider_id: fireworks
     provider_type: remote::fireworks
     config:
-      url: https://api.fireworks.ai/inference/v1
+      base_url: https://api.fireworks.ai/inference/v1
       api_key: ${env.FIREWORKS_API_KEY:=}
   - provider_id: together
     provider_type: remote::together
     config:
-      url: https://api.together.xyz/v1
+      base_url: https://api.together.xyz/v1
       api_key: ${env.TOGETHER_API_KEY:=}
   - provider_id: bedrock
     provider_type: remote::bedrock
@@ -52,9 +52,8 @@ providers:
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
     config:
-      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
       api_key: ${env.NVIDIA_API_KEY:=}
-      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
   - provider_id: openai
     provider_type: remote::openai
     config:
@@ -76,18 +75,18 @@ providers:
   - provider_id: groq
     provider_type: remote::groq
     config:
-      url: https://api.groq.com
+      base_url: https://api.groq.com/openai/v1
       api_key: ${env.GROQ_API_KEY:=}
   - provider_id: sambanova
     provider_type: remote::sambanova
     config:
-      url: https://api.sambanova.ai/v1
+      base_url: https://api.sambanova.ai/v1
       api_key: ${env.SAMBANOVA_API_KEY:=}
   - provider_id: ${env.AZURE_API_KEY:+azure}
     provider_type: remote::azure
     config:
       api_key: ${env.AZURE_API_KEY:=}
-      api_base: ${env.AZURE_API_BASE:=}
+      base_url: ${env.AZURE_API_BASE:=}
       api_version: ${env.AZURE_API_VERSION:=}
       api_type: ${env.AZURE_API_TYPE:=}
   - provider_id: sentence-transformers
diff --git a/src/llama_stack/distributions/watsonx/run.yaml b/src/llama_stack/distributions/watsonx/run.yaml
index 8456115d2..f8c489fe3 100644
--- a/src/llama_stack/distributions/watsonx/run.yaml
+++ b/src/llama_stack/distributions/watsonx/run.yaml
@@ -15,7 +15,7 @@ providers:
   - provider_id: watsonx
     provider_type: remote::watsonx
     config:
-      url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
+      base_url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
       api_key: ${env.WATSONX_API_KEY:=}
       project_id: ${env.WATSONX_PROJECT_ID:=}
   vector_io:
diff --git a/src/llama_stack/providers/remote/inference/azure/azure.py b/src/llama_stack/providers/remote/inference/azure/azure.py
index 134d01b15..c977d75d5 100644
--- a/src/llama_stack/providers/remote/inference/azure/azure.py
+++ b/src/llama_stack/providers/remote/inference/azure/azure.py
@@ -4,8 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from urllib.parse import urljoin
-
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
 from .config import AzureConfig
@@ -22,4 +20,4 @@ class AzureInferenceAdapter(OpenAIMixin):
 
         Returns the Azure API base URL from the configuration.
         """
-        return urljoin(str(self.config.api_base), "/openai/v1")
+        return str(self.config.base_url)
diff --git a/src/llama_stack/providers/remote/inference/azure/config.py b/src/llama_stack/providers/remote/inference/azure/config.py
index b801b91b2..f6407a183 100644
--- a/src/llama_stack/providers/remote/inference/azure/config.py
+++ b/src/llama_stack/providers/remote/inference/azure/config.py
@@ -32,8 +32,9 @@ class AzureProviderDataValidator(BaseModel):
 
 @json_schema_type
 class AzureConfig(RemoteInferenceProviderConfig):
-    api_base: HttpUrl = Field(
-        description="Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com)",
+    base_url: HttpUrl | None = Field(
+        default=None,
+        description="Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com/openai/v1)",
     )
     api_version: str | None = Field(
         default_factory=lambda: os.getenv("AZURE_API_VERSION"),
@@ -48,14 +49,14 @@ class AzureConfig(RemoteInferenceProviderConfig):
     def sample_run_config(
         cls,
         api_key: str = "${env.AZURE_API_KEY:=}",
-        api_base: str = "${env.AZURE_API_BASE:=}",
+        base_url: str = "${env.AZURE_API_BASE:=}",
         api_version: str = "${env.AZURE_API_VERSION:=}",
         api_type: str = "${env.AZURE_API_TYPE:=}",
         **kwargs,
     ) -> dict[str, Any]:
         return {
             "api_key": api_key,
-            "api_base": api_base,
+            "base_url": base_url,
             "api_version": api_version,
             "api_type": api_type,
         }
diff --git a/src/llama_stack/providers/remote/inference/cerebras/cerebras.py b/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
index 680431e22..23c27df1e 100644
--- a/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
+++ b/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
@@ -4,8 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from urllib.parse import urljoin
-
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
     OpenAIEmbeddingsRequestWithExtraBody,
@@ -21,7 +19,7 @@ class CerebrasInferenceAdapter(OpenAIMixin):
     provider_data_api_key_field: str = "cerebras_api_key"
 
     def get_base_url(self) -> str:
-        return urljoin(self.config.base_url, "v1")
+        return str(self.config.base_url)
 
     async def openai_embeddings(
         self,
diff --git a/src/llama_stack/providers/remote/inference/cerebras/config.py b/src/llama_stack/providers/remote/inference/cerebras/config.py
index db357fd1c..ea88abbea 100644
--- a/src/llama_stack/providers/remote/inference/cerebras/config.py
+++ b/src/llama_stack/providers/remote/inference/cerebras/config.py
@@ -7,12 +7,12 @@
 import os
 from typing import Any
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, HttpUrl
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack_api import json_schema_type
 
-DEFAULT_BASE_URL = "https://api.cerebras.ai"
+DEFAULT_BASE_URL = "https://api.cerebras.ai/v1"
 
 
 class CerebrasProviderDataValidator(BaseModel):
@@ -24,8 +24,8 @@ class CerebrasProviderDataValidator(BaseModel):
 
 @json_schema_type
 class CerebrasImplConfig(RemoteInferenceProviderConfig):
-    base_url: str = Field(
-        default=os.environ.get("CEREBRAS_BASE_URL", DEFAULT_BASE_URL),
+    base_url: HttpUrl | None = Field(
+        default=HttpUrl(os.environ.get("CEREBRAS_BASE_URL", DEFAULT_BASE_URL)),
         description="Base URL for the Cerebras API",
     )
 
diff --git a/src/llama_stack/providers/remote/inference/databricks/config.py b/src/llama_stack/providers/remote/inference/databricks/config.py
index bd409fa13..44cb862f9 100644
--- a/src/llama_stack/providers/remote/inference/databricks/config.py
+++ b/src/llama_stack/providers/remote/inference/databricks/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from pydantic import BaseModel, Field, SecretStr
+from pydantic import BaseModel, Field, HttpUrl, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack_api import json_schema_type
@@ -21,9 +21,9 @@ class DatabricksProviderDataValidator(BaseModel):
 
 @json_schema_type
 class DatabricksImplConfig(RemoteInferenceProviderConfig):
-    url: str | None = Field(
+    base_url: HttpUrl | None = Field(
         default=None,
-        description="The URL for the Databricks model serving endpoint",
+        description="The URL for the Databricks model serving endpoint (should include /serving-endpoints path)",
     )
     auth_credential: SecretStr | None = Field(
         default=None,
@@ -34,11 +34,11 @@ class DatabricksImplConfig(RemoteInferenceProviderConfig):
     @classmethod
     def sample_run_config(
         cls,
-        url: str = "${env.DATABRICKS_HOST:=}",
+        base_url: str = "${env.DATABRICKS_HOST:=}",
         api_token: str = "${env.DATABRICKS_TOKEN:=}",
         **kwargs: Any,
     ) -> dict[str, Any]:
         return {
-            "url": url,
+            "base_url": base_url,
             "api_token": api_token,
         }
diff --git a/src/llama_stack/providers/remote/inference/databricks/databricks.py b/src/llama_stack/providers/remote/inference/databricks/databricks.py
index c07d97b67..f2f8832f6 100644
--- a/src/llama_stack/providers/remote/inference/databricks/databricks.py
+++ b/src/llama_stack/providers/remote/inference/databricks/databricks.py
@@ -29,15 +29,21 @@ class DatabricksInferenceAdapter(OpenAIMixin):
     }
 
     def get_base_url(self) -> str:
-        return f"{self.config.url}/serving-endpoints"
+        return str(self.config.base_url)
 
     async def list_provider_model_ids(self) -> Iterable[str]:
         # Filter out None values from endpoint names
         api_token = self._get_api_key_from_config_or_provider_data()
+        # WorkspaceClient expects base host without /serving-endpoints suffix
+        base_url_str = str(self.config.base_url)
+        if base_url_str.endswith("/serving-endpoints"):
+            host = base_url_str[:-18]  # Remove '/serving-endpoints'
+        else:
+            host = base_url_str
         return [
             endpoint.name  # type: ignore[misc]
             for endpoint in WorkspaceClient(
-                host=self.config.url, token=api_token
+                host=host, token=api_token
             ).serving_endpoints.list()  # TODO: this is not async
         ]
 
diff --git a/src/llama_stack/providers/remote/inference/fireworks/config.py b/src/llama_stack/providers/remote/inference/fireworks/config.py
index e36c76054..c59b5f270 100644
--- a/src/llama_stack/providers/remote/inference/fireworks/config.py
+++ b/src/llama_stack/providers/remote/inference/fireworks/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from pydantic import Field
+from pydantic import Field, HttpUrl
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack_api import json_schema_type
@@ -14,14 +14,14 @@ from llama_stack_api import json_schema_type
 
 @json_schema_type
 class FireworksImplConfig(RemoteInferenceProviderConfig):
-    url: str = Field(
-        default="https://api.fireworks.ai/inference/v1",
+    base_url: HttpUrl | None = Field(
+        default=HttpUrl("https://api.fireworks.ai/inference/v1"),
         description="The URL for the Fireworks server",
     )
 
     @classmethod
     def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY:=}", **kwargs) -> dict[str, Any]:
         return {
-            "url": "https://api.fireworks.ai/inference/v1",
+            "base_url": "https://api.fireworks.ai/inference/v1",
             "api_key": api_key,
         }
diff --git a/src/llama_stack/providers/remote/inference/fireworks/fireworks.py b/src/llama_stack/providers/remote/inference/fireworks/fireworks.py
index 7e2b73546..61ea0b1f6 100644
--- a/src/llama_stack/providers/remote/inference/fireworks/fireworks.py
+++ b/src/llama_stack/providers/remote/inference/fireworks/fireworks.py
@@ -24,4 +24,4 @@ class FireworksInferenceAdapter(OpenAIMixin):
     provider_data_api_key_field: str = "fireworks_api_key"
 
     def get_base_url(self) -> str:
-        return "https://api.fireworks.ai/inference/v1"
+        return str(self.config.base_url)
diff --git a/src/llama_stack/providers/remote/inference/groq/config.py b/src/llama_stack/providers/remote/inference/groq/config.py
index cca53a4e8..e5c29c271 100644
--- a/src/llama_stack/providers/remote/inference/groq/config.py
+++ b/src/llama_stack/providers/remote/inference/groq/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, HttpUrl
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack_api import json_schema_type
@@ -21,14 +21,14 @@ class GroqProviderDataValidator(BaseModel):
 
 @json_schema_type
 class GroqConfig(RemoteInferenceProviderConfig):
-    url: str = Field(
-        default="https://api.groq.com",
+    base_url: HttpUrl | None = Field(
+        default=HttpUrl("https://api.groq.com/openai/v1"),
         description="The URL for the Groq AI server",
     )
 
     @classmethod
     def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY:=}", **kwargs) -> dict[str, Any]:
         return {
-            "url": "https://api.groq.com",
+            "base_url": "https://api.groq.com/openai/v1",
             "api_key": api_key,
         }
diff --git a/src/llama_stack/providers/remote/inference/groq/groq.py b/src/llama_stack/providers/remote/inference/groq/groq.py
index 3a4f2626d..f99de91ca 100644
--- a/src/llama_stack/providers/remote/inference/groq/groq.py
+++ b/src/llama_stack/providers/remote/inference/groq/groq.py
@@ -15,4 +15,4 @@ class GroqInferenceAdapter(OpenAIMixin):
     provider_data_api_key_field: str = "groq_api_key"
 
     def get_base_url(self) -> str:
-        return f"{self.config.url}/openai/v1"
+        return str(self.config.base_url)
diff --git a/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py b/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py
index ded210d89..a0f80d969 100644
--- a/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py
+++ b/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, HttpUrl
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack_api import json_schema_type
@@ -21,14 +21,14 @@ class LlamaProviderDataValidator(BaseModel):
 
 @json_schema_type
 class LlamaCompatConfig(RemoteInferenceProviderConfig):
-    openai_compat_api_base: str = Field(
-        default="https://api.llama.com/compat/v1/",
+    base_url: HttpUrl | None = Field(
+        default=HttpUrl("https://api.llama.com/compat/v1/"),
         description="The URL for the Llama API server",
     )
 
     @classmethod
     def sample_run_config(cls, api_key: str = "${env.LLAMA_API_KEY}", **kwargs) -> dict[str, Any]:
         return {
-            "openai_compat_api_base": "https://api.llama.com/compat/v1/",
+            "base_url": "https://api.llama.com/compat/v1/",
             "api_key": api_key,
         }
diff --git a/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py b/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
index a5f67ecd1..f29aebf36 100644
--- a/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
+++ b/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
@@ -31,7 +31,7 @@ class LlamaCompatInferenceAdapter(OpenAIMixin):
 
         :return: The Llama API base URL
         """
-        return self.config.openai_compat_api_base
+        return str(self.config.base_url)
 
     async def openai_completion(
         self,
diff --git a/src/llama_stack/providers/remote/inference/nvidia/config.py b/src/llama_stack/providers/remote/inference/nvidia/config.py
index e5b0c6b73..e1e9a0ea9 100644
--- a/src/llama_stack/providers/remote/inference/nvidia/config.py
+++ b/src/llama_stack/providers/remote/inference/nvidia/config.py
@@ -7,7 +7,7 @@
 import os
 from typing import Any
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, HttpUrl
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack_api import json_schema_type
@@ -44,18 +44,14 @@ class NVIDIAConfig(RemoteInferenceProviderConfig):
     URL of your running NVIDIA NIM and do not need to set the api_key.
     """
 
-    url: str = Field(
-        default_factory=lambda: os.getenv("NVIDIA_BASE_URL", "https://integrate.api.nvidia.com"),
+    base_url: HttpUrl | None = Field(
+        default_factory=lambda: os.getenv("NVIDIA_BASE_URL", "https://integrate.api.nvidia.com/v1"),
         description="A base url for accessing the NVIDIA NIM",
     )
     timeout: int = Field(
         default=60,
         description="Timeout for the HTTP requests",
     )
-    append_api_version: bool = Field(
-        default_factory=lambda: os.getenv("NVIDIA_APPEND_API_VERSION", "True").lower() != "false",
-        description="When set to false, the API version will not be appended to the base_url. By default, it is true.",
-    )
     rerank_model_to_url: dict[str, str] = Field(
         default_factory=lambda: {
             "nv-rerank-qa-mistral-4b:1": "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking",
@@ -68,13 +64,11 @@ class NVIDIAConfig(RemoteInferenceProviderConfig):
     @classmethod
     def sample_run_config(
         cls,
-        url: str = "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}",
+        base_url: HttpUrl | None = "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}",
         api_key: str = "${env.NVIDIA_API_KEY:=}",
-        append_api_version: bool = "${env.NVIDIA_APPEND_API_VERSION:=True}",
         **kwargs,
     ) -> dict[str, Any]:
         return {
-            "url": url,
+            "base_url": base_url,
             "api_key": api_key,
-            "append_api_version": append_api_version,
         }
diff --git a/src/llama_stack/providers/remote/inference/nvidia/nvidia.py b/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
index 17f8775bf..5d0d52d6a 100644
--- a/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -44,7 +44,7 @@ class NVIDIAInferenceAdapter(OpenAIMixin):
     }
 
     async def initialize(self) -> None:
-        logger.info(f"Initializing NVIDIAInferenceAdapter({self.config.url})...")
+        logger.info(f"Initializing NVIDIAInferenceAdapter({self.config.base_url})...")
 
         if _is_nvidia_hosted(self.config):
             if not self.config.auth_credential:
@@ -72,7 +72,7 @@ class NVIDIAInferenceAdapter(OpenAIMixin):
 
         :return: The NVIDIA API base URL
         """
-        return f"{self.config.url}/v1" if self.config.append_api_version else self.config.url
+        return str(self.config.base_url)
 
     async def list_provider_model_ids(self) -> Iterable[str]:
         """
diff --git a/src/llama_stack/providers/remote/inference/nvidia/utils.py b/src/llama_stack/providers/remote/inference/nvidia/utils.py
index 46ee939d9..c138d1fc5 100644
--- a/src/llama_stack/providers/remote/inference/nvidia/utils.py
+++ b/src/llama_stack/providers/remote/inference/nvidia/utils.py
@@ -8,4 +8,4 @@ from . import NVIDIAConfig
 
 
 def _is_nvidia_hosted(config: NVIDIAConfig) -> bool:
-    return "integrate.api.nvidia.com" in config.url
+    return "integrate.api.nvidia.com" in str(config.base_url)
diff --git a/src/llama_stack/providers/remote/inference/ollama/config.py b/src/llama_stack/providers/remote/inference/ollama/config.py
index 416b847a0..60dd34fa8 100644
--- a/src/llama_stack/providers/remote/inference/ollama/config.py
+++ b/src/llama_stack/providers/remote/inference/ollama/config.py
@@ -6,20 +6,22 @@
 
 from typing import Any
 
-from pydantic import Field, SecretStr
+from pydantic import Field, HttpUrl, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 
-DEFAULT_OLLAMA_URL = "http://localhost:11434"
+DEFAULT_OLLAMA_URL = "http://localhost:11434/v1"
 
 
 class OllamaImplConfig(RemoteInferenceProviderConfig):
     auth_credential: SecretStr | None = Field(default=None, exclude=True)
 
-    url: str = DEFAULT_OLLAMA_URL
+    base_url: HttpUrl | None = Field(default=HttpUrl(DEFAULT_OLLAMA_URL))
 
     @classmethod
-    def sample_run_config(cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", **kwargs) -> dict[str, Any]:
+    def sample_run_config(
+        cls, base_url: str = "${env.OLLAMA_URL:=http://localhost:11434/v1}", **kwargs
+    ) -> dict[str, Any]:
         return {
-            "url": url,
+            "base_url": base_url,
         }
diff --git a/src/llama_stack/providers/remote/inference/ollama/ollama.py b/src/llama_stack/providers/remote/inference/ollama/ollama.py
index d1bf85361..e8b872384 100644
--- a/src/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/src/llama_stack/providers/remote/inference/ollama/ollama.py
@@ -55,17 +55,23 @@ class OllamaInferenceAdapter(OpenAIMixin):
         # ollama client attaches itself to the current event loop (sadly?)
         loop = asyncio.get_running_loop()
         if loop not in self._clients:
-            self._clients[loop] = AsyncOllamaClient(host=self.config.url)
+            # Ollama client expects base URL without /v1 suffix
+            base_url_str = str(self.config.base_url)
+            if base_url_str.endswith("/v1"):
+                host = base_url_str[:-3]
+            else:
+                host = base_url_str
+            self._clients[loop] = AsyncOllamaClient(host=host)
         return self._clients[loop]
 
     def get_api_key(self):
         return "NO KEY REQUIRED"
 
     def get_base_url(self):
-        return self.config.url.rstrip("/") + "/v1"
+        return str(self.config.base_url)
 
     async def initialize(self) -> None:
-        logger.info(f"checking connectivity to Ollama at `{self.config.url}`...")
+        logger.info(f"checking connectivity to Ollama at `{self.config.base_url}`...")
         r = await self.health()
         if r["status"] == HealthStatus.ERROR:
             logger.warning(
diff --git a/src/llama_stack/providers/remote/inference/openai/config.py b/src/llama_stack/providers/remote/inference/openai/config.py
index ab28e571f..2057cd0d6 100644
--- a/src/llama_stack/providers/remote/inference/openai/config.py
+++ b/src/llama_stack/providers/remote/inference/openai/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, HttpUrl
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack_api import json_schema_type
@@ -21,8 +21,8 @@ class OpenAIProviderDataValidator(BaseModel):
 
 @json_schema_type
 class OpenAIConfig(RemoteInferenceProviderConfig):
-    base_url: str = Field(
-        default="https://api.openai.com/v1",
+    base_url: HttpUrl | None = Field(
+        default=HttpUrl("https://api.openai.com/v1"),
         description="Base URL for OpenAI API",
     )
 
diff --git a/src/llama_stack/providers/remote/inference/openai/openai.py b/src/llama_stack/providers/remote/inference/openai/openai.py
index 52bc48f1a..2d465546a 100644
--- a/src/llama_stack/providers/remote/inference/openai/openai.py
+++ b/src/llama_stack/providers/remote/inference/openai/openai.py
@@ -35,4 +35,4 @@ class OpenAIInferenceAdapter(OpenAIMixin):
 
         Returns the OpenAI API base URL from the configuration.
         """
-        return self.config.base_url
+        return str(self.config.base_url)
diff --git a/src/llama_stack/providers/remote/inference/passthrough/config.py b/src/llama_stack/providers/remote/inference/passthrough/config.py
index 54508b6fb..f45806e79 100644
--- a/src/llama_stack/providers/remote/inference/passthrough/config.py
+++ b/src/llama_stack/providers/remote/inference/passthrough/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from pydantic import Field
+from pydantic import Field, HttpUrl
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack_api import json_schema_type
@@ -14,16 +14,16 @@ from llama_stack_api import json_schema_type
 
 @json_schema_type
 class PassthroughImplConfig(RemoteInferenceProviderConfig):
-    url: str = Field(
+    base_url: HttpUrl | None = Field(
         default=None,
         description="The URL for the passthrough endpoint",
     )
 
     @classmethod
     def sample_run_config(
-        cls, url: str = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs
+        cls, base_url: HttpUrl | None = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs
     ) -> dict[str, Any]:
         return {
-            "url": url,
+            "base_url": base_url,
             "api_key": api_key,
         }
diff --git a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
index 75eedf026..b0e2e74ad 100644
--- a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
+++ b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
@@ -82,8 +82,8 @@ class PassthroughInferenceAdapter(NeedsRequestProviderData, Inference):
 
     def _get_passthrough_url(self) -> str:
         """Get the passthrough URL from config or provider data."""
-        if self.config.url is not None:
-            return self.config.url
+        if self.config.base_url is not None:
+            return str(self.config.base_url)
 
         provider_data = self.get_request_provider_data()
         if provider_data is None:
diff --git a/src/llama_stack/providers/remote/inference/runpod/config.py b/src/llama_stack/providers/remote/inference/runpod/config.py
index 2ee56ca94..8d06f5263 100644
--- a/src/llama_stack/providers/remote/inference/runpod/config.py
+++ b/src/llama_stack/providers/remote/inference/runpod/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from pydantic import BaseModel, Field, SecretStr
+from pydantic import BaseModel, Field, HttpUrl, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack_api import json_schema_type
@@ -21,7 +21,7 @@ class RunpodProviderDataValidator(BaseModel):
 
 @json_schema_type
 class RunpodImplConfig(RemoteInferenceProviderConfig):
-    url: str | None = Field(
+    base_url: HttpUrl | None = Field(
         default=None,
         description="The URL for the Runpod model serving endpoint",
     )
@@ -34,6 +34,6 @@ class RunpodImplConfig(RemoteInferenceProviderConfig):
     @classmethod
     def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
         return {
-            "url": "${env.RUNPOD_URL:=}",
+            "base_url": "${env.RUNPOD_URL:=}",
             "api_token": "${env.RUNPOD_API_TOKEN}",
         }
diff --git a/src/llama_stack/providers/remote/inference/runpod/runpod.py b/src/llama_stack/providers/remote/inference/runpod/runpod.py
index 9c770cc24..04ad12851 100644
--- a/src/llama_stack/providers/remote/inference/runpod/runpod.py
+++ b/src/llama_stack/providers/remote/inference/runpod/runpod.py
@@ -28,7 +28,7 @@ class RunpodInferenceAdapter(OpenAIMixin):
 
     def get_base_url(self) -> str:
         """Get base URL for OpenAI client."""
-        return self.config.url
+        return str(self.config.base_url)
 
     async def openai_chat_completion(
         self,
diff --git a/src/llama_stack/providers/remote/inference/sambanova/config.py b/src/llama_stack/providers/remote/inference/sambanova/config.py
index 93679ba99..79cda75a0 100644
--- a/src/llama_stack/providers/remote/inference/sambanova/config.py
+++ b/src/llama_stack/providers/remote/inference/sambanova/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, HttpUrl
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack_api import json_schema_type
@@ -21,14 +21,14 @@ class SambaNovaProviderDataValidator(BaseModel):
 
 @json_schema_type
 class SambaNovaImplConfig(RemoteInferenceProviderConfig):
-    url: str = Field(
-        default="https://api.sambanova.ai/v1",
+    base_url: HttpUrl | None = Field(
+        default=HttpUrl("https://api.sambanova.ai/v1"),
         description="The URL for the SambaNova AI server",
     )
 
     @classmethod
     def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY:=}", **kwargs) -> dict[str, Any]:
         return {
-            "url": "https://api.sambanova.ai/v1",
+            "base_url": "https://api.sambanova.ai/v1",
             "api_key": api_key,
         }
diff --git a/src/llama_stack/providers/remote/inference/sambanova/sambanova.py b/src/llama_stack/providers/remote/inference/sambanova/sambanova.py
index daa4b1670..cb01e3a90 100644
--- a/src/llama_stack/providers/remote/inference/sambanova/sambanova.py
+++ b/src/llama_stack/providers/remote/inference/sambanova/sambanova.py
@@ -25,4 +25,4 @@ class SambaNovaInferenceAdapter(OpenAIMixin):
 
         :return: The SambaNova base URL
         """
-        return self.config.url
+        return str(self.config.base_url)
diff --git a/src/llama_stack/providers/remote/inference/tgi/config.py b/src/llama_stack/providers/remote/inference/tgi/config.py
index 74edc8523..44cb4b812 100644
--- a/src/llama_stack/providers/remote/inference/tgi/config.py
+++ b/src/llama_stack/providers/remote/inference/tgi/config.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from pydantic import BaseModel, Field, SecretStr
+from pydantic import BaseModel, Field, HttpUrl, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack_api import json_schema_type
@@ -15,18 +15,19 @@ from llama_stack_api import json_schema_type
 class TGIImplConfig(RemoteInferenceProviderConfig):
     auth_credential: SecretStr | None = Field(default=None, exclude=True)
 
-    url: str = Field(
-        description="The URL for the TGI serving endpoint",
+    base_url: HttpUrl | None = Field(
+        default=None,
+        description="The URL for the TGI serving endpoint (should include /v1 path)",
     )
 
     @classmethod
     def sample_run_config(
         cls,
-        url: str = "${env.TGI_URL:=}",
+        base_url: str = "${env.TGI_URL:=}",
         **kwargs,
     ):
         return {
-            "url": url,
+            "base_url": base_url,
         }
 
 
diff --git a/src/llama_stack/providers/remote/inference/tgi/tgi.py b/src/llama_stack/providers/remote/inference/tgi/tgi.py
index dd47ccc62..5dc8c33f7 100644
--- a/src/llama_stack/providers/remote/inference/tgi/tgi.py
+++ b/src/llama_stack/providers/remote/inference/tgi/tgi.py
@@ -8,7 +8,7 @@
 from collections.abc import Iterable
 
 from huggingface_hub import AsyncInferenceClient, HfApi
-from pydantic import SecretStr
+from pydantic import HttpUrl, SecretStr
 
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
@@ -23,7 +23,7 @@ log = get_logger(name=__name__, category="inference::tgi")
 
 
 class _HfAdapter(OpenAIMixin):
-    url: str
+    base_url: HttpUrl
     api_key: SecretStr
 
     hf_client: AsyncInferenceClient
@@ -36,7 +36,7 @@ class _HfAdapter(OpenAIMixin):
         return "NO KEY REQUIRED"
 
     def get_base_url(self):
-        return self.url
+        return self.base_url
 
     async def list_provider_model_ids(self) -> Iterable[str]:
         return [self.model_id]
@@ -50,14 +50,20 @@ class _HfAdapter(OpenAIMixin):
 
 class TGIAdapter(_HfAdapter):
     async def initialize(self, config: TGIImplConfig) -> None:
-        if not config.url:
+        if not config.base_url:
             raise ValueError("You must provide a URL in run.yaml (or via the TGI_URL environment variable) to use TGI.")
-        log.info(f"Initializing TGI client with url={config.url}")
-        self.hf_client = AsyncInferenceClient(model=config.url, provider="hf-inference")
+        log.info(f"Initializing TGI client with url={config.base_url}")
+        # Extract base URL without /v1 for HF client initialization
+        base_url_str = str(config.base_url).rstrip("/")
+        if base_url_str.endswith("/v1"):
+            base_url_for_client = base_url_str[:-3]
+        else:
+            base_url_for_client = base_url_str
+        self.hf_client = AsyncInferenceClient(model=base_url_for_client, provider="hf-inference")
         endpoint_info = await self.hf_client.get_endpoint_info()
         self.max_tokens = endpoint_info["max_total_tokens"]
         self.model_id = endpoint_info["model_id"]
-        self.url = f"{config.url.rstrip('/')}/v1"
+        self.base_url = config.base_url
         self.api_key = SecretStr("NO_KEY")
 
 
diff --git a/src/llama_stack/providers/remote/inference/together/config.py b/src/llama_stack/providers/remote/inference/together/config.py
index c1b3c4a55..16f0686ba 100644
--- a/src/llama_stack/providers/remote/inference/together/config.py
+++ b/src/llama_stack/providers/remote/inference/together/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from pydantic import Field
+from pydantic import Field, HttpUrl
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack_api import json_schema_type
@@ -14,14 +14,14 @@ from llama_stack_api import json_schema_type
 
 @json_schema_type
 class TogetherImplConfig(RemoteInferenceProviderConfig):
-    url: str = Field(
-        default="https://api.together.xyz/v1",
+    base_url: HttpUrl | None = Field(
+        default=HttpUrl("https://api.together.xyz/v1"),
         description="The URL for the Together AI server",
     )
 
     @classmethod
     def sample_run_config(cls, **kwargs) -> dict[str, Any]:
         return {
-            "url": "https://api.together.xyz/v1",
+            "base_url": "https://api.together.xyz/v1",
             "api_key": "${env.TOGETHER_API_KEY:=}",
         }
diff --git a/src/llama_stack/providers/remote/inference/together/together.py b/src/llama_stack/providers/remote/inference/together/together.py
index cd34aec5e..0826dbcd2 100644
--- a/src/llama_stack/providers/remote/inference/together/together.py
+++ b/src/llama_stack/providers/remote/inference/together/together.py
@@ -9,7 +9,6 @@ from collections.abc import Iterable
 from typing import Any, cast
 
 from together import AsyncTogether  # type: ignore[import-untyped]
-from together.constants import BASE_URL  # type: ignore[import-untyped]
 
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
@@ -42,7 +41,7 @@ class TogetherInferenceAdapter(OpenAIMixin, NeedsRequestProviderData):
     provider_data_api_key_field: str = "together_api_key"
 
     def get_base_url(self):
-        return BASE_URL
+        return str(self.config.base_url)
 
     def _get_client(self) -> AsyncTogether:
         together_api_key = None
diff --git a/src/llama_stack/providers/remote/inference/vllm/config.py b/src/llama_stack/providers/remote/inference/vllm/config.py
index c43533ee4..db6c74431 100644
--- a/src/llama_stack/providers/remote/inference/vllm/config.py
+++ b/src/llama_stack/providers/remote/inference/vllm/config.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from pydantic import Field, SecretStr, field_validator
+from pydantic import Field, HttpUrl, SecretStr, field_validator
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack_api import json_schema_type
@@ -14,7 +14,7 @@ from llama_stack_api import json_schema_type
 
 @json_schema_type
 class VLLMInferenceAdapterConfig(RemoteInferenceProviderConfig):
-    url: str | None = Field(
+    base_url: HttpUrl | None = Field(
         default=None,
         description="The URL for the vLLM model serving endpoint",
     )
@@ -48,11 +48,11 @@ class VLLMInferenceAdapterConfig(RemoteInferenceProviderConfig):
     @classmethod
     def sample_run_config(
         cls,
-        url: str = "${env.VLLM_URL:=}",
+        base_url: str = "${env.VLLM_URL:=}",
         **kwargs,
     ):
         return {
-            "url": url,
+            "base_url": base_url,
             "max_tokens": "${env.VLLM_MAX_TOKENS:=4096}",
             "api_token": "${env.VLLM_API_TOKEN:=fake}",
             "tls_verify": "${env.VLLM_TLS_VERIFY:=true}",
diff --git a/src/llama_stack/providers/remote/inference/vllm/vllm.py b/src/llama_stack/providers/remote/inference/vllm/vllm.py
index 1510e9384..6664ca36b 100644
--- a/src/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/src/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -39,12 +39,12 @@ class VLLMInferenceAdapter(OpenAIMixin):
 
     def get_base_url(self) -> str:
         """Get the base URL from config."""
-        if not self.config.url:
+        if not self.config.base_url:
             raise ValueError("No base URL configured")
-        return self.config.url
+        return str(self.config.base_url)
 
     async def initialize(self) -> None:
-        if not self.config.url:
+        if not self.config.base_url:
             raise ValueError(
                 "You must provide a URL in run.yaml (or via the VLLM_URL environment variable) to use vLLM."
             )
diff --git a/src/llama_stack/providers/remote/inference/watsonx/config.py b/src/llama_stack/providers/remote/inference/watsonx/config.py
index 914f80820..be2b2c0ab 100644
--- a/src/llama_stack/providers/remote/inference/watsonx/config.py
+++ b/src/llama_stack/providers/remote/inference/watsonx/config.py
@@ -7,7 +7,7 @@
 import os
 from typing import Any
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, HttpUrl
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack_api import json_schema_type
@@ -23,7 +23,7 @@ class WatsonXProviderDataValidator(BaseModel):
 
 @json_schema_type
 class WatsonXConfig(RemoteInferenceProviderConfig):
-    url: str = Field(
+    base_url: HttpUrl | None = Field(
         default_factory=lambda: os.getenv("WATSONX_BASE_URL", "https://us-south.ml.cloud.ibm.com"),
         description="A base url for accessing the watsonx.ai",
     )
@@ -39,7 +39,7 @@ class WatsonXConfig(RemoteInferenceProviderConfig):
     @classmethod
     def sample_run_config(cls, **kwargs) -> dict[str, Any]:
         return {
-            "url": "${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}",
+            "base_url": "${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}",
             "api_key": "${env.WATSONX_API_KEY:=}",
             "project_id": "${env.WATSONX_PROJECT_ID:=}",
         }
diff --git a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
index aab9e2dca..5684f6c17 100644
--- a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
+++ b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
@@ -255,7 +255,7 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
         )
 
     def get_base_url(self) -> str:
-        return self.config.url
+        return str(self.config.base_url)
 
     # Copied from OpenAIMixin
     async def check_model_availability(self, model: str) -> bool:
@@ -316,7 +316,7 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
         """
         Retrieves foundation model specifications from the watsonx.ai API.
         """
-        url = f"{self.config.url}/ml/v1/foundation_model_specs?version=2023-10-25"
+        url = f"{str(self.config.base_url)}/ml/v1/foundation_model_specs?version=2023-10-25"
         headers = {
             # Note that there is no authorization header.  Listing models does not require authentication.
             "Content-Type": "application/json",
diff --git a/tests/integration/suites.py b/tests/integration/suites.py
index 7689657b4..10c872705 100644
--- a/tests/integration/suites.py
+++ b/tests/integration/suites.py
@@ -50,7 +50,7 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
         name="ollama",
         description="Local Ollama provider with text + safety models",
         env={
-            "OLLAMA_URL": "http://0.0.0.0:11434",
+            "OLLAMA_URL": "http://0.0.0.0:11434/v1",
             "SAFETY_MODEL": "ollama/llama-guard3:1b",
         },
         defaults={
@@ -64,7 +64,7 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
         name="ollama",
         description="Local Ollama provider with a vision model",
         env={
-            "OLLAMA_URL": "http://0.0.0.0:11434",
+            "OLLAMA_URL": "http://0.0.0.0:11434/v1",
         },
         defaults={
             "vision_model": "ollama/llama3.2-vision:11b",
@@ -75,7 +75,7 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
         name="ollama-postgres",
         description="Server-mode tests with Postgres-backed persistence",
         env={
-            "OLLAMA_URL": "http://0.0.0.0:11434",
+            "OLLAMA_URL": "http://0.0.0.0:11434/v1",
             "SAFETY_MODEL": "ollama/llama-guard3:1b",
             "POSTGRES_HOST": "127.0.0.1",
             "POSTGRES_PORT": "5432",
diff --git a/tests/unit/providers/inference/test_inference_client_caching.py b/tests/unit/providers/inference/test_inference_client_caching.py
index aa3a2c77a..6ddf790af 100644
--- a/tests/unit/providers/inference/test_inference_client_caching.py
+++ b/tests/unit/providers/inference/test_inference_client_caching.py
@@ -120,7 +120,7 @@ from llama_stack.providers.remote.inference.watsonx.watsonx import WatsonXInfere
             VLLMInferenceAdapter,
             "llama_stack.providers.remote.inference.vllm.VLLMProviderDataValidator",
             {
-                "url": "http://fake",
+                "base_url": "http://fake",
             },
         ),
     ],
@@ -153,7 +153,7 @@ def test_litellm_provider_data_used(config_cls, adapter_cls, provider_data_valid
     """Validate data for LiteLLM-based providers.  Similar to test_openai_provider_data_used, but without the
     assumption that there is an OpenAI-compatible client object."""
 
-    inference_adapter = adapter_cls(config=config_cls())
+    inference_adapter = adapter_cls(config=config_cls(base_url="http://fake"))
 
     inference_adapter.__provider_spec__ = MagicMock()
     inference_adapter.__provider_spec__.provider_data_validator = provider_data_validator
diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py
index 958895cc4..0cf8ed306 100644
--- a/tests/unit/providers/inference/test_remote_vllm.py
+++ b/tests/unit/providers/inference/test_remote_vllm.py
@@ -40,7 +40,7 @@ from llama_stack_api import (
 
 @pytest.fixture(scope="function")
 async def vllm_inference_adapter():
-    config = VLLMInferenceAdapterConfig(url="http://mocked.localhost:12345")
+    config = VLLMInferenceAdapterConfig(base_url="http://mocked.localhost:12345")
     inference_adapter = VLLMInferenceAdapter(config=config)
     inference_adapter.model_store = AsyncMock()
     await inference_adapter.initialize()
@@ -204,7 +204,7 @@ async def test_vllm_completion_extra_body():
     via extra_body to the underlying OpenAI client through the InferenceRouter.
     """
     # Set up the vLLM adapter
-    config = VLLMInferenceAdapterConfig(url="http://mocked.localhost:12345")
+    config = VLLMInferenceAdapterConfig(base_url="http://mocked.localhost:12345")
     vllm_adapter = VLLMInferenceAdapter(config=config)
     vllm_adapter.__provider_id__ = "vllm"
     await vllm_adapter.initialize()
@@ -277,7 +277,7 @@ async def test_vllm_chat_completion_extra_body():
     via extra_body to the underlying OpenAI client through the InferenceRouter for chat completion.
     """
     # Set up the vLLM adapter
-    config = VLLMInferenceAdapterConfig(url="http://mocked.localhost:12345")
+    config = VLLMInferenceAdapterConfig(base_url="http://mocked.localhost:12345")
     vllm_adapter = VLLMInferenceAdapter(config=config)
     vllm_adapter.__provider_id__ = "vllm"
     await vllm_adapter.initialize()
diff --git a/tests/unit/providers/nvidia/test_rerank_inference.py b/tests/unit/providers/nvidia/test_rerank_inference.py
index ee62910b8..4ad9dc766 100644
--- a/tests/unit/providers/nvidia/test_rerank_inference.py
+++ b/tests/unit/providers/nvidia/test_rerank_inference.py
@@ -146,7 +146,7 @@ async def test_hosted_model_not_in_endpoint_mapping():
 
 async def test_self_hosted_ignores_endpoint():
     adapter = create_adapter(
-        config=NVIDIAConfig(url="http://localhost:8000", api_key=None),
+        config=NVIDIAConfig(base_url="http://localhost:8000", api_key=None),
         rerank_endpoints={"test-model": "https://model.endpoint/rerank"},  # This should be ignored for self-hosted.
     )
     mock_session = MockSession(MockResponse())
diff --git a/tests/unit/providers/test_configs.py b/tests/unit/providers/test_configs.py
index 867cfffbc..b4ba78394 100644
--- a/tests/unit/providers/test_configs.py
+++ b/tests/unit/providers/test_configs.py
@@ -4,8 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+from typing import get_args, get_origin
+
 import pytest
-from pydantic import BaseModel
+from pydantic import BaseModel, HttpUrl
 
 from llama_stack.core.distribution import get_provider_registry, providable_apis
 from llama_stack.core.utils.dynamic import instantiate_class_type
@@ -41,3 +43,55 @@ class TestProviderConfigurations:
 
         sample_config = config_type.sample_run_config(__distro_dir__="foobarbaz")
         assert isinstance(sample_config, dict), f"{config_class_name}.sample_run_config() did not return a dict"
+
+    def test_remote_inference_url_standardization(self):
+        """Verify all remote inference providers use standardized base_url configuration."""
+        provider_registry = get_provider_registry()
+        inference_providers = provider_registry.get("inference", {})
+
+        # Filter for remote providers only
+        remote_providers = {k: v for k, v in inference_providers.items() if k.startswith("remote::")}
+
+        failures = []
+        for provider_type, provider_spec in remote_providers.items():
+            try:
+                config_class_name = provider_spec.config_class
+                config_type = instantiate_class_type(config_class_name)
+
+                # Check that config has base_url field (not url)
+                if hasattr(config_type, "model_fields"):
+                    fields = config_type.model_fields
+
+                    # Should NOT have 'url' field (old pattern)
+                    if "url" in fields:
+                        failures.append(
+                            f"{provider_type}: Uses deprecated 'url' field instead of 'base_url'. "
+                            f"Please rename to 'base_url' for consistency."
+                        )
+
+                    # Should have 'base_url' field with HttpUrl | None type
+                    if "base_url" in fields:
+                        field_info = fields["base_url"]
+                        annotation = field_info.annotation
+
+                        # Check if it's HttpUrl or HttpUrl | None
+                        # get_origin() returns Union for (X | Y), None for plain types
+                        # get_args() returns the types inside Union, e.g. (HttpUrl, NoneType)
+                        is_valid = False
+                        if get_origin(annotation) is not None:  # It's a Union/Optional
+                            if HttpUrl in get_args(annotation):
+                                is_valid = True
+                        elif annotation == HttpUrl:  # Plain HttpUrl without | None
+                            is_valid = True
+
+                        if not is_valid:
+                            failures.append(
+                                f"{provider_type}: base_url field has incorrect type annotation. "
+                                f"Expected 'HttpUrl | None', got '{annotation}'"
+                            )
+
+            except Exception as e:
+                failures.append(f"{provider_type}: Error checking URL standardization: {str(e)}")
+
+        if failures:
+            pytest.fail("URL standardization violations found:\n" + "\n".join(f"  - {f}" for f in failures))