diff --git a/docs/source/providers/inference/remote_vllm.md b/docs/source/providers/inference/remote_vllm.md index cba7aac52..6c725fb41 100644 --- a/docs/source/providers/inference/remote_vllm.md +++ b/docs/source/providers/inference/remote_vllm.md @@ -16,7 +16,7 @@ Remote vLLM inference provider for connecting to vLLM servers. ## Sample Configuration ```yaml -url: ${env.VLLM_URL:=http://localhost:8000/v1} +url: ${env.VLLM_URL} max_tokens: ${env.VLLM_MAX_TOKENS:=4096} api_token: ${env.VLLM_API_TOKEN:=fake} tls_verify: ${env.VLLM_TLS_VERIFY:=true} diff --git a/llama_stack/providers/remote/inference/fireworks/fireworks.py b/llama_stack/providers/remote/inference/fireworks/fireworks.py index 130a012dd..1c82ff3a8 100644 --- a/llama_stack/providers/remote/inference/fireworks/fireworks.py +++ b/llama_stack/providers/remote/inference/fireworks/fireworks.py @@ -96,7 +96,6 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv def _get_client(self) -> Fireworks: fireworks_api_key = self._get_api_key() - print(f">>>>>> fireworks_api_key: {fireworks_api_key} <<<<<") return Fireworks(api_key=fireworks_api_key) def _get_openai_client(self) -> AsyncOpenAI: diff --git a/llama_stack/providers/remote/inference/vllm/config.py b/llama_stack/providers/remote/inference/vllm/config.py index 8be8d5b8b..e11efa7f0 100644 --- a/llama_stack/providers/remote/inference/vllm/config.py +++ b/llama_stack/providers/remote/inference/vllm/config.py @@ -46,7 +46,7 @@ class VLLMInferenceAdapterConfig(BaseModel): @classmethod def sample_run_config( cls, - url: str = "${env.VLLM_URL:=http://localhost:8000/v1}", + url: str = "${env.VLLM_URL}", **kwargs, ): return { diff --git a/llama_stack/templates/starter/run.yaml b/llama_stack/templates/starter/run.yaml index 37f4e4c0b..01264f1c4 100644 --- a/llama_stack/templates/starter/run.yaml +++ b/llama_stack/templates/starter/run.yaml @@ -26,7 +26,7 @@ providers: - provider_id: ${env.ENABLE_VLLM:=__disabled__} provider_type: remote::vllm config: - url: ${env.VLLM_URL:=http://localhost:8000/v1} + url: ${env.VLLM_URL} max_tokens: ${env.VLLM_MAX_TOKENS:=4096} api_token: ${env.VLLM_API_TOKEN:=fake} tls_verify: ${env.VLLM_TLS_VERIFY:=true}