kind of working

This commit is contained in:
Kai Wu 2025-07-31 15:19:46 -07:00
parent b63982ef00
commit 3c24be8273
6 changed files with 42 additions and 19 deletions

View file

@ -15,14 +15,14 @@ data:
- provider_id: vllm-inference
provider_type: remote::vllm
config:
url: ${env.VLLM_URL:=http://localhost:8000/v1}
url: ${env.VLLM_URL:=http://localhost:8001/v1}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: nvidia
provider_type: remote::nvidia
config:
url: ${env.NVIDIA_BASE_URL:=http://localhost:8001/v1}
url: ${env.NVIDIA_BASE_URL:=http://localhost:8000/v1}
api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
- provider_id: sentence-transformers