base: 35 RPS; safety, 75 RPS

# What does this PR do? ## Test Plan # What does this PR do? ## Test Plan
2025-10-09 13:14:39 +00:00 · 2025-09-02 14:00:23 -07:00 · 2025-09-02 14:00:23 -07:00 · c3fa3e6333
commit c3fa3e6333
parent faf891b40c
5 changed files with 41 additions and 4 deletions
--- a/docs/source/distributions/k8s-benchmark/perf_test_config.yaml
+++ b/docs/source/distributions/k8s-benchmark/perf_test_config.yaml
@ -0,0 +1,19 @@
+version: '2'
+image_name: perf-test-demo
+apis:
+- inference
+providers:
+  inference:
+  - provider_id: vllm-inference
+    provider_type: remote::vllm
+    config:
+      url: ${env.VLLM_URL:=http://localhost:8001/v1}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=false}
+models:
+- model_id: ${env.INFERENCE_MODEL}
+  provider_id: vllm-inference
+  model_type: llm
+server:
+  port: 8322