featu: support passing "extra body" throught to providers

# What does this PR do? Allows passing through extra_body parameters to inference providers. closes #2720 ## Test Plan CI and added new test
2025-12-06 10:37:22 +00:00 · 2025-10-10 15:05:22 -07:00 · 2025-10-10 15:05:22 -07:00 · dbaaeea255
commit dbaaeea255
parent cb7fb0705b
21 changed files with 1799 additions and 96 deletions
--- a/tests/integration/inference/recordings/d2ba309413e85d6166f7543a879b890b4e65a5f9917a2d75c5795782ab7cbfff.json
+++ b/tests/integration/inference/recordings/d2ba309413e85d6166f7543a879b890b4e65a5f9917a2d75c5795782ab7cbfff.json
@ -0,0 +1,48 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_guided_choice[txt=vllm/Qwen/Qwen3-0.6B]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "prompt": "I am feeling really sad today.",
+      "stream": false
+    },
+    "endpoint": "/v1/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.completion.Completion",
+      "__data__": {
+        "id": "rec-d2ba309413e8",
+        "choices": [
+          {
+            "finish_reason": "length",
+            "index": 0,
+            "logprobs": null,
+            "text": " I have been working on a project that I feel like I'm not doing well",
+            "stop_reason": null,
+            "prompt_logprobs": null
+          }
+        ],
+        "created": 0,
+        "model": "Qwen/Qwen3-0.6B",
+        "object": "text_completion",
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 16,
+          "prompt_tokens": 7,
+          "total_tokens": 23,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        },
+        "service_tier": null,
+        "kv_transfer_params": null
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/inference/recordings/e3727f6c749ab8bdee2f581300092002485023b937d72b7aa8d4c15c9204fc5c.json
+++ b/tests/integration/inference/recordings/e3727f6c749ab8bdee2f581300092002485023b937d72b7aa8d4c15c9204fc5c.json
@ -0,0 +1,54 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_guided_choice[txt=vllm/Qwen/Qwen3-0.6B]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "prompt": "I am feeling really sad today.",
+      "stream": false,
+      "extra_body": {
+        "guided_choices": [
+          "joy",
+          "sadness"
+        ]
+      }
+    },
+    "endpoint": "/v1/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.completion.Completion",
+      "__data__": {
+        "id": "rec-e3727f6c749a",
+        "choices": [
+          {
+            "finish_reason": "length",
+            "index": 0,
+            "logprobs": null,
+            "text": " I feel that I am not good enough, and I feel like I have no",
+            "stop_reason": null,
+            "prompt_logprobs": null
+          }
+        ],
+        "created": 0,
+        "model": "Qwen/Qwen3-0.6B",
+        "object": "text_completion",
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 16,
+          "prompt_tokens": 7,
+          "total_tokens": 23,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        },
+        "service_tier": null,
+        "kv_transfer_params": null
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/inference/recordings/f02f1bfd75adaea87b91dedc59430b99015b5ed0e2bbf24418a31146ffcbca9b.json
+++ b/tests/integration/inference/recordings/f02f1bfd75adaea87b91dedc59430b99015b5ed0e2bbf24418a31146ffcbca9b.json
@ -0,0 +1,54 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_guided_choice[txt=vllm/Qwen/Qwen3-0.6B]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "prompt": "I am feeling really sad today.",
+      "stream": false,
+      "extra_body": {
+        "guided_choice": [
+          "joy",
+          "sadness"
+        ]
+      }
+    },
+    "endpoint": "/v1/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.completion.Completion",
+      "__data__": {
+        "id": "rec-f02f1bfd75ad",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "text": "sadness",
+            "stop_reason": null,
+            "prompt_logprobs": null
+          }
+        ],
+        "created": 0,
+        "model": "Qwen/Qwen3-0.6B",
+        "object": "text_completion",
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 3,
+          "prompt_tokens": 7,
+          "total_tokens": 10,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        },
+        "service_tier": null,
+        "kv_transfer_params": null
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@ -223,7 +223,7 @@ def test_openai_completion_guided_choice(llama_stack_client, client_with_models,
        model=text_model_id,
        prompt=prompt,
        stream=False,
-        guided_choice=["joy", "sadness"],
+        extra_body={"guided_choice": ["joy", "sadness"]},
    )
    assert len(response.choices) > 0
    choice = response.choices[0]