featu: support passing "extra body" throught to providers

# What does this PR do?
Allows passing through extra_body parameters to inference providers.


closes #2720

## Test Plan
CI and added new test
This commit is contained in:
Eric Huang 2025-10-10 14:41:09 -07:00
parent cb7fb0705b
commit 70d341c385
18 changed files with 1725 additions and 93 deletions

View file

@ -0,0 +1,54 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_guided_choice[txt=vllm/Qwen/Qwen3-0.6B]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/completions",
"headers": {},
"body": {
"model": "Qwen/Qwen3-0.6B",
"prompt": "I am feeling really sad today.",
"stream": false,
"extra_body": {
"guided_choices": [
"joy",
"sadness"
]
}
},
"endpoint": "/v1/completions",
"model": "Qwen/Qwen3-0.6B"
},
"response": {
"body": {
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "rec-e3727f6c749a",
"choices": [
{
"finish_reason": "length",
"index": 0,
"logprobs": null,
"text": " I feel that I am not good enough, and I feel like I have no",
"stop_reason": null,
"prompt_logprobs": null
}
],
"created": 0,
"model": "Qwen/Qwen3-0.6B",
"object": "text_completion",
"system_fingerprint": null,
"usage": {
"completion_tokens": 16,
"prompt_tokens": 7,
"total_tokens": 23,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"service_tier": null,
"kv_transfer_params": null
}
},
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,54 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_guided_choice[txt=vllm/Qwen/Qwen3-0.6B]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/completions",
"headers": {},
"body": {
"model": "Qwen/Qwen3-0.6B",
"prompt": "I am feeling really sad today.",
"stream": false,
"extra_body": {
"guided_choice": [
"joy",
"sadness"
]
}
},
"endpoint": "/v1/completions",
"model": "Qwen/Qwen3-0.6B"
},
"response": {
"body": {
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "rec-f02f1bfd75ad",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"text": "sadness",
"stop_reason": null,
"prompt_logprobs": null
}
],
"created": 0,
"model": "Qwen/Qwen3-0.6B",
"object": "text_completion",
"system_fingerprint": null,
"usage": {
"completion_tokens": 3,
"prompt_tokens": 7,
"total_tokens": 10,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"service_tier": null,
"kv_transfer_params": null
}
},
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -223,7 +223,7 @@ def test_openai_completion_guided_choice(llama_stack_client, client_with_models,
model=text_model_id,
prompt=prompt,
stream=False,
guided_choice=["joy", "sadness"],
extra_body={"guided_choice": ["joy", "sadness"]},
)
assert len(response.choices) > 0
choice = response.choices[0]