featu: support passing "extra body" throught to providers

# What does this PR do?
Allows passing through extra_body parameters to inference providers.


closes #2720

## Test Plan
CI and added new test
This commit is contained in:
Eric Huang 2025-10-10 14:36:51 -07:00
parent cb7fb0705b
commit aa34b11b50
16 changed files with 1670 additions and 92 deletions

View file

@ -0,0 +1,881 @@
{
"test_id": null,
"request": {
"method": "POST",
"url": "https://api.openai.com/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4-0613",
"created": 1686588896,
"object": "model",
"owned_by": "openai"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4",
"created": 1687882411,
"object": "model",
"owned_by": "openai"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-3.5-turbo",
"created": 1677610602,
"object": "model",
"owned_by": "openai"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "sora-2-pro",
"created": 1759708663,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-audio-mini-2025-10-06",
"created": 1759512137,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-realtime-mini",
"created": 1759517133,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-realtime-mini-2025-10-06",
"created": 1759517175,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "sora-2",
"created": 1759708615,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "davinci-002",
"created": 1692634301,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "babbage-002",
"created": 1692634615,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-3.5-turbo-instruct",
"created": 1692901427,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-3.5-turbo-instruct-0914",
"created": 1694122472,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "dall-e-3",
"created": 1698785189,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "dall-e-2",
"created": 1698798177,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4-1106-preview",
"created": 1698957206,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-3.5-turbo-1106",
"created": 1698959748,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "tts-1-hd",
"created": 1699046015,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "tts-1-1106",
"created": 1699053241,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "tts-1-hd-1106",
"created": 1699053533,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "text-embedding-3-small",
"created": 1705948997,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "text-embedding-3-large",
"created": 1705953180,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4-0125-preview",
"created": 1706037612,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4-turbo-preview",
"created": 1706037777,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-3.5-turbo-0125",
"created": 1706048358,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4-turbo",
"created": 1712361441,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4-turbo-2024-04-09",
"created": 1712601677,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o",
"created": 1715367049,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-2024-05-13",
"created": 1715368132,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-mini-2024-07-18",
"created": 1721172717,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-mini",
"created": 1721172741,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-2024-08-06",
"created": 1722814719,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "chatgpt-4o-latest",
"created": 1723515131,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o1-mini-2024-09-12",
"created": 1725648979,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o1-mini",
"created": 1725649008,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-realtime-preview-2024-10-01",
"created": 1727131766,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-audio-preview-2024-10-01",
"created": 1727389042,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-audio-preview",
"created": 1727460443,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-realtime-preview",
"created": 1727659998,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "omni-moderation-latest",
"created": 1731689265,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "omni-moderation-2024-09-26",
"created": 1732734466,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-realtime-preview-2024-12-17",
"created": 1733945430,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-audio-preview-2024-12-17",
"created": 1734034239,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-mini-realtime-preview-2024-12-17",
"created": 1734112601,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-mini-audio-preview-2024-12-17",
"created": 1734115920,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o1-2024-12-17",
"created": 1734326976,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o1",
"created": 1734375816,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-mini-realtime-preview",
"created": 1734387380,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-mini-audio-preview",
"created": 1734387424,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o3-mini",
"created": 1737146383,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o3-mini-2025-01-31",
"created": 1738010200,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-2024-11-20",
"created": 1739331543,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-search-preview-2025-03-11",
"created": 1741388170,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-search-preview",
"created": 1741388720,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-mini-search-preview-2025-03-11",
"created": 1741390858,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-mini-search-preview",
"created": 1741391161,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-transcribe",
"created": 1742068463,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-mini-transcribe",
"created": 1742068596,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o1-pro-2025-03-19",
"created": 1742251504,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o1-pro",
"created": 1742251791,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-mini-tts",
"created": 1742403959,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o3-2025-04-16",
"created": 1744133301,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o4-mini-2025-04-16",
"created": 1744133506,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o3",
"created": 1744225308,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o4-mini",
"created": 1744225351,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4.1-2025-04-14",
"created": 1744315746,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4.1",
"created": 1744316542,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4.1-mini-2025-04-14",
"created": 1744317547,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4.1-mini",
"created": 1744318173,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4.1-nano-2025-04-14",
"created": 1744321025,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4.1-nano",
"created": 1744321707,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-image-1",
"created": 1745517030,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "codex-mini-latest",
"created": 1746673257,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-realtime-preview-2025-06-03",
"created": 1748907838,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-4o-audio-preview-2025-06-03",
"created": 1748908498,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o4-mini-deep-research",
"created": 1749685485,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "o4-mini-deep-research-2025-06-26",
"created": 1750866121,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-5-chat-latest",
"created": 1754073306,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-5-2025-08-07",
"created": 1754075360,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-5",
"created": 1754425777,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-5-mini-2025-08-07",
"created": 1754425867,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-5-mini",
"created": 1754425928,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-5-nano-2025-08-07",
"created": 1754426303,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-5-nano",
"created": 1754426384,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-audio-2025-08-28",
"created": 1756256146,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-realtime",
"created": 1756271701,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-realtime-2025-08-28",
"created": 1756271773,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-audio",
"created": 1756339249,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-5-codex",
"created": 1757527818,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-image-1-mini",
"created": 1758845821,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-5-pro-2025-10-06",
"created": 1759469707,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-5-pro",
"created": 1759469822,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-audio-mini",
"created": 1759512027,
"object": "model",
"owned_by": "system"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-3.5-turbo-16k",
"created": 1683758102,
"object": "model",
"owned_by": "openai-internal"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "tts-1",
"created": 1681940951,
"object": "model",
"owned_by": "openai-internal"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "whisper-1",
"created": 1677532384,
"object": "model",
"owned_by": "openai-internal"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "text-embedding-ada-002",
"created": 1671217299,
"object": "model",
"owned_by": "openai-internal"
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,45 @@
{
"test_id": null,
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "Qwen/Qwen3-0.6B",
"created": 1760131802,
"object": "model",
"owned_by": "vllm",
"root": "Qwen/Qwen3-0.6B",
"parent": null,
"max_model_len": 4096,
"permission": [
{
"id": "modelperm-b270195ed9424b1786a24780cb1c6c7c",
"object": "model_permission",
"created": 1760131802,
"allow_create_engine": false,
"allow_sampling": true,
"allow_logprobs": true,
"allow_search_indices": false,
"allow_view": true,
"allow_fine_tuning": false,
"organization": "*",
"group": null,
"is_blocking": false
}
]
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,543 @@
{
"test_id": null,
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/flux-1-dev-fp8",
"created": 1729532889,
"object": "model",
"owned_by": "fireworks",
"kind": "FLUMINA_BASE_MODEL",
"supports_chat": false,
"supports_image_input": false,
"supports_tools": false
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/flux-kontext-max",
"created": 1750714611,
"object": "model",
"owned_by": "fireworks",
"kind": "FLUMINA_BASE_MODEL",
"supports_chat": true,
"supports_image_input": true,
"supports_tools": false
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/flux-kontext-pro",
"created": 1750488264,
"object": "model",
"owned_by": "fireworks",
"kind": "FLUMINA_BASE_MODEL",
"supports_chat": true,
"supports_image_input": true,
"supports_tools": false
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b",
"created": 1748467427,
"object": "model",
"owned_by": "sentientfoundation-serverless",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new",
"created": 1739563474,
"object": "model",
"owned_by": "sentientfoundation",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/gpt-oss-120b",
"created": 1754345600,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507",
"created": 1753124424,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 262144
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-235b-a22b-thinking-2507",
"created": 1753455434,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 262144
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/deepseek-v3-0324",
"created": 1742827220,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 163840
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/kimi-k2-instruct",
"created": 1752259096,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/gpt-oss-20b",
"created": 1754345466,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/kimi-k2-instruct-0905",
"created": 1757018994,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 262144
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"created": 1733442103,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-235b-a22b",
"created": 1745885249,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/glm-4p5-air",
"created": 1754089426,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/deepseek-v3p1",
"created": 1755758988,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 163840
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/flux-1-schnell-fp8",
"created": 1729535376,
"object": "model",
"owned_by": "fireworks",
"kind": "FLUMINA_BASE_MODEL",
"supports_chat": false,
"supports_image_input": false,
"supports_tools": false
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/llama-v3p1-405b-instruct",
"created": 1721428386,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/llama4-scout-instruct-basic",
"created": 1743878279,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": true,
"supports_tools": true,
"context_length": 1048576
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-30b-a3b",
"created": 1745878133,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/llama-v3p1-70b-instruct",
"created": 1721287357,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/deepseek-r1-0528",
"created": 1748456377,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 163840
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/mixtral-8x22b-instruct",
"created": 1713375508,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 65536
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"created": 1743878495,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": true,
"supports_tools": true,
"context_length": 1048576
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen2p5-vl-32b-instruct",
"created": 1743392739,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": true,
"supports_tools": false,
"context_length": 128000
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/deepseek-v3p1-terminus",
"created": 1758586241,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 163840
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"created": 1721692808,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct",
"created": 1753211090,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 262144
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-30b-a3b-thinking-2507",
"created": 1753916446,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-embedding-8b",
"created": 1755707090,
"object": "model",
"owned_by": "fireworks",
"kind": "EMBEDDING_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 40960
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-reranker-8b",
"created": 1759865045,
"object": "model",
"owned_by": "fireworks",
"kind": "EMBEDDING_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 40960
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/glm-4p5",
"created": 1753809636,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct",
"created": 1754063588,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 262144
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/deepseek-r1",
"created": 1737397673,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 163840
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/deepseek-v3",
"created": 1735576668,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/deepseek-r1-basic",
"created": 1742306746,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 163840
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507",
"created": 1753808388,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 262144
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/tvergho-87e44d/models/debatecards-70b-ft-3epoch-dpo-v2",
"created": 1743381121,
"object": "model",
"owned_by": "tvergho-87e44d",
"kind": "HF_PEFT_ADDON",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false
}
}
],
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,54 @@
{
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_guided_choice[txt=vllm/Qwen/Qwen3-0.6B]",
"request": {
"method": "POST",
"url": "http://localhost:8000/v1/v1/completions",
"headers": {},
"body": {
"model": "Qwen/Qwen3-0.6B",
"prompt": "I am feeling really sad today.",
"stream": false,
"extra_body": {
"guided_choice": [
"joy",
"sadness"
]
}
},
"endpoint": "/v1/completions",
"model": "Qwen/Qwen3-0.6B"
},
"response": {
"body": {
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "rec-f02f1bfd75ad",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"text": "sadness",
"stop_reason": null,
"prompt_logprobs": null
}
],
"created": 0,
"model": "Qwen/Qwen3-0.6B",
"object": "text_completion",
"system_fingerprint": null,
"usage": {
"completion_tokens": 3,
"prompt_tokens": 7,
"total_tokens": 10,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"service_tier": null,
"kv_transfer_params": null
}
},
"is_streaming": false
},
"id_normalization_mapping": {}
}

View file

@ -15,9 +15,14 @@ from llama_stack.apis.inference import (
OpenAIChatCompletion,
OpenAIChatCompletionRequest,
OpenAIChoice,
OpenAICompletion,
OpenAICompletionChoice,
OpenAICompletionRequest,
ToolChoice,
)
from llama_stack.apis.models import Model
from llama_stack.core.routers.inference import InferenceRouter
from llama_stack.core.routing_tables.models import ModelsRoutingTable
from llama_stack.providers.datatypes import HealthStatus
from llama_stack.providers.remote.inference.vllm.config import VLLMInferenceAdapterConfig
from llama_stack.providers.remote.inference.vllm.vllm import VLLMInferenceAdapter
@ -191,3 +196,121 @@ async def test_openai_chat_completion_is_async(vllm_inference_adapter):
assert mock_create_client.call_count == 4 # no cheating
assert total_time < (sleep_time * 2), f"Total time taken: {total_time}s exceeded expected max"
async def test_extra_body_forwarding(vllm_inference_adapter):
"""
Test that extra_body parameters (e.g., chat_template_kwargs) are correctly
forwarded to the underlying OpenAI client.
"""
mock_model = Model(identifier="mock-model", provider_resource_id="mock-model", provider_id="vllm-inference")
vllm_inference_adapter.model_store.get_model.return_value = mock_model
with patch.object(VLLMInferenceAdapter, "client", new_callable=PropertyMock) as mock_client_property:
mock_client = MagicMock()
mock_client.chat.completions.create = AsyncMock(
return_value=OpenAIChatCompletion(
id="chatcmpl-abc123",
created=1,
model="mock-model",
choices=[
OpenAIChoice(
message=OpenAIAssistantMessageParam(
content="test response",
),
finish_reason="stop",
index=0,
)
],
)
)
mock_client_property.return_value = mock_client
# Test with chat_template_kwargs for Granite thinking mode
params = OpenAIChatCompletionRequest(
model="mock-model",
messages=[{"role": "user", "content": "test"}],
stream=False,
chat_template_kwargs={"thinking": True},
)
await vllm_inference_adapter.openai_chat_completion(params)
# Verify that the client was called with extra_body containing chat_template_kwargs
mock_client.chat.completions.create.assert_called_once()
call_kwargs = mock_client.chat.completions.create.call_args.kwargs
assert "extra_body" in call_kwargs
assert "chat_template_kwargs" in call_kwargs["extra_body"]
assert call_kwargs["extra_body"]["chat_template_kwargs"] == {"thinking": True}
async def test_vllm_completion_extra_body():
"""
Test that vLLM-specific guided_choice and prompt_logprobs parameters are correctly forwarded
via extra_body to the underlying OpenAI client through the InferenceRouter.
"""
# Set up the vLLM adapter
config = VLLMInferenceAdapterConfig(url="http://mocked.localhost:12345")
vllm_adapter = VLLMInferenceAdapter(config=config)
vllm_adapter.__provider_id__ = "vllm"
await vllm_adapter.initialize()
# Create a mock model store
mock_model_store = AsyncMock()
mock_model = Model(identifier="mock-model", provider_resource_id="mock-model", provider_id="vllm")
mock_model_store.get_model.return_value = mock_model
mock_model_store.has_model.return_value = True
# Create a mock dist_registry
mock_dist_registry = MagicMock()
mock_dist_registry.get = AsyncMock(return_value=mock_model)
mock_dist_registry.set = AsyncMock()
# Set up the routing table
routing_table = ModelsRoutingTable(
impls_by_provider_id={"vllm": vllm_adapter},
dist_registry=mock_dist_registry,
policy=[],
)
# Inject the model store into the adapter
vllm_adapter.model_store = routing_table
# Create the InferenceRouter
router = InferenceRouter(routing_table=routing_table)
# Patch the OpenAI client
with patch.object(VLLMInferenceAdapter, "client", new_callable=PropertyMock) as mock_client_property:
mock_client = MagicMock()
mock_client.completions.create = AsyncMock(
return_value=OpenAICompletion(
id="cmpl-abc123",
created=1,
model="mock-model",
choices=[
OpenAICompletionChoice(
text="joy",
finish_reason="stop",
index=0,
)
],
)
)
mock_client_property.return_value = mock_client
# Test with guided_choice and prompt_logprobs as extra fields
params = OpenAICompletionRequest(
model="mock-model",
prompt="I am feeling happy",
stream=False,
guided_choice=["joy", "sadness"],
prompt_logprobs=5,
)
await router.openai_completion(params)
# Verify that the client was called with extra_body containing both parameters
mock_client.completions.create.assert_called_once()
call_kwargs = mock_client.completions.create.call_args.kwargs
assert "extra_body" in call_kwargs
assert "guided_choice" in call_kwargs["extra_body"]
assert call_kwargs["extra_body"]["guided_choice"] == ["joy", "sadness"]
assert "prompt_logprobs" in call_kwargs["extra_body"]
assert call_kwargs["extra_body"]["prompt_logprobs"] == 5