chore: update the vLLM inference impl to use OpenAIMixin for openai-compat functions

inference recordings from Qwen3-0.6B and vLLM 0.8.3 - ``` docker run --gpus all -v ~/.cache/huggingface:/root/.cache/huggingface -p 8000:8000 --ipc=host \ vllm/vllm-openai:latest \ --model Qwen/Qwen3-0.6B --enable-auto-tool-choice --tool-call-parser hermes ``` test with - ``` ./scripts/integration-tests.sh --stack-config server:ci-tests --setup vllm --subdirs inference ```
2025-10-16 14:57:20 +00:00 · 2025-09-10 10:10:10 -04:00 · 2025-09-10 10:10:10 -04:00 · c2a9c65fff
commit c2a9c65fff
parent c86e45496e
33 changed files with 51813 additions and 203 deletions
--- a/tests/integration/recordings/responses/a1f9695ef183.json
+++ b/tests/integration/recordings/responses/a1f9695ef183.json
@ -0,0 +1,197 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "prompt": "<|begin_of_text|>Complete the sentence: Micheael Jordan is born in ",
+      "logprobs": 1,
+      "stream": true,
+      "temperature": 0.0,
+      "max_tokens": 5
+    },
+    "endpoint": "/v1/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "cmpl-7968eb721ac34b348eb879f300052b1b",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": {
+                "text_offset": [
+                  0
+                ],
+                "token_logprobs": [
+                  -0.031588248908519745
+                ],
+                "tokens": [
+                  "1"
+                ],
+                "top_logprobs": [
+                  {
+                    "1": -0.031588248908519745
+                  }
+                ]
+              },
+              "text": "1",
+              "stop_reason": null
+            }
+          ],
+          "created": 1757524134,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "cmpl-7968eb721ac34b348eb879f300052b1b",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": {
+                "text_offset": [
+                  1
+                ],
+                "token_logprobs": [
+                  -0.01468715537339449
+                ],
+                "tokens": [
+                  "9"
+                ],
+                "top_logprobs": [
+                  {
+                    "9": -0.01468715537339449
+                  }
+                ]
+              },
+              "text": "9",
+              "stop_reason": null
+            }
+          ],
+          "created": 1757524134,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "cmpl-7968eb721ac34b348eb879f300052b1b",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": {
+                "text_offset": [
+                  2
+                ],
+                "token_logprobs": [
+                  -0.30242931842803955
+                ],
+                "tokens": [
+                  "5"
+                ],
+                "top_logprobs": [
+                  {
+                    "5": -0.30242931842803955
+                  }
+                ]
+              },
+              "text": "5",
+              "stop_reason": null
+            }
+          ],
+          "created": 1757524134,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "cmpl-7968eb721ac34b348eb879f300052b1b",
+          "choices": [
+            {
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": {
+                "text_offset": [
+                  3
+                ],
+                "token_logprobs": [
+                  -1.2276304960250854
+                ],
+                "tokens": [
+                  "8"
+                ],
+                "top_logprobs": [
+                  {
+                    "8": -1.2276304960250854
+                  }
+                ]
+              },
+              "text": "8",
+              "stop_reason": null
+            }
+          ],
+          "created": 1757524134,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.completion.Completion",
+        "__data__": {
+          "id": "cmpl-7968eb721ac34b348eb879f300052b1b",
+          "choices": [
+            {
+              "finish_reason": "length",
+              "index": 0,
+              "logprobs": {
+                "text_offset": [
+                  4
+                ],
+                "token_logprobs": [
+                  -0.7615041732788086
+                ],
+                "tokens": [
+                  ","
+                ],
+                "top_logprobs": [
+                  {
+                    ",": -0.7615041732788086
+                  }
+                ]
+              },
+              "text": ",",
+              "stop_reason": null
+            }
+          ],
+          "created": 1757524134,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "text_completion",
+          "system_fingerprint": null,
+          "usage": null
+        }
+      }
+    ],
+    "is_streaming": true
+  }
+}