chore: update the vLLM inference impl to use OpenAIMixin for openai-compat functions

inference recordings from Qwen3-0.6B and vLLM 0.8.3 - ``` docker run --gpus all -v ~/.cache/huggingface:/root/.cache/huggingface -p 8000:8000 --ipc=host \ vllm/vllm-openai:latest \ --model Qwen/Qwen3-0.6B --enable-auto-tool-choice --tool-call-parser hermes ``` test with - ``` ./scripts/integration-tests.sh --stack-config server:ci-tests --setup vllm --subdirs inference ```
2025-10-16 06:53:47 +00:00 · 2025-09-10 10:10:10 -04:00 · 2025-09-10 10:10:10 -04:00 · c2a9c65fff
commit c2a9c65fff
parent c86e45496e
33 changed files with 51813 additions and 203 deletions
--- a/tests/integration/recordings/responses/62b82c42a270.json
+++ b/tests/integration/recordings/responses/62b82c42a270.json
@ -0,0 +1,46 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "prompt": "<|begin_of_text|>Complete the sentence using one word: Roses are red, violets are ",
+      "stream": false,
+      "temperature": 0.0,
+      "max_tokens": 50
+    },
+    "endpoint": "/v1/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.completion.Completion",
+      "__data__": {
+        "id": "cmpl-75aab3472ee24335962c490f4efdb884",
+        "choices": [
+          {
+            "finish_reason": "length",
+            "index": 0,
+            "logprobs": null,
+            "text": " _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _",
+            "stop_reason": null,
+            "prompt_logprobs": null
+          }
+        ],
+        "created": 1757524131,
+        "model": "Qwen/Qwen3-0.6B",
+        "object": "text_completion",
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 50,
+          "prompt_tokens": 23,
+          "total_tokens": 73,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}