mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 12:07:34 +00:00
chore: update the vLLM inference impl to use OpenAIMixin for openai-compat functions
inference recordings from Qwen3-0.6B and vLLM 0.8.3 - ``` docker run --gpus all -v ~/.cache/huggingface:/root/.cache/huggingface -p 8000:8000 --ipc=host \ vllm/vllm-openai:latest \ --model Qwen/Qwen3-0.6B --enable-auto-tool-choice --tool-call-parser hermes ``` test with - ``` ./scripts/integration-tests.sh --stack-config server:ci-tests --setup vllm --subdirs inference ```
This commit is contained in:
parent
c86e45496e
commit
c2a9c65fff
33 changed files with 51813 additions and 203 deletions
|
@ -4,7 +4,7 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
import json
|
import json
|
||||||
from collections.abc import AsyncGenerator, AsyncIterator
|
from collections.abc import AsyncGenerator
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
@ -38,13 +38,6 @@ from llama_stack.apis.inference import (
|
||||||
LogProbConfig,
|
LogProbConfig,
|
||||||
Message,
|
Message,
|
||||||
ModelStore,
|
ModelStore,
|
||||||
OpenAIChatCompletion,
|
|
||||||
OpenAICompletion,
|
|
||||||
OpenAIEmbeddingData,
|
|
||||||
OpenAIEmbeddingsResponse,
|
|
||||||
OpenAIEmbeddingUsage,
|
|
||||||
OpenAIMessageParam,
|
|
||||||
OpenAIResponseFormatParam,
|
|
||||||
ResponseFormat,
|
ResponseFormat,
|
||||||
SamplingParams,
|
SamplingParams,
|
||||||
TextTruncation,
|
TextTruncation,
|
||||||
|
@ -71,11 +64,11 @@ from llama_stack.providers.utils.inference.openai_compat import (
|
||||||
convert_message_to_openai_dict,
|
convert_message_to_openai_dict,
|
||||||
convert_tool_call,
|
convert_tool_call,
|
||||||
get_sampling_options,
|
get_sampling_options,
|
||||||
prepare_openai_completion_params,
|
|
||||||
process_chat_completion_stream_response,
|
process_chat_completion_stream_response,
|
||||||
process_completion_response,
|
process_completion_response,
|
||||||
process_completion_stream_response,
|
process_completion_stream_response,
|
||||||
)
|
)
|
||||||
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||||
completion_request_to_prompt,
|
completion_request_to_prompt,
|
||||||
content_has_media,
|
content_has_media,
|
||||||
|
@ -288,7 +281,7 @@ async def _process_vllm_chat_completion_stream_response(
|
||||||
yield c
|
yield c
|
||||||
|
|
||||||
|
|
||||||
class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
class VLLMInferenceAdapter(OpenAIMixin, Inference, ModelsProtocolPrivate):
|
||||||
# automatically set by the resolver when instantiating the provider
|
# automatically set by the resolver when instantiating the provider
|
||||||
__provider_id__: str
|
__provider_id__: str
|
||||||
model_store: ModelStore | None = None
|
model_store: ModelStore | None = None
|
||||||
|
@ -296,7 +289,6 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
||||||
def __init__(self, config: VLLMInferenceAdapterConfig) -> None:
|
def __init__(self, config: VLLMInferenceAdapterConfig) -> None:
|
||||||
self.register_helper = ModelRegistryHelper(build_hf_repo_model_entries())
|
self.register_helper = ModelRegistryHelper(build_hf_repo_model_entries())
|
||||||
self.config = config
|
self.config = config
|
||||||
self.client = None
|
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
if not self.config.url:
|
if not self.config.url:
|
||||||
|
@ -308,8 +300,6 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
||||||
return self.config.refresh_models
|
return self.config.refresh_models
|
||||||
|
|
||||||
async def list_models(self) -> list[Model] | None:
|
async def list_models(self) -> list[Model] | None:
|
||||||
self._lazy_initialize_client()
|
|
||||||
assert self.client is not None # mypy
|
|
||||||
models = []
|
models = []
|
||||||
async for m in self.client.models.list():
|
async for m in self.client.models.list():
|
||||||
model_type = ModelType.llm # unclear how to determine embedding vs. llm models
|
model_type = ModelType.llm # unclear how to determine embedding vs. llm models
|
||||||
|
@ -340,8 +330,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
||||||
HealthResponse: A dictionary containing the health status.
|
HealthResponse: A dictionary containing the health status.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
client = self._create_client() if self.client is None else self.client
|
_ = [m async for m in self.client.models.list()] # Ensure the client is initialized
|
||||||
_ = [m async for m in client.models.list()] # Ensure the client is initialized
|
|
||||||
return HealthResponse(status=HealthStatus.OK)
|
return HealthResponse(status=HealthStatus.OK)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
|
return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
|
||||||
|
@ -351,19 +340,14 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
||||||
raise ValueError("Model store not set")
|
raise ValueError("Model store not set")
|
||||||
return await self.model_store.get_model(model_id)
|
return await self.model_store.get_model(model_id)
|
||||||
|
|
||||||
def _lazy_initialize_client(self):
|
def get_api_key(self):
|
||||||
if self.client is not None:
|
return self.config.api_token
|
||||||
return
|
|
||||||
|
|
||||||
log.info(f"Initializing vLLM client with base_url={self.config.url}")
|
def get_base_url(self):
|
||||||
self.client = self._create_client()
|
return self.config.url
|
||||||
|
|
||||||
def _create_client(self):
|
def get_extra_client_params(self):
|
||||||
return AsyncOpenAI(
|
return {"http_client": httpx.AsyncClient(verify=self.config.tls_verify)}
|
||||||
base_url=self.config.url,
|
|
||||||
api_key=self.config.api_token,
|
|
||||||
http_client=httpx.AsyncClient(verify=self.config.tls_verify),
|
|
||||||
)
|
|
||||||
|
|
||||||
async def completion(
|
async def completion(
|
||||||
self,
|
self,
|
||||||
|
@ -374,7 +358,6 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
||||||
stream: bool | None = False,
|
stream: bool | None = False,
|
||||||
logprobs: LogProbConfig | None = None,
|
logprobs: LogProbConfig | None = None,
|
||||||
) -> CompletionResponse | AsyncGenerator[CompletionResponseStreamChunk, None]:
|
) -> CompletionResponse | AsyncGenerator[CompletionResponseStreamChunk, None]:
|
||||||
self._lazy_initialize_client()
|
|
||||||
if sampling_params is None:
|
if sampling_params is None:
|
||||||
sampling_params = SamplingParams()
|
sampling_params = SamplingParams()
|
||||||
model = await self._get_model(model_id)
|
model = await self._get_model(model_id)
|
||||||
|
@ -406,7 +389,6 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
||||||
logprobs: LogProbConfig | None = None,
|
logprobs: LogProbConfig | None = None,
|
||||||
tool_config: ToolConfig | None = None,
|
tool_config: ToolConfig | None = None,
|
||||||
) -> ChatCompletionResponse | AsyncGenerator[ChatCompletionResponseStreamChunk, None]:
|
) -> ChatCompletionResponse | AsyncGenerator[ChatCompletionResponseStreamChunk, None]:
|
||||||
self._lazy_initialize_client()
|
|
||||||
if sampling_params is None:
|
if sampling_params is None:
|
||||||
sampling_params = SamplingParams()
|
sampling_params = SamplingParams()
|
||||||
model = await self._get_model(model_id)
|
model = await self._get_model(model_id)
|
||||||
|
@ -479,16 +461,12 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
||||||
yield chunk
|
yield chunk
|
||||||
|
|
||||||
async def register_model(self, model: Model) -> Model:
|
async def register_model(self, model: Model) -> Model:
|
||||||
# register_model is called during Llama Stack initialization, hence we cannot init self.client if not initialized yet.
|
|
||||||
# self.client should only be created after the initialization is complete to avoid asyncio cross-context errors.
|
|
||||||
# Changing this may lead to unpredictable behavior.
|
|
||||||
client = self._create_client() if self.client is None else self.client
|
|
||||||
try:
|
try:
|
||||||
model = await self.register_helper.register_model(model)
|
model = await self.register_helper.register_model(model)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass # Ignore statically unknown model, will check live listing
|
pass # Ignore statically unknown model, will check live listing
|
||||||
try:
|
try:
|
||||||
res = await client.models.list()
|
res = await self.client.models.list()
|
||||||
except APIConnectionError as e:
|
except APIConnectionError as e:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Failed to connect to vLLM at {self.config.url}. Please check if vLLM is running and accessible at that URL."
|
f"Failed to connect to vLLM at {self.config.url}. Please check if vLLM is running and accessible at that URL."
|
||||||
|
@ -543,8 +521,6 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
||||||
output_dimension: int | None = None,
|
output_dimension: int | None = None,
|
||||||
task_type: EmbeddingTaskType | None = None,
|
task_type: EmbeddingTaskType | None = None,
|
||||||
) -> EmbeddingsResponse:
|
) -> EmbeddingsResponse:
|
||||||
self._lazy_initialize_client()
|
|
||||||
assert self.client is not None
|
|
||||||
model = await self._get_model(model_id)
|
model = await self._get_model(model_id)
|
||||||
|
|
||||||
kwargs = {}
|
kwargs = {}
|
||||||
|
@ -560,154 +536,3 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
||||||
|
|
||||||
embeddings = [data.embedding for data in response.data]
|
embeddings = [data.embedding for data in response.data]
|
||||||
return EmbeddingsResponse(embeddings=embeddings)
|
return EmbeddingsResponse(embeddings=embeddings)
|
||||||
|
|
||||||
async def openai_embeddings(
|
|
||||||
self,
|
|
||||||
model: str,
|
|
||||||
input: str | list[str],
|
|
||||||
encoding_format: str | None = "float",
|
|
||||||
dimensions: int | None = None,
|
|
||||||
user: str | None = None,
|
|
||||||
) -> OpenAIEmbeddingsResponse:
|
|
||||||
self._lazy_initialize_client()
|
|
||||||
assert self.client is not None
|
|
||||||
model_obj = await self._get_model(model)
|
|
||||||
assert model_obj.model_type == ModelType.embedding
|
|
||||||
|
|
||||||
# Convert input to list if it's a string
|
|
||||||
input_list = [input] if isinstance(input, str) else input
|
|
||||||
|
|
||||||
# Call vLLM embeddings endpoint with encoding_format
|
|
||||||
response = await self.client.embeddings.create(
|
|
||||||
model=model_obj.provider_resource_id,
|
|
||||||
input=input_list,
|
|
||||||
dimensions=dimensions,
|
|
||||||
encoding_format=encoding_format,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Convert response to OpenAI format
|
|
||||||
data = [
|
|
||||||
OpenAIEmbeddingData(
|
|
||||||
embedding=embedding_data.embedding,
|
|
||||||
index=i,
|
|
||||||
)
|
|
||||||
for i, embedding_data in enumerate(response.data)
|
|
||||||
]
|
|
||||||
|
|
||||||
# Not returning actual token usage since vLLM doesn't provide it
|
|
||||||
usage = OpenAIEmbeddingUsage(prompt_tokens=-1, total_tokens=-1)
|
|
||||||
|
|
||||||
return OpenAIEmbeddingsResponse(
|
|
||||||
data=data,
|
|
||||||
model=model_obj.provider_resource_id,
|
|
||||||
usage=usage,
|
|
||||||
)
|
|
||||||
|
|
||||||
async def openai_completion(
|
|
||||||
self,
|
|
||||||
model: str,
|
|
||||||
prompt: str | list[str] | list[int] | list[list[int]],
|
|
||||||
best_of: int | None = None,
|
|
||||||
echo: bool | None = None,
|
|
||||||
frequency_penalty: float | None = None,
|
|
||||||
logit_bias: dict[str, float] | None = None,
|
|
||||||
logprobs: bool | None = None,
|
|
||||||
max_tokens: int | None = None,
|
|
||||||
n: int | None = None,
|
|
||||||
presence_penalty: float | None = None,
|
|
||||||
seed: int | None = None,
|
|
||||||
stop: str | list[str] | None = None,
|
|
||||||
stream: bool | None = None,
|
|
||||||
stream_options: dict[str, Any] | None = None,
|
|
||||||
temperature: float | None = None,
|
|
||||||
top_p: float | None = None,
|
|
||||||
user: str | None = None,
|
|
||||||
guided_choice: list[str] | None = None,
|
|
||||||
prompt_logprobs: int | None = None,
|
|
||||||
suffix: str | None = None,
|
|
||||||
) -> OpenAICompletion:
|
|
||||||
self._lazy_initialize_client()
|
|
||||||
model_obj = await self._get_model(model)
|
|
||||||
|
|
||||||
extra_body: dict[str, Any] = {}
|
|
||||||
if prompt_logprobs is not None and prompt_logprobs >= 0:
|
|
||||||
extra_body["prompt_logprobs"] = prompt_logprobs
|
|
||||||
if guided_choice:
|
|
||||||
extra_body["guided_choice"] = guided_choice
|
|
||||||
|
|
||||||
params = await prepare_openai_completion_params(
|
|
||||||
model=model_obj.provider_resource_id,
|
|
||||||
prompt=prompt,
|
|
||||||
best_of=best_of,
|
|
||||||
echo=echo,
|
|
||||||
frequency_penalty=frequency_penalty,
|
|
||||||
logit_bias=logit_bias,
|
|
||||||
logprobs=logprobs,
|
|
||||||
max_tokens=max_tokens,
|
|
||||||
n=n,
|
|
||||||
presence_penalty=presence_penalty,
|
|
||||||
seed=seed,
|
|
||||||
stop=stop,
|
|
||||||
stream=stream,
|
|
||||||
stream_options=stream_options,
|
|
||||||
temperature=temperature,
|
|
||||||
top_p=top_p,
|
|
||||||
user=user,
|
|
||||||
extra_body=extra_body,
|
|
||||||
)
|
|
||||||
return await self.client.completions.create(**params) # type: ignore
|
|
||||||
|
|
||||||
async def openai_chat_completion(
|
|
||||||
self,
|
|
||||||
model: str,
|
|
||||||
messages: list[OpenAIMessageParam],
|
|
||||||
frequency_penalty: float | None = None,
|
|
||||||
function_call: str | dict[str, Any] | None = None,
|
|
||||||
functions: list[dict[str, Any]] | None = None,
|
|
||||||
logit_bias: dict[str, float] | None = None,
|
|
||||||
logprobs: bool | None = None,
|
|
||||||
max_completion_tokens: int | None = None,
|
|
||||||
max_tokens: int | None = None,
|
|
||||||
n: int | None = None,
|
|
||||||
parallel_tool_calls: bool | None = None,
|
|
||||||
presence_penalty: float | None = None,
|
|
||||||
response_format: OpenAIResponseFormatParam | None = None,
|
|
||||||
seed: int | None = None,
|
|
||||||
stop: str | list[str] | None = None,
|
|
||||||
stream: bool | None = None,
|
|
||||||
stream_options: dict[str, Any] | None = None,
|
|
||||||
temperature: float | None = None,
|
|
||||||
tool_choice: str | dict[str, Any] | None = None,
|
|
||||||
tools: list[dict[str, Any]] | None = None,
|
|
||||||
top_logprobs: int | None = None,
|
|
||||||
top_p: float | None = None,
|
|
||||||
user: str | None = None,
|
|
||||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
|
||||||
self._lazy_initialize_client()
|
|
||||||
model_obj = await self._get_model(model)
|
|
||||||
params = await prepare_openai_completion_params(
|
|
||||||
model=model_obj.provider_resource_id,
|
|
||||||
messages=messages,
|
|
||||||
frequency_penalty=frequency_penalty,
|
|
||||||
function_call=function_call,
|
|
||||||
functions=functions,
|
|
||||||
logit_bias=logit_bias,
|
|
||||||
logprobs=logprobs,
|
|
||||||
max_completion_tokens=max_completion_tokens,
|
|
||||||
max_tokens=max_tokens,
|
|
||||||
n=n,
|
|
||||||
parallel_tool_calls=parallel_tool_calls,
|
|
||||||
presence_penalty=presence_penalty,
|
|
||||||
response_format=response_format,
|
|
||||||
seed=seed,
|
|
||||||
stop=stop,
|
|
||||||
stream=stream,
|
|
||||||
stream_options=stream_options,
|
|
||||||
temperature=temperature,
|
|
||||||
tool_choice=tool_choice,
|
|
||||||
tools=tools,
|
|
||||||
top_logprobs=top_logprobs,
|
|
||||||
top_p=top_p,
|
|
||||||
user=user,
|
|
||||||
)
|
|
||||||
return await self.client.chat.completions.create(**params) # type: ignore
|
|
||||||
|
|
|
@ -67,6 +67,17 @@ class OpenAIMixin(ABC):
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def get_extra_client_params(self) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Get any extra parameters to pass to the AsyncOpenAI client.
|
||||||
|
|
||||||
|
Child classes can override this method to provide additional parameters
|
||||||
|
such as timeout settings, proxies, etc.
|
||||||
|
|
||||||
|
:return: A dictionary of extra parameters
|
||||||
|
"""
|
||||||
|
return {}
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def client(self) -> AsyncOpenAI:
|
def client(self) -> AsyncOpenAI:
|
||||||
"""
|
"""
|
||||||
|
@ -78,6 +89,7 @@ class OpenAIMixin(ABC):
|
||||||
return AsyncOpenAI(
|
return AsyncOpenAI(
|
||||||
api_key=self.get_api_key(),
|
api_key=self.get_api_key(),
|
||||||
base_url=self.get_base_url(),
|
base_url=self.get_base_url(),
|
||||||
|
**self.get_extra_client_params(),
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _get_provider_model_id(self, model: str) -> str:
|
async def _get_provider_model_id(self, model: str) -> str:
|
||||||
|
@ -124,10 +136,15 @@ class OpenAIMixin(ABC):
|
||||||
"""
|
"""
|
||||||
Direct OpenAI completion API call.
|
Direct OpenAI completion API call.
|
||||||
"""
|
"""
|
||||||
if guided_choice is not None:
|
# Handle parameters that are not supported by OpenAI API, but may be by the provider
|
||||||
logger.warning("guided_choice is not supported by the OpenAI API. Ignoring.")
|
# prompt_logprobs is supported by vLLM
|
||||||
if prompt_logprobs is not None:
|
# guided_choice is supported by vLLM
|
||||||
logger.warning("prompt_logprobs is not supported by the OpenAI API. Ignoring.")
|
# TODO: test coverage
|
||||||
|
extra_body: dict[str, Any] = {}
|
||||||
|
if prompt_logprobs is not None and prompt_logprobs >= 0:
|
||||||
|
extra_body["prompt_logprobs"] = prompt_logprobs
|
||||||
|
if guided_choice:
|
||||||
|
extra_body["guided_choice"] = guided_choice
|
||||||
|
|
||||||
# TODO: fix openai_completion to return type compatible with OpenAI's API response
|
# TODO: fix openai_completion to return type compatible with OpenAI's API response
|
||||||
return await self.client.completions.create( # type: ignore[no-any-return]
|
return await self.client.completions.create( # type: ignore[no-any-return]
|
||||||
|
@ -150,7 +167,8 @@ class OpenAIMixin(ABC):
|
||||||
top_p=top_p,
|
top_p=top_p,
|
||||||
user=user,
|
user=user,
|
||||||
suffix=suffix,
|
suffix=suffix,
|
||||||
)
|
),
|
||||||
|
extra_body=extra_body,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def openai_chat_completion(
|
async def openai_chat_completion(
|
||||||
|
|
3917
tests/integration/recordings/responses/004d45cd1d80.json
Normal file
3917
tests/integration/recordings/responses/004d45cd1d80.json
Normal file
File diff suppressed because it is too large
Load diff
66
tests/integration/recordings/responses/032f8c5a1289.json
Normal file
66
tests/integration/recordings/responses/032f8c5a1289.json
Normal file
|
@ -0,0 +1,66 @@
|
||||||
|
{
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://localhost:8000/v1/v1/chat/completions",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "Which planet has rings around it with a name starting with letter S?"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"stream": false,
|
||||||
|
"temperature": 0.0,
|
||||||
|
"max_tokens": 4096
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/chat/completions",
|
||||||
|
"model": "Qwen/Qwen3-0.6B"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": {
|
||||||
|
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "chatcmpl-307d8869f9a341439fef1e92274a89ec",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"message": {
|
||||||
|
"content": "<think>\nOkay, so the question is asking which planet has rings around it and the name starts with the letter S. Let me think. I know that there are many planets in our solar system, right? The ones we usually mention are Mercury, Venus, Earth, Mars, Jupiter, Saturn, etc. Now, the key here is that the planet has rings, and the name starts with S.\n\nFirst, I remember that Saturn is the planet with rings. But wait, the name starts with S, so that's correct. But let me make sure. Are there any other planets with rings that start with S? Like maybe something else? Let me think. Oh, right! There's also Neptune, but Neptune is a gas giant, not a planet with rings. Wait, no, Neptune does have rings, but they're not as prominent as Saturn's. So Saturn is the one with rings and the name starting with S. \n\nWait, but maybe I should double-check. Let me recall. The planets in order are Mercury, Venus, Earth, Mars, Jupiter, Saturn, and then Neptune. So Saturn comes after Jupiter. So yes, Saturn has rings. And the name starts with S. So the answer should be Saturn.\n</think>\n\nThe planet with rings around it and a name starting with the letter S is **Saturn**.",
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"annotations": null,
|
||||||
|
"audio": null,
|
||||||
|
"function_call": null,
|
||||||
|
"tool_calls": [],
|
||||||
|
"reasoning_content": null
|
||||||
|
},
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524152,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 274,
|
||||||
|
"prompt_tokens": 22,
|
||||||
|
"total_tokens": 296,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens_details": null
|
||||||
|
},
|
||||||
|
"prompt_logprobs": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"is_streaming": false
|
||||||
|
}
|
||||||
|
}
|
105
tests/integration/recordings/responses/033340f58ebc.json
Normal file
105
tests/integration/recordings/responses/033340f58ebc.json
Normal file
|
@ -0,0 +1,105 @@
|
||||||
|
{
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://localhost:8000/v1/v1/chat/completions",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"tools": [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "get_weather",
|
||||||
|
"description": "Get the current weather",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"location": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The city and state (both required), e.g. San Francisco, CA."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"location"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "Pretend you are a weather assistant."
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "What's the weather like in San Francisco, CA?"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"stream": false,
|
||||||
|
"temperature": 0.0,
|
||||||
|
"max_tokens": 4096
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/chat/completions",
|
||||||
|
"model": "Qwen/Qwen3-0.6B"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": {
|
||||||
|
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "chatcmpl-3d55352696e445f598c3d881a1130454",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "tool_calls",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"message": {
|
||||||
|
"content": "<think>\nOkay, the user is asking about the weather in San Francisco, CA. I need to use the get_weather function. The function requires the location parameter, which in this case is \"San Francisco, CA\". I should make sure to format the arguments correctly as a JSON object. Let me check the required parameters again to confirm. The location is required, so I'll include that. No other parameters are needed. Alright, I'll structure the tool call accordingly.\n</think>\n\n",
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"annotations": null,
|
||||||
|
"audio": null,
|
||||||
|
"function_call": null,
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"id": "chatcmpl-tool-98de38728cf6486d925102e0515d6d8f",
|
||||||
|
"function": {
|
||||||
|
"arguments": "{\"location\": \"San Francisco, CA\"}",
|
||||||
|
"name": "get_weather"
|
||||||
|
},
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"reasoning_content": null
|
||||||
|
},
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524137,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 120,
|
||||||
|
"prompt_tokens": 185,
|
||||||
|
"total_tokens": 305,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens_details": null
|
||||||
|
},
|
||||||
|
"prompt_logprobs": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"is_streaming": false
|
||||||
|
}
|
||||||
|
}
|
1021
tests/integration/recordings/responses/07eabb983d8e.json
Normal file
1021
tests/integration/recordings/responses/07eabb983d8e.json
Normal file
File diff suppressed because it is too large
Load diff
2888
tests/integration/recordings/responses/0dfab8a5deb8.json
Normal file
2888
tests/integration/recordings/responses/0dfab8a5deb8.json
Normal file
File diff suppressed because it is too large
Load diff
59
tests/integration/recordings/responses/27463384d1a3.json
Normal file
59
tests/integration/recordings/responses/27463384d1a3.json
Normal file
|
@ -0,0 +1,59 @@
|
||||||
|
{
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://localhost:8000/v1/v1/chat/completions",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Hello, world!"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"stream": false
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/chat/completions",
|
||||||
|
"model": "Qwen/Qwen3-0.6B"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": {
|
||||||
|
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "chatcmpl-5434d48fcb354eb498e3909a87c4bf39",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"message": {
|
||||||
|
"content": "<think>\nOkay, the user just said \"Hello, world!\" so I need to respond in a friendly way. Let me start by acknowledging their input. Maybe say \"Hello, world!\" and then offer help. Keep it simple and positive. No need for any complex messages. Just a straightforward reply.\n</think>\n\nHello, world! \ud83d\ude0a How can I assist you today?",
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"annotations": null,
|
||||||
|
"audio": null,
|
||||||
|
"function_call": null,
|
||||||
|
"tool_calls": [],
|
||||||
|
"reasoning_content": null
|
||||||
|
},
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524166,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 77,
|
||||||
|
"prompt_tokens": 12,
|
||||||
|
"total_tokens": 89,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens_details": null
|
||||||
|
},
|
||||||
|
"prompt_logprobs": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"is_streaming": false
|
||||||
|
}
|
||||||
|
}
|
9609
tests/integration/recordings/responses/299952f00fd8.json
Normal file
9609
tests/integration/recordings/responses/299952f00fd8.json
Normal file
File diff suppressed because it is too large
Load diff
3750
tests/integration/recordings/responses/42a07c178a75.json
Normal file
3750
tests/integration/recordings/responses/42a07c178a75.json
Normal file
File diff suppressed because it is too large
Load diff
129
tests/integration/recordings/responses/50053de3a8a8.json
Normal file
129
tests/integration/recordings/responses/50053de3a8a8.json
Normal file
|
@ -0,0 +1,129 @@
|
||||||
|
{
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://localhost:8000/v1/v1/chat/completions",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"tools": [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "get_object_namespace_list",
|
||||||
|
"description": "Get the list of objects in a namespace",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"kind": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "the type of object"
|
||||||
|
},
|
||||||
|
"namespace": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "the name of the namespace"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"kind",
|
||||||
|
"namespace"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "You are a helpful assistant."
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "What pods are in the namespace openshift-lightspeed?"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": ""
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"id": "1",
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "get_object_namespace_list",
|
||||||
|
"arguments": "{\"kind\": \"pod\", \"namespace\": \"openshift-lightspeed\"}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "tool",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "the objects are pod1, pod2, pod3"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"stream": false,
|
||||||
|
"temperature": 0.0,
|
||||||
|
"max_tokens": 4096
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/chat/completions",
|
||||||
|
"model": "Qwen/Qwen3-0.6B"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": {
|
||||||
|
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "chatcmpl-042380355dac4046b0510320198e5563",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"message": {
|
||||||
|
"content": "<think>\nOkay, the user asked for the pods in the namespace openshift-lightspeed. I called the get_object_namespace_list function with kind \"pod\" and namespace \"openshift-lightspeed\". The response came back with three objects: pod1, pod2, pod3. Now I need to present this information clearly.\n\nI should confirm the answer by listing each pod. Maybe mention that the list includes those specific pods. Keep it straightforward and make sure the user knows the result. No need for extra details since the tool response already provided the list. Just relay the information back in a friendly manner.\n</think>\n\nThe pods in the namespace openshift-lightspeed are: pod1, pod2, and pod3.",
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"annotations": null,
|
||||||
|
"audio": null,
|
||||||
|
"function_call": null,
|
||||||
|
"tool_calls": [],
|
||||||
|
"reasoning_content": null
|
||||||
|
},
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524154,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 148,
|
||||||
|
"prompt_tokens": 256,
|
||||||
|
"total_tokens": 404,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens_details": null
|
||||||
|
},
|
||||||
|
"prompt_logprobs": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"is_streaming": false
|
||||||
|
}
|
||||||
|
}
|
70
tests/integration/recordings/responses/551066a5c7f4.json
Normal file
70
tests/integration/recordings/responses/551066a5c7f4.json
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
{
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://localhost:8000/v1/v1/completions",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"prompt": "Hello, world!",
|
||||||
|
"stream": false,
|
||||||
|
"extra_body": {
|
||||||
|
"prompt_logprobs": 0
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/completions",
|
||||||
|
"model": "Qwen/Qwen3-0.6B"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": {
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-f7da1cba36ba427c8acd9ad2f0722fa4",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "length",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " I'm a 3-year-old girl who loves to play with my cat.",
|
||||||
|
"stop_reason": null,
|
||||||
|
"prompt_logprobs": [
|
||||||
|
null,
|
||||||
|
{
|
||||||
|
"11": {
|
||||||
|
"logprob": -8.8668212890625,
|
||||||
|
"rank": 1031,
|
||||||
|
"decoded_token": ","
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"1879": {
|
||||||
|
"logprob": -4.7821879386901855,
|
||||||
|
"rank": 10,
|
||||||
|
"decoded_token": "\u0120world"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"0": {
|
||||||
|
"logprob": -1.6575366258621216,
|
||||||
|
"rank": 2,
|
||||||
|
"decoded_token": "!"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524144,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 16,
|
||||||
|
"prompt_tokens": 4,
|
||||||
|
"total_tokens": 20,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens_details": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"is_streaming": false
|
||||||
|
}
|
||||||
|
}
|
1021
tests/integration/recordings/responses/592d9733fb28.json
Normal file
1021
tests/integration/recordings/responses/592d9733fb28.json
Normal file
File diff suppressed because it is too large
Load diff
124
tests/integration/recordings/responses/62b67ba9b8ea.json
Normal file
124
tests/integration/recordings/responses/62b67ba9b8ea.json
Normal file
|
@ -0,0 +1,124 @@
|
||||||
|
{
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://localhost:8000/v1/v1/chat/completions",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "You are a helpful assistant. Michael Jordan was born in 1963. He played basketball for the Chicago Bulls for 15 seasons."
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "Please give me information about Michael Jordan."
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"extra_body": {
|
||||||
|
"guided_json": {
|
||||||
|
"$defs": {
|
||||||
|
"NBAStats": {
|
||||||
|
"properties": {
|
||||||
|
"year_for_draft": {
|
||||||
|
"title": "Year For Draft",
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"num_seasons_in_nba": {
|
||||||
|
"title": "Num Seasons In Nba",
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"year_for_draft",
|
||||||
|
"num_seasons_in_nba"
|
||||||
|
],
|
||||||
|
"title": "NBAStats",
|
||||||
|
"type": "object"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"properties": {
|
||||||
|
"first_name": {
|
||||||
|
"title": "First Name",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"last_name": {
|
||||||
|
"title": "Last Name",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"year_of_birth": {
|
||||||
|
"title": "Year Of Birth",
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"nba_stats": {
|
||||||
|
"$ref": "#/$defs/NBAStats"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"first_name",
|
||||||
|
"last_name",
|
||||||
|
"year_of_birth",
|
||||||
|
"nba_stats"
|
||||||
|
],
|
||||||
|
"title": "AnswerFormat",
|
||||||
|
"type": "object"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"stream": false,
|
||||||
|
"temperature": 0.0,
|
||||||
|
"max_tokens": 4096
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/chat/completions",
|
||||||
|
"model": "Qwen/Qwen3-0.6B"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": {
|
||||||
|
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "chatcmpl-ba863ae481964bcca497a7feba2a2689",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"message": {
|
||||||
|
"content": "{\"first_name\": \"Michael\", \"last_name\": \"Jordan\", \"year_of_birth\": 1963, \"nba_stats\": {\"year_for_draft\": 1984, \"num_seasons_in_nba\": 15}}",
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"annotations": null,
|
||||||
|
"audio": null,
|
||||||
|
"function_call": null,
|
||||||
|
"tool_calls": [],
|
||||||
|
"reasoning_content": null
|
||||||
|
},
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524142,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 61,
|
||||||
|
"prompt_tokens": 51,
|
||||||
|
"total_tokens": 112,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens_details": null
|
||||||
|
},
|
||||||
|
"prompt_logprobs": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"is_streaming": false
|
||||||
|
}
|
||||||
|
}
|
46
tests/integration/recordings/responses/62b82c42a270.json
Normal file
46
tests/integration/recordings/responses/62b82c42a270.json
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
{
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://localhost:8000/v1/v1/completions",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"prompt": "<|begin_of_text|>Complete the sentence using one word: Roses are red, violets are ",
|
||||||
|
"stream": false,
|
||||||
|
"temperature": 0.0,
|
||||||
|
"max_tokens": 50
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/completions",
|
||||||
|
"model": "Qwen/Qwen3-0.6B"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": {
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-75aab3472ee24335962c490f4efdb884",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "length",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _",
|
||||||
|
"stop_reason": null,
|
||||||
|
"prompt_logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524131,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 50,
|
||||||
|
"prompt_tokens": 23,
|
||||||
|
"total_tokens": 73,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens_details": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"is_streaming": false
|
||||||
|
}
|
||||||
|
}
|
8685
tests/integration/recordings/responses/666d8719f984.json
Normal file
8685
tests/integration/recordings/responses/666d8719f984.json
Normal file
File diff suppressed because it is too large
Load diff
2684
tests/integration/recordings/responses/67c8e604fcd3.json
Normal file
2684
tests/integration/recordings/responses/67c8e604fcd3.json
Normal file
File diff suppressed because it is too large
Load diff
50
tests/integration/recordings/responses/6825a67ceccd.json
Normal file
50
tests/integration/recordings/responses/6825a67ceccd.json
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
{
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://localhost:8000/v1/v1/completions",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"prompt": "I am feeling really sad today.",
|
||||||
|
"stream": false,
|
||||||
|
"extra_body": {
|
||||||
|
"guided_choice": [
|
||||||
|
"joy",
|
||||||
|
"sadness"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/completions",
|
||||||
|
"model": "Qwen/Qwen3-0.6B"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": {
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-b4feb568934e44e7a8b7ac8071dce285",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "sadness",
|
||||||
|
"stop_reason": null,
|
||||||
|
"prompt_logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524114,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 3,
|
||||||
|
"prompt_tokens": 7,
|
||||||
|
"total_tokens": 10,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens_details": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"is_streaming": false
|
||||||
|
}
|
||||||
|
}
|
86
tests/integration/recordings/responses/6cf57d8dfcfc.json
Normal file
86
tests/integration/recordings/responses/6cf57d8dfcfc.json
Normal file
|
@ -0,0 +1,86 @@
|
||||||
|
{
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://localhost:8000/v1/v1/completions",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"prompt": "<|begin_of_text|>Complete the sentence: Micheael Jordan is born in ",
|
||||||
|
"logprobs": 1,
|
||||||
|
"stream": false,
|
||||||
|
"temperature": 0.0,
|
||||||
|
"max_tokens": 5
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/completions",
|
||||||
|
"model": "Qwen/Qwen3-0.6B"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": {
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-b114bec84b5a430ba94520ac9e7a6d8d",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "length",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": {
|
||||||
|
"text_offset": [
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
4
|
||||||
|
],
|
||||||
|
"token_logprobs": [
|
||||||
|
-0.03187580779194832,
|
||||||
|
-0.014971747994422913,
|
||||||
|
-0.3393988609313965,
|
||||||
|
-1.2200194597244263,
|
||||||
|
-0.762129545211792
|
||||||
|
],
|
||||||
|
"tokens": [
|
||||||
|
"1",
|
||||||
|
"9",
|
||||||
|
"5",
|
||||||
|
"8",
|
||||||
|
","
|
||||||
|
],
|
||||||
|
"top_logprobs": [
|
||||||
|
{
|
||||||
|
"1": -0.03187580779194832
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"9": -0.014971747994422913
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"5": -0.3393988609313965
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"8": -1.2200194597244263
|
||||||
|
},
|
||||||
|
{
|
||||||
|
",": -0.762129545211792
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"text": "1958,",
|
||||||
|
"stop_reason": null,
|
||||||
|
"prompt_logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524133,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 5,
|
||||||
|
"prompt_tokens": 18,
|
||||||
|
"total_tokens": 23,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens_details": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"is_streaming": false
|
||||||
|
}
|
||||||
|
}
|
59
tests/integration/recordings/responses/7ef63231b9f8.json
Normal file
59
tests/integration/recordings/responses/7ef63231b9f8.json
Normal file
|
@ -0,0 +1,59 @@
|
||||||
|
{
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://localhost:8000/v1/v1/chat/completions",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Which planet has rings around it with a name starting with letter S?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"stream": false
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/chat/completions",
|
||||||
|
"model": "Qwen/Qwen3-0.6B"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": {
|
||||||
|
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "chatcmpl-ab6b31bfc8244a48bee3f9ed8bff6455",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"message": {
|
||||||
|
"content": "<think>\nOkay, so the question is asking which planet has rings around it and the name starts with the letter S. Let me think. I know that there are many planets in our solar system, like Earth, Mars, Venus, Jupiter, etc. But the question specifically mentions rings and the name starting with S. \n\nFirst, I should recall the planets and their names. Let me list them in order of their distance from the Sun. From the Sun, we have Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune, and then Pluto. Wait, Pluto was once considered a planet but was reclassified. So maybe the question is about the planets that are now in order with the rings. \n\nNow, Saturn is the next one after Jupiter. Saturn is the planet that has rings around it. The name starts with S. So the answer would be Saturn. Let me double-check. Yes, Saturn is the planet with rings and the name starts with S. I don't think there's another planet with rings and a starting letter S. For example, maybe another planet in another system, but the question is about the solar system. So I think the answer is Saturn.\n</think>\n\nThe planet with rings around it and a name starting with the letter S is **Saturn**.",
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"annotations": null,
|
||||||
|
"audio": null,
|
||||||
|
"function_call": null,
|
||||||
|
"tool_calls": [],
|
||||||
|
"reasoning_content": null
|
||||||
|
},
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524161,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 266,
|
||||||
|
"prompt_tokens": 22,
|
||||||
|
"total_tokens": 288,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens_details": null
|
||||||
|
},
|
||||||
|
"prompt_logprobs": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"is_streaming": false
|
||||||
|
}
|
||||||
|
}
|
4055
tests/integration/recordings/responses/89b141855b81.json
Normal file
4055
tests/integration/recordings/responses/89b141855b81.json
Normal file
File diff suppressed because it is too large
Load diff
524
tests/integration/recordings/responses/8ea4d7e002fa.json
Normal file
524
tests/integration/recordings/responses/8ea4d7e002fa.json
Normal file
|
@ -0,0 +1,524 @@
|
||||||
|
{
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://localhost:8000/v1/v1/completions",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"prompt": "<|begin_of_text|>Return the exact same sentence and don't add additional words): Michael Jordan was born in the year of 1963",
|
||||||
|
"stream": true,
|
||||||
|
"temperature": 0.0,
|
||||||
|
"max_tokens": 50,
|
||||||
|
"stop": [
|
||||||
|
"1963"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/completions",
|
||||||
|
"model": "Qwen/Qwen3-0.6B"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": [
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-8ca2b3d09fbe49f99b86bc496a6c4bf8",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "",
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524133,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-8ca2b3d09fbe49f99b86bc496a6c4bf8",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "",
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524133,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-8ca2b3d09fbe49f99b86bc496a6c4bf8",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": ".",
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524133,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-8ca2b3d09fbe49f99b86bc496a6c4bf8",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " <|",
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524133,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-8ca2b3d09fbe49f99b86bc496a6c4bf8",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "end",
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524133,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-8ca2b3d09fbe49f99b86bc496a6c4bf8",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "_of_t",
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524133,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-8ca2b3d09fbe49f99b86bc496a6c4bf8",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "e",
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524133,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-8ca2b3d09fbe49f99b86bc496a6c4bf8",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "xt|",
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524133,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-8ca2b3d09fbe49f99b86bc496a6c4bf8",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": ">\n",
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524133,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-8ca2b3d09fbe49f99b86bc496a6c4bf8",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "\n",
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524133,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-8ca2b3d09fbe49f99b86bc496a6c4bf8",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "*",
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524133,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-8ca2b3d09fbe49f99b86bc496a6c4bf8",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "*A",
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524133,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-8ca2b3d09fbe49f99b86bc496a6c4bf8",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": ".** Mich",
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524133,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-8ca2b3d09fbe49f99b86bc496a6c4bf8",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "ael Jor",
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524133,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-8ca2b3d09fbe49f99b86bc496a6c4bf8",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "dan ",
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524133,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-8ca2b3d09fbe49f99b86bc496a6c4bf8",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "was b",
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524133,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-8ca2b3d09fbe49f99b86bc496a6c4bf8",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "orn",
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524133,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-8ca2b3d09fbe49f99b86bc496a6c4bf8",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " in ",
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524133,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-8ca2b3d09fbe49f99b86bc496a6c4bf8",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "the y",
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524133,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-8ca2b3d09fbe49f99b86bc496a6c4bf8",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "ear",
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524133,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-8ca2b3d09fbe49f99b86bc496a6c4bf8",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " ",
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524133,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-8ca2b3d09fbe49f99b86bc496a6c4bf8",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "o",
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524133,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-8ca2b3d09fbe49f99b86bc496a6c4bf8",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "f",
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524133,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-8ca2b3d09fbe49f99b86bc496a6c4bf8",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " ",
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524133,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-8ca2b3d09fbe49f99b86bc496a6c4bf8",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "",
|
||||||
|
"stop_reason": "1963"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524133,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"is_streaming": true
|
||||||
|
}
|
||||||
|
}
|
71
tests/integration/recordings/responses/934eabb2c7bd.json
Normal file
71
tests/integration/recordings/responses/934eabb2c7bd.json
Normal file
|
@ -0,0 +1,71 @@
|
||||||
|
{
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://localhost:8000/v1/v1/completions",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"prompt": "<|begin_of_text|>Michael Jordan was born in 1963. He played basketball for the Chicago Bulls. He retired in 2003.Please respond in JSON format with the schema: {\"properties\": {\"name\": {\"title\": \"Name\", \"type\": \"string\"}, \"year_born\": {\"title\": \"Year Born\", \"type\": \"string\"}, \"year_retired\": {\"title\": \"Year Retired\", \"type\": \"string\"}}, \"required\": [\"name\", \"year_born\", \"year_retired\"], \"title\": \"AnswerFormat\", \"type\": \"object\"}",
|
||||||
|
"extra_body": {
|
||||||
|
"guided_json": {
|
||||||
|
"properties": {
|
||||||
|
"name": {
|
||||||
|
"title": "Name",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"year_born": {
|
||||||
|
"title": "Year Born",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"year_retired": {
|
||||||
|
"title": "Year Retired",
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"name",
|
||||||
|
"year_born",
|
||||||
|
"year_retired"
|
||||||
|
],
|
||||||
|
"title": "AnswerFormat",
|
||||||
|
"type": "object"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"stream": false,
|
||||||
|
"temperature": 0.0,
|
||||||
|
"max_tokens": 50
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/completions",
|
||||||
|
"model": "Qwen/Qwen3-0.6B"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": {
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-d0abbd0dc22c41a091a2a72ce913cbed",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "{ \n \"name\": \"Michael Jordan\",\n \"year_born\": \"1963\",\n \"year_retired\": \"2003\"\n}",
|
||||||
|
"stop_reason": null,
|
||||||
|
"prompt_logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524134,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 36,
|
||||||
|
"prompt_tokens": 128,
|
||||||
|
"total_tokens": 164,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens_details": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"is_streaming": false
|
||||||
|
}
|
||||||
|
}
|
197
tests/integration/recordings/responses/a1f9695ef183.json
Normal file
197
tests/integration/recordings/responses/a1f9695ef183.json
Normal file
|
@ -0,0 +1,197 @@
|
||||||
|
{
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://localhost:8000/v1/v1/completions",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"prompt": "<|begin_of_text|>Complete the sentence: Micheael Jordan is born in ",
|
||||||
|
"logprobs": 1,
|
||||||
|
"stream": true,
|
||||||
|
"temperature": 0.0,
|
||||||
|
"max_tokens": 5
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/completions",
|
||||||
|
"model": "Qwen/Qwen3-0.6B"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": [
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-7968eb721ac34b348eb879f300052b1b",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": {
|
||||||
|
"text_offset": [
|
||||||
|
0
|
||||||
|
],
|
||||||
|
"token_logprobs": [
|
||||||
|
-0.031588248908519745
|
||||||
|
],
|
||||||
|
"tokens": [
|
||||||
|
"1"
|
||||||
|
],
|
||||||
|
"top_logprobs": [
|
||||||
|
{
|
||||||
|
"1": -0.031588248908519745
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"text": "1",
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524134,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-7968eb721ac34b348eb879f300052b1b",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": {
|
||||||
|
"text_offset": [
|
||||||
|
1
|
||||||
|
],
|
||||||
|
"token_logprobs": [
|
||||||
|
-0.01468715537339449
|
||||||
|
],
|
||||||
|
"tokens": [
|
||||||
|
"9"
|
||||||
|
],
|
||||||
|
"top_logprobs": [
|
||||||
|
{
|
||||||
|
"9": -0.01468715537339449
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"text": "9",
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524134,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-7968eb721ac34b348eb879f300052b1b",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": {
|
||||||
|
"text_offset": [
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"token_logprobs": [
|
||||||
|
-0.30242931842803955
|
||||||
|
],
|
||||||
|
"tokens": [
|
||||||
|
"5"
|
||||||
|
],
|
||||||
|
"top_logprobs": [
|
||||||
|
{
|
||||||
|
"5": -0.30242931842803955
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"text": "5",
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524134,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-7968eb721ac34b348eb879f300052b1b",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": {
|
||||||
|
"text_offset": [
|
||||||
|
3
|
||||||
|
],
|
||||||
|
"token_logprobs": [
|
||||||
|
-1.2276304960250854
|
||||||
|
],
|
||||||
|
"tokens": [
|
||||||
|
"8"
|
||||||
|
],
|
||||||
|
"top_logprobs": [
|
||||||
|
{
|
||||||
|
"8": -1.2276304960250854
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"text": "8",
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524134,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-7968eb721ac34b348eb879f300052b1b",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "length",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": {
|
||||||
|
"text_offset": [
|
||||||
|
4
|
||||||
|
],
|
||||||
|
"token_logprobs": [
|
||||||
|
-0.7615041732788086
|
||||||
|
],
|
||||||
|
"tokens": [
|
||||||
|
","
|
||||||
|
],
|
||||||
|
"top_logprobs": [
|
||||||
|
{
|
||||||
|
",": -0.7615041732788086
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"text": ",",
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524134,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"is_streaming": true
|
||||||
|
}
|
||||||
|
}
|
5747
tests/integration/recordings/responses/a5163d8d2d01.json
Normal file
5747
tests/integration/recordings/responses/a5163d8d2d01.json
Normal file
File diff suppressed because it is too large
Load diff
3509
tests/integration/recordings/responses/aacf9abc51d4.json
Normal file
3509
tests/integration/recordings/responses/aacf9abc51d4.json
Normal file
File diff suppressed because it is too large
Load diff
66
tests/integration/recordings/responses/c42baf6a3700.json
Normal file
66
tests/integration/recordings/responses/c42baf6a3700.json
Normal file
|
@ -0,0 +1,66 @@
|
||||||
|
{
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://localhost:8000/v1/v1/chat/completions",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "Which planet do humans live on?"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"stream": false,
|
||||||
|
"temperature": 0.0,
|
||||||
|
"max_tokens": 4096
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/chat/completions",
|
||||||
|
"model": "Qwen/Qwen3-0.6B"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": {
|
||||||
|
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "chatcmpl-f7cda312aefe4a4ca4c34e7342f23802",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"message": {
|
||||||
|
"content": "<think>\nOkay, the user is asking which planet humans live on. Let me think. I know that humans are on Earth. But maybe they're confused because sometimes people think of other planets. I should clarify that Earth is the only planet where humans have been found. Also, maybe mention that Earth is a unique planet with all the necessary conditions for life. I should keep it simple and direct.\n</think>\n\nHumans live on **Earth**. This planet is unique in having the conditions necessary for life, such as liquid water, breathable air, and suitable temperatures.",
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"annotations": null,
|
||||||
|
"audio": null,
|
||||||
|
"function_call": null,
|
||||||
|
"tool_calls": [],
|
||||||
|
"reasoning_content": null
|
||||||
|
},
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524135,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 114,
|
||||||
|
"prompt_tokens": 15,
|
||||||
|
"total_tokens": 129,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens_details": null
|
||||||
|
},
|
||||||
|
"prompt_logprobs": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"is_streaming": false
|
||||||
|
}
|
||||||
|
}
|
59
tests/integration/recordings/responses/e08e01e5652a.json
Normal file
59
tests/integration/recordings/responses/e08e01e5652a.json
Normal file
|
@ -0,0 +1,59 @@
|
||||||
|
{
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://localhost:8000/v1/v1/chat/completions",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Which planet do humans live on?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"stream": false
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/chat/completions",
|
||||||
|
"model": "Qwen/Qwen3-0.6B"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": {
|
||||||
|
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "chatcmpl-8e11e56bbb9744e1ac92ae6af3fb5148",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"message": {
|
||||||
|
"content": "<think>\nOkay, the user is asking which planet humans live on. Hmm, I know that humans are on Earth, but I should make sure. Wait, Earth is our home. But maybe they're confused because sometimes people think of other planets. Let me check. The answer is Earth. But maybe I should explain why it's correct. Oh, right, Earth is the planet where we live. No other planets have been confirmed to have humans. So the answer is Earth.\n</think>\n\nHumans live on **Earth**.",
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"annotations": null,
|
||||||
|
"audio": null,
|
||||||
|
"function_call": null,
|
||||||
|
"tool_calls": [],
|
||||||
|
"reasoning_content": null
|
||||||
|
},
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524156,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 107,
|
||||||
|
"prompt_tokens": 15,
|
||||||
|
"total_tokens": 122,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens_details": null
|
||||||
|
},
|
||||||
|
"prompt_logprobs": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"is_streaming": false
|
||||||
|
}
|
||||||
|
}
|
3041
tests/integration/recordings/responses/f518ea4fde7d.json
Normal file
3041
tests/integration/recordings/responses/f518ea4fde7d.json
Normal file
File diff suppressed because it is too large
Load diff
45
tests/integration/recordings/responses/fadf16af6d35.json
Normal file
45
tests/integration/recordings/responses/fadf16af6d35.json
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
{
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://localhost:8000/v1/v1/completions",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"prompt": "Respond to this question and explain your answer. Complete the sentence using one word: Roses are red, violets are ",
|
||||||
|
"stream": false,
|
||||||
|
"extra_body": {}
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/completions",
|
||||||
|
"model": "Qwen/Qwen3-0.6B"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": {
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-52758d074c7541f99998bf277e553e78",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "length",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " ____. \n\nThe sentence is: \"Roses are red, violets",
|
||||||
|
"stop_reason": null,
|
||||||
|
"prompt_logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524112,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 16,
|
||||||
|
"prompt_tokens": 25,
|
||||||
|
"total_tokens": 41,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens_details": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"is_streaming": false
|
||||||
|
}
|
||||||
|
}
|
85
tests/integration/recordings/responses/fded9afd616c.json
Normal file
85
tests/integration/recordings/responses/fded9afd616c.json
Normal file
|
@ -0,0 +1,85 @@
|
||||||
|
{
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://localhost:8000/v1/v1/completions",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"prompt": "Hello, world!",
|
||||||
|
"stream": false,
|
||||||
|
"extra_body": {
|
||||||
|
"prompt_logprobs": 1
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/completions",
|
||||||
|
"model": "Qwen/Qwen3-0.6B"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": {
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "cmpl-df4fc1ac600a43778b6ad1e3781a4129",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "length",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " I'm a 14-year-old girl with a passion for science and tech",
|
||||||
|
"stop_reason": null,
|
||||||
|
"prompt_logprobs": [
|
||||||
|
null,
|
||||||
|
{
|
||||||
|
"11": {
|
||||||
|
"logprob": -8.8668212890625,
|
||||||
|
"rank": 1031,
|
||||||
|
"decoded_token": ","
|
||||||
|
},
|
||||||
|
"21806": {
|
||||||
|
"logprob": -4.8355712890625,
|
||||||
|
"rank": 1,
|
||||||
|
"decoded_token": "\u0120Answer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"1879": {
|
||||||
|
"logprob": -4.7821879386901855,
|
||||||
|
"rank": 10,
|
||||||
|
"decoded_token": "\u0120world"
|
||||||
|
},
|
||||||
|
"358": {
|
||||||
|
"logprob": -0.9071879386901855,
|
||||||
|
"rank": 1,
|
||||||
|
"decoded_token": "\u0120I"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"0": {
|
||||||
|
"logprob": -1.6575366258621216,
|
||||||
|
"rank": 2,
|
||||||
|
"decoded_token": "!"
|
||||||
|
},
|
||||||
|
"13": {
|
||||||
|
"logprob": -1.4700366258621216,
|
||||||
|
"rank": 1,
|
||||||
|
"decoded_token": "."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1757524113,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 16,
|
||||||
|
"prompt_tokens": 4,
|
||||||
|
"total_tokens": 20,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens_details": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"is_streaming": false
|
||||||
|
}
|
||||||
|
}
|
|
@ -78,7 +78,7 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
|
||||||
"VLLM_URL": "http://localhost:8000/v1",
|
"VLLM_URL": "http://localhost:8000/v1",
|
||||||
},
|
},
|
||||||
defaults={
|
defaults={
|
||||||
"text_model": "vllm/meta-llama/Llama-3.2-1B-Instruct",
|
"text_model": "vllm/Qwen/Qwen3-0.6B",
|
||||||
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
|
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
|
|
|
@ -11,7 +11,7 @@ import threading
|
||||||
import time
|
import time
|
||||||
from http.server import BaseHTTPRequestHandler, HTTPServer
|
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||||
from typing import Any
|
from typing import Any
|
||||||
from unittest.mock import AsyncMock, MagicMock, patch
|
from unittest.mock import AsyncMock, MagicMock, PropertyMock, patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from openai.types.chat.chat_completion_chunk import (
|
from openai.types.chat.chat_completion_chunk import (
|
||||||
|
@ -150,10 +150,12 @@ async def test_tool_call_response(vllm_inference_adapter):
|
||||||
"""Verify that tool call arguments from a CompletionMessage are correctly converted
|
"""Verify that tool call arguments from a CompletionMessage are correctly converted
|
||||||
into the expected JSON format."""
|
into the expected JSON format."""
|
||||||
|
|
||||||
# Patch the call to vllm so we can inspect the arguments sent were correct
|
# Patch the client property to avoid instantiating a real AsyncOpenAI client
|
||||||
with patch.object(
|
with patch.object(VLLMInferenceAdapter, "client", new_callable=PropertyMock) as mock_create_client:
|
||||||
vllm_inference_adapter.client.chat.completions, "create", new_callable=AsyncMock
|
mock_client = MagicMock()
|
||||||
) as mock_nonstream_completion:
|
mock_client.chat.completions.create = AsyncMock()
|
||||||
|
mock_create_client.return_value = mock_client
|
||||||
|
|
||||||
messages = [
|
messages = [
|
||||||
SystemMessage(content="You are a helpful assistant"),
|
SystemMessage(content="You are a helpful assistant"),
|
||||||
UserMessage(content="How many?"),
|
UserMessage(content="How many?"),
|
||||||
|
@ -179,7 +181,7 @@ async def test_tool_call_response(vllm_inference_adapter):
|
||||||
tool_config=ToolConfig(tool_choice=ToolChoice.auto),
|
tool_config=ToolConfig(tool_choice=ToolChoice.auto),
|
||||||
)
|
)
|
||||||
|
|
||||||
assert mock_nonstream_completion.call_args.kwargs["messages"][2]["tool_calls"] == [
|
assert mock_client.chat.completions.create.call_args.kwargs["messages"][2]["tool_calls"] == [
|
||||||
{
|
{
|
||||||
"id": "foo",
|
"id": "foo",
|
||||||
"type": "function",
|
"type": "function",
|
||||||
|
@ -641,9 +643,7 @@ async def test_health_status_success(vllm_inference_adapter):
|
||||||
This test verifies that the health method returns a HealthResponse with status OK, only
|
This test verifies that the health method returns a HealthResponse with status OK, only
|
||||||
when the connection to the vLLM server is successful.
|
when the connection to the vLLM server is successful.
|
||||||
"""
|
"""
|
||||||
# Set vllm_inference_adapter.client to None to ensure _create_client is called
|
with patch.object(VLLMInferenceAdapter, "client", new_callable=PropertyMock) as mock_create_client:
|
||||||
vllm_inference_adapter.client = None
|
|
||||||
with patch.object(vllm_inference_adapter, "_create_client") as mock_create_client:
|
|
||||||
# Create mock client and models
|
# Create mock client and models
|
||||||
mock_client = MagicMock()
|
mock_client = MagicMock()
|
||||||
mock_models = MagicMock()
|
mock_models = MagicMock()
|
||||||
|
@ -674,8 +674,7 @@ async def test_health_status_failure(vllm_inference_adapter):
|
||||||
This test verifies that the health method returns a HealthResponse with status ERROR
|
This test verifies that the health method returns a HealthResponse with status ERROR
|
||||||
and an appropriate error message when the connection to the vLLM server fails.
|
and an appropriate error message when the connection to the vLLM server fails.
|
||||||
"""
|
"""
|
||||||
vllm_inference_adapter.client = None
|
with patch.object(VLLMInferenceAdapter, "client", new_callable=PropertyMock) as mock_create_client:
|
||||||
with patch.object(vllm_inference_adapter, "_create_client") as mock_create_client:
|
|
||||||
# Create mock client and models
|
# Create mock client and models
|
||||||
mock_client = MagicMock()
|
mock_client = MagicMock()
|
||||||
mock_models = MagicMock()
|
mock_models = MagicMock()
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue