mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-10 05:24:39 +00:00
Merge branch 'main' into use-openai-for-ollama
This commit is contained in:
commit
91fb6f42cb
74 changed files with 8761 additions and 971 deletions
|
@ -6,12 +6,25 @@
|
|||
|
||||
|
||||
import time
|
||||
import unicodedata
|
||||
|
||||
import pytest
|
||||
|
||||
from ..test_cases.test_case import TestCase
|
||||
|
||||
|
||||
def _normalize_text(text: str) -> str:
|
||||
"""
|
||||
Normalize Unicode text by removing diacritical marks for comparison.
|
||||
|
||||
The test case streaming_01 expects the answer "Sol" for the question "What's the name of the Sun
|
||||
in latin?", but the model is returning "sōl" (with a macron over the 'o'), which is the correct
|
||||
Latin spelling. The test is failing because it's doing a simple case-insensitive string search
|
||||
for "sol" but the actual response contains the diacritical mark.
|
||||
"""
|
||||
return unicodedata.normalize("NFD", text).encode("ascii", "ignore").decode("ascii").lower()
|
||||
|
||||
|
||||
def provider_from_model(client_with_models, model_id):
|
||||
models = {m.identifier: m for m in client_with_models.models.list()}
|
||||
models.update({m.provider_resource_id: m for m in client_with_models.models.list()})
|
||||
|
@ -42,6 +55,10 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
|
|||
"remote::groq",
|
||||
"remote::gemini", # https://generativelanguage.googleapis.com/v1beta/openai/completions -> 404
|
||||
"remote::anthropic", # at least claude-3-{5,7}-{haiku,sonnet}-* / claude-{sonnet,opus}-4-* are not supported
|
||||
"remote::azure", # {'error': {'code': 'OperationNotSupported', 'message': 'The completion operation
|
||||
# does not work with the specified model, gpt-5-mini. Please choose different model and try
|
||||
# again. You can learn more about which models can be used with each operation here:
|
||||
# https://go.microsoft.com/fwlink/?linkid=2197993.'}}"}
|
||||
):
|
||||
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.")
|
||||
|
||||
|
@ -157,7 +174,8 @@ def test_openai_completion_non_streaming_suffix(llama_stack_client, client_with_
|
|||
assert len(response.choices) > 0
|
||||
choice = response.choices[0]
|
||||
assert len(choice.text) > 5
|
||||
assert "france" in choice.text.lower()
|
||||
normalized_text = _normalize_text(choice.text)
|
||||
assert "france" in normalized_text
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
@ -248,7 +266,9 @@ def test_openai_chat_completion_non_streaming(compat_client, client_with_models,
|
|||
)
|
||||
message_content = response.choices[0].message.content.lower().strip()
|
||||
assert len(message_content) > 0
|
||||
assert expected.lower() in message_content
|
||||
normalized_expected = _normalize_text(expected)
|
||||
normalized_content = _normalize_text(message_content)
|
||||
assert normalized_expected in normalized_content
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
@ -272,10 +292,13 @@ def test_openai_chat_completion_streaming(compat_client, client_with_models, tex
|
|||
)
|
||||
streamed_content = []
|
||||
for chunk in response:
|
||||
if chunk.choices[0].delta.content:
|
||||
# On some providers like Azure, the choices are empty on the first chunk, so we need to check for that
|
||||
if chunk.choices and len(chunk.choices) > 0 and chunk.choices[0].delta.content:
|
||||
streamed_content.append(chunk.choices[0].delta.content.lower().strip())
|
||||
assert len(streamed_content) > 0
|
||||
assert expected.lower() in "".join(streamed_content)
|
||||
normalized_expected = _normalize_text(expected)
|
||||
normalized_content = _normalize_text("".join(streamed_content))
|
||||
assert normalized_expected in normalized_content
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
@ -308,8 +331,12 @@ def test_openai_chat_completion_streaming_with_n(compat_client, client_with_mode
|
|||
streamed_content.get(choice.index, "") + choice.delta.content.lower().strip()
|
||||
)
|
||||
assert len(streamed_content) == 2
|
||||
normalized_expected = _normalize_text(expected)
|
||||
for i, content in streamed_content.items():
|
||||
assert expected.lower() in content, f"Choice {i}: Expected {expected.lower()} in {content}"
|
||||
normalized_content = _normalize_text(content)
|
||||
assert normalized_expected in normalized_content, (
|
||||
f"Choice {i}: Expected {normalized_expected} in {normalized_content}"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
@ -339,9 +366,9 @@ def test_inference_store(compat_client, client_with_models, text_model_id, strea
|
|||
content = ""
|
||||
response_id = None
|
||||
for chunk in response:
|
||||
if response_id is None:
|
||||
if response_id is None and chunk.id:
|
||||
response_id = chunk.id
|
||||
if chunk.choices[0].delta.content:
|
||||
if chunk.choices and len(chunk.choices) > 0 and chunk.choices[0].delta.content:
|
||||
content += chunk.choices[0].delta.content
|
||||
else:
|
||||
response_id = response.id
|
||||
|
@ -410,11 +437,12 @@ def test_inference_store_tool_calls(compat_client, client_with_models, text_mode
|
|||
content = ""
|
||||
response_id = None
|
||||
for chunk in response:
|
||||
if response_id is None:
|
||||
if response_id is None and chunk.id:
|
||||
response_id = chunk.id
|
||||
if delta := chunk.choices[0].delta:
|
||||
if delta.content:
|
||||
content += delta.content
|
||||
if chunk.choices and len(chunk.choices) > 0:
|
||||
if delta := chunk.choices[0].delta:
|
||||
if delta.content:
|
||||
content += delta.content
|
||||
else:
|
||||
response_id = response.id
|
||||
content = response.choices[0].message.content
|
||||
|
@ -484,4 +512,5 @@ def test_openai_chat_completion_non_streaming_with_file(openai_client, client_wi
|
|||
stream=False,
|
||||
)
|
||||
message_content = response.choices[0].message.content.lower().strip()
|
||||
assert "hello world" in message_content
|
||||
normalized_content = _normalize_text(message_content)
|
||||
assert "hello world" in normalized_content
|
||||
|
|
|
@ -32,6 +32,7 @@ def skip_if_model_doesnt_support_completion(client_with_models, model_id):
|
|||
"remote::vertexai",
|
||||
"remote::groq",
|
||||
"remote::sambanova",
|
||||
"remote::azure",
|
||||
)
|
||||
or "openai-compat" in provider.provider_type
|
||||
):
|
||||
|
@ -44,7 +45,7 @@ def skip_if_model_doesnt_support_json_schema_structured_output(client_with_model
|
|||
provider_id = models[model_id].provider_id
|
||||
providers = {p.provider_id: p for p in client_with_models.providers.list()}
|
||||
provider = providers[provider_id]
|
||||
if provider.provider_type in ("remote::sambanova",):
|
||||
if provider.provider_type in ("remote::sambanova", "remote::azure"):
|
||||
pytest.skip(
|
||||
f"Model {model_id} hosted by {provider.provider_type} doesn't support json_schema structured output"
|
||||
)
|
||||
|
|
71
tests/integration/recordings/responses/0fda25b9241c.json
Normal file
71
tests/integration/recordings/responses/0fda25b9241c.json
Normal file
|
@ -0,0 +1,71 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "gpt-5-mini",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Which planet do humans live on?"
|
||||
}
|
||||
],
|
||||
"stream": false
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "gpt-5-mini"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIXqfvjuluKkZtG3q2QJoSQhBU0",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "Humans live on Earth \u2014 the third planet from the Sun. It's the only known planet that naturally supports life, with a breathable atmosphere, liquid water, and temperatures suitable for living organisms.",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": [],
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499901,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": {
|
||||
"completion_tokens": 112,
|
||||
"prompt_tokens": 13,
|
||||
"total_tokens": 125,
|
||||
"completion_tokens_details": {
|
||||
"accepted_prediction_tokens": 0,
|
||||
"audio_tokens": 0,
|
||||
"reasoning_tokens": 64,
|
||||
"rejected_prediction_tokens": 0
|
||||
},
|
||||
"prompt_tokens_details": {
|
||||
"audio_tokens": 0,
|
||||
"cached_tokens": 0
|
||||
}
|
||||
},
|
||||
"prompt_filter_results": [
|
||||
{
|
||||
"prompt_index": 0,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
448
tests/integration/recordings/responses/2b2ad549510d.json
Normal file
448
tests/integration/recordings/responses/2b2ad549510d.json
Normal file
|
@ -0,0 +1,448 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "gpt-5-mini",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello, world!"
|
||||
}
|
||||
],
|
||||
"stream": true
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "gpt-5-mini"
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "",
|
||||
"choices": [],
|
||||
"created": 0,
|
||||
"model": "",
|
||||
"object": "",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null,
|
||||
"prompt_filter_results": [
|
||||
{
|
||||
"prompt_index": 0,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499910,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "Hello",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499910,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": ",",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499910,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " world",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499910,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "!",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499910,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " Hi",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499910,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " \u2014",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499910,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " how",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499910,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " can",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499910,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " I",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499910,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " help",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499910,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " you",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499910,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " today",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499910,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "?",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499910,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499910,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": true
|
||||
}
|
||||
}
|
71
tests/integration/recordings/responses/57b67d1b1a36.json
Normal file
71
tests/integration/recordings/responses/57b67d1b1a36.json
Normal file
|
@ -0,0 +1,71 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "gpt-5-mini",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Which planet has rings around it with a name starting with letter S?"
|
||||
}
|
||||
],
|
||||
"stream": false
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "gpt-5-mini"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIkT5cbqFazpungtewksVePcUNa",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "Saturn. It's the planet famous for its prominent ring system made of ice and rock.",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": [],
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499914,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": {
|
||||
"completion_tokens": 156,
|
||||
"prompt_tokens": 20,
|
||||
"total_tokens": 176,
|
||||
"completion_tokens_details": {
|
||||
"accepted_prediction_tokens": 0,
|
||||
"audio_tokens": 0,
|
||||
"reasoning_tokens": 128,
|
||||
"rejected_prediction_tokens": 0
|
||||
},
|
||||
"prompt_tokens_details": {
|
||||
"audio_tokens": 0,
|
||||
"cached_tokens": 0
|
||||
}
|
||||
},
|
||||
"prompt_filter_results": [
|
||||
{
|
||||
"prompt_index": 0,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
71
tests/integration/recordings/responses/8752115f8d0c.json
Normal file
71
tests/integration/recordings/responses/8752115f8d0c.json
Normal file
|
@ -0,0 +1,71 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "gpt-5-mini",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello, world!"
|
||||
}
|
||||
],
|
||||
"stream": false
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "gpt-5-mini"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIuyylsMNXspa83k8LrD8SQadNY",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "Hello! \ud83d\udc4b How can I help you today \u2014 answer a question, write or edit something, debug code, brainstorm ideas, or anything else?",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": [],
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499924,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": {
|
||||
"completion_tokens": 40,
|
||||
"prompt_tokens": 10,
|
||||
"total_tokens": 50,
|
||||
"completion_tokens_details": {
|
||||
"accepted_prediction_tokens": 0,
|
||||
"audio_tokens": 0,
|
||||
"reasoning_tokens": 0,
|
||||
"rejected_prediction_tokens": 0
|
||||
},
|
||||
"prompt_tokens_details": {
|
||||
"audio_tokens": 0,
|
||||
"cached_tokens": 0
|
||||
}
|
||||
},
|
||||
"prompt_filter_results": [
|
||||
{
|
||||
"prompt_index": 0,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
1178
tests/integration/recordings/responses/94d11daee205.json
Normal file
1178
tests/integration/recordings/responses/94d11daee205.json
Normal file
File diff suppressed because it is too large
Load diff
1150
tests/integration/recordings/responses/9f3d749cc1c8.json
Normal file
1150
tests/integration/recordings/responses/9f3d749cc1c8.json
Normal file
File diff suppressed because it is too large
Load diff
98
tests/integration/recordings/responses/c791119e6359.json
Normal file
98
tests/integration/recordings/responses/c791119e6359.json
Normal file
|
@ -0,0 +1,98 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "gpt-5-mini",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What's the weather in Tokyo? Use the get_weather function to get the weather."
|
||||
}
|
||||
],
|
||||
"stream": false,
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"description": "Get the weather in a given city",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"city": {
|
||||
"type": "string",
|
||||
"description": "The city to get the weather for"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "gpt-5-mini"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIwq9Odd0mOJMmw7ytv8iEazH4H",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "tool_calls",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": [],
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call_yw18spRc1jjUlEyabbXBhB33",
|
||||
"function": {
|
||||
"arguments": "{\"city\":\"Tokyo\"}",
|
||||
"name": "get_weather"
|
||||
},
|
||||
"type": "function"
|
||||
}
|
||||
]
|
||||
},
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499926,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": {
|
||||
"completion_tokens": 88,
|
||||
"prompt_tokens": 151,
|
||||
"total_tokens": 239,
|
||||
"completion_tokens_details": {
|
||||
"accepted_prediction_tokens": 0,
|
||||
"audio_tokens": 0,
|
||||
"reasoning_tokens": 64,
|
||||
"rejected_prediction_tokens": 0
|
||||
},
|
||||
"prompt_tokens_details": {
|
||||
"audio_tokens": 0,
|
||||
"cached_tokens": 0
|
||||
}
|
||||
},
|
||||
"prompt_filter_results": [
|
||||
{
|
||||
"prompt_index": 0,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
2150
tests/integration/recordings/responses/d3e27b7234e2.json
Normal file
2150
tests/integration/recordings/responses/d3e27b7234e2.json
Normal file
File diff suppressed because it is too large
Load diff
310
tests/integration/recordings/responses/fb785db7fafd.json
Normal file
310
tests/integration/recordings/responses/fb785db7fafd.json
Normal file
|
@ -0,0 +1,310 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "gpt-5-mini",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What's the weather in Tokyo? Use the get_weather function to get the weather."
|
||||
}
|
||||
],
|
||||
"stream": true,
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"description": "Get the weather in a given city",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"city": {
|
||||
"type": "string",
|
||||
"description": "The city to get the weather for"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "gpt-5-mini"
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "",
|
||||
"choices": [],
|
||||
"created": 0,
|
||||
"model": "",
|
||||
"object": "",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null,
|
||||
"prompt_filter_results": [
|
||||
{
|
||||
"prompt_index": 0,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIiMMWyfACuKUYWEyYSazcnvRVo",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": "call_TMbEoYn9q0ZKtoxav5LpD9Ts",
|
||||
"function": {
|
||||
"arguments": "",
|
||||
"name": "get_weather"
|
||||
},
|
||||
"type": "function"
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499912,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIiMMWyfACuKUYWEyYSazcnvRVo",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "{\"",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499912,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIiMMWyfACuKUYWEyYSazcnvRVo",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "city",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499912,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIiMMWyfACuKUYWEyYSazcnvRVo",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "\":\"",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499912,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIiMMWyfACuKUYWEyYSazcnvRVo",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "Tokyo",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499912,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIiMMWyfACuKUYWEyYSazcnvRVo",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "\"}",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499912,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIiMMWyfACuKUYWEyYSazcnvRVo",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": "tool_calls",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499912,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": true
|
||||
}
|
||||
}
|
556
tests/integration/recordings/responses/ff3271401fb4.json
Normal file
556
tests/integration/recordings/responses/ff3271401fb4.json
Normal file
|
@ -0,0 +1,556 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "gpt-5-mini",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is the name of the US captial?"
|
||||
}
|
||||
],
|
||||
"stream": true
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "gpt-5-mini"
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "",
|
||||
"choices": [],
|
||||
"created": 0,
|
||||
"model": "",
|
||||
"object": "",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null,
|
||||
"prompt_filter_results": [
|
||||
{
|
||||
"prompt_index": 0,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "The",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " capital",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " of",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " the",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " United",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " States",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " is",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " Washington",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": ",",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " D",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": ".C",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": ".",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " (",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "District",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " of",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " Columbia",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": ").",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": true
|
||||
}
|
||||
}
|
|
@ -49,16 +49,13 @@ def setup_openai_telemetry_data(llama_stack_client, text_model_id):
|
|||
traces = llama_stack_client.telemetry.query_traces(limit=10)
|
||||
if len(traces) >= 5: # 5 OpenAI completion traces
|
||||
break
|
||||
time.sleep(1)
|
||||
time.sleep(0.1)
|
||||
|
||||
if len(traces) < 5:
|
||||
pytest.fail(
|
||||
f"Failed to create sufficient OpenAI completion telemetry data after 30s. Got {len(traces)} traces."
|
||||
)
|
||||
|
||||
# Wait for 5 seconds to ensure traces has completed logging
|
||||
time.sleep(5)
|
||||
|
||||
yield
|
||||
|
||||
|
||||
|
@ -185,11 +182,13 @@ def test_openai_completion_creates_telemetry(llama_stack_client, text_model_id):
|
|||
assert len(response.choices) > 0, "Response should have at least one choice"
|
||||
|
||||
# Wait for telemetry to be recorded
|
||||
time.sleep(3)
|
||||
|
||||
# Check that we have more traces now
|
||||
final_traces = llama_stack_client.telemetry.query_traces(limit=20)
|
||||
final_count = len(final_traces)
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < 30:
|
||||
final_traces = llama_stack_client.telemetry.query_traces(limit=20)
|
||||
final_count = len(final_traces)
|
||||
if final_count > initial_count:
|
||||
break
|
||||
time.sleep(0.1)
|
||||
|
||||
# Should have at least as many traces as before (might have more due to other activity)
|
||||
assert final_count >= initial_count, "Should have at least as many traces after OpenAI call"
|
||||
|
|
|
@ -42,14 +42,11 @@ def setup_telemetry_data(llama_stack_client, text_model_id):
|
|||
traces = llama_stack_client.telemetry.query_traces(limit=10)
|
||||
if len(traces) >= 4:
|
||||
break
|
||||
time.sleep(1)
|
||||
time.sleep(0.1)
|
||||
|
||||
if len(traces) < 4:
|
||||
pytest.fail(f"Failed to create sufficient telemetry data after 30s. Got {len(traces)} traces.")
|
||||
|
||||
# Wait for 5 seconds to ensure traces has completed logging
|
||||
time.sleep(5)
|
||||
|
||||
yield
|
||||
|
||||
|
||||
|
|
|
@ -46,10 +46,7 @@ def setup_telemetry_metrics_data(openai_client, client_with_models, text_model_i
|
|||
break
|
||||
except Exception:
|
||||
pass
|
||||
time.sleep(1)
|
||||
|
||||
# Wait additional time to ensure all metrics are processed
|
||||
time.sleep(5)
|
||||
time.sleep(0.1)
|
||||
|
||||
# Return the token lists for use in tests
|
||||
return {"prompt_tokens": prompt_tokens, "completion_tokens": completion_tokens, "total_tokens": total_tokens}
|
||||
|
|
|
@ -183,6 +183,110 @@ def test_vector_db_insert_from_url_and_query(
|
|||
assert any("llama2" in chunk.content.lower() for chunk in response2.chunks)
|
||||
|
||||
|
||||
def test_rag_tool_openai_apis(client_with_empty_registry, embedding_model_id, embedding_dimension):
|
||||
vector_db_id = "test_openai_vector_db"
|
||||
|
||||
client_with_empty_registry.vector_dbs.register(
|
||||
vector_db_id=vector_db_id,
|
||||
embedding_model=embedding_model_id,
|
||||
embedding_dimension=embedding_dimension,
|
||||
)
|
||||
|
||||
available_vector_dbs = [vector_db.identifier for vector_db in client_with_empty_registry.vector_dbs.list()]
|
||||
actual_vector_db_id = available_vector_dbs[0]
|
||||
|
||||
# different document formats that should work with OpenAI APIs
|
||||
documents = [
|
||||
Document(
|
||||
document_id="text-doc",
|
||||
content="This is a plain text document about machine learning algorithms.",
|
||||
metadata={"type": "text", "category": "AI"},
|
||||
),
|
||||
Document(
|
||||
document_id="url-doc",
|
||||
content="https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/chat.rst",
|
||||
mime_type="text/plain",
|
||||
metadata={"type": "url", "source": "pytorch"},
|
||||
),
|
||||
Document(
|
||||
document_id="data-url-doc",
|
||||
content="data:text/plain;base64,VGhpcyBpcyBhIGRhdGEgVVJMIGRvY3VtZW50IGFib3V0IGRlZXAgbGVhcm5pbmcu", # "This is a data URL document about deep learning."
|
||||
metadata={"type": "data_url", "encoding": "base64"},
|
||||
),
|
||||
]
|
||||
|
||||
client_with_empty_registry.tool_runtime.rag_tool.insert(
|
||||
documents=documents,
|
||||
vector_db_id=actual_vector_db_id,
|
||||
chunk_size_in_tokens=256,
|
||||
)
|
||||
|
||||
files_list = client_with_empty_registry.files.list()
|
||||
assert len(files_list.data) >= len(documents), (
|
||||
f"Expected at least {len(documents)} files, got {len(files_list.data)}"
|
||||
)
|
||||
|
||||
vector_store_files = client_with_empty_registry.vector_io.openai_list_files_in_vector_store(
|
||||
vector_store_id=actual_vector_db_id
|
||||
)
|
||||
assert len(vector_store_files.data) >= len(documents), f"Expected at least {len(documents)} files in vector store"
|
||||
|
||||
response = client_with_empty_registry.tool_runtime.rag_tool.query(
|
||||
vector_db_ids=[actual_vector_db_id],
|
||||
content="Tell me about machine learning and deep learning",
|
||||
)
|
||||
|
||||
assert_valid_text_response(response)
|
||||
content_text = " ".join([chunk.text for chunk in response.content]).lower()
|
||||
assert "machine learning" in content_text or "deep learning" in content_text
|
||||
|
||||
|
||||
def test_rag_tool_exception_handling(client_with_empty_registry, embedding_model_id, embedding_dimension):
|
||||
vector_db_id = "test_exception_handling"
|
||||
|
||||
client_with_empty_registry.vector_dbs.register(
|
||||
vector_db_id=vector_db_id,
|
||||
embedding_model=embedding_model_id,
|
||||
embedding_dimension=embedding_dimension,
|
||||
)
|
||||
|
||||
available_vector_dbs = [vector_db.identifier for vector_db in client_with_empty_registry.vector_dbs.list()]
|
||||
actual_vector_db_id = available_vector_dbs[0]
|
||||
|
||||
documents = [
|
||||
Document(
|
||||
document_id="valid-doc",
|
||||
content="This is a valid document that should be processed successfully.",
|
||||
metadata={"status": "valid"},
|
||||
),
|
||||
Document(
|
||||
document_id="invalid-url-doc",
|
||||
content="https://nonexistent-domain-12345.com/invalid.txt",
|
||||
metadata={"status": "invalid_url"},
|
||||
),
|
||||
Document(
|
||||
document_id="another-valid-doc",
|
||||
content="This is another valid document for testing resilience.",
|
||||
metadata={"status": "valid"},
|
||||
),
|
||||
]
|
||||
|
||||
client_with_empty_registry.tool_runtime.rag_tool.insert(
|
||||
documents=documents,
|
||||
vector_db_id=actual_vector_db_id,
|
||||
chunk_size_in_tokens=256,
|
||||
)
|
||||
|
||||
response = client_with_empty_registry.tool_runtime.rag_tool.query(
|
||||
vector_db_ids=[actual_vector_db_id],
|
||||
content="valid document",
|
||||
)
|
||||
|
||||
assert_valid_text_response(response)
|
||||
content_text = " ".join([chunk.text for chunk in response.content]).lower()
|
||||
assert "valid document" in content_text
|
||||
|
||||
|
||||
def test_rag_tool_insert_and_query(client_with_empty_registry, embedding_model_id, embedding_dimension):
|
||||
providers = [p for p in client_with_empty_registry.providers.list() if p.api == "vector_io"]
|
||||
assert len(providers) > 0
|
||||
|
@ -249,3 +353,107 @@ def test_rag_tool_insert_and_query(client_with_empty_registry, embedding_model_i
|
|||
"chunk_template": "This should raise a ValueError because it is missing the proper template variables",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def test_rag_tool_query_generation(client_with_empty_registry, embedding_model_id, embedding_dimension):
|
||||
vector_db_id = "test_query_generation_db"
|
||||
|
||||
client_with_empty_registry.vector_dbs.register(
|
||||
vector_db_id=vector_db_id,
|
||||
embedding_model=embedding_model_id,
|
||||
embedding_dimension=embedding_dimension,
|
||||
)
|
||||
|
||||
available_vector_dbs = [vector_db.identifier for vector_db in client_with_empty_registry.vector_dbs.list()]
|
||||
actual_vector_db_id = available_vector_dbs[0]
|
||||
|
||||
documents = [
|
||||
Document(
|
||||
document_id="ai-doc",
|
||||
content="Artificial intelligence and machine learning are transforming technology.",
|
||||
metadata={"category": "AI"},
|
||||
),
|
||||
Document(
|
||||
document_id="banana-doc",
|
||||
content="Don't bring a banana to a knife fight.",
|
||||
metadata={"category": "wisdom"},
|
||||
),
|
||||
]
|
||||
|
||||
client_with_empty_registry.tool_runtime.rag_tool.insert(
|
||||
documents=documents,
|
||||
vector_db_id=actual_vector_db_id,
|
||||
chunk_size_in_tokens=256,
|
||||
)
|
||||
|
||||
response = client_with_empty_registry.tool_runtime.rag_tool.query(
|
||||
vector_db_ids=[actual_vector_db_id],
|
||||
content="Tell me about AI",
|
||||
)
|
||||
|
||||
assert_valid_text_response(response)
|
||||
content_text = " ".join([chunk.text for chunk in response.content]).lower()
|
||||
assert "artificial intelligence" in content_text or "machine learning" in content_text
|
||||
|
||||
|
||||
def test_rag_tool_pdf_data_url_handling(client_with_empty_registry, embedding_model_id, embedding_dimension):
|
||||
vector_db_id = "test_pdf_data_url_db"
|
||||
|
||||
client_with_empty_registry.vector_dbs.register(
|
||||
vector_db_id=vector_db_id,
|
||||
embedding_model=embedding_model_id,
|
||||
embedding_dimension=embedding_dimension,
|
||||
)
|
||||
|
||||
available_vector_dbs = [vector_db.identifier for vector_db in client_with_empty_registry.vector_dbs.list()]
|
||||
actual_vector_db_id = available_vector_dbs[0]
|
||||
|
||||
sample_pdf = b"%PDF-1.3\n3 0 obj\n<</Type /Page\n/Parent 1 0 R\n/Resources 2 0 R\n/Contents 4 0 R>>\nendobj\n4 0 obj\n<</Filter /FlateDecode /Length 115>>\nstream\nx\x9c\x15\xcc1\x0e\x820\x18@\xe1\x9dS\xbcM]jk$\xd5\xd5(\x83!\x86\xa1\x17\xf8\xa3\xa5`LIh+\xd7W\xc6\xf7\r\xef\xc0\xbd\xd2\xaa\xb6,\xd5\xc5\xb1o\x0c\xa6VZ\xe3znn%\xf3o\xab\xb1\xe7\xa3:Y\xdc\x8bm\xeb\xf3&1\xc8\xd7\xd3\x97\xc82\xe6\x81\x87\xe42\xcb\x87Vb(\x12<\xdd<=}Jc\x0cL\x91\xee\xda$\xb5\xc3\xbd\xd7\xe9\x0f\x8d\x97 $\nendstream\nendobj\n1 0 obj\n<</Type /Pages\n/Kids [3 0 R ]\n/Count 1\n/MediaBox [0 0 595.28 841.89]\n>>\nendobj\n5 0 obj\n<</Type /Font\n/BaseFont /Helvetica\n/Subtype /Type1\n/Encoding /WinAnsiEncoding\n>>\nendobj\n2 0 obj\n<<\n/ProcSet [/PDF /Text /ImageB /ImageC /ImageI]\n/Font <<\n/F1 5 0 R\n>>\n/XObject <<\n>>\n>>\nendobj\n6 0 obj\n<<\n/Producer (PyFPDF 1.7.2 http://pyfpdf.googlecode.com/)\n/Title (This is a sample title.)\n/Author (Llama Stack Developers)\n/CreationDate (D:20250312165548)\n>>\nendobj\n7 0 obj\n<<\n/Type /Catalog\n/Pages 1 0 R\n/OpenAction [3 0 R /FitH null]\n/PageLayout /OneColumn\n>>\nendobj\nxref\n0 8\n0000000000 65535 f \n0000000272 00000 n \n0000000455 00000 n \n0000000009 00000 n \n0000000087 00000 n \n0000000359 00000 n \n0000000559 00000 n \n0000000734 00000 n \ntrailer\n<<\n/Size 8\n/Root 7 0 R\n/Info 6 0 R\n>>\nstartxref\n837\n%%EOF\n"
|
||||
|
||||
import base64
|
||||
|
||||
pdf_base64 = base64.b64encode(sample_pdf).decode("utf-8")
|
||||
pdf_data_url = f"data:application/pdf;base64,{pdf_base64}"
|
||||
|
||||
documents = [
|
||||
Document(
|
||||
document_id="test-pdf-data-url",
|
||||
content=pdf_data_url,
|
||||
metadata={"type": "pdf", "source": "data_url"},
|
||||
),
|
||||
]
|
||||
|
||||
client_with_empty_registry.tool_runtime.rag_tool.insert(
|
||||
documents=documents,
|
||||
vector_db_id=actual_vector_db_id,
|
||||
chunk_size_in_tokens=256,
|
||||
)
|
||||
|
||||
files_list = client_with_empty_registry.files.list()
|
||||
assert len(files_list.data) >= 1, "PDF should have been uploaded to Files API"
|
||||
|
||||
pdf_file = None
|
||||
for file in files_list.data:
|
||||
if file.filename and "test-pdf-data-url" in file.filename:
|
||||
pdf_file = file
|
||||
break
|
||||
|
||||
assert pdf_file is not None, "PDF file should be found in Files API"
|
||||
assert pdf_file.bytes == len(sample_pdf), f"File size should match original PDF ({len(sample_pdf)} bytes)"
|
||||
|
||||
file_content = client_with_empty_registry.files.retrieve_content(pdf_file.id)
|
||||
assert file_content.startswith(b"%PDF-"), "Retrieved file should be a valid PDF"
|
||||
|
||||
vector_store_files = client_with_empty_registry.vector_io.openai_list_files_in_vector_store(
|
||||
vector_store_id=actual_vector_db_id
|
||||
)
|
||||
assert len(vector_store_files.data) >= 1, "PDF should be attached to vector store"
|
||||
|
||||
response = client_with_empty_registry.tool_runtime.rag_tool.query(
|
||||
vector_db_ids=[actual_vector_db_id],
|
||||
content="sample title",
|
||||
)
|
||||
|
||||
assert_valid_text_response(response)
|
||||
content_text = " ".join([chunk.text for chunk in response.content]).lower()
|
||||
assert "sample title" in content_text or "title" in content_text
|
||||
|
|
|
@ -6,16 +6,18 @@
|
|||
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
from unittest.mock import AsyncMock, Mock, patch
|
||||
|
||||
import pytest
|
||||
from openai import AsyncOpenAI
|
||||
from openai import NOT_GIVEN, AsyncOpenAI
|
||||
from openai.types.model import Model as OpenAIModel
|
||||
|
||||
# Import the real Pydantic response types instead of using Mocks
|
||||
from llama_stack.apis.inference import (
|
||||
OpenAIAssistantMessageParam,
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChoice,
|
||||
OpenAICompletion,
|
||||
OpenAIEmbeddingData,
|
||||
OpenAIEmbeddingsResponse,
|
||||
OpenAIEmbeddingUsage,
|
||||
|
@ -153,24 +155,22 @@ class TestInferenceRecording:
|
|||
|
||||
async def test_recording_mode(self, temp_storage_dir, real_openai_chat_response):
|
||||
"""Test that recording mode captures and stores responses."""
|
||||
|
||||
async def mock_create(*args, **kwargs):
|
||||
return real_openai_chat_response
|
||||
|
||||
temp_storage_dir = temp_storage_dir / "test_recording_mode"
|
||||
with patch("openai.resources.chat.completions.AsyncCompletions.create", side_effect=mock_create):
|
||||
with inference_recording(mode=InferenceMode.RECORD, storage_dir=str(temp_storage_dir)):
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
with inference_recording(mode=InferenceMode.RECORD, storage_dir=str(temp_storage_dir)):
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
client.chat.completions._post = AsyncMock(return_value=real_openai_chat_response)
|
||||
|
||||
response = await client.chat.completions.create(
|
||||
model="llama3.2:3b",
|
||||
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
||||
temperature=0.7,
|
||||
max_tokens=50,
|
||||
)
|
||||
response = await client.chat.completions.create(
|
||||
model="llama3.2:3b",
|
||||
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
||||
temperature=0.7,
|
||||
max_tokens=50,
|
||||
user=NOT_GIVEN,
|
||||
)
|
||||
|
||||
# Verify the response was returned correctly
|
||||
assert response.choices[0].message.content == "Hello! I'm doing well, thank you for asking."
|
||||
# Verify the response was returned correctly
|
||||
assert response.choices[0].message.content == "Hello! I'm doing well, thank you for asking."
|
||||
client.chat.completions._post.assert_called_once()
|
||||
|
||||
# Verify recording was stored
|
||||
storage = ResponseStorage(temp_storage_dir)
|
||||
|
@ -178,40 +178,74 @@ class TestInferenceRecording:
|
|||
|
||||
async def test_replay_mode(self, temp_storage_dir, real_openai_chat_response):
|
||||
"""Test that replay mode returns stored responses without making real calls."""
|
||||
|
||||
async def mock_create(*args, **kwargs):
|
||||
return real_openai_chat_response
|
||||
|
||||
temp_storage_dir = temp_storage_dir / "test_replay_mode"
|
||||
# First, record a response
|
||||
with patch("openai.resources.chat.completions.AsyncCompletions.create", side_effect=mock_create):
|
||||
with inference_recording(mode=InferenceMode.RECORD, storage_dir=str(temp_storage_dir)):
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
with inference_recording(mode=InferenceMode.RECORD, storage_dir=str(temp_storage_dir)):
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
client.chat.completions._post = AsyncMock(return_value=real_openai_chat_response)
|
||||
|
||||
response = await client.chat.completions.create(
|
||||
model="llama3.2:3b",
|
||||
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
||||
temperature=0.7,
|
||||
max_tokens=50,
|
||||
)
|
||||
response = await client.chat.completions.create(
|
||||
model="llama3.2:3b",
|
||||
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
||||
temperature=0.7,
|
||||
max_tokens=50,
|
||||
user=NOT_GIVEN,
|
||||
)
|
||||
client.chat.completions._post.assert_called_once()
|
||||
|
||||
# Now test replay mode - should not call the original method
|
||||
with patch("openai.resources.chat.completions.AsyncCompletions.create") as mock_create_patch:
|
||||
with inference_recording(mode=InferenceMode.REPLAY, storage_dir=str(temp_storage_dir)):
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
with inference_recording(mode=InferenceMode.REPLAY, storage_dir=str(temp_storage_dir)):
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
client.chat.completions._post = AsyncMock(return_value=real_openai_chat_response)
|
||||
|
||||
response = await client.chat.completions.create(
|
||||
model="llama3.2:3b",
|
||||
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
||||
temperature=0.7,
|
||||
max_tokens=50,
|
||||
)
|
||||
response = await client.chat.completions.create(
|
||||
model="llama3.2:3b",
|
||||
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
||||
temperature=0.7,
|
||||
max_tokens=50,
|
||||
)
|
||||
|
||||
# Verify we got the recorded response
|
||||
assert response.choices[0].message.content == "Hello! I'm doing well, thank you for asking."
|
||||
# Verify we got the recorded response
|
||||
assert response.choices[0].message.content == "Hello! I'm doing well, thank you for asking."
|
||||
|
||||
# Verify the original method was NOT called
|
||||
mock_create_patch.assert_not_called()
|
||||
# Verify the original method was NOT called
|
||||
client.chat.completions._post.assert_not_called()
|
||||
|
||||
async def test_replay_mode_models(self, temp_storage_dir):
|
||||
"""Test that replay mode returns stored responses without making real model listing calls."""
|
||||
|
||||
async def _async_iterator(models):
|
||||
for model in models:
|
||||
yield model
|
||||
|
||||
models = [
|
||||
OpenAIModel(id="foo", created=1, object="model", owned_by="test"),
|
||||
OpenAIModel(id="bar", created=2, object="model", owned_by="test"),
|
||||
]
|
||||
|
||||
expected_ids = {m.id for m in models}
|
||||
|
||||
temp_storage_dir = temp_storage_dir / "test_replay_mode_models"
|
||||
|
||||
# baseline - mock works without recording
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
client.models._get_api_list = Mock(return_value=_async_iterator(models))
|
||||
assert {m.id async for m in client.models.list()} == expected_ids
|
||||
client.models._get_api_list.assert_called_once()
|
||||
|
||||
# record the call
|
||||
with inference_recording(mode=InferenceMode.RECORD, storage_dir=temp_storage_dir):
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
client.models._get_api_list = Mock(return_value=_async_iterator(models))
|
||||
assert {m.id async for m in client.models.list()} == expected_ids
|
||||
client.models._get_api_list.assert_called_once()
|
||||
|
||||
# replay the call
|
||||
with inference_recording(mode=InferenceMode.REPLAY, storage_dir=temp_storage_dir):
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
client.models._get_api_list = Mock(return_value=_async_iterator(models))
|
||||
assert {m.id async for m in client.models.list()} == expected_ids
|
||||
client.models._get_api_list.assert_not_called()
|
||||
|
||||
async def test_replay_missing_recording(self, temp_storage_dir):
|
||||
"""Test that replay mode fails when no recording is found."""
|
||||
|
@ -228,36 +262,110 @@ class TestInferenceRecording:
|
|||
async def test_embeddings_recording(self, temp_storage_dir, real_embeddings_response):
|
||||
"""Test recording and replay of embeddings calls."""
|
||||
|
||||
async def mock_create(*args, **kwargs):
|
||||
return real_embeddings_response
|
||||
# baseline - mock works without recording
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
client.embeddings._post = AsyncMock(return_value=real_embeddings_response)
|
||||
response = await client.embeddings.create(
|
||||
model=real_embeddings_response.model,
|
||||
input=["Hello world", "Test embedding"],
|
||||
encoding_format=NOT_GIVEN,
|
||||
)
|
||||
assert len(response.data) == 2
|
||||
assert response.data[0].embedding == [0.1, 0.2, 0.3]
|
||||
client.embeddings._post.assert_called_once()
|
||||
|
||||
temp_storage_dir = temp_storage_dir / "test_embeddings_recording"
|
||||
# Record
|
||||
with patch("openai.resources.embeddings.AsyncEmbeddings.create", side_effect=mock_create):
|
||||
with inference_recording(mode=InferenceMode.RECORD, storage_dir=str(temp_storage_dir)):
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
with inference_recording(mode=InferenceMode.RECORD, storage_dir=str(temp_storage_dir)):
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
client.embeddings._post = AsyncMock(return_value=real_embeddings_response)
|
||||
|
||||
response = await client.embeddings.create(
|
||||
model="nomic-embed-text", input=["Hello world", "Test embedding"]
|
||||
)
|
||||
response = await client.embeddings.create(
|
||||
model=real_embeddings_response.model,
|
||||
input=["Hello world", "Test embedding"],
|
||||
encoding_format=NOT_GIVEN,
|
||||
dimensions=NOT_GIVEN,
|
||||
user=NOT_GIVEN,
|
||||
)
|
||||
|
||||
assert len(response.data) == 2
|
||||
assert len(response.data) == 2
|
||||
|
||||
# Replay
|
||||
with patch("openai.resources.embeddings.AsyncEmbeddings.create") as mock_create_patch:
|
||||
with inference_recording(mode=InferenceMode.REPLAY, storage_dir=str(temp_storage_dir)):
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
with inference_recording(mode=InferenceMode.REPLAY, storage_dir=str(temp_storage_dir)):
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
client.embeddings._post = AsyncMock(return_value=real_embeddings_response)
|
||||
|
||||
response = await client.embeddings.create(
|
||||
model="nomic-embed-text", input=["Hello world", "Test embedding"]
|
||||
)
|
||||
response = await client.embeddings.create(
|
||||
model=real_embeddings_response.model,
|
||||
input=["Hello world", "Test embedding"],
|
||||
)
|
||||
|
||||
# Verify we got the recorded response
|
||||
assert len(response.data) == 2
|
||||
assert response.data[0].embedding == [0.1, 0.2, 0.3]
|
||||
# Verify we got the recorded response
|
||||
assert len(response.data) == 2
|
||||
assert response.data[0].embedding == [0.1, 0.2, 0.3]
|
||||
|
||||
# Verify original method was not called
|
||||
mock_create_patch.assert_not_called()
|
||||
# Verify original method was not called
|
||||
client.embeddings._post.assert_not_called()
|
||||
|
||||
async def test_completions_recording(self, temp_storage_dir):
|
||||
real_completions_response = OpenAICompletion(
|
||||
id="test_completion",
|
||||
object="text_completion",
|
||||
created=1234567890,
|
||||
model="llama3.2:3b",
|
||||
choices=[
|
||||
{
|
||||
"text": "Hello! I'm doing well, thank you for asking.",
|
||||
"index": 0,
|
||||
"logprobs": None,
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
temp_storage_dir = temp_storage_dir / "test_completions_recording"
|
||||
|
||||
# baseline - mock works without recording
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
client.completions._post = AsyncMock(return_value=real_completions_response)
|
||||
response = await client.completions.create(
|
||||
model=real_completions_response.model,
|
||||
prompt="Hello, how are you?",
|
||||
temperature=0.7,
|
||||
max_tokens=50,
|
||||
user=NOT_GIVEN,
|
||||
)
|
||||
assert response.choices[0].text == real_completions_response.choices[0].text
|
||||
client.completions._post.assert_called_once()
|
||||
|
||||
# Record
|
||||
with inference_recording(mode=InferenceMode.RECORD, storage_dir=str(temp_storage_dir)):
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
client.completions._post = AsyncMock(return_value=real_completions_response)
|
||||
|
||||
response = await client.completions.create(
|
||||
model=real_completions_response.model,
|
||||
prompt="Hello, how are you?",
|
||||
temperature=0.7,
|
||||
max_tokens=50,
|
||||
user=NOT_GIVEN,
|
||||
)
|
||||
|
||||
assert response.choices[0].text == real_completions_response.choices[0].text
|
||||
client.completions._post.assert_called_once()
|
||||
|
||||
# Replay
|
||||
with inference_recording(mode=InferenceMode.REPLAY, storage_dir=str(temp_storage_dir)):
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
client.completions._post = AsyncMock(return_value=real_completions_response)
|
||||
response = await client.completions.create(
|
||||
model=real_completions_response.model,
|
||||
prompt="Hello, how are you?",
|
||||
temperature=0.7,
|
||||
max_tokens=50,
|
||||
)
|
||||
assert response.choices[0].text == real_completions_response.choices[0].text
|
||||
client.completions._post.assert_not_called()
|
||||
|
||||
async def test_live_mode(self, real_openai_chat_response):
|
||||
"""Test that live mode passes through to original methods."""
|
||||
|
|
|
@ -6,19 +6,15 @@
|
|||
|
||||
import asyncio
|
||||
import json
|
||||
import logging # allow-direct-logging
|
||||
import threading
|
||||
import time
|
||||
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||
from typing import Any
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
from unittest.mock import AsyncMock, MagicMock, PropertyMock, patch
|
||||
|
||||
import pytest
|
||||
from openai.types.chat.chat_completion_chunk import (
|
||||
ChatCompletionChunk as OpenAIChatCompletionChunk,
|
||||
)
|
||||
from openai.types.chat.chat_completion_chunk import (
|
||||
Choice as OpenAIChoice,
|
||||
Choice as OpenAIChoiceChunk,
|
||||
)
|
||||
from openai.types.chat.chat_completion_chunk import (
|
||||
ChoiceDelta as OpenAIChoiceDelta,
|
||||
|
@ -35,6 +31,9 @@ from llama_stack.apis.inference import (
|
|||
ChatCompletionRequest,
|
||||
ChatCompletionResponseEventType,
|
||||
CompletionMessage,
|
||||
OpenAIAssistantMessageParam,
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChoice,
|
||||
SystemMessage,
|
||||
ToolChoice,
|
||||
ToolConfig,
|
||||
|
@ -61,41 +60,6 @@ from llama_stack.providers.remote.inference.vllm.vllm import (
|
|||
# -v -s --tb=short --disable-warnings
|
||||
|
||||
|
||||
class MockInferenceAdapterWithSleep:
|
||||
def __init__(self, sleep_time: int, response: dict[str, Any]):
|
||||
self.httpd = None
|
||||
|
||||
class DelayedRequestHandler(BaseHTTPRequestHandler):
|
||||
# ruff: noqa: N802
|
||||
def do_POST(self):
|
||||
time.sleep(sleep_time)
|
||||
response_body = json.dumps(response).encode("utf-8")
|
||||
self.send_response(code=200)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.send_header("Content-Length", len(response_body))
|
||||
self.end_headers()
|
||||
self.wfile.write(response_body)
|
||||
|
||||
self.request_handler = DelayedRequestHandler
|
||||
|
||||
def __enter__(self):
|
||||
httpd = HTTPServer(("", 0), self.request_handler)
|
||||
self.httpd = httpd
|
||||
host, port = httpd.server_address
|
||||
httpd_thread = threading.Thread(target=httpd.serve_forever)
|
||||
httpd_thread.daemon = True # stop server if this thread terminates
|
||||
httpd_thread.start()
|
||||
|
||||
config = VLLMInferenceAdapterConfig(url=f"http://{host}:{port}")
|
||||
inference_adapter = VLLMInferenceAdapter(config)
|
||||
return inference_adapter
|
||||
|
||||
def __exit__(self, _exc_type, _exc_value, _traceback):
|
||||
if self.httpd:
|
||||
self.httpd.shutdown()
|
||||
self.httpd.server_close()
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def mock_openai_models_list():
|
||||
with patch("openai.resources.models.AsyncModels.list", new_callable=AsyncMock) as mock_list:
|
||||
|
@ -150,10 +114,12 @@ async def test_tool_call_response(vllm_inference_adapter):
|
|||
"""Verify that tool call arguments from a CompletionMessage are correctly converted
|
||||
into the expected JSON format."""
|
||||
|
||||
# Patch the call to vllm so we can inspect the arguments sent were correct
|
||||
with patch.object(
|
||||
vllm_inference_adapter.client.chat.completions, "create", new_callable=AsyncMock
|
||||
) as mock_nonstream_completion:
|
||||
# Patch the client property to avoid instantiating a real AsyncOpenAI client
|
||||
with patch.object(VLLMInferenceAdapter, "client", new_callable=PropertyMock) as mock_create_client:
|
||||
mock_client = MagicMock()
|
||||
mock_client.chat.completions.create = AsyncMock()
|
||||
mock_create_client.return_value = mock_client
|
||||
|
||||
messages = [
|
||||
SystemMessage(content="You are a helpful assistant"),
|
||||
UserMessage(content="How many?"),
|
||||
|
@ -179,7 +145,7 @@ async def test_tool_call_response(vllm_inference_adapter):
|
|||
tool_config=ToolConfig(tool_choice=ToolChoice.auto),
|
||||
)
|
||||
|
||||
assert mock_nonstream_completion.call_args.kwargs["messages"][2]["tool_calls"] == [
|
||||
assert mock_client.chat.completions.create.call_args.kwargs["messages"][2]["tool_calls"] == [
|
||||
{
|
||||
"id": "foo",
|
||||
"type": "function",
|
||||
|
@ -199,7 +165,7 @@ async def test_tool_call_delta_empty_tool_call_buf():
|
|||
|
||||
async def mock_stream():
|
||||
delta = OpenAIChoiceDelta(content="", tool_calls=None)
|
||||
choices = [OpenAIChoice(delta=delta, finish_reason="stop", index=0)]
|
||||
choices = [OpenAIChoiceChunk(delta=delta, finish_reason="stop", index=0)]
|
||||
mock_chunk = OpenAIChatCompletionChunk(
|
||||
id="chunk-1",
|
||||
created=1,
|
||||
|
@ -225,7 +191,7 @@ async def test_tool_call_delta_streaming_arguments_dict():
|
|||
model="foo",
|
||||
object="chat.completion.chunk",
|
||||
choices=[
|
||||
OpenAIChoice(
|
||||
OpenAIChoiceChunk(
|
||||
delta=OpenAIChoiceDelta(
|
||||
content="",
|
||||
tool_calls=[
|
||||
|
@ -250,7 +216,7 @@ async def test_tool_call_delta_streaming_arguments_dict():
|
|||
model="foo",
|
||||
object="chat.completion.chunk",
|
||||
choices=[
|
||||
OpenAIChoice(
|
||||
OpenAIChoiceChunk(
|
||||
delta=OpenAIChoiceDelta(
|
||||
content="",
|
||||
tool_calls=[
|
||||
|
@ -275,7 +241,9 @@ async def test_tool_call_delta_streaming_arguments_dict():
|
|||
model="foo",
|
||||
object="chat.completion.chunk",
|
||||
choices=[
|
||||
OpenAIChoice(delta=OpenAIChoiceDelta(content="", tool_calls=None), finish_reason="tool_calls", index=0)
|
||||
OpenAIChoiceChunk(
|
||||
delta=OpenAIChoiceDelta(content="", tool_calls=None), finish_reason="tool_calls", index=0
|
||||
)
|
||||
],
|
||||
)
|
||||
for chunk in [mock_chunk_1, mock_chunk_2, mock_chunk_3]:
|
||||
|
@ -299,7 +267,7 @@ async def test_multiple_tool_calls():
|
|||
model="foo",
|
||||
object="chat.completion.chunk",
|
||||
choices=[
|
||||
OpenAIChoice(
|
||||
OpenAIChoiceChunk(
|
||||
delta=OpenAIChoiceDelta(
|
||||
content="",
|
||||
tool_calls=[
|
||||
|
@ -324,7 +292,7 @@ async def test_multiple_tool_calls():
|
|||
model="foo",
|
||||
object="chat.completion.chunk",
|
||||
choices=[
|
||||
OpenAIChoice(
|
||||
OpenAIChoiceChunk(
|
||||
delta=OpenAIChoiceDelta(
|
||||
content="",
|
||||
tool_calls=[
|
||||
|
@ -349,7 +317,9 @@ async def test_multiple_tool_calls():
|
|||
model="foo",
|
||||
object="chat.completion.chunk",
|
||||
choices=[
|
||||
OpenAIChoice(delta=OpenAIChoiceDelta(content="", tool_calls=None), finish_reason="tool_calls", index=0)
|
||||
OpenAIChoiceChunk(
|
||||
delta=OpenAIChoiceDelta(content="", tool_calls=None), finish_reason="tool_calls", index=0
|
||||
)
|
||||
],
|
||||
)
|
||||
for chunk in [mock_chunk_1, mock_chunk_2, mock_chunk_3]:
|
||||
|
@ -393,59 +363,6 @@ async def test_process_vllm_chat_completion_stream_response_no_choices():
|
|||
assert chunks[0].event.event_type.value == "start"
|
||||
|
||||
|
||||
@pytest.mark.allow_network
|
||||
def test_chat_completion_doesnt_block_event_loop(caplog):
|
||||
loop = asyncio.new_event_loop()
|
||||
loop.set_debug(True)
|
||||
caplog.set_level(logging.WARNING)
|
||||
|
||||
# Log when event loop is blocked for more than 200ms
|
||||
loop.slow_callback_duration = 0.5
|
||||
# Sleep for 500ms in our delayed http response
|
||||
sleep_time = 0.5
|
||||
|
||||
mock_model = Model(identifier="mock-model", provider_resource_id="mock-model", provider_id="vllm-inference")
|
||||
mock_response = {
|
||||
"id": "chatcmpl-abc123",
|
||||
"object": "chat.completion",
|
||||
"created": 1,
|
||||
"modle": "mock-model",
|
||||
"choices": [
|
||||
{
|
||||
"message": {"content": ""},
|
||||
"logprobs": None,
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
async def do_chat_completion():
|
||||
await inference_adapter.chat_completion(
|
||||
"mock-model",
|
||||
[],
|
||||
stream=False,
|
||||
tools=None,
|
||||
tool_config=ToolConfig(tool_choice=ToolChoice.auto),
|
||||
)
|
||||
|
||||
with MockInferenceAdapterWithSleep(sleep_time, mock_response) as inference_adapter:
|
||||
inference_adapter.model_store = AsyncMock()
|
||||
inference_adapter.model_store.get_model.return_value = mock_model
|
||||
loop.run_until_complete(inference_adapter.initialize())
|
||||
|
||||
# Clear the logs so far and run the actual chat completion we care about
|
||||
caplog.clear()
|
||||
loop.run_until_complete(do_chat_completion())
|
||||
|
||||
# Ensure we don't have any asyncio warnings in the captured log
|
||||
# records from our chat completion call. A message gets logged
|
||||
# here any time we exceed the slow_callback_duration configured
|
||||
# above.
|
||||
asyncio_warnings = [record.message for record in caplog.records if record.name == "asyncio"]
|
||||
assert not asyncio_warnings
|
||||
|
||||
|
||||
async def test_get_params_empty_tools(vllm_inference_adapter):
|
||||
request = ChatCompletionRequest(
|
||||
tools=[],
|
||||
|
@ -641,9 +558,7 @@ async def test_health_status_success(vllm_inference_adapter):
|
|||
This test verifies that the health method returns a HealthResponse with status OK, only
|
||||
when the connection to the vLLM server is successful.
|
||||
"""
|
||||
# Set vllm_inference_adapter.client to None to ensure _create_client is called
|
||||
vllm_inference_adapter.client = None
|
||||
with patch.object(vllm_inference_adapter, "_create_client") as mock_create_client:
|
||||
with patch.object(VLLMInferenceAdapter, "client", new_callable=PropertyMock) as mock_create_client:
|
||||
# Create mock client and models
|
||||
mock_client = MagicMock()
|
||||
mock_models = MagicMock()
|
||||
|
@ -674,8 +589,7 @@ async def test_health_status_failure(vllm_inference_adapter):
|
|||
This test verifies that the health method returns a HealthResponse with status ERROR
|
||||
and an appropriate error message when the connection to the vLLM server fails.
|
||||
"""
|
||||
vllm_inference_adapter.client = None
|
||||
with patch.object(vllm_inference_adapter, "_create_client") as mock_create_client:
|
||||
with patch.object(VLLMInferenceAdapter, "client", new_callable=PropertyMock) as mock_create_client:
|
||||
# Create mock client and models
|
||||
mock_client = MagicMock()
|
||||
mock_models = MagicMock()
|
||||
|
@ -697,3 +611,48 @@ async def test_health_status_failure(vllm_inference_adapter):
|
|||
assert "Health check failed: Connection failed" in health_response["message"]
|
||||
|
||||
mock_models.list.assert_called_once()
|
||||
|
||||
|
||||
async def test_openai_chat_completion_is_async(vllm_inference_adapter):
|
||||
"""
|
||||
Verify that openai_chat_completion is async and doesn't block the event loop.
|
||||
|
||||
To do this we mock the underlying inference with a sleep, start multiple
|
||||
inference calls in parallel, and ensure the total time taken is less
|
||||
than the sum of the individual sleep times.
|
||||
"""
|
||||
sleep_time = 0.5
|
||||
|
||||
async def mock_create(*args, **kwargs):
|
||||
await asyncio.sleep(sleep_time)
|
||||
return OpenAIChatCompletion(
|
||||
id="chatcmpl-abc123",
|
||||
created=1,
|
||||
model="mock-model",
|
||||
choices=[
|
||||
OpenAIChoice(
|
||||
message=OpenAIAssistantMessageParam(
|
||||
content="nothing interesting",
|
||||
),
|
||||
finish_reason="stop",
|
||||
index=0,
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
async def do_inference():
|
||||
await vllm_inference_adapter.openai_chat_completion(
|
||||
"mock-model", messages=["one fish", "two fish"], stream=False
|
||||
)
|
||||
|
||||
with patch.object(VLLMInferenceAdapter, "client", new_callable=PropertyMock) as mock_create_client:
|
||||
mock_client = MagicMock()
|
||||
mock_client.chat.completions.create = AsyncMock(side_effect=mock_create)
|
||||
mock_create_client.return_value = mock_client
|
||||
|
||||
start_time = time.time()
|
||||
await asyncio.gather(do_inference(), do_inference(), do_inference(), do_inference())
|
||||
total_time = time.time() - start_time
|
||||
|
||||
assert mock_create_client.call_count == 4 # no cheating
|
||||
assert total_time < (sleep_time * 2), f"Total time taken: {total_time}s exceeded expected max"
|
||||
|
|
53
tests/unit/providers/test_bedrock.py
Normal file
53
tests/unit/providers/test_bedrock.py
Normal file
|
@ -0,0 +1,53 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.providers.remote.inference.bedrock.bedrock import (
|
||||
_get_region_prefix,
|
||||
_to_inference_profile_id,
|
||||
)
|
||||
|
||||
|
||||
def test_region_prefixes():
|
||||
assert _get_region_prefix("us-east-1") == "us."
|
||||
assert _get_region_prefix("eu-west-1") == "eu."
|
||||
assert _get_region_prefix("ap-south-1") == "ap."
|
||||
assert _get_region_prefix("ca-central-1") == "us."
|
||||
|
||||
# Test case insensitive
|
||||
assert _get_region_prefix("US-EAST-1") == "us."
|
||||
assert _get_region_prefix("EU-WEST-1") == "eu."
|
||||
assert _get_region_prefix("Ap-South-1") == "ap."
|
||||
|
||||
# Test None region
|
||||
assert _get_region_prefix(None) == "us."
|
||||
|
||||
|
||||
def test_model_id_conversion():
|
||||
# Basic conversion
|
||||
assert (
|
||||
_to_inference_profile_id("meta.llama3-1-70b-instruct-v1:0", "us-east-1") == "us.meta.llama3-1-70b-instruct-v1:0"
|
||||
)
|
||||
|
||||
# Already has prefix
|
||||
assert (
|
||||
_to_inference_profile_id("us.meta.llama3-1-70b-instruct-v1:0", "us-east-1")
|
||||
== "us.meta.llama3-1-70b-instruct-v1:0"
|
||||
)
|
||||
|
||||
# ARN should be returned unchanged
|
||||
arn = "arn:aws:bedrock:us-east-1:123456789012:inference-profile/us.meta.llama3-1-70b-instruct-v1:0"
|
||||
assert _to_inference_profile_id(arn, "us-east-1") == arn
|
||||
|
||||
# ARN should be returned unchanged even without region
|
||||
assert _to_inference_profile_id(arn) == arn
|
||||
|
||||
# Optional region parameter defaults to us-east-1
|
||||
assert _to_inference_profile_id("meta.llama3-1-70b-instruct-v1:0") == "us.meta.llama3-1-70b-instruct-v1:0"
|
||||
|
||||
# Different regions work with optional parameter
|
||||
assert (
|
||||
_to_inference_profile_id("meta.llama3-1-70b-instruct-v1:0", "eu-west-1") == "eu.meta.llama3-1-70b-instruct-v1:0"
|
||||
)
|
|
@ -178,3 +178,41 @@ def test_content_from_data_and_mime_type_both_encodings_fail():
|
|||
# Should raise an exception instead of returning empty string
|
||||
with pytest.raises(UnicodeDecodeError):
|
||||
content_from_data_and_mime_type(data, mime_type)
|
||||
|
||||
|
||||
async def test_memory_tool_error_handling():
|
||||
"""Test that memory tool handles various failures gracefully without crashing."""
|
||||
from llama_stack.providers.inline.tool_runtime.rag.config import RagToolRuntimeConfig
|
||||
from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRuntimeImpl
|
||||
|
||||
config = RagToolRuntimeConfig()
|
||||
memory_tool = MemoryToolRuntimeImpl(
|
||||
config=config,
|
||||
vector_io_api=AsyncMock(),
|
||||
inference_api=AsyncMock(),
|
||||
files_api=AsyncMock(),
|
||||
)
|
||||
|
||||
docs = [
|
||||
RAGDocument(document_id="good_doc", content="Good content", metadata={}),
|
||||
RAGDocument(document_id="bad_url_doc", content=URL(uri="https://bad.url"), metadata={}),
|
||||
RAGDocument(document_id="another_good_doc", content="Another good content", metadata={}),
|
||||
]
|
||||
|
||||
mock_file1 = MagicMock()
|
||||
mock_file1.id = "file_good1"
|
||||
mock_file2 = MagicMock()
|
||||
mock_file2.id = "file_good2"
|
||||
memory_tool.files_api.openai_upload_file.side_effect = [mock_file1, mock_file2]
|
||||
|
||||
with patch("httpx.AsyncClient") as mock_client:
|
||||
mock_instance = AsyncMock()
|
||||
mock_instance.get.side_effect = Exception("Bad URL")
|
||||
mock_client.return_value.__aenter__.return_value = mock_instance
|
||||
|
||||
# won't raise exception despite one document failing
|
||||
await memory_tool.insert(docs, "vector_store_123")
|
||||
|
||||
# processed 2 documents successfully, skipped 1
|
||||
assert memory_tool.files_api.openai_upload_file.call_count == 2
|
||||
assert memory_tool.vector_io_api.openai_attach_file_to_vector_store.call_count == 2
|
||||
|
|
|
@ -26,9 +26,9 @@ def test_generate_chunk_id():
|
|||
|
||||
chunk_ids = sorted([chunk.chunk_id for chunk in chunks])
|
||||
assert chunk_ids == [
|
||||
"177a1368-f6a8-0c50-6e92-18677f2c3de3",
|
||||
"bc744db3-1b25-0a9c-cdff-b6ba3df73c36",
|
||||
"f68df25d-d9aa-ab4d-5684-64a233add20d",
|
||||
"31d1f9a3-c8d2-66e7-3c37-af2acd329778",
|
||||
"d07dade7-29c0-cda7-df29-0249a1dcbc3e",
|
||||
"d14f75a1-5855-7f72-2c78-d9fc4275a346",
|
||||
]
|
||||
|
||||
|
||||
|
@ -36,14 +36,14 @@ def test_generate_chunk_id_with_window():
|
|||
chunk = Chunk(content="test", metadata={"document_id": "doc-1"})
|
||||
chunk_id1 = generate_chunk_id("doc-1", chunk, chunk_window="0-1")
|
||||
chunk_id2 = generate_chunk_id("doc-1", chunk, chunk_window="1-2")
|
||||
assert chunk_id1 == "149018fe-d0eb-0f8d-5f7f-726bdd2aeedb"
|
||||
assert chunk_id2 == "4562c1ee-9971-1f3b-51a6-7d05e5211154"
|
||||
assert chunk_id1 == "8630321a-d9cb-2bb6-cd28-ebf68dafd866"
|
||||
assert chunk_id2 == "13a1c09a-cbda-b61a-2d1a-7baa90888685"
|
||||
|
||||
|
||||
def test_chunk_id():
|
||||
# Test with existing chunk ID
|
||||
chunk_with_id = Chunk(content="test", metadata={"document_id": "existing-id"})
|
||||
assert chunk_with_id.chunk_id == "84ededcc-b80b-a83e-1a20-ca6515a11350"
|
||||
assert chunk_with_id.chunk_id == "11704f92-42b6-61df-bf85-6473e7708fbd"
|
||||
|
||||
# Test with document ID in metadata
|
||||
chunk_with_doc_id = Chunk(content="test", metadata={"document_id": "doc-1"})
|
||||
|
|
|
@ -65,6 +65,9 @@ async def test_inference_store_pagination_basic():
|
|||
input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
|
||||
await store.store_chat_completion(completion, input_messages)
|
||||
|
||||
# Wait for all queued writes to complete
|
||||
await store.flush()
|
||||
|
||||
# Test 1: First page with limit=2, descending order (default)
|
||||
result = await store.list_chat_completions(limit=2, order=Order.desc)
|
||||
assert len(result.data) == 2
|
||||
|
@ -108,6 +111,9 @@ async def test_inference_store_pagination_ascending():
|
|||
input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
|
||||
await store.store_chat_completion(completion, input_messages)
|
||||
|
||||
# Wait for all queued writes to complete
|
||||
await store.flush()
|
||||
|
||||
# Test ascending order pagination
|
||||
result = await store.list_chat_completions(limit=1, order=Order.asc)
|
||||
assert len(result.data) == 1
|
||||
|
@ -143,6 +149,9 @@ async def test_inference_store_pagination_with_model_filter():
|
|||
input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
|
||||
await store.store_chat_completion(completion, input_messages)
|
||||
|
||||
# Wait for all queued writes to complete
|
||||
await store.flush()
|
||||
|
||||
# Test pagination with model filter
|
||||
result = await store.list_chat_completions(limit=1, model="model-a", order=Order.desc)
|
||||
assert len(result.data) == 1
|
||||
|
@ -190,6 +199,9 @@ async def test_inference_store_pagination_no_limit():
|
|||
input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
|
||||
await store.store_chat_completion(completion, input_messages)
|
||||
|
||||
# Wait for all queued writes to complete
|
||||
await store.flush()
|
||||
|
||||
# Test without limit
|
||||
result = await store.list_chat_completions(order=Order.desc)
|
||||
assert len(result.data) == 2
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue