diff --git a/tests/integration/inference/test_openai_completion.py b/tests/integration/inference/test_openai_completion.py index 931c144af..5186a221e 100644 --- a/tests/integration/inference/test_openai_completion.py +++ b/tests/integration/inference/test_openai_completion.py @@ -9,6 +9,7 @@ import time import unicodedata import pytest +from pydantic import BaseModel from ..test_cases.test_case import TestCase @@ -62,6 +63,14 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id) pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.") +def skip_if_doesnt_support_completions_logprobs(client_with_models, model_id): + provider_type = provider_from_model(client_with_models, model_id).provider_type + if provider_type in ( + "remote::ollama", # logprobs is ignored + ): + pytest.skip(f"Model {model_id} hosted by {provider_type} doesn't support /v1/completions logprobs.") + + def skip_if_model_doesnt_support_suffix(client_with_models, model_id): # To test `fim` ( fill in the middle ) completion, we need to use a model that supports suffix. # Use this to specifically test this API functionality. @@ -205,28 +214,6 @@ def test_openai_completion_streaming(llama_stack_client, client_with_models, tex assert len(content_str) > 10 -@pytest.mark.parametrize( - "prompt_logprobs", - [ - 1, - 0, - ], -) -def test_openai_completion_prompt_logprobs(llama_stack_client, client_with_models, text_model_id, prompt_logprobs): - skip_if_provider_isnt_vllm(client_with_models, text_model_id) - - prompt = "Hello, world!" - response = llama_stack_client.completions.create( - model=text_model_id, - prompt=prompt, - stream=False, - prompt_logprobs=prompt_logprobs, - ) - assert len(response.choices) > 0 - choice = response.choices[0] - assert len(choice.prompt_logprobs) > 0 - - def test_openai_completion_guided_choice(llama_stack_client, client_with_models, text_model_id): skip_if_provider_isnt_vllm(client_with_models, text_model_id) @@ -518,3 +505,214 @@ def test_openai_chat_completion_non_streaming_with_file(openai_client, client_wi message_content = response.choices[0].message.content.lower().strip() normalized_content = _normalize_text(message_content) assert "hello world" in normalized_content + + +@pytest.mark.parametrize( + "test_case", + [ + "inference:completion:stop_sequence", + ], +) +def test_openai_completion_stop_sequence(client_with_models, openai_client, text_model_id, test_case): + skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id) + + tc = TestCase(test_case) + + response = openai_client.completions.create( + model=text_model_id, + prompt=tc["content"], + stop="1963", + stream=False, + ) + assert len(response.choices) > 0 + choice = response.choices[0] + assert "1963" not in choice.text + + response = openai_client.completions.create( + model=text_model_id, + prompt=tc["content"], + stop=["blathering", "1963"], + stream=False, + ) + assert len(response.choices) > 0 + choice = response.choices[0] + assert "1963" not in choice.text + + +@pytest.mark.parametrize( + "test_case", + [ + "inference:completion:log_probs", + ], +) +def test_openai_completion_logprobs(client_with_models, openai_client, text_model_id, test_case): + skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id) + skip_if_doesnt_support_completions_logprobs(client_with_models, text_model_id) + + tc = TestCase(test_case) + + response = openai_client.completions.create( + model=text_model_id, + prompt=tc["content"], + logprobs=5, + ) + assert len(response.choices) > 0 + choice = response.choices[0] + assert choice.text, "Response text should not be empty" + assert choice.logprobs, "Logprobs should not be empty" + logprobs = choice.logprobs + assert logprobs.token_logprobs, "Response tokens should not be empty" + assert len(logprobs.tokens) == len(logprobs.token_logprobs) + assert len(logprobs.token_logprobs) == len(logprobs.top_logprobs) + for i, (token, prob) in enumerate(zip(logprobs.tokens, logprobs.token_logprobs, strict=True)): + assert logprobs.top_logprobs[i][token] == prob + assert len(logprobs.top_logprobs[i]) == 5 + + +@pytest.mark.parametrize( + "test_case", + [ + "inference:completion:log_probs", + ], +) +def test_openai_completion_logprobs_streaming(client_with_models, openai_client, text_model_id, test_case): + skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id) + skip_if_doesnt_support_completions_logprobs(client_with_models, text_model_id) + + tc = TestCase(test_case) + + response = openai_client.completions.create( + model=text_model_id, + prompt=tc["content"], + logprobs=3, + stream=True, + max_tokens=5, + ) + for chunk in response: + choice = chunk.choices[0] + choice = response.choices[0] + if choice.text: # if there's a token, we expect logprobs + assert choice.logprobs, "Logprobs should not be empty" + logprobs = choice.logprobs + assert logprobs.token_logprobs, "Response tokens should not be empty" + assert len(logprobs.tokens) == len(logprobs.token_logprobs) + assert len(logprobs.token_logprobs) == len(logprobs.top_logprobs) + for i, (token, prob) in enumerate(zip(logprobs.tokens, logprobs.token_logprobs, strict=True)): + assert logprobs.top_logprobs[i][token] == prob + assert len(logprobs.top_logprobs[i]) == 3 + else: # no token, no logprobs + assert not choice.logprobs, "Logprobs should be empty" + + +@pytest.mark.parametrize( + "test_case", + [ + "inference:chat_completion:tool_calling", + ], +) +def test_openai_chat_completion_with_tools(openai_client, text_model_id, test_case): + tc = TestCase(test_case) + + response = openai_client.chat.completions.create( + model=text_model_id, + messages=tc["messages"], + tools=tc["tools"], + tool_choice="auto", + stream=False, + ) + assert len(response.choices) == 1 + assert len(response.choices[0].message.tool_calls) == 1 + tool_call = response.choices[0].message.tool_calls[0] + assert tool_call.function.name == tc["tools"][0]["function"]["name"] + assert "location" in tool_call.function.arguments + assert tc["expected"]["location"] in tool_call.function.arguments + + +@pytest.mark.parametrize( + "test_case", + [ + "inference:chat_completion:tool_calling", + ], +) +def test_openai_chat_completion_with_tools_and_streaming(openai_client, text_model_id, test_case): + tc = TestCase(test_case) + + response = openai_client.chat.completions.create( + model=text_model_id, + messages=tc["messages"], + tools=tc["tools"], + tool_choice="auto", + stream=True, + ) + # Accumulate tool calls from streaming chunks + tool_calls = [] + for chunk in response: + if chunk.choices and chunk.choices[0].delta.tool_calls: + for i, tc_delta in enumerate(chunk.choices[0].delta.tool_calls): + while len(tool_calls) <= i: + tool_calls.append({"function": {"name": "", "arguments": ""}}) + if tc_delta.function and tc_delta.function.name: + tool_calls[i]["function"]["name"] = tc_delta.function.name + if tc_delta.function and tc_delta.function.arguments: + tool_calls[i]["function"]["arguments"] += tc_delta.function.arguments + assert len(tool_calls) == 1 + tool_call = tool_calls[0] + assert tool_call["function"]["name"] == tc["tools"][0]["function"]["name"] + assert "location" in tool_call["function"]["arguments"] + assert tc["expected"]["location"] in tool_call["function"]["arguments"] + + +@pytest.mark.parametrize( + "test_case", + [ + "inference:chat_completion:tool_calling", + ], +) +def test_openai_chat_completion_with_tool_choice_none(openai_client, text_model_id, test_case): + tc = TestCase(test_case) + + response = openai_client.chat.completions.create( + model=text_model_id, + messages=tc["messages"], + tools=tc["tools"], + tool_choice="none", + stream=False, + ) + assert len(response.choices) == 1 + tool_calls = response.choices[0].message.tool_calls + assert tool_calls is None or len(tool_calls) == 0 + + +@pytest.mark.parametrize( + "test_case", + [ + "inference:chat_completion:structured_output", + ], +) +def test_openai_chat_completion_structured_output(openai_client, text_model_id, test_case): + # Note: Skip condition may need adjustment for OpenAI client + class AnswerFormat(BaseModel): + first_name: str + last_name: str + year_of_birth: int + + tc = TestCase(test_case) + + response = openai_client.chat.completions.create( + model=text_model_id, + messages=tc["messages"], + response_format={ + "type": "json_schema", + "json_schema": { + "name": "AnswerFormat", + "schema": AnswerFormat.model_json_schema(), + }, + }, + stream=False, + ) + print(response.choices[0].message.content) + answer = AnswerFormat.model_validate_json(response.choices[0].message.content) + expected = tc["expected"] + assert answer.first_name == expected["first_name"] + assert answer.last_name == expected["last_name"] + assert answer.year_of_birth == expected["year_of_birth"] diff --git a/tests/integration/recordings/responses/239f4768f5aa.json b/tests/integration/recordings/responses/239f4768f5aa.json new file mode 100644 index 000000000..ce540db3f --- /dev/null +++ b/tests/integration/recordings/responses/239f4768f5aa.json @@ -0,0 +1,89 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant. Michael Jordan was born in 1963. He played basketball for the Chicago Bulls for 15 seasons." + }, + { + "role": "user", + "content": "Please give me information about Michael Jordan." + } + ], + "response_format": { + "type": "json_schema", + "json_schema": { + "name": "AnswerFormat", + "schema": { + "properties": { + "first_name": { + "title": "First Name", + "type": "string" + }, + "last_name": { + "title": "Last Name", + "type": "string" + }, + "year_of_birth": { + "title": "Year Of Birth", + "type": "integer" + } + }, + "required": [ + "first_name", + "last_name", + "year_of_birth" + ], + "title": "AnswerFormat", + "type": "object" + } + } + }, + "stream": false + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "chatcmpl-433", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "{\"first_name\": \"Michael\", \"last_name\": \"Jordan\", \"year_of_birth\": 1963}\n\n \t\t\t\t\t\t\t\t\t\t\t \t\t ", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": null + } + } + ], + "created": 1758979490, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": { + "completion_tokens": 31, + "prompt_tokens": 60, + "total_tokens": 91, + "completion_tokens_details": null, + "prompt_tokens_details": null + } + } + }, + "is_streaming": false + } +} diff --git a/tests/integration/recordings/responses/2a5a4e821bc8.json b/tests/integration/recordings/responses/2a5a4e821bc8.json new file mode 100644 index 000000000..098e9e76d --- /dev/null +++ b/tests/integration/recordings/responses/2a5a4e821bc8.json @@ -0,0 +1,44 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "prompt": "Hello, world!", + "logprobs": false, + "stream": false, + "extra_body": {} + }, + "endpoint": "/v1/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-74", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "text": "Hello! How can I assist you today?" + } + ], + "created": 1758975636, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": { + "completion_tokens": 10, + "prompt_tokens": 29, + "total_tokens": 39, + "completion_tokens_details": null, + "prompt_tokens_details": null + } + } + }, + "is_streaming": false + } +} diff --git a/tests/integration/recordings/responses/38ea441b5f83.json b/tests/integration/recordings/responses/38ea441b5f83.json new file mode 100644 index 000000000..79886b389 --- /dev/null +++ b/tests/integration/recordings/responses/38ea441b5f83.json @@ -0,0 +1,92 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "Pretend you are a weather assistant." + }, + { + "role": "user", + "content": "What's the weather like in San Francisco, CA?" + } + ], + "stream": false, + "tool_choice": "auto", + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state (both required), e.g. San Francisco, CA." + } + }, + "required": [ + "location" + ] + } + } + } + ] + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "chatcmpl-761", + "choices": [ + { + "finish_reason": "tool_calls", + "index": 0, + "logprobs": null, + "message": { + "content": "", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": [ + { + "id": "call_cj8ownwc", + "function": { + "arguments": "{\"location\":\"San Francisco, CA\"}", + "name": "get_weather" + }, + "type": "function", + "index": 0 + } + ] + } + } + ], + "created": 1758975113, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": { + "completion_tokens": 18, + "prompt_tokens": 185, + "total_tokens": 203, + "completion_tokens_details": null, + "prompt_tokens_details": null + } + } + }, + "is_streaming": false + } +} diff --git a/tests/integration/recordings/responses/5b2088233334.json b/tests/integration/recordings/responses/5b2088233334.json new file mode 100644 index 000000000..8bce46b12 --- /dev/null +++ b/tests/integration/recordings/responses/5b2088233334.json @@ -0,0 +1,44 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "prompt": "Hello, world!", + "logprobs": true, + "stream": false, + "extra_body": {} + }, + "endpoint": "/v1/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-809", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "text": "Hello! It's nice to meet you. Is there anything I can help you with or would you like to chat?" + } + ], + "created": 1758975633, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": { + "completion_tokens": 25, + "prompt_tokens": 29, + "total_tokens": 54, + "completion_tokens_details": null, + "prompt_tokens_details": null + } + } + }, + "is_streaming": false + } +} diff --git a/tests/integration/recordings/responses/65c12de0a1db.json b/tests/integration/recordings/responses/65c12de0a1db.json new file mode 100644 index 000000000..e1c0fb8fc --- /dev/null +++ b/tests/integration/recordings/responses/65c12de0a1db.json @@ -0,0 +1,60 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "Pretend you are a weather assistant." + }, + { + "role": "user", + "content": "What's the weather like in San Francisco, CA?" + } + ], + "stream": false + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "chatcmpl-123", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "Hello! As of my knowledge cutoff on December 15th, I have the latest information for you. However, please note that my data may not be entirely up-to-date.\n\nCurrently, and based on historical climate patterns, it appears to be a partly cloudy day with mild temperatures in San Francisco, CA. Expect a temperature range of around 48\u00b0F (9\u00b0C) to 54\u00b0F (12\u00b0C). It's likely to be a breezy day, with winds blowing at about 13 mph (21 km/h).\n\nHowever, if I were to look into more recent weather patterns or forecasts, I would recommend checking the latest conditions directly from reliable sources such as the National Weather Service or local news outlets for more accurate and up-to-date information.\n\nPlease let me know how I can further assist you.", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": null + } + } + ], + "created": 1758978071, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": { + "completion_tokens": 163, + "prompt_tokens": 45, + "total_tokens": 208, + "completion_tokens_details": null, + "prompt_tokens_details": null + } + } + }, + "is_streaming": false + } +} diff --git a/tests/integration/recordings/responses/a369881bb3a2.json b/tests/integration/recordings/responses/a369881bb3a2.json new file mode 100644 index 000000000..540a5e694 --- /dev/null +++ b/tests/integration/recordings/responses/a369881bb3a2.json @@ -0,0 +1,55 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "user", + "content": "Test trace 0" + } + ] + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "chatcmpl-272", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "I'm happy to help you with a test. Since we are in the middle of a text-based conversation, I'll do my best to simulate a simple test tracing process.\n\n**Trace Test Results**\n\nTo perform this test, please follow these steps:\n\n1. Type \"test\" on command mode.\n2. Press Enter.\n\nNow, let's start tracing...\n\nTest Tracing Results:\nTest Case: General Functions\nTest Case Result: PASS\n\nSystem Response:\n\n```\n# System Boot Time: 2023-10-13T14:30:00\n# CPU Temperature: 35\u00b0C\n# Disk Space Available: 80%\n```\n\nNext Steps?\n\nType 'done' to exit the test, or 'run' for more tests.", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": null + } + } + ], + "created": 1758978134, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": { + "completion_tokens": 152, + "prompt_tokens": 29, + "total_tokens": 181, + "completion_tokens_details": null, + "prompt_tokens_details": null + } + } + }, + "is_streaming": false + } +} diff --git a/tests/integration/recordings/responses/a46b77ffd494.json b/tests/integration/recordings/responses/a46b77ffd494.json new file mode 100644 index 000000000..dff3d3fd7 --- /dev/null +++ b/tests/integration/recordings/responses/a46b77ffd494.json @@ -0,0 +1,44 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "prompt": "Return the exact same sentence and don't add additional words): Michael Jordan was born in the year of 1963", + "stop": "1963", + "stream": false, + "extra_body": {} + }, + "endpoint": "/v1/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-183", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "text": "Michael Jordan was born in the year of " + } + ], + "created": 1758978053, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": { + "completion_tokens": 11, + "prompt_tokens": 48, + "total_tokens": 59, + "completion_tokens_details": null, + "prompt_tokens_details": null + } + } + }, + "is_streaming": false + } +} diff --git a/tests/integration/recordings/responses/c3dbccc5de74.json b/tests/integration/recordings/responses/c3dbccc5de74.json new file mode 100644 index 000000000..a2043db9a --- /dev/null +++ b/tests/integration/recordings/responses/c3dbccc5de74.json @@ -0,0 +1,112 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "Pretend you are a weather assistant." + }, + { + "role": "user", + "content": "What's the weather like in San Francisco, CA?" + } + ], + "stream": true, + "tool_choice": "auto", + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state (both required), e.g. San Francisco, CA." + } + }, + "required": [ + "location" + ] + } + } + } + ] + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-634", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "index": 0, + "id": "call_wubm4yax", + "function": { + "arguments": "{\"location\":\"San Francisco, CA\"}", + "name": "get_weather" + }, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1758975115, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-634", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "tool_calls", + "index": 0, + "logprobs": null + } + ], + "created": 1758975115, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + } +} diff --git a/tests/integration/recordings/responses/c8e196049fe4.json b/tests/integration/recordings/responses/c8e196049fe4.json new file mode 100644 index 000000000..3a1495f07 --- /dev/null +++ b/tests/integration/recordings/responses/c8e196049fe4.json @@ -0,0 +1,47 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "prompt": "Return the exact same sentence and don't add additional words): Michael Jordan was born in the year of 1963", + "stop": [ + "blathering", + "1963" + ], + "stream": false, + "extra_body": {} + }, + "endpoint": "/v1/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-381", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "text": "Michael Jordan was born in the year of " + } + ], + "created": 1758978056, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": { + "completion_tokens": 11, + "prompt_tokens": 48, + "total_tokens": 59, + "completion_tokens_details": null, + "prompt_tokens_details": null + } + } + }, + "is_streaming": false + } +} diff --git a/tests/integration/recordings/responses/cb1099daed49.json b/tests/integration/recordings/responses/cb1099daed49.json new file mode 100644 index 000000000..3c105cfe5 --- /dev/null +++ b/tests/integration/recordings/responses/cb1099daed49.json @@ -0,0 +1,55 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "user", + "content": "Test trace 1" + } + ] + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "chatcmpl-122", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "It appears you're trying to initiate a conversation or test the functionality of this AI system. I'm happy to chat with you!\n\nWould you like to:\nA) Ask me a question on a specific topic\nB) Engage in a conversational dialogue on a topic of your choice\nC) Play a text-based game\nD) Test my language understanding capabilities\n\nPlease respond with the letter of your preferred activity.", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": null + } + } + ], + "created": 1758978142, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": { + "completion_tokens": 85, + "prompt_tokens": 29, + "total_tokens": 114, + "completion_tokens_details": null, + "prompt_tokens_details": null + } + } + }, + "is_streaming": false + } +} diff --git a/tests/integration/recordings/responses/d0ac68cbde69.json b/tests/integration/recordings/responses/d0ac68cbde69.json index 750c5c69b..b37962fb6 100644 --- a/tests/integration/recordings/responses/d0ac68cbde69.json +++ b/tests/integration/recordings/responses/d0ac68cbde69.json @@ -11,26 +11,7 @@ "body": { "__type__": "ollama._types.ProcessResponse", "__data__": { - "models": [ - { - "model": "llama3.2-vision:11b", - "name": "llama3.2-vision:11b", - "digest": "6f2f9757ae97e8a3f8ea33d6adb2b11d93d9a35bef277cd2c0b1b5af8e8d0b1e", - "expires_at": "2025-09-03T11:51:35.966409-07:00", - "size": 12401209008, - "size_vram": 12401209008, - "details": { - "parent_model": "", - "format": "gguf", - "family": "mllama", - "families": [ - "mllama" - ], - "parameter_size": "10.7B", - "quantization_level": "Q4_K_M" - } - } - ] + "models": [] } }, "is_streaming": false diff --git a/tests/integration/telemetry/test_telemetry.py b/tests/integration/telemetry/test_telemetry.py new file mode 100644 index 000000000..aff337e35 --- /dev/null +++ b/tests/integration/telemetry/test_telemetry.py @@ -0,0 +1,187 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import time +from datetime import UTC, datetime +from uuid import uuid4 + +import pytest +from llama_stack_client import Agent + + +@pytest.fixture(scope="module", autouse=True) +def setup_telemetry_data(llama_stack_client, text_model_id): + """Setup fixture that creates telemetry data before tests run.""" + agent = Agent(llama_stack_client, model=text_model_id, instructions="You are a helpful assistant") + + session_id = agent.create_session(f"test-setup-session-{uuid4()}") + + messages = [ + "What is 2 + 2?", + "Tell me a short joke", + ] + + for msg in messages: + agent.create_turn( + messages=[{"role": "user", "content": msg}], + session_id=session_id, + stream=False, + ) + + for i in range(2): + llama_stack_client.chat.completions.create( + model=text_model_id, messages=[{"role": "user", "content": f"Test trace {i}"}] + ) + + start_time = time.time() + + while time.time() - start_time < 30: + traces = llama_stack_client.telemetry.query_traces(limit=10) + if len(traces) >= 4: + break + time.sleep(0.1) + + if len(traces) < 4: + pytest.fail(f"Failed to create sufficient telemetry data after 30s. Got {len(traces)} traces.") + + yield + + +def test_query_traces_basic(llama_stack_client): + """Test basic trace querying functionality with proper data validation.""" + all_traces = llama_stack_client.telemetry.query_traces(limit=5) + + assert isinstance(all_traces, list), "Should return a list of traces" + assert len(all_traces) >= 4, "Should have at least 4 traces from setup" + + # Verify trace structure and data quality + first_trace = all_traces[0] + assert hasattr(first_trace, "trace_id"), "Trace should have trace_id" + assert hasattr(first_trace, "start_time"), "Trace should have start_time" + assert hasattr(first_trace, "root_span_id"), "Trace should have root_span_id" + + # Validate trace_id is a valid UUID format + assert isinstance(first_trace.trace_id, str) and len(first_trace.trace_id) > 0, ( + "trace_id should be non-empty string" + ) + + # Validate start_time format and not in the future + now = datetime.now(UTC) + if isinstance(first_trace.start_time, str): + trace_time = datetime.fromisoformat(first_trace.start_time.replace("Z", "+00:00")) + else: + # start_time is already a datetime object + trace_time = first_trace.start_time + if trace_time.tzinfo is None: + trace_time = trace_time.replace(tzinfo=UTC) + + # Ensure trace time is not in the future (but allow any age in the past for persistent test data) + time_diff = (now - trace_time).total_seconds() + assert time_diff >= 0, f"Trace start_time should not be in the future, got {time_diff}s" + + # Validate root_span_id exists and is non-empty + assert isinstance(first_trace.root_span_id, str) and len(first_trace.root_span_id) > 0, ( + "root_span_id should be non-empty string" + ) + + # Test querying specific trace by ID + specific_trace = llama_stack_client.telemetry.get_trace(trace_id=first_trace.trace_id) + assert specific_trace.trace_id == first_trace.trace_id, "Retrieved trace should match requested ID" + assert specific_trace.start_time == first_trace.start_time, "Retrieved trace should have same start_time" + assert specific_trace.root_span_id == first_trace.root_span_id, "Retrieved trace should have same root_span_id" + + # Test pagination with proper validation + recent_traces = llama_stack_client.telemetry.query_traces(limit=3, offset=0) + assert len(recent_traces) <= 3, "Should return at most 3 traces when limit=3" + assert len(recent_traces) >= 1, "Should return at least 1 trace" + + # Verify all traces have required fields + for trace in recent_traces: + assert hasattr(trace, "trace_id") and trace.trace_id, "Each trace should have non-empty trace_id" + assert hasattr(trace, "start_time") and trace.start_time, "Each trace should have non-empty start_time" + assert hasattr(trace, "root_span_id") and trace.root_span_id, "Each trace should have non-empty root_span_id" + + +def test_query_spans_basic(llama_stack_client): + """Test basic span querying functionality with proper validation.""" + spans = llama_stack_client.telemetry.query_spans(attribute_filters=[], attributes_to_return=[]) + + assert isinstance(spans, list), "Should return a list of spans" + assert len(spans) >= 1, "Should have at least one span from setup" + + # Verify span structure and data quality + first_span = spans[0] + required_attrs = ["span_id", "name", "trace_id"] + for attr in required_attrs: + assert hasattr(first_span, attr), f"Span should have {attr} attribute" + assert getattr(first_span, attr), f"Span {attr} should not be empty" + + # Validate span data types and content + assert isinstance(first_span.span_id, str) and len(first_span.span_id) > 0, "span_id should be non-empty string" + assert isinstance(first_span.name, str) and len(first_span.name) > 0, "span name should be non-empty string" + assert isinstance(first_span.trace_id, str) and len(first_span.trace_id) > 0, "trace_id should be non-empty string" + + # Verify span belongs to a valid trace (test with traces we know exist) + all_traces = llama_stack_client.telemetry.query_traces(limit=10) + trace_ids = {t.trace_id for t in all_traces} + if first_span.trace_id in trace_ids: + trace = llama_stack_client.telemetry.get_trace(trace_id=first_span.trace_id) + assert trace is not None, "Should be able to retrieve trace for valid trace_id" + assert trace.trace_id == first_span.trace_id, "Trace ID should match span's trace_id" + + # Test with span filtering and validate results + filtered_spans = llama_stack_client.telemetry.query_spans( + attribute_filters=[{"key": "name", "op": "eq", "value": first_span.name}], + attributes_to_return=["name", "span_id"], + ) + assert isinstance(filtered_spans, list), "Should return a list with span name filter" + + # Validate filtered spans if filtering works + if len(filtered_spans) > 0: + for span in filtered_spans: + assert hasattr(span, "name"), "Filtered spans should have name attribute" + assert hasattr(span, "span_id"), "Filtered spans should have span_id attribute" + assert span.name == first_span.name, "Filtered spans should match the filter criteria" + assert isinstance(span.span_id, str) and len(span.span_id) > 0, "Filtered span_id should be valid" + + # Test that all spans have consistent structure + for span in spans: + for attr in required_attrs: + assert hasattr(span, attr) and getattr(span, attr), f"All spans should have non-empty {attr}" + + +def test_telemetry_pagination(llama_stack_client): + """Test pagination in telemetry queries.""" + # Get total count of traces + all_traces = llama_stack_client.telemetry.query_traces(limit=20) + total_count = len(all_traces) + assert total_count >= 4, "Should have at least 4 traces from setup" + + # Test trace pagination + page1 = llama_stack_client.telemetry.query_traces(limit=2, offset=0) + page2 = llama_stack_client.telemetry.query_traces(limit=2, offset=2) + + assert len(page1) == 2, "First page should have exactly 2 traces" + assert len(page2) >= 1, "Second page should have at least 1 trace" + + # Verify no overlap between pages + page1_ids = {t.trace_id for t in page1} + page2_ids = {t.trace_id for t in page2} + assert len(page1_ids.intersection(page2_ids)) == 0, "Pages should contain different traces" + + # Test ordering + ordered_traces = llama_stack_client.telemetry.query_traces(limit=5, order_by=["start_time"]) + assert len(ordered_traces) >= 4, "Should have at least 4 traces for ordering test" + + # Verify ordering by start_time + for i in range(len(ordered_traces) - 1): + current_time = ordered_traces[i].start_time + next_time = ordered_traces[i + 1].start_time + assert current_time <= next_time, f"Traces should be ordered by start_time: {current_time} > {next_time}" + + # Test limit behavior + limited = llama_stack_client.telemetry.query_traces(limit=3) + assert len(limited) == 3, "Should return exactly 3 traces when limit=3" diff --git a/tests/integration/test_cases/inference/chat_completion.json b/tests/integration/test_cases/inference/chat_completion.json index 203fc51a5..99add7b66 100644 --- a/tests/integration/test_cases/inference/chat_completion.json +++ b/tests/integration/test_cases/inference/chat_completion.json @@ -83,12 +83,19 @@ ], "tools": [ { - "tool_name": "get_weather", - "description": "Get the current weather", - "parameters": { - "location": { - "param_type": "string", - "description": "The city and state (both required), e.g. San Francisco, CA." + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state (both required), e.g. San Francisco, CA." + } + }, + "required": ["location"] } } } @@ -116,12 +123,19 @@ ], "tools": [ { - "tool_name": "get_weather", - "description": "Get the current weather", - "parameters": { - "location": { - "param_type": "string", - "description": "The city and state (both required), e.g. San Francisco, CA." + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state (both required), e.g. San Francisco, CA." + } + }, + "required": ["location"] } } } @@ -162,12 +176,19 @@ ], "tools": [ { - "tool_name": "get_weather", - "description": "Get the current weather", - "parameters": { - "location": { - "param_type": "string", - "description": "The city and state (both required), e.g. San Francisco, CA." + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state (both required), e.g. San Francisco, CA." + } + }, + "required": ["location"] } } } @@ -192,66 +213,6 @@ ] } }, - "array_parameter": { - "data": { - "messages": [ - [ - { - "role": "user", - "content": "Please add a new product with name 'Widget', price 19.99, in stock, and tags ['new', 'sale'] and give me the product id." - } - ] - ], - "tools": [ - { - "tool_name": "addProduct", - "description": "Get the current weather", - "parameters": { - "name": { - "param_type": "string", - "description": "Name of the product" - }, - "price": { - "param_type": "number", - "description": "Price of the product" - }, - "inStock": { - "param_type": "boolean", - "description": "Availability status of the product." - }, - "tags": { - "param_type": "list[str]", - "description": "List of product tags" - } - } - } - ], - "tool_responses": [ - { - "response": "{'response': 'Successfully added product with id: 123'}" - } - ], - "expected": [ - { - "num_tool_calls": 1, - "tool_name": "addProduct", - "tool_arguments": { - "name": "Widget", - "price": 19.99, - "inStock": true, - "tags": [ - "new", - "sale" - ] - } - }, - { - "num_tool_calls": 0, - "answer": "123" - } - ] - } - }, "sample_messages_tool_calling": { "data": { "messages": [ @@ -270,13 +231,19 @@ ], "tools": [ { - "tool_name": "get_weather", - "description": "Get the current weather", - "parameters": { - "location": { - "param_type": "string", - "description": "The city and state, e.g. San Francisco, CA", - "required": true + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state (both required), e.g. San Francisco, CA." + } + }, + "required": ["location"] } } } @@ -343,18 +310,23 @@ ], "tools": [ { - "tool_name": "get_object_namespace_list", - "description": "Get the list of objects in a namespace", - "parameters": { - "kind": { - "param_type": "string", - "description": "the type of object", - "required": true - }, - "namespace": { - "param_type": "string", - "description": "the name of the namespace", - "required": true + "type": "function", + "function": { + "name": "get_object_namespace_list", + "description": "Get the list of objects in a namespace", + "parameters": { + "type": "object", + "properties": { + "kind": { + "type": "string", + "description": "the type of object" + }, + "namespace": { + "type": "string", + "description": "the name of the namespace" + } + }, + "required": ["kind", "namespace"] } } }