diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh index 2d088f3df..0951feb14 100755 --- a/scripts/integration-tests.sh +++ b/scripts/integration-tests.sh @@ -231,7 +231,8 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then # Use a fixed port for the OTEL collector so the server can connect to it COLLECTOR_PORT=4317 export LLAMA_STACK_TEST_COLLECTOR_PORT="${COLLECTOR_PORT}" - export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:${COLLECTOR_PORT}" + # Disabled: https://github.com/llamastack/llama-stack/issues/4089 + #export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:${COLLECTOR_PORT}" export OTEL_EXPORTER_OTLP_PROTOCOL="http/protobuf" export OTEL_BSP_SCHEDULE_DELAY="200" export OTEL_BSP_EXPORT_TIMEOUT="2000" @@ -337,7 +338,8 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then DOCKER_ENV_VARS="" DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE" DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server" - DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:${COLLECTOR_PORT}" + # Disabled: https://github.com/llamastack/llama-stack/issues/4089 + #DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:${COLLECTOR_PORT}" DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_METRIC_EXPORT_INTERVAL=200" DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_BSP_SCHEDULE_DELAY=200" DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_BSP_EXPORT_TIMEOUT=2000" @@ -353,6 +355,10 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then [ -n "${OLLAMA_URL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL" [ -n "${SAFETY_MODEL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e SAFETY_MODEL=$SAFETY_MODEL" + if [[ "$TEST_SETUP" == "vllm" ]]; then + DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e VLLM_URL=http://localhost:8000/v1" + fi + # Determine the actual image name (may have localhost/ prefix) IMAGE_NAME=$(docker images --format "{{.Repository}}:{{.Tag}}" | grep "distribution-$DISTRO:dev$" | head -1) if [[ -z "$IMAGE_NAME" ]]; then diff --git a/tests/integration/inference/recordings/99bf0054f11a9c58c13a44f9cf962a706ebe85e2a5fe637ddad558cbaafe92d8.json b/tests/integration/inference/recordings/99bf0054f11a9c58c13a44f9cf962a706ebe85e2a5fe637ddad558cbaafe92d8.json new file mode 100644 index 000000000..250e91c68 --- /dev/null +++ b/tests/integration/inference/recordings/99bf0054f11a9c58c13a44f9cf962a706ebe85e2a5fe637ddad558cbaafe92d8.json @@ -0,0 +1,103 @@ +{ + "test_id": "tests/integration/inference/test_tools_with_schemas.py::TestMCPToolsInChatCompletion::test_mcp_tools_in_inference[txt=vllm/Qwen/Qwen3-0.6B]", + "request": { + "method": "POST", + "url": "http://localhost:8000/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "Qwen/Qwen3-0.6B", + "messages": [ + { + "role": "user", + "content": "Calculate 5 + 3" + } + ], + "max_tokens": 4096, + "tools": [ + { + "type": "function", + "function": { + "name": "calculate", + "description": "", + "parameters": { + "properties": { + "x": { + "title": "X", + "type": "number" + }, + "y": { + "title": "Y", + "type": "number" + }, + "operation": { + "title": "Operation", + "type": "string" + } + }, + "required": [ + "x", + "y", + "operation" + ], + "title": "calculateArguments", + "type": "object" + } + } + } + ] + }, + "endpoint": "/v1/chat/completions", + "model": "Qwen/Qwen3-0.6B" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "rec-99bf0054f11a", + "choices": [ + { + "finish_reason": "tool_calls", + "index": 0, + "logprobs": null, + "message": { + "content": "\nOkay, the user wants to calculate 5 plus 3. Let me check the tools provided. The only function available is 'calculate', which requires x, y, and operation. The parameters are numbers and an operation. The user input is straightforward: 5 + 3. So I need to call the 'calculate' function with x=5, y=3, and operation='+'. That should give the correct result. I don't see any other parameters needed here. Just make sure the JSON is correctly formatted with the required fields.\n\n\n", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": [ + { + "id": "chatcmpl-tool-6d1a92899a8246bb8fae5682dc08590c", + "function": { + "arguments": "{\"x\": 5, \"y\": 3, \"operation\": \"+\"}", + "name": "calculate" + }, + "type": "function" + } + ], + "reasoning_content": null + }, + "stop_reason": null + } + ], + "created": 0, + "model": "Qwen/Qwen3-0.6B", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": null, + "usage": { + "completion_tokens": 144, + "prompt_tokens": 193, + "total_tokens": 337, + "completion_tokens_details": null, + "prompt_tokens_details": null + }, + "prompt_logprobs": null, + "kv_transfer_params": null + } + }, + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/telemetry/test_completions.py b/tests/integration/telemetry/test_completions.py index 2b8835f6c..af073d8bc 100644 --- a/tests/integration/telemetry/test_completions.py +++ b/tests/integration/telemetry/test_completions.py @@ -12,9 +12,13 @@ before and after each test, ensuring test isolation. import json +import pytest + def test_streaming_chunk_count(mock_otlp_collector, llama_stack_client, text_model_id): """Verify streaming adds chunk_count and __type__=async_generator.""" + + pytest.skip("Disabled: See https://github.com/llamastack/llama-stack/issues/4089") stream = llama_stack_client.chat.completions.create( model=text_model_id, messages=[{"role": "user", "content": "Test trace openai 1"}], @@ -50,6 +54,7 @@ def test_streaming_chunk_count(mock_otlp_collector, llama_stack_client, text_mod def test_telemetry_format_completeness(mock_otlp_collector, llama_stack_client, text_model_id): """Comprehensive validation of telemetry data format including spans and metrics.""" + pytest.skip("Disabled: See https://github.com/llamastack/llama-stack/issues/4089") response = llama_stack_client.chat.completions.create( model=text_model_id, messages=[{"role": "user", "content": "Test trace openai with temperature 0.7"}],