mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 09:53:45 +00:00
Merge remote-tracking branch 'upstream/main' into strip-telem
This commit is contained in:
commit
dfed6bf285
3 changed files with 116 additions and 2 deletions
|
|
@ -231,7 +231,8 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
|
|||
# Use a fixed port for the OTEL collector so the server can connect to it
|
||||
COLLECTOR_PORT=4317
|
||||
export LLAMA_STACK_TEST_COLLECTOR_PORT="${COLLECTOR_PORT}"
|
||||
export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:${COLLECTOR_PORT}"
|
||||
# Disabled: https://github.com/llamastack/llama-stack/issues/4089
|
||||
#export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:${COLLECTOR_PORT}"
|
||||
export OTEL_EXPORTER_OTLP_PROTOCOL="http/protobuf"
|
||||
export OTEL_BSP_SCHEDULE_DELAY="200"
|
||||
export OTEL_BSP_EXPORT_TIMEOUT="2000"
|
||||
|
|
@ -337,7 +338,8 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
|
|||
DOCKER_ENV_VARS=""
|
||||
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE"
|
||||
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server"
|
||||
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:${COLLECTOR_PORT}"
|
||||
# Disabled: https://github.com/llamastack/llama-stack/issues/4089
|
||||
#DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:${COLLECTOR_PORT}"
|
||||
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_METRIC_EXPORT_INTERVAL=200"
|
||||
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_BSP_SCHEDULE_DELAY=200"
|
||||
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_BSP_EXPORT_TIMEOUT=2000"
|
||||
|
|
@ -353,6 +355,10 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
|
|||
[ -n "${OLLAMA_URL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL"
|
||||
[ -n "${SAFETY_MODEL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e SAFETY_MODEL=$SAFETY_MODEL"
|
||||
|
||||
if [[ "$TEST_SETUP" == "vllm" ]]; then
|
||||
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e VLLM_URL=http://localhost:8000/v1"
|
||||
fi
|
||||
|
||||
# Determine the actual image name (may have localhost/ prefix)
|
||||
IMAGE_NAME=$(docker images --format "{{.Repository}}:{{.Tag}}" | grep "distribution-$DISTRO:dev$" | head -1)
|
||||
if [[ -z "$IMAGE_NAME" ]]; then
|
||||
|
|
|
|||
|
|
@ -0,0 +1,103 @@
|
|||
{
|
||||
"test_id": "tests/integration/inference/test_tools_with_schemas.py::TestMCPToolsInChatCompletion::test_mcp_tools_in_inference[txt=vllm/Qwen/Qwen3-0.6B]",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://localhost:8000/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "Qwen/Qwen3-0.6B",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Calculate 5 + 3"
|
||||
}
|
||||
],
|
||||
"max_tokens": 4096,
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "calculate",
|
||||
"description": "",
|
||||
"parameters": {
|
||||
"properties": {
|
||||
"x": {
|
||||
"title": "X",
|
||||
"type": "number"
|
||||
},
|
||||
"y": {
|
||||
"title": "Y",
|
||||
"type": "number"
|
||||
},
|
||||
"operation": {
|
||||
"title": "Operation",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"x",
|
||||
"y",
|
||||
"operation"
|
||||
],
|
||||
"title": "calculateArguments",
|
||||
"type": "object"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "Qwen/Qwen3-0.6B"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "rec-99bf0054f11a",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "tool_calls",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "<think>\nOkay, the user wants to calculate 5 plus 3. Let me check the tools provided. The only function available is 'calculate', which requires x, y, and operation. The parameters are numbers and an operation. The user input is straightforward: 5 + 3. So I need to call the 'calculate' function with x=5, y=3, and operation='+'. That should give the correct result. I don't see any other parameters needed here. Just make sure the JSON is correctly formatted with the required fields.\n</think>\n\n",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": null,
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "chatcmpl-tool-6d1a92899a8246bb8fae5682dc08590c",
|
||||
"function": {
|
||||
"arguments": "{\"x\": 5, \"y\": 3, \"operation\": \"+\"}",
|
||||
"name": "calculate"
|
||||
},
|
||||
"type": "function"
|
||||
}
|
||||
],
|
||||
"reasoning_content": null
|
||||
},
|
||||
"stop_reason": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "Qwen/Qwen3-0.6B",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": {
|
||||
"completion_tokens": 144,
|
||||
"prompt_tokens": 193,
|
||||
"total_tokens": 337,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
},
|
||||
"prompt_logprobs": null,
|
||||
"kv_transfer_params": null
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
},
|
||||
"id_normalization_mapping": {}
|
||||
}
|
||||
|
|
@ -12,9 +12,13 @@ before and after each test, ensuring test isolation.
|
|||
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def test_streaming_chunk_count(mock_otlp_collector, llama_stack_client, text_model_id):
|
||||
"""Verify streaming adds chunk_count and __type__=async_generator."""
|
||||
|
||||
pytest.skip("Disabled: See https://github.com/llamastack/llama-stack/issues/4089")
|
||||
stream = llama_stack_client.chat.completions.create(
|
||||
model=text_model_id,
|
||||
messages=[{"role": "user", "content": "Test trace openai 1"}],
|
||||
|
|
@ -50,6 +54,7 @@ def test_streaming_chunk_count(mock_otlp_collector, llama_stack_client, text_mod
|
|||
def test_telemetry_format_completeness(mock_otlp_collector, llama_stack_client, text_model_id):
|
||||
"""Comprehensive validation of telemetry data format including spans and metrics."""
|
||||
|
||||
pytest.skip("Disabled: See https://github.com/llamastack/llama-stack/issues/4089")
|
||||
response = llama_stack_client.chat.completions.create(
|
||||
model=text_model_id,
|
||||
messages=[{"role": "user", "content": "Test trace openai with temperature 0.7"}],
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue