ported most test_text_completions and all test_telemetry tests

2025-10-03 19:57:35 +00:00 · 2025-09-27 07:14:16 -04:00 · 2025-09-27 07:14:16 -04:00 · 5cd36ff0dc
commit 5cd36ff0dc
parent 0e78cd5383
14 changed files with 1119 additions and 139 deletions
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@ -9,6 +9,7 @@ import time
 import unicodedata

 import pytest
+from pydantic import BaseModel

 from ..test_cases.test_case import TestCase

@ -62,6 +63,14 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
        pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.")


+def skip_if_doesnt_support_completions_logprobs(client_with_models, model_id):
+    provider_type = provider_from_model(client_with_models, model_id).provider_type
+    if provider_type in (
+        "remote::ollama",  # logprobs is ignored
+    ):
+        pytest.skip(f"Model {model_id} hosted by {provider_type} doesn't support /v1/completions logprobs.")
+
+
 def skip_if_model_doesnt_support_suffix(client_with_models, model_id):
    # To test `fim` ( fill in the middle ) completion, we need to use a model that supports suffix.
    # Use this to specifically test this API functionality.
@ -205,28 +214,6 @@ def test_openai_completion_streaming(llama_stack_client, client_with_models, tex
    assert len(content_str) > 10


-@pytest.mark.parametrize(
-    "prompt_logprobs",
-    [
-        1,
-        0,
-    ],
-)
-def test_openai_completion_prompt_logprobs(llama_stack_client, client_with_models, text_model_id, prompt_logprobs):
-    skip_if_provider_isnt_vllm(client_with_models, text_model_id)
-
-    prompt = "Hello, world!"
-    response = llama_stack_client.completions.create(
-        model=text_model_id,
-        prompt=prompt,
-        stream=False,
-        prompt_logprobs=prompt_logprobs,
-    )
-    assert len(response.choices) > 0
-    choice = response.choices[0]
-    assert len(choice.prompt_logprobs) > 0
-
-
 def test_openai_completion_guided_choice(llama_stack_client, client_with_models, text_model_id):
    skip_if_provider_isnt_vllm(client_with_models, text_model_id)

@ -518,3 +505,214 @@ def test_openai_chat_completion_non_streaming_with_file(openai_client, client_wi
    message_content = response.choices[0].message.content.lower().strip()
    normalized_content = _normalize_text(message_content)
    assert "hello world" in normalized_content
+
+
+@pytest.mark.parametrize(
+    "test_case",
+    [
+        "inference:completion:stop_sequence",
+    ],
+)
+def test_openai_completion_stop_sequence(client_with_models, openai_client, text_model_id, test_case):
+    skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id)
+
+    tc = TestCase(test_case)
+
+    response = openai_client.completions.create(
+        model=text_model_id,
+        prompt=tc["content"],
+        stop="1963",
+        stream=False,
+    )
+    assert len(response.choices) > 0
+    choice = response.choices[0]
+    assert "1963" not in choice.text
+
+    response = openai_client.completions.create(
+        model=text_model_id,
+        prompt=tc["content"],
+        stop=["blathering", "1963"],
+        stream=False,
+    )
+    assert len(response.choices) > 0
+    choice = response.choices[0]
+    assert "1963" not in choice.text
+
+
+@pytest.mark.parametrize(
+    "test_case",
+    [
+        "inference:completion:log_probs",
+    ],
+)
+def test_openai_completion_logprobs(client_with_models, openai_client, text_model_id, test_case):
+    skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id)
+    skip_if_doesnt_support_completions_logprobs(client_with_models, text_model_id)
+
+    tc = TestCase(test_case)
+
+    response = openai_client.completions.create(
+        model=text_model_id,
+        prompt=tc["content"],
+        logprobs=5,
+    )
+    assert len(response.choices) > 0
+    choice = response.choices[0]
+    assert choice.text, "Response text should not be empty"
+    assert choice.logprobs, "Logprobs should not be empty"
+    logprobs = choice.logprobs
+    assert logprobs.token_logprobs, "Response tokens should not be empty"
+    assert len(logprobs.tokens) == len(logprobs.token_logprobs)
+    assert len(logprobs.token_logprobs) == len(logprobs.top_logprobs)
+    for i, (token, prob) in enumerate(zip(logprobs.tokens, logprobs.token_logprobs, strict=True)):
+        assert logprobs.top_logprobs[i][token] == prob
+        assert len(logprobs.top_logprobs[i]) == 5
+
+
+@pytest.mark.parametrize(
+    "test_case",
+    [
+        "inference:completion:log_probs",
+    ],
+)
+def test_openai_completion_logprobs_streaming(client_with_models, openai_client, text_model_id, test_case):
+    skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id)
+    skip_if_doesnt_support_completions_logprobs(client_with_models, text_model_id)
+
+    tc = TestCase(test_case)
+
+    response = openai_client.completions.create(
+        model=text_model_id,
+        prompt=tc["content"],
+        logprobs=3,
+        stream=True,
+        max_tokens=5,
+    )
+    for chunk in response:
+        choice = chunk.choices[0]
+        choice = response.choices[0]
+        if choice.text:  # if there's a token, we expect logprobs
+            assert choice.logprobs, "Logprobs should not be empty"
+            logprobs = choice.logprobs
+            assert logprobs.token_logprobs, "Response tokens should not be empty"
+            assert len(logprobs.tokens) == len(logprobs.token_logprobs)
+            assert len(logprobs.token_logprobs) == len(logprobs.top_logprobs)
+            for i, (token, prob) in enumerate(zip(logprobs.tokens, logprobs.token_logprobs, strict=True)):
+                assert logprobs.top_logprobs[i][token] == prob
+                assert len(logprobs.top_logprobs[i]) == 3
+        else:  # no token, no logprobs
+            assert not choice.logprobs, "Logprobs should be empty"
+
+
+@pytest.mark.parametrize(
+    "test_case",
+    [
+        "inference:chat_completion:tool_calling",
+    ],
+)
+def test_openai_chat_completion_with_tools(openai_client, text_model_id, test_case):
+    tc = TestCase(test_case)
+
+    response = openai_client.chat.completions.create(
+        model=text_model_id,
+        messages=tc["messages"],
+        tools=tc["tools"],
+        tool_choice="auto",
+        stream=False,
+    )
+    assert len(response.choices) == 1
+    assert len(response.choices[0].message.tool_calls) == 1
+    tool_call = response.choices[0].message.tool_calls[0]
+    assert tool_call.function.name == tc["tools"][0]["function"]["name"]
+    assert "location" in tool_call.function.arguments
+    assert tc["expected"]["location"] in tool_call.function.arguments
+
+
+@pytest.mark.parametrize(
+    "test_case",
+    [
+        "inference:chat_completion:tool_calling",
+    ],
+)
+def test_openai_chat_completion_with_tools_and_streaming(openai_client, text_model_id, test_case):
+    tc = TestCase(test_case)
+
+    response = openai_client.chat.completions.create(
+        model=text_model_id,
+        messages=tc["messages"],
+        tools=tc["tools"],
+        tool_choice="auto",
+        stream=True,
+    )
+    # Accumulate tool calls from streaming chunks
+    tool_calls = []
+    for chunk in response:
+        if chunk.choices and chunk.choices[0].delta.tool_calls:
+            for i, tc_delta in enumerate(chunk.choices[0].delta.tool_calls):
+                while len(tool_calls) <= i:
+                    tool_calls.append({"function": {"name": "", "arguments": ""}})
+                if tc_delta.function and tc_delta.function.name:
+                    tool_calls[i]["function"]["name"] = tc_delta.function.name
+                if tc_delta.function and tc_delta.function.arguments:
+                    tool_calls[i]["function"]["arguments"] += tc_delta.function.arguments
+    assert len(tool_calls) == 1
+    tool_call = tool_calls[0]
+    assert tool_call["function"]["name"] == tc["tools"][0]["function"]["name"]
+    assert "location" in tool_call["function"]["arguments"]
+    assert tc["expected"]["location"] in tool_call["function"]["arguments"]
+
+
+@pytest.mark.parametrize(
+    "test_case",
+    [
+        "inference:chat_completion:tool_calling",
+    ],
+)
+def test_openai_chat_completion_with_tool_choice_none(openai_client, text_model_id, test_case):
+    tc = TestCase(test_case)
+
+    response = openai_client.chat.completions.create(
+        model=text_model_id,
+        messages=tc["messages"],
+        tools=tc["tools"],
+        tool_choice="none",
+        stream=False,
+    )
+    assert len(response.choices) == 1
+    tool_calls = response.choices[0].message.tool_calls
+    assert tool_calls is None or len(tool_calls) == 0
+
+
+@pytest.mark.parametrize(
+    "test_case",
+    [
+        "inference:chat_completion:structured_output",
+    ],
+)
+def test_openai_chat_completion_structured_output(openai_client, text_model_id, test_case):
+    # Note: Skip condition may need adjustment for OpenAI client
+    class AnswerFormat(BaseModel):
+        first_name: str
+        last_name: str
+        year_of_birth: int
+
+    tc = TestCase(test_case)
+
+    response = openai_client.chat.completions.create(
+        model=text_model_id,
+        messages=tc["messages"],
+        response_format={
+            "type": "json_schema",
+            "json_schema": {
+                "name": "AnswerFormat",
+                "schema": AnswerFormat.model_json_schema(),
+            },
+        },
+        stream=False,
+    )
+    print(response.choices[0].message.content)
+    answer = AnswerFormat.model_validate_json(response.choices[0].message.content)
+    expected = tc["expected"]
+    assert answer.first_name == expected["first_name"]
+    assert answer.last_name == expected["last_name"]
+    assert answer.year_of_birth == expected["year_of_birth"]
--- a/tests/integration/recordings/responses/239f4768f5aa.json
+++ b/tests/integration/recordings/responses/239f4768f5aa.json
@ -0,0 +1,89 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "system",
+          "content": "You are a helpful assistant. Michael Jordan was born in 1963. He played basketball for the Chicago Bulls for 15 seasons."
+        },
+        {
+          "role": "user",
+          "content": "Please give me information about Michael Jordan."
+        }
+      ],
+      "response_format": {
+        "type": "json_schema",
+        "json_schema": {
+          "name": "AnswerFormat",
+          "schema": {
+            "properties": {
+              "first_name": {
+                "title": "First Name",
+                "type": "string"
+              },
+              "last_name": {
+                "title": "Last Name",
+                "type": "string"
+              },
+              "year_of_birth": {
+                "title": "Year Of Birth",
+                "type": "integer"
+              }
+            },
+            "required": [
+              "first_name",
+              "last_name",
+              "year_of_birth"
+            ],
+            "title": "AnswerFormat",
+            "type": "object"
+          }
+        }
+      },
+      "stream": false
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "chatcmpl-433",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "{\"first_name\": \"Michael\", \"last_name\": \"Jordan\", \"year_of_birth\": 1963}\n\n   \t\t\t\t\t\t\t\t\t\t\t \t\t   ",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 1758979490,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 31,
+          "prompt_tokens": 60,
+          "total_tokens": 91,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/2a5a4e821bc8.json
+++ b/tests/integration/recordings/responses/2a5a4e821bc8.json
@ -0,0 +1,44 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "prompt": "Hello, world!",
+      "logprobs": false,
+      "stream": false,
+      "extra_body": {}
+    },
+    "endpoint": "/v1/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.completion.Completion",
+      "__data__": {
+        "id": "cmpl-74",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "text": "Hello! How can I assist you today?"
+          }
+        ],
+        "created": 1758975636,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 10,
+          "prompt_tokens": 29,
+          "total_tokens": 39,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/38ea441b5f83.json
+++ b/tests/integration/recordings/responses/38ea441b5f83.json
@ -0,0 +1,92 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "system",
+          "content": "Pretend you are a weather assistant."
+        },
+        {
+          "role": "user",
+          "content": "What's the weather like in San Francisco, CA?"
+        }
+      ],
+      "stream": false,
+      "tool_choice": "auto",
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_weather",
+            "description": "Get the current weather",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "location": {
+                  "type": "string",
+                  "description": "The city and state (both required), e.g. San Francisco, CA."
+                }
+              },
+              "required": [
+                "location"
+              ]
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "chatcmpl-761",
+        "choices": [
+          {
+            "finish_reason": "tool_calls",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": [
+                {
+                  "id": "call_cj8ownwc",
+                  "function": {
+                    "arguments": "{\"location\":\"San Francisco, CA\"}",
+                    "name": "get_weather"
+                  },
+                  "type": "function",
+                  "index": 0
+                }
+              ]
+            }
+          }
+        ],
+        "created": 1758975113,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 18,
+          "prompt_tokens": 185,
+          "total_tokens": 203,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/5b2088233334.json
+++ b/tests/integration/recordings/responses/5b2088233334.json
@ -0,0 +1,44 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "prompt": "Hello, world!",
+      "logprobs": true,
+      "stream": false,
+      "extra_body": {}
+    },
+    "endpoint": "/v1/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.completion.Completion",
+      "__data__": {
+        "id": "cmpl-809",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "text": "Hello! It's nice to meet you. Is there anything I can help you with or would you like to chat?"
+          }
+        ],
+        "created": 1758975633,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 25,
+          "prompt_tokens": 29,
+          "total_tokens": 54,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/65c12de0a1db.json
+++ b/tests/integration/recordings/responses/65c12de0a1db.json
@ -0,0 +1,60 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "system",
+          "content": "Pretend you are a weather assistant."
+        },
+        {
+          "role": "user",
+          "content": "What's the weather like in San Francisco, CA?"
+        }
+      ],
+      "stream": false
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "chatcmpl-123",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "Hello! As of my knowledge cutoff on December 15th, I have the latest information for you. However, please note that my data may not be entirely up-to-date.\n\nCurrently, and based on historical climate patterns, it appears to be a partly cloudy day with mild temperatures in San Francisco, CA. Expect a temperature range of around 48\u00b0F (9\u00b0C) to 54\u00b0F (12\u00b0C). It's likely to be a breezy day, with winds blowing at about 13 mph (21 km/h).\n\nHowever, if I were to look into more recent weather patterns or forecasts, I would recommend checking the latest conditions directly from reliable sources such as the National Weather Service or local news outlets for more accurate and up-to-date information.\n\nPlease let me know how I can further assist you.",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 1758978071,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 163,
+          "prompt_tokens": 45,
+          "total_tokens": 208,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/a369881bb3a2.json
+++ b/tests/integration/recordings/responses/a369881bb3a2.json
@ -0,0 +1,55 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Test trace 0"
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "chatcmpl-272",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "I'm happy to help you with a test. Since we are in the middle of a text-based conversation, I'll do my best to simulate a simple test tracing process.\n\n**Trace Test Results**\n\nTo perform this test, please follow these steps:\n\n1. Type \"test\" on command mode.\n2. Press Enter.\n\nNow, let's start tracing...\n\nTest Tracing Results:\nTest Case: General Functions\nTest Case Result: PASS\n\nSystem Response:\n\n```\n# System Boot Time: 2023-10-13T14:30:00\n# CPU Temperature: 35\u00b0C\n# Disk Space Available: 80%\n```\n\nNext Steps?\n\nType 'done' to exit the test, or 'run' for more tests.",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 1758978134,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 152,
+          "prompt_tokens": 29,
+          "total_tokens": 181,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/a46b77ffd494.json
+++ b/tests/integration/recordings/responses/a46b77ffd494.json
@ -0,0 +1,44 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "prompt": "Return the exact same sentence and don't add additional words): Michael Jordan was born in the year of 1963",
+      "stop": "1963",
+      "stream": false,
+      "extra_body": {}
+    },
+    "endpoint": "/v1/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.completion.Completion",
+      "__data__": {
+        "id": "cmpl-183",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "text": "Michael Jordan was born in the year of "
+          }
+        ],
+        "created": 1758978053,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 11,
+          "prompt_tokens": 48,
+          "total_tokens": 59,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/c3dbccc5de74.json
+++ b/tests/integration/recordings/responses/c3dbccc5de74.json
@ -0,0 +1,112 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "system",
+          "content": "Pretend you are a weather assistant."
+        },
+        {
+          "role": "user",
+          "content": "What's the weather like in San Francisco, CA?"
+        }
+      ],
+      "stream": true,
+      "tool_choice": "auto",
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_weather",
+            "description": "Get the current weather",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "location": {
+                  "type": "string",
+                  "description": "The city and state (both required), e.g. San Francisco, CA."
+                }
+              },
+              "required": [
+                "location"
+              ]
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-634",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_wubm4yax",
+                    "function": {
+                      "arguments": "{\"location\":\"San Francisco, CA\"}",
+                      "name": "get_weather"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758975115,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-634",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1758975115,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      }
+    ],
+    "is_streaming": true
+  }
+}
--- a/tests/integration/recordings/responses/c8e196049fe4.json
+++ b/tests/integration/recordings/responses/c8e196049fe4.json
@ -0,0 +1,47 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "prompt": "Return the exact same sentence and don't add additional words): Michael Jordan was born in the year of 1963",
+      "stop": [
+        "blathering",
+        "1963"
+      ],
+      "stream": false,
+      "extra_body": {}
+    },
+    "endpoint": "/v1/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.completion.Completion",
+      "__data__": {
+        "id": "cmpl-381",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "text": "Michael Jordan was born in the year of "
+          }
+        ],
+        "created": 1758978056,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 11,
+          "prompt_tokens": 48,
+          "total_tokens": 59,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/cb1099daed49.json
+++ b/tests/integration/recordings/responses/cb1099daed49.json
@ -0,0 +1,55 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Test trace 1"
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "chatcmpl-122",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "It appears you're trying to initiate a conversation or test the functionality of this AI system. I'm happy to chat with you!\n\nWould you like to:\nA) Ask me a question on a specific topic\nB) Engage in a conversational dialogue on a topic of your choice\nC) Play a text-based game\nD) Test my language understanding capabilities\n\nPlease respond with the letter of your preferred activity.",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 1758978142,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 85,
+          "prompt_tokens": 29,
+          "total_tokens": 114,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/d0ac68cbde69.json
+++ b/tests/integration/recordings/responses/d0ac68cbde69.json
@ -11,26 +11,7 @@
    "body": {
      "__type__": "ollama._types.ProcessResponse",
      "__data__": {
-        "models": [
-          {
-            "model": "llama3.2-vision:11b",
-            "name": "llama3.2-vision:11b",
-            "digest": "6f2f9757ae97e8a3f8ea33d6adb2b11d93d9a35bef277cd2c0b1b5af8e8d0b1e",
-            "expires_at": "2025-09-03T11:51:35.966409-07:00",
-            "size": 12401209008,
-            "size_vram": 12401209008,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "mllama",
-              "families": [
-                "mllama"
-              ],
-              "parameter_size": "10.7B",
-              "quantization_level": "Q4_K_M"
-            }
-          }
-        ]
+        "models": []
      }
    },
    "is_streaming": false
--- a/tests/integration/telemetry/test_telemetry.py
+++ b/tests/integration/telemetry/test_telemetry.py
@ -0,0 +1,187 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import time
+from datetime import UTC, datetime
+from uuid import uuid4
+
+import pytest
+from llama_stack_client import Agent
+
+
+@pytest.fixture(scope="module", autouse=True)
+def setup_telemetry_data(llama_stack_client, text_model_id):
+    """Setup fixture that creates telemetry data before tests run."""
+    agent = Agent(llama_stack_client, model=text_model_id, instructions="You are a helpful assistant")
+
+    session_id = agent.create_session(f"test-setup-session-{uuid4()}")
+
+    messages = [
+        "What is 2 + 2?",
+        "Tell me a short joke",
+    ]
+
+    for msg in messages:
+        agent.create_turn(
+            messages=[{"role": "user", "content": msg}],
+            session_id=session_id,
+            stream=False,
+        )
+
+    for i in range(2):
+        llama_stack_client.chat.completions.create(
+            model=text_model_id, messages=[{"role": "user", "content": f"Test trace {i}"}]
+        )
+
+    start_time = time.time()
+
+    while time.time() - start_time < 30:
+        traces = llama_stack_client.telemetry.query_traces(limit=10)
+        if len(traces) >= 4:
+            break
+        time.sleep(0.1)
+
+    if len(traces) < 4:
+        pytest.fail(f"Failed to create sufficient telemetry data after 30s. Got {len(traces)} traces.")
+
+    yield
+
+
+def test_query_traces_basic(llama_stack_client):
+    """Test basic trace querying functionality with proper data validation."""
+    all_traces = llama_stack_client.telemetry.query_traces(limit=5)
+
+    assert isinstance(all_traces, list), "Should return a list of traces"
+    assert len(all_traces) >= 4, "Should have at least 4 traces from setup"
+
+    # Verify trace structure and data quality
+    first_trace = all_traces[0]
+    assert hasattr(first_trace, "trace_id"), "Trace should have trace_id"
+    assert hasattr(first_trace, "start_time"), "Trace should have start_time"
+    assert hasattr(first_trace, "root_span_id"), "Trace should have root_span_id"
+
+    # Validate trace_id is a valid UUID format
+    assert isinstance(first_trace.trace_id, str) and len(first_trace.trace_id) > 0, (
+        "trace_id should be non-empty string"
+    )
+
+    # Validate start_time format and not in the future
+    now = datetime.now(UTC)
+    if isinstance(first_trace.start_time, str):
+        trace_time = datetime.fromisoformat(first_trace.start_time.replace("Z", "+00:00"))
+    else:
+        # start_time is already a datetime object
+        trace_time = first_trace.start_time
+        if trace_time.tzinfo is None:
+            trace_time = trace_time.replace(tzinfo=UTC)
+
+    # Ensure trace time is not in the future (but allow any age in the past for persistent test data)
+    time_diff = (now - trace_time).total_seconds()
+    assert time_diff >= 0, f"Trace start_time should not be in the future, got {time_diff}s"
+
+    # Validate root_span_id exists and is non-empty
+    assert isinstance(first_trace.root_span_id, str) and len(first_trace.root_span_id) > 0, (
+        "root_span_id should be non-empty string"
+    )
+
+    # Test querying specific trace by ID
+    specific_trace = llama_stack_client.telemetry.get_trace(trace_id=first_trace.trace_id)
+    assert specific_trace.trace_id == first_trace.trace_id, "Retrieved trace should match requested ID"
+    assert specific_trace.start_time == first_trace.start_time, "Retrieved trace should have same start_time"
+    assert specific_trace.root_span_id == first_trace.root_span_id, "Retrieved trace should have same root_span_id"
+
+    # Test pagination with proper validation
+    recent_traces = llama_stack_client.telemetry.query_traces(limit=3, offset=0)
+    assert len(recent_traces) <= 3, "Should return at most 3 traces when limit=3"
+    assert len(recent_traces) >= 1, "Should return at least 1 trace"
+
+    # Verify all traces have required fields
+    for trace in recent_traces:
+        assert hasattr(trace, "trace_id") and trace.trace_id, "Each trace should have non-empty trace_id"
+        assert hasattr(trace, "start_time") and trace.start_time, "Each trace should have non-empty start_time"
+        assert hasattr(trace, "root_span_id") and trace.root_span_id, "Each trace should have non-empty root_span_id"
+
+
+def test_query_spans_basic(llama_stack_client):
+    """Test basic span querying functionality with proper validation."""
+    spans = llama_stack_client.telemetry.query_spans(attribute_filters=[], attributes_to_return=[])
+
+    assert isinstance(spans, list), "Should return a list of spans"
+    assert len(spans) >= 1, "Should have at least one span from setup"
+
+    # Verify span structure and data quality
+    first_span = spans[0]
+    required_attrs = ["span_id", "name", "trace_id"]
+    for attr in required_attrs:
+        assert hasattr(first_span, attr), f"Span should have {attr} attribute"
+        assert getattr(first_span, attr), f"Span {attr} should not be empty"
+
+    # Validate span data types and content
+    assert isinstance(first_span.span_id, str) and len(first_span.span_id) > 0, "span_id should be non-empty string"
+    assert isinstance(first_span.name, str) and len(first_span.name) > 0, "span name should be non-empty string"
+    assert isinstance(first_span.trace_id, str) and len(first_span.trace_id) > 0, "trace_id should be non-empty string"
+
+    # Verify span belongs to a valid trace (test with traces we know exist)
+    all_traces = llama_stack_client.telemetry.query_traces(limit=10)
+    trace_ids = {t.trace_id for t in all_traces}
+    if first_span.trace_id in trace_ids:
+        trace = llama_stack_client.telemetry.get_trace(trace_id=first_span.trace_id)
+        assert trace is not None, "Should be able to retrieve trace for valid trace_id"
+        assert trace.trace_id == first_span.trace_id, "Trace ID should match span's trace_id"
+
+    # Test with span filtering and validate results
+    filtered_spans = llama_stack_client.telemetry.query_spans(
+        attribute_filters=[{"key": "name", "op": "eq", "value": first_span.name}],
+        attributes_to_return=["name", "span_id"],
+    )
+    assert isinstance(filtered_spans, list), "Should return a list with span name filter"
+
+    # Validate filtered spans if filtering works
+    if len(filtered_spans) > 0:
+        for span in filtered_spans:
+            assert hasattr(span, "name"), "Filtered spans should have name attribute"
+            assert hasattr(span, "span_id"), "Filtered spans should have span_id attribute"
+            assert span.name == first_span.name, "Filtered spans should match the filter criteria"
+            assert isinstance(span.span_id, str) and len(span.span_id) > 0, "Filtered span_id should be valid"
+
+    # Test that all spans have consistent structure
+    for span in spans:
+        for attr in required_attrs:
+            assert hasattr(span, attr) and getattr(span, attr), f"All spans should have non-empty {attr}"
+
+
+def test_telemetry_pagination(llama_stack_client):
+    """Test pagination in telemetry queries."""
+    # Get total count of traces
+    all_traces = llama_stack_client.telemetry.query_traces(limit=20)
+    total_count = len(all_traces)
+    assert total_count >= 4, "Should have at least 4 traces from setup"
+
+    # Test trace pagination
+    page1 = llama_stack_client.telemetry.query_traces(limit=2, offset=0)
+    page2 = llama_stack_client.telemetry.query_traces(limit=2, offset=2)
+
+    assert len(page1) == 2, "First page should have exactly 2 traces"
+    assert len(page2) >= 1, "Second page should have at least 1 trace"
+
+    # Verify no overlap between pages
+    page1_ids = {t.trace_id for t in page1}
+    page2_ids = {t.trace_id for t in page2}
+    assert len(page1_ids.intersection(page2_ids)) == 0, "Pages should contain different traces"
+
+    # Test ordering
+    ordered_traces = llama_stack_client.telemetry.query_traces(limit=5, order_by=["start_time"])
+    assert len(ordered_traces) >= 4, "Should have at least 4 traces for ordering test"
+
+    # Verify ordering by start_time
+    for i in range(len(ordered_traces) - 1):
+        current_time = ordered_traces[i].start_time
+        next_time = ordered_traces[i + 1].start_time
+        assert current_time <= next_time, f"Traces should be ordered by start_time: {current_time} > {next_time}"
+
+    # Test limit behavior
+    limited = llama_stack_client.telemetry.query_traces(limit=3)
+    assert len(limited) == 3, "Should return exactly 3 traces when limit=3"
--- a/tests/integration/test_cases/inference/chat_completion.json
+++ b/tests/integration/test_cases/inference/chat_completion.json
@ -83,12 +83,19 @@
      ],
      "tools": [
        {
-          "tool_name": "get_weather",
-          "description": "Get the current weather",
-          "parameters": {
-            "location": {
-              "param_type": "string",
-              "description": "The city and state (both required), e.g. San Francisco, CA."
+          "type": "function",
+          "function": {
+            "name": "get_weather",
+            "description": "Get the current weather",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "location": {
+                  "type": "string",
+                  "description": "The city and state (both required), e.g. San Francisco, CA."
+                }
+              },
+              "required": ["location"]
            }
          }
        }
@ -116,12 +123,19 @@
      ],
      "tools": [
        {
-          "tool_name": "get_weather",
-          "description": "Get the current weather",
-          "parameters": {
-            "location": {
-              "param_type": "string",
-              "description": "The city and state (both required), e.g. San Francisco, CA."
+          "type": "function",
+          "function": {
+            "name": "get_weather",
+            "description": "Get the current weather",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "location": {
+                  "type": "string",
+                  "description": "The city and state (both required), e.g. San Francisco, CA."
+                }
+              },
+              "required": ["location"]
            }
          }
        }
@ -162,12 +176,19 @@
      ],
      "tools": [
        {
-          "tool_name": "get_weather",
-          "description": "Get the current weather",
-          "parameters": {
-            "location": {
-              "param_type": "string",
-              "description": "The city and state (both required), e.g. San Francisco, CA."
+          "type": "function",
+          "function": {
+            "name": "get_weather",
+            "description": "Get the current weather",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "location": {
+                  "type": "string",
+                  "description": "The city and state (both required), e.g. San Francisco, CA."
+                }
+              },
+              "required": ["location"]
            }
          }
        }
@ -192,66 +213,6 @@
      ]
    }
  },
-  "array_parameter": {
-    "data": {
-      "messages": [
-        [
-          {
-            "role": "user",
-            "content": "Please add a new product with name 'Widget', price 19.99, in stock, and tags ['new', 'sale'] and give me the product id."
-          }
-        ]
-      ],
-      "tools": [
-        {
-          "tool_name": "addProduct",
-          "description": "Get the current weather",
-          "parameters": {
-            "name": {
-              "param_type": "string",
-              "description": "Name of the product"
-            },
-            "price": {
-              "param_type": "number",
-              "description": "Price of the product"
-            },
-            "inStock": {
-              "param_type": "boolean",
-              "description": "Availability status of the product."
-            },
-            "tags": {
-              "param_type": "list[str]",
-              "description": "List of product tags"
-            }
-          }
-        }
-      ],
-      "tool_responses": [
-        {
-          "response": "{'response': 'Successfully added product with id: 123'}"
-        }
-      ],
-      "expected": [
-        {
-          "num_tool_calls": 1,
-          "tool_name": "addProduct",
-          "tool_arguments": {
-            "name": "Widget",
-            "price": 19.99,
-            "inStock": true,
-            "tags": [
-              "new",
-              "sale"
-            ]
-          }
-        },
-        {
-          "num_tool_calls": 0,
-          "answer": "123"
-        }
-      ]
-    }
-  },
  "sample_messages_tool_calling": {
    "data": {
      "messages": [
@ -270,13 +231,19 @@
      ],
      "tools": [
        {
-          "tool_name": "get_weather",
-          "description": "Get the current weather",
-          "parameters": {
-            "location": {
-              "param_type": "string",
-              "description": "The city and state, e.g. San Francisco, CA",
-              "required": true
+          "type": "function",
+          "function": {
+            "name": "get_weather",
+            "description": "Get the current weather",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "location": {
+                  "type": "string",
+                  "description": "The city and state (both required), e.g. San Francisco, CA."
+                }
+              },
+              "required": ["location"]
            }
          }
        }
@ -343,18 +310,23 @@
      ],
      "tools": [
        {
-          "tool_name": "get_object_namespace_list",
-          "description": "Get the list of objects in a namespace",
-          "parameters": {
-            "kind": {
-              "param_type": "string",
-              "description": "the type of object",
-              "required": true
-            },
-            "namespace": {
-              "param_type": "string",
-              "description": "the name of the namespace",
-              "required": true
+          "type": "function",
+          "function": {
+            "name": "get_object_namespace_list",
+            "description": "Get the list of objects in a namespace",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "kind": {
+                  "type": "string",
+                  "description": "the type of object"
+                },
+                "namespace": {
+                  "type": "string",
+                  "description": "the name of the namespace"
+                }
+              },
+              "required": ["kind", "namespace"]
            }
          }
        }