chore(recorder): add support for NOT_GIVEN (#3430)
Some checks failed
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
Test Llama Stack Build / build-single-provider (push) Failing after 3s
API Conformance Tests / check-schema-compatibility (push) Successful in 8s
Test External API and Providers / test-external (venv) (push) Failing after 4s
Test Llama Stack Build / build (push) Failing after 4s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 18s
Python Package Build Test / build (3.12) (push) Failing after 14s
UI Tests / ui-tests (22) (push) Successful in 41s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 0s
Integration Tests (Replay) / Integration Tests (, , , client=, ) (push) Failing after 3s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 4s
Python Package Build Test / build (3.13) (push) Failing after 1s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 4s
Pre-commit / pre-commit (push) Successful in 1m31s
Vector IO Integration Tests / test-matrix (push) Failing after 4s
Test Llama Stack Build / generate-matrix (push) Successful in 4s
Update ReadTheDocs / update-readthedocs (push) Failing after 3s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 3s
Unit Tests / unit-tests (3.13) (push) Failing after 3s
Unit Tests / unit-tests (3.12) (push) Failing after 14s

# What does this PR do?

the recorder mocks the openai-python interface. the openai-python
interface allows NOT_GIVEN as an input option. this change properly
handles NOT_GIVEN.


## Test Plan

ci (coverage for chat, completions, embeddings)
This commit is contained in:
Matthew Farrellee 2025-09-13 14:11:38 -04:00 committed by GitHub
parent 8cf2128b40
commit 6787755c0c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 67 additions and 3 deletions

View file

@ -9,7 +9,7 @@ from pathlib import Path
from unittest.mock import AsyncMock, Mock, patch
import pytest
from openai import AsyncOpenAI
from openai import NOT_GIVEN, AsyncOpenAI
from openai.types.model import Model as OpenAIModel
# Import the real Pydantic response types instead of using Mocks
@ -17,6 +17,7 @@ from llama_stack.apis.inference import (
OpenAIAssistantMessageParam,
OpenAIChatCompletion,
OpenAIChoice,
OpenAICompletion,
OpenAIEmbeddingData,
OpenAIEmbeddingsResponse,
OpenAIEmbeddingUsage,
@ -170,6 +171,7 @@ class TestInferenceRecording:
messages=[{"role": "user", "content": "Hello, how are you?"}],
temperature=0.7,
max_tokens=50,
user=NOT_GIVEN,
)
# Verify the response was returned correctly
@ -198,6 +200,7 @@ class TestInferenceRecording:
messages=[{"role": "user", "content": "Hello, how are you?"}],
temperature=0.7,
max_tokens=50,
user=NOT_GIVEN,
)
# Now test replay mode - should not call the original method
@ -281,7 +284,11 @@ class TestInferenceRecording:
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
response = await client.embeddings.create(
model="nomic-embed-text", input=["Hello world", "Test embedding"]
model=real_embeddings_response.model,
input=["Hello world", "Test embedding"],
encoding_format=NOT_GIVEN,
dimensions=NOT_GIVEN,
user=NOT_GIVEN,
)
assert len(response.data) == 2
@ -292,7 +299,8 @@ class TestInferenceRecording:
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
response = await client.embeddings.create(
model="nomic-embed-text", input=["Hello world", "Test embedding"]
model=real_embeddings_response.model,
input=["Hello world", "Test embedding"],
)
# Verify we got the recorded response
@ -302,6 +310,57 @@ class TestInferenceRecording:
# Verify original method was not called
mock_create_patch.assert_not_called()
async def test_completions_recording(self, temp_storage_dir):
real_completions_response = OpenAICompletion(
id="test_completion",
object="text_completion",
created=1234567890,
model="llama3.2:3b",
choices=[
{
"text": "Hello! I'm doing well, thank you for asking.",
"index": 0,
"logprobs": None,
"finish_reason": "stop",
}
],
)
async def mock_create(*args, **kwargs):
return real_completions_response
temp_storage_dir = temp_storage_dir / "test_completions_recording"
# Record
with patch(
"openai.resources.completions.AsyncCompletions.create", new_callable=AsyncMock, side_effect=mock_create
):
with inference_recording(mode=InferenceMode.RECORD, storage_dir=str(temp_storage_dir)):
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
response = await client.completions.create(
model=real_completions_response.model,
prompt="Hello, how are you?",
temperature=0.7,
max_tokens=50,
user=NOT_GIVEN,
)
assert response.choices[0].text == real_completions_response.choices[0].text
# Replay
with patch("openai.resources.completions.AsyncCompletions.create") as mock_create_patch:
with inference_recording(mode=InferenceMode.REPLAY, storage_dir=str(temp_storage_dir)):
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
response = await client.completions.create(
model=real_completions_response.model,
prompt="Hello, how are you?",
temperature=0.7,
max_tokens=50,
)
assert response.choices[0].text == real_completions_response.choices[0].text
mock_create_patch.assert_not_called()
async def test_live_mode(self, real_openai_chat_response):
"""Test that live mode passes through to original methods."""