mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 09:53:45 +00:00
Merge branch 'main' into add_max_tool_calls_v2
This commit is contained in:
commit
afacd985a3
97 changed files with 6976 additions and 255 deletions
|
|
@ -211,3 +211,23 @@ def test_asymmetric_embeddings(llama_stack_client, embedding_model_id):
|
|||
|
||||
assert query_response.embeddings is not None
|
||||
```
|
||||
|
||||
## TypeScript Client Replays
|
||||
|
||||
TypeScript SDK tests can run alongside Python tests when testing against `server:<config>` stacks. Set `TS_CLIENT_PATH` to the path or version of `llama-stack-client-typescript` to enable:
|
||||
|
||||
```bash
|
||||
# Use published npm package (responses suite)
|
||||
TS_CLIENT_PATH=^0.3.2 scripts/integration-tests.sh --stack-config server:ci-tests --suite responses --setup gpt
|
||||
|
||||
# Use local checkout from ~/.cache (recommended for development)
|
||||
git clone https://github.com/llamastack/llama-stack-client-typescript.git ~/.cache/llama-stack-client-typescript
|
||||
TS_CLIENT_PATH=~/.cache/llama-stack-client-typescript scripts/integration-tests.sh --stack-config server:ci-tests --suite responses --setup gpt
|
||||
|
||||
# Run base suite with TypeScript tests
|
||||
TS_CLIENT_PATH=~/.cache/llama-stack-client-typescript scripts/integration-tests.sh --stack-config server:ci-tests --suite base --setup ollama
|
||||
```
|
||||
|
||||
TypeScript tests run immediately after Python tests pass, using the same replay fixtures. The mapping between Python suites/setups and TypeScript test files is defined in `tests/integration/client-typescript/suites.json`.
|
||||
|
||||
If `TS_CLIENT_PATH` is unset, TypeScript tests are skipped entirely.
|
||||
|
|
|
|||
104
tests/integration/client-typescript/__tests__/inference.test.ts
Normal file
104
tests/integration/client-typescript/__tests__/inference.test.ts
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
// All rights reserved.
|
||||
//
|
||||
// This source code is licensed under the terms described in the LICENSE file in
|
||||
// the root directory of this source tree.
|
||||
|
||||
/**
|
||||
* Integration tests for Inference API (Chat Completions).
|
||||
* Ported from: llama-stack/tests/integration/inference/test_openai_completion.py
|
||||
*
|
||||
* IMPORTANT: Test cases must match EXACTLY with Python tests to use recorded API responses.
|
||||
*/
|
||||
|
||||
import { createTestClient, requireTextModel } from '../setup';
|
||||
|
||||
describe('Inference API - Chat Completions', () => {
|
||||
// Test cases matching llama-stack/tests/integration/test_cases/inference/chat_completion.json
|
||||
const chatCompletionTestCases = [
|
||||
{
|
||||
id: 'non_streaming_01',
|
||||
question: 'Which planet do humans live on?',
|
||||
expected: 'earth',
|
||||
testId:
|
||||
'tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-inference:chat_completion:non_streaming_01]',
|
||||
},
|
||||
{
|
||||
id: 'non_streaming_02',
|
||||
question: 'Which planet has rings around it with a name starting with letter S?',
|
||||
expected: 'saturn',
|
||||
testId:
|
||||
'tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-inference:chat_completion:non_streaming_02]',
|
||||
},
|
||||
];
|
||||
|
||||
const streamingTestCases = [
|
||||
{
|
||||
id: 'streaming_01',
|
||||
question: "What's the name of the Sun in latin?",
|
||||
expected: 'sol',
|
||||
testId:
|
||||
'tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-inference:chat_completion:streaming_01]',
|
||||
},
|
||||
{
|
||||
id: 'streaming_02',
|
||||
question: 'What is the name of the US captial?',
|
||||
expected: 'washington',
|
||||
testId:
|
||||
'tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-inference:chat_completion:streaming_02]',
|
||||
},
|
||||
];
|
||||
|
||||
test.each(chatCompletionTestCases)(
|
||||
'chat completion non-streaming: $id',
|
||||
async ({ question, expected, testId }) => {
|
||||
const client = createTestClient(testId);
|
||||
const textModel = requireTextModel();
|
||||
|
||||
const response = await client.chat.completions.create({
|
||||
model: textModel,
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: question,
|
||||
},
|
||||
],
|
||||
stream: false,
|
||||
});
|
||||
|
||||
// Non-streaming responses have choices with message property
|
||||
const choice = response.choices[0];
|
||||
expect(choice).toBeDefined();
|
||||
if (!choice || !('message' in choice)) {
|
||||
throw new Error('Expected non-streaming response with message');
|
||||
}
|
||||
const content = choice.message.content;
|
||||
expect(content).toBeDefined();
|
||||
const messageContent = typeof content === 'string' ? content.toLowerCase().trim() : '';
|
||||
expect(messageContent.length).toBeGreaterThan(0);
|
||||
expect(messageContent).toContain(expected.toLowerCase());
|
||||
},
|
||||
);
|
||||
|
||||
test.each(streamingTestCases)('chat completion streaming: $id', async ({ question, expected, testId }) => {
|
||||
const client = createTestClient(testId);
|
||||
const textModel = requireTextModel();
|
||||
|
||||
const stream = await client.chat.completions.create({
|
||||
model: textModel,
|
||||
messages: [{ role: 'user', content: question }],
|
||||
stream: true,
|
||||
});
|
||||
|
||||
const streamedContent: string[] = [];
|
||||
for await (const chunk of stream) {
|
||||
if (chunk.choices && chunk.choices.length > 0 && chunk.choices[0]?.delta?.content) {
|
||||
streamedContent.push(chunk.choices[0].delta.content);
|
||||
}
|
||||
}
|
||||
|
||||
expect(streamedContent.length).toBeGreaterThan(0);
|
||||
const fullContent = streamedContent.join('').toLowerCase().trim();
|
||||
expect(fullContent).toContain(expected.toLowerCase());
|
||||
});
|
||||
});
|
||||
132
tests/integration/client-typescript/__tests__/responses.test.ts
Normal file
132
tests/integration/client-typescript/__tests__/responses.test.ts
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
// All rights reserved.
|
||||
//
|
||||
// This source code is licensed under the terms described in the LICENSE file in
|
||||
// the root directory of this source tree.
|
||||
|
||||
/**
|
||||
* Integration tests for Responses API.
|
||||
* Ported from: llama-stack/tests/integration/responses/test_basic_responses.py
|
||||
*
|
||||
* IMPORTANT: Test cases and IDs must match EXACTLY with Python tests to use recorded API responses.
|
||||
*/
|
||||
|
||||
import { createTestClient, requireTextModel, getResponseOutputText } from '../setup';
|
||||
|
||||
describe('Responses API - Basic', () => {
|
||||
// Test cases matching llama-stack/tests/integration/responses/fixtures/test_cases.py
|
||||
const basicTestCases = [
|
||||
{
|
||||
id: 'earth',
|
||||
input: 'Which planet do humans live on?',
|
||||
expected: 'earth',
|
||||
// Use client_with_models fixture to match non-streaming recordings
|
||||
testId:
|
||||
'tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[client_with_models-txt=openai/gpt-4o-earth]',
|
||||
},
|
||||
{
|
||||
id: 'saturn',
|
||||
input: 'Which planet has rings around it with a name starting with letter S?',
|
||||
expected: 'saturn',
|
||||
testId:
|
||||
'tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[client_with_models-txt=openai/gpt-4o-saturn]',
|
||||
},
|
||||
];
|
||||
|
||||
test.each(basicTestCases)('non-streaming basic response: $id', async ({ input, expected, testId }) => {
|
||||
// Create client with test_id for all requests
|
||||
const client = createTestClient(testId);
|
||||
const textModel = requireTextModel();
|
||||
|
||||
// Create a response
|
||||
const response = await client.responses.create({
|
||||
model: textModel,
|
||||
input,
|
||||
stream: false,
|
||||
});
|
||||
|
||||
// Verify response has content
|
||||
const outputText = getResponseOutputText(response).toLowerCase().trim();
|
||||
expect(outputText.length).toBeGreaterThan(0);
|
||||
expect(outputText).toContain(expected.toLowerCase());
|
||||
|
||||
// Verify usage is reported
|
||||
expect(response.usage).toBeDefined();
|
||||
expect(response.usage!.input_tokens).toBeGreaterThan(0);
|
||||
expect(response.usage!.output_tokens).toBeGreaterThan(0);
|
||||
expect(response.usage!.total_tokens).toBe(response.usage!.input_tokens + response.usage!.output_tokens);
|
||||
|
||||
// Verify stored response matches
|
||||
const retrievedResponse = await client.responses.retrieve(response.id);
|
||||
expect(getResponseOutputText(retrievedResponse)).toBe(getResponseOutputText(response));
|
||||
|
||||
// Test follow-up with previous_response_id
|
||||
const nextResponse = await client.responses.create({
|
||||
model: textModel,
|
||||
input: 'Repeat your previous response in all caps.',
|
||||
previous_response_id: response.id,
|
||||
});
|
||||
const nextOutputText = getResponseOutputText(nextResponse).trim();
|
||||
expect(nextOutputText).toContain(expected.toUpperCase());
|
||||
});
|
||||
|
||||
test.each(basicTestCases)('streaming basic response: $id', async ({ input, expected, testId }) => {
|
||||
// Modify test_id for streaming variant
|
||||
const streamingTestId = testId.replace(
|
||||
'test_response_non_streaming_basic',
|
||||
'test_response_streaming_basic',
|
||||
);
|
||||
const client = createTestClient(streamingTestId);
|
||||
const textModel = requireTextModel();
|
||||
|
||||
// Create a streaming response
|
||||
const stream = await client.responses.create({
|
||||
model: textModel,
|
||||
input,
|
||||
stream: true,
|
||||
});
|
||||
|
||||
const events: any[] = [];
|
||||
let responseId = '';
|
||||
|
||||
for await (const chunk of stream) {
|
||||
events.push(chunk);
|
||||
|
||||
if (chunk.type === 'response.created') {
|
||||
// Verify response.created is the first event
|
||||
expect(events.length).toBe(1);
|
||||
expect(chunk.response.status).toBe('in_progress');
|
||||
responseId = chunk.response.id;
|
||||
} else if (chunk.type === 'response.completed') {
|
||||
// Verify response.completed comes after response.created
|
||||
expect(events.length).toBeGreaterThanOrEqual(2);
|
||||
expect(chunk.response.status).toBe('completed');
|
||||
expect(chunk.response.id).toBe(responseId);
|
||||
|
||||
// Verify content quality
|
||||
const outputText = getResponseOutputText(chunk.response).toLowerCase().trim();
|
||||
expect(outputText.length).toBeGreaterThan(0);
|
||||
expect(outputText).toContain(expected.toLowerCase());
|
||||
|
||||
// Verify usage is reported
|
||||
expect(chunk.response.usage).toBeDefined();
|
||||
expect(chunk.response.usage!.input_tokens).toBeGreaterThan(0);
|
||||
expect(chunk.response.usage!.output_tokens).toBeGreaterThan(0);
|
||||
expect(chunk.response.usage!.total_tokens).toBe(
|
||||
chunk.response.usage!.input_tokens + chunk.response.usage!.output_tokens,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Verify we got both events
|
||||
expect(events.length).toBeGreaterThanOrEqual(2);
|
||||
const firstEvent = events[0];
|
||||
const lastEvent = events[events.length - 1];
|
||||
expect(firstEvent.type).toBe('response.created');
|
||||
expect(lastEvent.type).toBe('response.completed');
|
||||
|
||||
// Verify stored response matches streamed response
|
||||
const retrievedResponse = await client.responses.retrieve(responseId);
|
||||
expect(getResponseOutputText(retrievedResponse)).toBe(getResponseOutputText(lastEvent.response));
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
// All rights reserved.
|
||||
//
|
||||
// This source code is licensed under the terms described in the LICENSE file in
|
||||
// the root directory of this source tree.
|
||||
|
||||
/** @type {import('ts-jest').JestConfigWithTsJest} */
|
||||
module.exports = {
|
||||
preset: 'ts-jest/presets/default-esm',
|
||||
testEnvironment: 'node',
|
||||
extensionsToTreatAsEsm: ['.ts'],
|
||||
moduleNameMapper: {
|
||||
'^(\\.{1,2}/.*)\\.js$': '$1',
|
||||
},
|
||||
transform: {
|
||||
'^.+\\.tsx?$': [
|
||||
'ts-jest',
|
||||
{
|
||||
useESM: true,
|
||||
tsconfig: {
|
||||
module: 'ES2022',
|
||||
moduleResolution: 'bundler',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
testMatch: ['<rootDir>/__tests__/**/*.test.ts'],
|
||||
setupFilesAfterEnv: ['<rootDir>/setup.ts'],
|
||||
testTimeout: 60000, // 60 seconds (integration tests can be slow)
|
||||
watchman: false, // Disable watchman to avoid permission issues
|
||||
};
|
||||
5507
tests/integration/client-typescript/package-lock.json
generated
Normal file
5507
tests/integration/client-typescript/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load diff
18
tests/integration/client-typescript/package.json
Normal file
18
tests/integration/client-typescript/package.json
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
{
|
||||
"name": "llama-stack-typescript-integration-tests",
|
||||
"version": "0.0.1",
|
||||
"private": true,
|
||||
"description": "TypeScript client integration tests for Llama Stack",
|
||||
"scripts": {
|
||||
"test": "node run-tests.js"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@swc/core": "^1.3.102",
|
||||
"@swc/jest": "^0.2.29",
|
||||
"@types/jest": "^29.4.0",
|
||||
"@types/node": "^20.0.0",
|
||||
"jest": "^29.4.0",
|
||||
"ts-jest": "^29.1.0",
|
||||
"typescript": "^5.0.0"
|
||||
}
|
||||
}
|
||||
63
tests/integration/client-typescript/run-tests.js
Executable file
63
tests/integration/client-typescript/run-tests.js
Executable file
|
|
@ -0,0 +1,63 @@
|
|||
#!/usr/bin/env node
|
||||
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
// All rights reserved.
|
||||
//
|
||||
// This source code is licensed under the terms described in the LICENSE file in
|
||||
// the root directory of this source tree.
|
||||
|
||||
/**
|
||||
* Test runner that finds and executes TypeScript tests based on suite/setup mapping.
|
||||
* Called by integration-tests.sh via npm test.
|
||||
*/
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const { execSync } = require('child_process');
|
||||
|
||||
const suite = process.env.LLAMA_STACK_TEST_SUITE;
|
||||
const setup = process.env.LLAMA_STACK_TEST_SETUP || '';
|
||||
|
||||
if (!suite) {
|
||||
console.error('Error: LLAMA_STACK_TEST_SUITE environment variable is required');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Read suites.json to find matching test files
|
||||
const suitesPath = path.join(__dirname, 'suites.json');
|
||||
if (!fs.existsSync(suitesPath)) {
|
||||
console.log(`No TypeScript tests configured (${suitesPath} not found)`);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const suites = JSON.parse(fs.readFileSync(suitesPath, 'utf-8'));
|
||||
|
||||
// Find matching entry
|
||||
let testFiles = [];
|
||||
for (const entry of suites) {
|
||||
if (entry.suite !== suite) {
|
||||
continue;
|
||||
}
|
||||
const entrySetup = entry.setup || '';
|
||||
if (entrySetup && entrySetup !== setup) {
|
||||
continue;
|
||||
}
|
||||
testFiles = entry.files || [];
|
||||
break;
|
||||
}
|
||||
|
||||
if (testFiles.length === 0) {
|
||||
console.log(`No TypeScript integration tests mapped for suite ${suite} (setup ${setup})`);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
console.log(`Running TypeScript tests for suite ${suite} (setup ${setup}): ${testFiles.join(', ')}`);
|
||||
|
||||
// Run Jest with the mapped test files
|
||||
try {
|
||||
execSync(`npx jest --config jest.integration.config.js ${testFiles.join(' ')}`, {
|
||||
stdio: 'inherit',
|
||||
cwd: __dirname,
|
||||
});
|
||||
} catch (error) {
|
||||
process.exit(error.status || 1);
|
||||
}
|
||||
162
tests/integration/client-typescript/setup.ts
Normal file
162
tests/integration/client-typescript/setup.ts
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
// All rights reserved.
|
||||
//
|
||||
// This source code is licensed under the terms described in the LICENSE file in
|
||||
// the root directory of this source tree.
|
||||
|
||||
/**
|
||||
* Global setup for integration tests.
|
||||
* This file mimics pytest's fixture system by providing shared test configuration.
|
||||
*/
|
||||
|
||||
import LlamaStackClient from 'llama-stack-client';
|
||||
|
||||
/**
|
||||
* Load test configuration from the Python setup system.
|
||||
* This reads setup definitions from tests/integration/suites.py via get_setup_env.py.
|
||||
*/
|
||||
function loadTestConfig() {
|
||||
const baseURL = process.env['TEST_API_BASE_URL'];
|
||||
const setupName = process.env['LLAMA_STACK_TEST_SETUP'];
|
||||
const textModel = process.env['LLAMA_STACK_TEST_TEXT_MODEL'];
|
||||
const embeddingModel = process.env['LLAMA_STACK_TEST_EMBEDDING_MODEL'];
|
||||
|
||||
if (!baseURL) {
|
||||
throw new Error(
|
||||
'TEST_API_BASE_URL is required for integration tests. ' +
|
||||
'Run tests using: ./scripts/integration-test.sh',
|
||||
);
|
||||
}
|
||||
|
||||
return {
|
||||
baseURL,
|
||||
textModel,
|
||||
embeddingModel,
|
||||
setupName,
|
||||
};
|
||||
}
|
||||
|
||||
// Read configuration from environment variables (set by scripts/integration-test.sh)
|
||||
export const TEST_CONFIG = loadTestConfig();
|
||||
|
||||
// Validate required configuration
|
||||
beforeAll(() => {
|
||||
console.log('\n=== Integration Test Configuration ===');
|
||||
console.log(`Base URL: ${TEST_CONFIG.baseURL}`);
|
||||
console.log(`Setup: ${TEST_CONFIG.setupName || 'NOT SET'}`);
|
||||
console.log(
|
||||
`Text Model: ${TEST_CONFIG.textModel || 'NOT SET - tests requiring text model will be skipped'}`,
|
||||
);
|
||||
console.log(
|
||||
`Embedding Model: ${
|
||||
TEST_CONFIG.embeddingModel || 'NOT SET - tests requiring embedding model will be skipped'
|
||||
}`,
|
||||
);
|
||||
console.log('=====================================\n');
|
||||
});
|
||||
|
||||
/**
|
||||
* Create a client instance for integration tests.
|
||||
* Mimics pytest's `llama_stack_client` fixture.
|
||||
*
|
||||
* @param testId - Test ID to send in X-LlamaStack-Provider-Data header for replay mode.
|
||||
* Format: "tests/integration/responses/test_basic_responses.py::test_name[params]"
|
||||
*/
|
||||
export function createTestClient(testId?: string): LlamaStackClient {
|
||||
const headers: Record<string, string> = {};
|
||||
|
||||
// In server mode with replay, send test ID for recording isolation
|
||||
if (process.env['LLAMA_STACK_TEST_STACK_CONFIG_TYPE'] === 'server' && testId) {
|
||||
headers['X-LlamaStack-Provider-Data'] = JSON.stringify({
|
||||
__test_id: testId,
|
||||
});
|
||||
}
|
||||
|
||||
return new LlamaStackClient({
|
||||
baseURL: TEST_CONFIG.baseURL,
|
||||
timeout: 60000, // 60 seconds
|
||||
defaultHeaders: headers,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Skip test if required model is not configured.
|
||||
* Mimics pytest's `skip_if_no_model` autouse fixture.
|
||||
*/
|
||||
export function skipIfNoModel(modelType: 'text' | 'embedding'): typeof test {
|
||||
const model = modelType === 'text' ? TEST_CONFIG.textModel : TEST_CONFIG.embeddingModel;
|
||||
|
||||
if (!model) {
|
||||
const envVar = modelType === 'text' ? 'LLAMA_STACK_TEST_TEXT_MODEL' : 'LLAMA_STACK_TEST_EMBEDDING_MODEL';
|
||||
const message = `Skipping: ${modelType} model not configured (set ${envVar})`;
|
||||
return test.skip.bind(test) as typeof test;
|
||||
}
|
||||
|
||||
return test;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the configured text model, throwing if not set.
|
||||
* Use this in tests that absolutely require a text model.
|
||||
*/
|
||||
export function requireTextModel(): string {
|
||||
if (!TEST_CONFIG.textModel) {
|
||||
throw new Error(
|
||||
'LLAMA_STACK_TEST_TEXT_MODEL environment variable is required. ' +
|
||||
'Run tests using: ./scripts/integration-test.sh',
|
||||
);
|
||||
}
|
||||
return TEST_CONFIG.textModel;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the configured embedding model, throwing if not set.
|
||||
* Use this in tests that absolutely require an embedding model.
|
||||
*/
|
||||
export function requireEmbeddingModel(): string {
|
||||
if (!TEST_CONFIG.embeddingModel) {
|
||||
throw new Error(
|
||||
'LLAMA_STACK_TEST_EMBEDDING_MODEL environment variable is required. ' +
|
||||
'Run tests using: ./scripts/integration-test.sh',
|
||||
);
|
||||
}
|
||||
return TEST_CONFIG.embeddingModel;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts aggregated text output from a ResponseObject.
|
||||
* This concatenates all text content from the response's output array.
|
||||
*
|
||||
* Copied from llama-stack-client's response-helpers until it's available in published version.
|
||||
*/
|
||||
export function getResponseOutputText(response: any): string {
|
||||
const pieces: string[] = [];
|
||||
|
||||
for (const output of response.output ?? []) {
|
||||
if (!output || output.type !== 'message') {
|
||||
continue;
|
||||
}
|
||||
|
||||
const content = output.content;
|
||||
if (typeof content === 'string') {
|
||||
pieces.push(content);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!Array.isArray(content)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const item of content) {
|
||||
if (typeof item === 'string') {
|
||||
pieces.push(item);
|
||||
continue;
|
||||
}
|
||||
if (item && item.type === 'output_text' && 'text' in item && typeof item.text === 'string') {
|
||||
pieces.push(item.text);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return pieces.join('');
|
||||
}
|
||||
12
tests/integration/client-typescript/suites.json
Normal file
12
tests/integration/client-typescript/suites.json
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
[
|
||||
{
|
||||
"suite": "responses",
|
||||
"setup": "gpt",
|
||||
"files": ["__tests__/responses.test.ts"]
|
||||
},
|
||||
{
|
||||
"suite": "base",
|
||||
"setup": "ollama",
|
||||
"files": ["__tests__/inference.test.ts"]
|
||||
}
|
||||
]
|
||||
16
tests/integration/client-typescript/tsconfig.json
Normal file
16
tests/integration/client-typescript/tsconfig.json
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"module": "ES2022",
|
||||
"lib": ["ES2022"],
|
||||
"moduleResolution": "bundler",
|
||||
"esModuleInterop": true,
|
||||
"allowSyntheticDefaultImports": true,
|
||||
"strict": true,
|
||||
"skipLibCheck": true,
|
||||
"resolveJsonModule": true,
|
||||
"types": ["jest", "node"]
|
||||
},
|
||||
"include": ["**/*.ts"],
|
||||
"exclude": ["node_modules"]
|
||||
}
|
||||
|
|
@ -50,7 +50,7 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
|
|||
name="ollama",
|
||||
description="Local Ollama provider with text + safety models",
|
||||
env={
|
||||
"OLLAMA_URL": "http://0.0.0.0:11434",
|
||||
"OLLAMA_URL": "http://0.0.0.0:11434/v1",
|
||||
"SAFETY_MODEL": "ollama/llama-guard3:1b",
|
||||
},
|
||||
defaults={
|
||||
|
|
@ -64,7 +64,7 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
|
|||
name="ollama",
|
||||
description="Local Ollama provider with a vision model",
|
||||
env={
|
||||
"OLLAMA_URL": "http://0.0.0.0:11434",
|
||||
"OLLAMA_URL": "http://0.0.0.0:11434/v1",
|
||||
},
|
||||
defaults={
|
||||
"vision_model": "ollama/llama3.2-vision:11b",
|
||||
|
|
@ -75,7 +75,7 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
|
|||
name="ollama-postgres",
|
||||
description="Server-mode tests with Postgres-backed persistence",
|
||||
env={
|
||||
"OLLAMA_URL": "http://0.0.0.0:11434",
|
||||
"OLLAMA_URL": "http://0.0.0.0:11434/v1",
|
||||
"SAFETY_MODEL": "ollama/llama-guard3:1b",
|
||||
"POSTGRES_HOST": "127.0.0.1",
|
||||
"POSTGRES_PORT": "5432",
|
||||
|
|
|
|||
|
|
@ -0,0 +1,206 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
"""Tests for making Safety API optional in meta-reference agents provider.
|
||||
|
||||
This test suite validates the changes introduced to fix issue #4165, which
|
||||
allows running the meta-reference agents provider without the Safety API.
|
||||
Safety API is now an optional dependency, and errors are raised at request time
|
||||
when guardrails are explicitly requested without Safety API configured.
|
||||
"""
|
||||
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from llama_stack.core.datatypes import Api
|
||||
from llama_stack.core.storage.datatypes import KVStoreReference, ResponsesStoreReference
|
||||
from llama_stack.providers.inline.agents.meta_reference import get_provider_impl
|
||||
from llama_stack.providers.inline.agents.meta_reference.config import (
|
||||
AgentPersistenceConfig,
|
||||
MetaReferenceAgentsImplConfig,
|
||||
)
|
||||
from llama_stack.providers.inline.agents.meta_reference.responses.utils import (
|
||||
run_guardrails,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_persistence_config():
|
||||
"""Create a mock persistence configuration."""
|
||||
return AgentPersistenceConfig(
|
||||
agent_state=KVStoreReference(
|
||||
backend="kv_default",
|
||||
namespace="agents",
|
||||
),
|
||||
responses=ResponsesStoreReference(
|
||||
backend="sql_default",
|
||||
table_name="responses",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_deps():
|
||||
"""Create mock dependencies for the agents provider."""
|
||||
# Create mock APIs
|
||||
inference_api = AsyncMock()
|
||||
vector_io_api = AsyncMock()
|
||||
tool_runtime_api = AsyncMock()
|
||||
tool_groups_api = AsyncMock()
|
||||
conversations_api = AsyncMock()
|
||||
|
||||
return {
|
||||
Api.inference: inference_api,
|
||||
Api.vector_io: vector_io_api,
|
||||
Api.tool_runtime: tool_runtime_api,
|
||||
Api.tool_groups: tool_groups_api,
|
||||
Api.conversations: conversations_api,
|
||||
}
|
||||
|
||||
|
||||
class TestProviderInitialization:
|
||||
"""Test provider initialization with different safety API configurations."""
|
||||
|
||||
async def test_initialization_with_safety_api_present(self, mock_persistence_config, mock_deps):
|
||||
"""Test successful initialization when Safety API is configured."""
|
||||
config = MetaReferenceAgentsImplConfig(persistence=mock_persistence_config)
|
||||
|
||||
# Add safety API to deps
|
||||
safety_api = AsyncMock()
|
||||
mock_deps[Api.safety] = safety_api
|
||||
|
||||
# Mock the initialize method to avoid actual initialization
|
||||
with patch(
|
||||
"llama_stack.providers.inline.agents.meta_reference.agents.MetaReferenceAgentsImpl.initialize",
|
||||
new_callable=AsyncMock,
|
||||
):
|
||||
# Should not raise any exception
|
||||
provider = await get_provider_impl(config, mock_deps, policy=[], telemetry_enabled=False)
|
||||
assert provider is not None
|
||||
|
||||
async def test_initialization_without_safety_api(self, mock_persistence_config, mock_deps):
|
||||
"""Test successful initialization when Safety API is not configured."""
|
||||
config = MetaReferenceAgentsImplConfig(persistence=mock_persistence_config)
|
||||
|
||||
# Safety API is NOT in mock_deps - provider should still start
|
||||
# Mock the initialize method to avoid actual initialization
|
||||
with patch(
|
||||
"llama_stack.providers.inline.agents.meta_reference.agents.MetaReferenceAgentsImpl.initialize",
|
||||
new_callable=AsyncMock,
|
||||
):
|
||||
# Should not raise any exception
|
||||
provider = await get_provider_impl(config, mock_deps, policy=[], telemetry_enabled=False)
|
||||
assert provider is not None
|
||||
assert provider.safety_api is None
|
||||
|
||||
|
||||
class TestGuardrailsFunctionality:
|
||||
"""Test run_guardrails function with optional safety API."""
|
||||
|
||||
async def test_run_guardrails_with_none_safety_api(self):
|
||||
"""Test that run_guardrails returns None when safety_api is None."""
|
||||
result = await run_guardrails(safety_api=None, messages="test message", guardrail_ids=["llama-guard"])
|
||||
assert result is None
|
||||
|
||||
async def test_run_guardrails_with_empty_messages(self):
|
||||
"""Test that run_guardrails returns None for empty messages."""
|
||||
# Test with None safety API
|
||||
result = await run_guardrails(safety_api=None, messages="", guardrail_ids=["llama-guard"])
|
||||
assert result is None
|
||||
|
||||
# Test with mock safety API
|
||||
mock_safety_api = AsyncMock()
|
||||
result = await run_guardrails(safety_api=mock_safety_api, messages="", guardrail_ids=["llama-guard"])
|
||||
assert result is None
|
||||
|
||||
async def test_run_guardrails_with_none_safety_api_ignores_guardrails(self):
|
||||
"""Test that guardrails are skipped when safety_api is None, even if guardrail_ids are provided."""
|
||||
# Should not raise exception, just return None
|
||||
result = await run_guardrails(
|
||||
safety_api=None,
|
||||
messages="potentially harmful content",
|
||||
guardrail_ids=["llama-guard", "content-filter"],
|
||||
)
|
||||
assert result is None
|
||||
|
||||
async def test_create_response_rejects_guardrails_without_safety_api(self, mock_persistence_config, mock_deps):
|
||||
"""Test that create_openai_response raises error when guardrails requested but Safety API unavailable."""
|
||||
from llama_stack.providers.inline.agents.meta_reference.responses.openai_responses import (
|
||||
OpenAIResponsesImpl,
|
||||
)
|
||||
from llama_stack_api import ResponseGuardrailSpec
|
||||
|
||||
# Create OpenAIResponsesImpl with no safety API
|
||||
with patch("llama_stack.providers.inline.agents.meta_reference.responses.openai_responses.ResponsesStore"):
|
||||
impl = OpenAIResponsesImpl(
|
||||
inference_api=mock_deps[Api.inference],
|
||||
tool_groups_api=mock_deps[Api.tool_groups],
|
||||
tool_runtime_api=mock_deps[Api.tool_runtime],
|
||||
responses_store=MagicMock(),
|
||||
vector_io_api=mock_deps[Api.vector_io],
|
||||
safety_api=None, # No Safety API
|
||||
conversations_api=mock_deps[Api.conversations],
|
||||
)
|
||||
|
||||
# Test with string guardrail
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
await impl.create_openai_response(
|
||||
input="test input",
|
||||
model="test-model",
|
||||
guardrails=["llama-guard"],
|
||||
)
|
||||
assert "Cannot process guardrails: Safety API is not configured" in str(exc_info.value)
|
||||
|
||||
# Test with ResponseGuardrailSpec
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
await impl.create_openai_response(
|
||||
input="test input",
|
||||
model="test-model",
|
||||
guardrails=[ResponseGuardrailSpec(type="llama-guard")],
|
||||
)
|
||||
assert "Cannot process guardrails: Safety API is not configured" in str(exc_info.value)
|
||||
|
||||
async def test_create_response_succeeds_without_guardrails_and_no_safety_api(
|
||||
self, mock_persistence_config, mock_deps
|
||||
):
|
||||
"""Test that create_openai_response works when no guardrails requested and Safety API unavailable."""
|
||||
from llama_stack.providers.inline.agents.meta_reference.responses.openai_responses import (
|
||||
OpenAIResponsesImpl,
|
||||
)
|
||||
|
||||
# Create OpenAIResponsesImpl with no safety API
|
||||
with (
|
||||
patch("llama_stack.providers.inline.agents.meta_reference.responses.openai_responses.ResponsesStore"),
|
||||
patch.object(OpenAIResponsesImpl, "_create_streaming_response", new_callable=AsyncMock) as mock_stream,
|
||||
):
|
||||
# Mock the streaming response to return a simple async generator
|
||||
async def mock_generator():
|
||||
yield MagicMock()
|
||||
|
||||
mock_stream.return_value = mock_generator()
|
||||
|
||||
impl = OpenAIResponsesImpl(
|
||||
inference_api=mock_deps[Api.inference],
|
||||
tool_groups_api=mock_deps[Api.tool_groups],
|
||||
tool_runtime_api=mock_deps[Api.tool_runtime],
|
||||
responses_store=MagicMock(),
|
||||
vector_io_api=mock_deps[Api.vector_io],
|
||||
safety_api=None, # No Safety API
|
||||
conversations_api=mock_deps[Api.conversations],
|
||||
)
|
||||
|
||||
# Should not raise when no guardrails requested
|
||||
# Note: This will still fail later in execution due to mocking, but should pass the validation
|
||||
try:
|
||||
await impl.create_openai_response(
|
||||
input="test input",
|
||||
model="test-model",
|
||||
guardrails=None, # No guardrails
|
||||
)
|
||||
except Exception as e:
|
||||
# Ensure the error is NOT about missing Safety API
|
||||
assert "Cannot process guardrails: Safety API is not configured" not in str(e)
|
||||
|
|
@ -120,7 +120,7 @@ from llama_stack.providers.remote.inference.watsonx.watsonx import WatsonXInfere
|
|||
VLLMInferenceAdapter,
|
||||
"llama_stack.providers.remote.inference.vllm.VLLMProviderDataValidator",
|
||||
{
|
||||
"url": "http://fake",
|
||||
"base_url": "http://fake",
|
||||
},
|
||||
),
|
||||
],
|
||||
|
|
@ -153,7 +153,7 @@ def test_litellm_provider_data_used(config_cls, adapter_cls, provider_data_valid
|
|||
"""Validate data for LiteLLM-based providers. Similar to test_openai_provider_data_used, but without the
|
||||
assumption that there is an OpenAI-compatible client object."""
|
||||
|
||||
inference_adapter = adapter_cls(config=config_cls())
|
||||
inference_adapter = adapter_cls(config=config_cls(base_url="http://fake"))
|
||||
|
||||
inference_adapter.__provider_spec__ = MagicMock()
|
||||
inference_adapter.__provider_spec__.provider_data_validator = provider_data_validator
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ from llama_stack_api import (
|
|||
|
||||
@pytest.fixture(scope="function")
|
||||
async def vllm_inference_adapter():
|
||||
config = VLLMInferenceAdapterConfig(url="http://mocked.localhost:12345")
|
||||
config = VLLMInferenceAdapterConfig(base_url="http://mocked.localhost:12345")
|
||||
inference_adapter = VLLMInferenceAdapter(config=config)
|
||||
inference_adapter.model_store = AsyncMock()
|
||||
await inference_adapter.initialize()
|
||||
|
|
@ -204,7 +204,7 @@ async def test_vllm_completion_extra_body():
|
|||
via extra_body to the underlying OpenAI client through the InferenceRouter.
|
||||
"""
|
||||
# Set up the vLLM adapter
|
||||
config = VLLMInferenceAdapterConfig(url="http://mocked.localhost:12345")
|
||||
config = VLLMInferenceAdapterConfig(base_url="http://mocked.localhost:12345")
|
||||
vllm_adapter = VLLMInferenceAdapter(config=config)
|
||||
vllm_adapter.__provider_id__ = "vllm"
|
||||
await vllm_adapter.initialize()
|
||||
|
|
@ -277,7 +277,7 @@ async def test_vllm_chat_completion_extra_body():
|
|||
via extra_body to the underlying OpenAI client through the InferenceRouter for chat completion.
|
||||
"""
|
||||
# Set up the vLLM adapter
|
||||
config = VLLMInferenceAdapterConfig(url="http://mocked.localhost:12345")
|
||||
config = VLLMInferenceAdapterConfig(base_url="http://mocked.localhost:12345")
|
||||
vllm_adapter = VLLMInferenceAdapter(config=config)
|
||||
vllm_adapter.__provider_id__ = "vllm"
|
||||
await vllm_adapter.initialize()
|
||||
|
|
|
|||
|
|
@ -146,7 +146,7 @@ async def test_hosted_model_not_in_endpoint_mapping():
|
|||
|
||||
async def test_self_hosted_ignores_endpoint():
|
||||
adapter = create_adapter(
|
||||
config=NVIDIAConfig(url="http://localhost:8000", api_key=None),
|
||||
config=NVIDIAConfig(base_url="http://localhost:8000", api_key=None),
|
||||
rerank_endpoints={"test-model": "https://model.endpoint/rerank"}, # This should be ignored for self-hosted.
|
||||
)
|
||||
mock_session = MockSession(MockResponse())
|
||||
|
|
|
|||
|
|
@ -4,8 +4,10 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import get_args, get_origin
|
||||
|
||||
import pytest
|
||||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel, HttpUrl
|
||||
|
||||
from llama_stack.core.distribution import get_provider_registry, providable_apis
|
||||
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||
|
|
@ -41,3 +43,55 @@ class TestProviderConfigurations:
|
|||
|
||||
sample_config = config_type.sample_run_config(__distro_dir__="foobarbaz")
|
||||
assert isinstance(sample_config, dict), f"{config_class_name}.sample_run_config() did not return a dict"
|
||||
|
||||
def test_remote_inference_url_standardization(self):
|
||||
"""Verify all remote inference providers use standardized base_url configuration."""
|
||||
provider_registry = get_provider_registry()
|
||||
inference_providers = provider_registry.get("inference", {})
|
||||
|
||||
# Filter for remote providers only
|
||||
remote_providers = {k: v for k, v in inference_providers.items() if k.startswith("remote::")}
|
||||
|
||||
failures = []
|
||||
for provider_type, provider_spec in remote_providers.items():
|
||||
try:
|
||||
config_class_name = provider_spec.config_class
|
||||
config_type = instantiate_class_type(config_class_name)
|
||||
|
||||
# Check that config has base_url field (not url)
|
||||
if hasattr(config_type, "model_fields"):
|
||||
fields = config_type.model_fields
|
||||
|
||||
# Should NOT have 'url' field (old pattern)
|
||||
if "url" in fields:
|
||||
failures.append(
|
||||
f"{provider_type}: Uses deprecated 'url' field instead of 'base_url'. "
|
||||
f"Please rename to 'base_url' for consistency."
|
||||
)
|
||||
|
||||
# Should have 'base_url' field with HttpUrl | None type
|
||||
if "base_url" in fields:
|
||||
field_info = fields["base_url"]
|
||||
annotation = field_info.annotation
|
||||
|
||||
# Check if it's HttpUrl or HttpUrl | None
|
||||
# get_origin() returns Union for (X | Y), None for plain types
|
||||
# get_args() returns the types inside Union, e.g. (HttpUrl, NoneType)
|
||||
is_valid = False
|
||||
if get_origin(annotation) is not None: # It's a Union/Optional
|
||||
if HttpUrl in get_args(annotation):
|
||||
is_valid = True
|
||||
elif annotation == HttpUrl: # Plain HttpUrl without | None
|
||||
is_valid = True
|
||||
|
||||
if not is_valid:
|
||||
failures.append(
|
||||
f"{provider_type}: base_url field has incorrect type annotation. "
|
||||
f"Expected 'HttpUrl | None', got '{annotation}'"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
failures.append(f"{provider_type}: Error checking URL standardization: {str(e)}")
|
||||
|
||||
if failures:
|
||||
pytest.fail("URL standardization violations found:\n" + "\n".join(f" - {f}" for f in failures))
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
|
||||
from llama_stack_api import Chunk, ChunkMetadata
|
||||
from llama_stack_api import Chunk, ChunkMetadata, VectorStoreFileObject
|
||||
|
||||
# This test is a unit test for the chunk_utils.py helpers. This should only contain
|
||||
# tests which are specific to this file. More general (API-level) tests should be placed in
|
||||
|
|
@ -78,3 +78,77 @@ def test_chunk_serialization():
|
|||
serialized_chunk = chunk.model_dump()
|
||||
assert serialized_chunk["chunk_id"] == "test-chunk-id"
|
||||
assert "chunk_id" in serialized_chunk
|
||||
|
||||
|
||||
def test_vector_store_file_object_attributes_validation():
|
||||
"""Test VectorStoreFileObject validates and sanitizes attributes at input boundary."""
|
||||
# Test with metadata containing lists, nested dicts, and primitives
|
||||
from llama_stack_api.vector_io import VectorStoreChunkingStrategyAuto
|
||||
|
||||
file_obj = VectorStoreFileObject(
|
||||
id="file-123",
|
||||
attributes={
|
||||
"tags": ["transformers", "h100-compatible", "region:us"], # List -> string
|
||||
"model_name": "granite-3.3-8b", # String preserved
|
||||
"score": 0.95, # Float preserved
|
||||
"active": True, # Bool preserved
|
||||
"count": 42, # Int -> float
|
||||
"nested": {"key": "value"}, # Dict filtered out
|
||||
},
|
||||
chunking_strategy=VectorStoreChunkingStrategyAuto(),
|
||||
created_at=1234567890,
|
||||
status="completed",
|
||||
vector_store_id="vs-123",
|
||||
)
|
||||
|
||||
# Lists converted to comma-separated strings
|
||||
assert file_obj.attributes["tags"] == "transformers, h100-compatible, region:us"
|
||||
# Primitives preserved
|
||||
assert file_obj.attributes["model_name"] == "granite-3.3-8b"
|
||||
assert file_obj.attributes["score"] == 0.95
|
||||
assert file_obj.attributes["active"] is True
|
||||
assert file_obj.attributes["count"] == 42.0 # int -> float
|
||||
# Complex types filtered out
|
||||
assert "nested" not in file_obj.attributes
|
||||
|
||||
|
||||
def test_vector_store_file_object_attributes_constraints():
|
||||
"""Test VectorStoreFileObject enforces OpenAPI constraints on attributes."""
|
||||
from llama_stack_api.vector_io import VectorStoreChunkingStrategyAuto
|
||||
|
||||
# Test max 16 properties
|
||||
many_attrs = {f"key{i}": f"value{i}" for i in range(20)}
|
||||
file_obj = VectorStoreFileObject(
|
||||
id="file-123",
|
||||
attributes=many_attrs,
|
||||
chunking_strategy=VectorStoreChunkingStrategyAuto(),
|
||||
created_at=1234567890,
|
||||
status="completed",
|
||||
vector_store_id="vs-123",
|
||||
)
|
||||
assert len(file_obj.attributes) == 16 # Max 16 properties
|
||||
|
||||
# Test max 64 char keys are filtered
|
||||
long_key_attrs = {"a" * 65: "value", "valid_key": "value"}
|
||||
file_obj = VectorStoreFileObject(
|
||||
id="file-124",
|
||||
attributes=long_key_attrs,
|
||||
chunking_strategy=VectorStoreChunkingStrategyAuto(),
|
||||
created_at=1234567890,
|
||||
status="completed",
|
||||
vector_store_id="vs-123",
|
||||
)
|
||||
assert "a" * 65 not in file_obj.attributes
|
||||
assert "valid_key" in file_obj.attributes
|
||||
|
||||
# Test max 512 char string values are truncated
|
||||
long_value_attrs = {"key": "x" * 600}
|
||||
file_obj = VectorStoreFileObject(
|
||||
id="file-125",
|
||||
attributes=long_value_attrs,
|
||||
chunking_strategy=VectorStoreChunkingStrategyAuto(),
|
||||
created_at=1234567890,
|
||||
status="completed",
|
||||
vector_store_id="vs-123",
|
||||
)
|
||||
assert len(file_obj.attributes["key"]) == 512
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue