llama-stack-mirror/tests/integration/recordings/responses/a98eecadddc8.json
Matthew Farrellee f4ab154ade
Some checks failed
Vector IO Integration Tests / test-matrix (push) Failing after 4s
Update ReadTheDocs / update-readthedocs (push) Failing after 3s
UI Tests / ui-tests (22) (push) Successful in 43s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 3s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
API Conformance Tests / check-schema-compatibility (push) Successful in 7s
Unit Tests / unit-tests (3.13) (push) Failing after 4s
Pre-commit / pre-commit (push) Successful in 1m21s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
Integration Tests (Replay) / Integration Tests (, , , client=, ) (push) Failing after 3s
Python Package Build Test / build (3.12) (push) Failing after 2s
Python Package Build Test / build (3.13) (push) Failing after 2s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 5s
Unit Tests / unit-tests (3.12) (push) Failing after 3s
Test External API and Providers / test-external (venv) (push) Failing after 5s
feat: add dynamic model registration support to TGI inference (#3417)
# What does this PR do?

adds dynamic model support to TGI

add new overwrite_completion_id feature to OpenAIMixin to deal with TGI
always returning id=""

## Test Plan

tgi: `docker run --gpus all --shm-size 1g -p 8080:80 -v /data:/data
ghcr.io/huggingface/text-generation-inference --model-id
Qwen/Qwen3-0.6B`

stack: `TGI_URL=http://localhost:8080 uv run llama stack build
--image-type venv --distro ci-tests --run`

test: `./scripts/integration-tests.sh --stack-config
http://localhost:8321 --setup tgi --subdirs inference --pattern openai`
2025-09-15 15:52:40 -04:00

366 lines
10 KiB
JSON

{
"request": {
"method": "POST",
"url": "http://localhost:8080/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "Qwen/Qwen3-0.6B",
"messages": [
{
"role": "user",
"content": "What's the weather in Tokyo? Use the get_weather function to get the weather."
}
],
"stream": true,
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the weather in a given city",
"parameters": {
"type": "object",
"properties": {
"city": {
"type": "string",
"description": "The city to get the weather for"
}
}
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "Qwen/Qwen3-0.6B"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"index": 0,
"id": "0",
"function": {
"arguments": "{",
"name": "get_weather"
},
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1757550392,
"model": "Qwen/Qwen3-0.6B",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.3.5-dev0-sha-1b90c50",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"index": 0,
"id": "0",
"function": {
"arguments": " \"",
"name": null
},
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1757550392,
"model": "Qwen/Qwen3-0.6B",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.3.5-dev0-sha-1b90c50",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"index": 0,
"id": "0",
"function": {
"arguments": "c",
"name": null
},
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1757550392,
"model": "Qwen/Qwen3-0.6B",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.3.5-dev0-sha-1b90c50",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"index": 0,
"id": "0",
"function": {
"arguments": "ity",
"name": null
},
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1757550392,
"model": "Qwen/Qwen3-0.6B",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.3.5-dev0-sha-1b90c50",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"index": 0,
"id": "0",
"function": {
"arguments": "\":",
"name": null
},
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1757550392,
"model": "Qwen/Qwen3-0.6B",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.3.5-dev0-sha-1b90c50",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"index": 0,
"id": "0",
"function": {
"arguments": " \"",
"name": null
},
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1757550392,
"model": "Qwen/Qwen3-0.6B",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.3.5-dev0-sha-1b90c50",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"index": 0,
"id": "0",
"function": {
"arguments": "Tok",
"name": null
},
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1757550392,
"model": "Qwen/Qwen3-0.6B",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.3.5-dev0-sha-1b90c50",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"index": 0,
"id": "0",
"function": {
"arguments": "yo",
"name": null
},
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1757550392,
"model": "Qwen/Qwen3-0.6B",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.3.5-dev0-sha-1b90c50",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"index": 0,
"id": "0",
"function": {
"arguments": "\"}",
"name": null
},
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1757550392,
"model": "Qwen/Qwen3-0.6B",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "3.3.5-dev0-sha-1b90c50",
"usage": null
}
}
],
"is_streaming": true
}
}