feat: add dynamic model registration support to TGI inference

add new overwrite_completion_id feature to OpenAIMixin to deal with TGI always returning id=""

test with -

tgi: `docker run --gpus all --shm-size 1g -p 8080:80 -v /data:/data ghcr.io/huggingface/text-generation-inference --model-id Qwen/Qwen3-0.6B`

stack: `TGI_URL=http://localhost:8080 uv run llama stack build --image-type venv --distro ci-tests --run`

test: `./scripts/integration-tests.sh --stack-config http://localhost:8321 --setup tgi --subdirs inference --pattern openai`
This commit is contained in:
Matthew Farrellee 2025-09-11 02:02:02 -04:00
parent d15368a302
commit c3fc859257
14 changed files with 12218 additions and 20 deletions

View file

@ -48,7 +48,6 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
"remote::nvidia",
"remote::runpod",
"remote::sambanova",
"remote::tgi",
"remote::vertexai",
# {"error":{"message":"Unknown request URL: GET /openai/v1/completions. Please check the URL for typos,
# or see the docs at https://console.groq.com/docs/","type":"invalid_request_error","code":"unknown_url"}}
@ -96,6 +95,7 @@ def skip_if_doesnt_support_n(client_with_models, model_id):
"remote::vertexai",
# Error code: 400 - [{'error': {'code': 400, 'message': 'Unable to submit request because candidateCount must be 1 but
# the entered value was 2. Update the candidateCount value and try again.', 'status': 'INVALID_ARGUMENT'}
"remote::tgi", # TGI ignores n param silently
):
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support n param.")
@ -110,7 +110,6 @@ def skip_if_model_doesnt_support_openai_chat_completion(client_with_models, mode
"remote::cerebras",
"remote::databricks",
"remote::runpod",
"remote::tgi",
):
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI chat completions.")