feat: add dynamic model registration support to TGI inference

add new overwrite_completion_id feature to OpenAIMixin to deal with TGI always returning id="" test with - tgi: `docker run --gpus all --shm-size 1g -p 8080:80 -v /data:/data ghcr.io/huggingface/text-generation-inference --model-id Qwen/Qwen3-0.6B` stack: `TGI_URL=http://localhost:8080 uv run llama stack build --image-type venv --distro ci-tests --run` test: `./scripts/integration-tests.sh --stack-config http://localhost:8321 --setup tgi --subdirs inference --pattern openai`
2025-10-05 12:21:52 +00:00 · 2025-09-11 02:02:02 -04:00 · 2025-09-11 02:02:02 -04:00 · c3fc859257
commit c3fc859257
parent d15368a302
14 changed files with 12218 additions and 20 deletions
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@ -48,7 +48,6 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
        "remote::nvidia",
        "remote::runpod",
        "remote::sambanova",
-        "remote::tgi",
        "remote::vertexai",
        # {"error":{"message":"Unknown request URL: GET /openai/v1/completions. Please check the URL for typos,
        # or see the docs at https://console.groq.com/docs/","type":"invalid_request_error","code":"unknown_url"}}
@ -96,6 +95,7 @@ def skip_if_doesnt_support_n(client_with_models, model_id):
        "remote::vertexai",
        #  Error code: 400 - [{'error': {'code': 400, 'message': 'Unable to submit request because candidateCount must be 1 but
        #  the entered value was 2. Update the candidateCount value and try again.', 'status': 'INVALID_ARGUMENT'}
+        "remote::tgi",  # TGI ignores n param silently
    ):
        pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support n param.")

@ -110,7 +110,6 @@ def skip_if_model_doesnt_support_openai_chat_completion(client_with_models, mode
        "remote::cerebras",
        "remote::databricks",
        "remote::runpod",
-        "remote::tgi",
    ):
        pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI chat completions.")