mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 12:07:34 +00:00
feat: add dynamic model registration support to TGI inference (#3417)
Some checks failed
Vector IO Integration Tests / test-matrix (push) Failing after 4s
Update ReadTheDocs / update-readthedocs (push) Failing after 3s
UI Tests / ui-tests (22) (push) Successful in 43s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 3s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
API Conformance Tests / check-schema-compatibility (push) Successful in 7s
Unit Tests / unit-tests (3.13) (push) Failing after 4s
Pre-commit / pre-commit (push) Successful in 1m21s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
Integration Tests (Replay) / Integration Tests (, , , client=, ) (push) Failing after 3s
Python Package Build Test / build (3.12) (push) Failing after 2s
Python Package Build Test / build (3.13) (push) Failing after 2s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 5s
Unit Tests / unit-tests (3.12) (push) Failing after 3s
Test External API and Providers / test-external (venv) (push) Failing after 5s
Some checks failed
Vector IO Integration Tests / test-matrix (push) Failing after 4s
Update ReadTheDocs / update-readthedocs (push) Failing after 3s
UI Tests / ui-tests (22) (push) Successful in 43s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 3s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
API Conformance Tests / check-schema-compatibility (push) Successful in 7s
Unit Tests / unit-tests (3.13) (push) Failing after 4s
Pre-commit / pre-commit (push) Successful in 1m21s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
Integration Tests (Replay) / Integration Tests (, , , client=, ) (push) Failing after 3s
Python Package Build Test / build (3.12) (push) Failing after 2s
Python Package Build Test / build (3.13) (push) Failing after 2s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 5s
Unit Tests / unit-tests (3.12) (push) Failing after 3s
Test External API and Providers / test-external (venv) (push) Failing after 5s
# What does this PR do? adds dynamic model support to TGI add new overwrite_completion_id feature to OpenAIMixin to deal with TGI always returning id="" ## Test Plan tgi: `docker run --gpus all --shm-size 1g -p 8080:80 -v /data:/data ghcr.io/huggingface/text-generation-inference --model-id Qwen/Qwen3-0.6B` stack: `TGI_URL=http://localhost:8080 uv run llama stack build --image-type venv --distro ci-tests --run` test: `./scripts/integration-tests.sh --stack-config http://localhost:8321 --setup tgi --subdirs inference --pattern openai`
This commit is contained in:
parent
ab321739f2
commit
f4ab154ade
14 changed files with 12218 additions and 20 deletions
|
@ -48,7 +48,6 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
|
|||
"remote::nvidia",
|
||||
"remote::runpod",
|
||||
"remote::sambanova",
|
||||
"remote::tgi",
|
||||
"remote::vertexai",
|
||||
# {"error":{"message":"Unknown request URL: GET /openai/v1/completions. Please check the URL for typos,
|
||||
# or see the docs at https://console.groq.com/docs/","type":"invalid_request_error","code":"unknown_url"}}
|
||||
|
@ -96,6 +95,7 @@ def skip_if_doesnt_support_n(client_with_models, model_id):
|
|||
"remote::vertexai",
|
||||
# Error code: 400 - [{'error': {'code': 400, 'message': 'Unable to submit request because candidateCount must be 1 but
|
||||
# the entered value was 2. Update the candidateCount value and try again.', 'status': 'INVALID_ARGUMENT'}
|
||||
"remote::tgi", # TGI ignores n param silently
|
||||
):
|
||||
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support n param.")
|
||||
|
||||
|
@ -110,7 +110,6 @@ def skip_if_model_doesnt_support_openai_chat_completion(client_with_models, mode
|
|||
"remote::cerebras",
|
||||
"remote::databricks",
|
||||
"remote::runpod",
|
||||
"remote::tgi",
|
||||
):
|
||||
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI chat completions.")
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue