Merge branch 'main' into henrytu/cerebras-integration

2025-12-18 16:29:47 +00:00 · 2024-12-02 10:57:59 -05:00 · 2024-12-02 10:57:59 -05:00 · c29e3271d3
commit c29e3271d3
parent 659764b91f fe48b9fb8c
38 changed files with 523 additions and 139 deletions
--- a/llama_stack/providers/tests/inference/fixtures.py
+++ b/llama_stack/providers/tests/inference/fixtures.py
@ -21,6 +21,7 @@ from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig
 from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
 from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
 from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
+from llama_stack.providers.remote.inference.tgi import TGIImplConfig
 from llama_stack.providers.remote.inference.together import TogetherImplConfig
 from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
 from llama_stack.providers.tests.resolver import construct_stack_for_test
@ -172,6 +173,22 @@ def inference_nvidia() -> ProviderFixture:
    )


+@pytest.fixture(scope="session")
+def inference_tgi() -> ProviderFixture:
+    return ProviderFixture(
+        providers=[
+            Provider(
+                provider_id="tgi",
+                provider_type="remote::tgi",
+                config=TGIImplConfig(
+                    url=get_env_or_fail("TGI_URL"),
+                    api_token=os.getenv("TGI_API_TOKEN", None),
+                ).model_dump(),
+            )
+        ],
+    )
+
+
 def get_model_short_name(model_name: str) -> str:
    """Convert model name to a short test identifier.

@ -207,6 +224,7 @@ INFERENCE_FIXTURES = [
    "bedrock",
    "cerebras",
    "nvidia",
+    "tgi",
 ]