Merge branch 'main' into groq

2025-12-17 16:12:46 +00:00 · 2024-11-26 12:28:31 -06:00 · 2024-11-26 12:28:31 -06:00 · bc427b3081
commit bc427b3081
parent 7d7d1e6ea1 d3956a1d22
9 changed files with 43 additions and 8 deletions
--- a/llama_stack/distribution/utils/model_utils.py
+++ b/llama_stack/distribution/utils/model_utils.py
@ -4,11 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-import os
+from pathlib import Path

 from .config_dirs import DEFAULT_CHECKPOINT_DIR


 def model_local_dir(descriptor: str) -> str:
-    path = os.path.join(DEFAULT_CHECKPOINT_DIR, descriptor)
-    return path.replace(":", "-")
+    return str(Path(DEFAULT_CHECKPOINT_DIR) / (descriptor.replace(":", "-")))
--- a/llama_stack/providers/inline/datasetio/init.py
+++ b/llama_stack/providers/inline/datasetio/init.py
@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
--- a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py
+++ b/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py
@ -10,7 +10,7 @@ from llama_stack.apis.scoring_functions import ScoringFn

 answer_correctness_fn_def = ScoringFn(
    identifier="braintrust::answer-correctness",
-    description="Test whether an output is factual, compared to an original (`expected`) value. One of Braintrust LLM basd scorer https://github.com/braintrustdata/autoevals/blob/main/py/autoevals/llm.py",
+    description="Scores the correctness of the answer based on the ground truth.. One of Braintrust LLM basd scorer https://github.com/braintrustdata/autoevals/blob/main/py/autoevals/llm.py",
    params=None,
    provider_id="braintrust",
    provider_resource_id="answer-correctness",
--- a/llama_stack/providers/remote/datasetio/init.py
+++ b/llama_stack/providers/remote/datasetio/init.py
@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
--- a/llama_stack/providers/remote/datasetio/huggingface/config.py
+++ b/llama_stack/providers/remote/datasetio/huggingface/config.py
@ -3,12 +3,13 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
+from pydantic import BaseModel
+
 from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR
 from llama_stack.providers.utils.kvstore.config import (
    KVStoreConfig,
    SqliteKVStoreConfig,
 )
-from pydantic import BaseModel


 class HuggingfaceDatasetIOConfig(BaseModel):
--- a/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
+++ b/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
@ -9,6 +9,7 @@ from llama_stack.apis.datasetio import *  # noqa: F403


 import datasets as hf_datasets
+
 from llama_stack.providers.datatypes import DatasetsProtocolPrivate
 from llama_stack.providers.utils.datasetio.url_utils import get_dataframe_from_url
 from llama_stack.providers.utils.kvstore import kvstore_impl
--- a/llama_stack/providers/remote/inference/tgi/tgi.py
+++ b/llama_stack/providers/remote/inference/tgi/tgi.py
@ -89,8 +89,9 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):
        stream: Optional[bool] = False,
        logprobs: Optional[LogProbConfig] = None,
    ) -> AsyncGenerator:
+        model = await self.model_store.get_model(model_id)
        request = CompletionRequest(
-            model=model_id,
+            model=model.provider_resource_id,
            content=content,
            sampling_params=sampling_params,
            response_format=response_format,
@ -194,8 +195,9 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):
        stream: Optional[bool] = False,
        logprobs: Optional[LogProbConfig] = None,
    ) -> AsyncGenerator:
+        model = await self.model_store.get_model(model_id)
        request = ChatCompletionRequest(
-            model=model_id,
+            model=model.provider_resource_id,
            messages=messages,
            sampling_params=sampling_params,
            tools=tools or [],
@ -249,7 +251,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):

    def _get_params(self, request: ChatCompletionRequest) -> dict:
        prompt, input_tokens = chat_completion_request_to_model_input_info(
-            request, self.formatter
+            request, self.register_helper.get_llama_model(request.model), self.formatter
        )
        return dict(
            prompt=prompt,
--- a/llama_stack/providers/tests/inference/fixtures.py
+++ b/llama_stack/providers/tests/inference/fixtures.py
@ -20,6 +20,7 @@ from llama_stack.providers.remote.inference.bedrock import BedrockConfig
 from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
 from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
 from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
+from llama_stack.providers.remote.inference.tgi import TGIImplConfig
 from llama_stack.providers.remote.inference.together import TogetherImplConfig
 from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
 from llama_stack.providers.remote.inference.groq import GroqImplConfig
@ -172,6 +173,21 @@ def inference_groq() -> ProviderFixture:
        ),
    )

+@pytest.fixture(scope="session")
+def inference_tgi() -> ProviderFixture:
+    return ProviderFixture(
+        providers=[
+            Provider(
+                provider_id="tgi",
+                provider_type="remote::tgi",
+                config=TGIImplConfig(
+                    url=get_env_or_fail("TGI_URL"),
+                    api_token=os.getenv("TGI_API_TOKEN", None),
+                ).model_dump(),
+            )
+        ],
+    )
+

 def get_model_short_name(model_name: str) -> str:
    """Convert model name to a short test identifier.
@ -208,6 +224,7 @@ INFERENCE_FIXTURES = [
    "bedrock",
    "nvidia",
    "groq",
+    "tgi",
 ]


--- a/llama_stack/providers/utils/scoring/init.py
+++ b/llama_stack/providers/utils/scoring/init.py
@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.