From 34be07e0dfb5c3f66854970e65b4d5591242f9ee Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Sun, 24 Nov 2024 14:18:59 -0800 Subject: [PATCH 1/6] Ensure model_local_dir does not mangle "C:\" on Windows --- llama_stack/distribution/utils/model_utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llama_stack/distribution/utils/model_utils.py b/llama_stack/distribution/utils/model_utils.py index e104965a5..abd0dc087 100644 --- a/llama_stack/distribution/utils/model_utils.py +++ b/llama_stack/distribution/utils/model_utils.py @@ -4,11 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import os +from pathlib import Path from .config_dirs import DEFAULT_CHECKPOINT_DIR def model_local_dir(descriptor: str) -> str: - path = os.path.join(DEFAULT_CHECKPOINT_DIR, descriptor) - return path.replace(":", "-") + return str(Path(DEFAULT_CHECKPOINT_DIR) / (descriptor.replace(":", "-"))) From 60cb7f64affb1306be9dc072bb69ea1b05361b91 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Mon, 25 Nov 2024 09:42:27 -0800 Subject: [PATCH 2/6] add missing __init__ --- llama_stack/providers/utils/scoring/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 llama_stack/providers/utils/scoring/__init__.py diff --git a/llama_stack/providers/utils/scoring/__init__.py b/llama_stack/providers/utils/scoring/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/llama_stack/providers/utils/scoring/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. From de7af28756e6558fae2679b8034d4664cd1ce776 Mon Sep 17 00:00:00 2001 From: Dinesh Yeduguru Date: Mon, 25 Nov 2024 13:17:02 -0800 Subject: [PATCH 3/6] Tgi fixture (#519) # What does this PR do? * Add a test fixture for tgi * Fixes the logic to correctly pass the llama model for chat completion Fixes #514 ## Test Plan pytest -k "tgi" llama_stack/providers/tests/inference/test_text_inference.py --env TGI_URL=http://localhost:$INFERENCE_PORT --env TGI_API_TOKEN=$HF_TOKEN --- .../providers/remote/inference/tgi/tgi.py | 8 +++++--- .../providers/tests/inference/fixtures.py | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/llama_stack/providers/remote/inference/tgi/tgi.py b/llama_stack/providers/remote/inference/tgi/tgi.py index 621188284..01981c62b 100644 --- a/llama_stack/providers/remote/inference/tgi/tgi.py +++ b/llama_stack/providers/remote/inference/tgi/tgi.py @@ -89,8 +89,9 @@ class _HfAdapter(Inference, ModelsProtocolPrivate): stream: Optional[bool] = False, logprobs: Optional[LogProbConfig] = None, ) -> AsyncGenerator: + model = await self.model_store.get_model(model_id) request = CompletionRequest( - model=model_id, + model=model.provider_resource_id, content=content, sampling_params=sampling_params, response_format=response_format, @@ -194,8 +195,9 @@ class _HfAdapter(Inference, ModelsProtocolPrivate): stream: Optional[bool] = False, logprobs: Optional[LogProbConfig] = None, ) -> AsyncGenerator: + model = await self.model_store.get_model(model_id) request = ChatCompletionRequest( - model=model_id, + model=model.provider_resource_id, messages=messages, sampling_params=sampling_params, tools=tools or [], @@ -249,7 +251,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate): def _get_params(self, request: ChatCompletionRequest) -> dict: prompt, input_tokens = chat_completion_request_to_model_input_info( - request, self.formatter + request, self.register_helper.get_llama_model(request.model), self.formatter ) return dict( prompt=prompt, diff --git a/llama_stack/providers/tests/inference/fixtures.py b/llama_stack/providers/tests/inference/fixtures.py index 2007818e5..a427eef12 100644 --- a/llama_stack/providers/tests/inference/fixtures.py +++ b/llama_stack/providers/tests/inference/fixtures.py @@ -20,6 +20,7 @@ from llama_stack.providers.remote.inference.bedrock import BedrockConfig from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig from llama_stack.providers.remote.inference.ollama import OllamaImplConfig +from llama_stack.providers.remote.inference.tgi import TGIImplConfig from llama_stack.providers.remote.inference.together import TogetherImplConfig from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig from llama_stack.providers.tests.resolver import construct_stack_for_test @@ -156,6 +157,22 @@ def inference_nvidia() -> ProviderFixture: ) +@pytest.fixture(scope="session") +def inference_tgi() -> ProviderFixture: + return ProviderFixture( + providers=[ + Provider( + provider_id="tgi", + provider_type="remote::tgi", + config=TGIImplConfig( + url=get_env_or_fail("TGI_URL"), + api_token=os.getenv("TGI_API_TOKEN", None), + ).model_dump(), + ) + ], + ) + + def get_model_short_name(model_name: str) -> str: """Convert model name to a short test identifier. @@ -190,6 +207,7 @@ INFERENCE_FIXTURES = [ "remote", "bedrock", "nvidia", + "tgi", ] From bbd81231ce4032a6cfc8f7fb2df0b258a003cc31 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Mon, 25 Nov 2024 17:23:27 -0800 Subject: [PATCH 4/6] add missing __init__ --- llama_stack/providers/inline/datasetio/__init__.py | 5 +++++ llama_stack/providers/remote/datasetio/__init__.py | 5 +++++ 2 files changed, 10 insertions(+) create mode 100644 llama_stack/providers/inline/datasetio/__init__.py create mode 100644 llama_stack/providers/remote/datasetio/__init__.py diff --git a/llama_stack/providers/inline/datasetio/__init__.py b/llama_stack/providers/inline/datasetio/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/llama_stack/providers/inline/datasetio/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/llama_stack/providers/remote/datasetio/__init__.py b/llama_stack/providers/remote/datasetio/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/llama_stack/providers/remote/datasetio/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. From 2936133f95b5b5bb90e34e27630643434c53a7da Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Mon, 25 Nov 2024 18:55:54 -0800 Subject: [PATCH 5/6] precommit --- llama_stack/providers/remote/datasetio/huggingface/config.py | 3 ++- .../providers/remote/datasetio/huggingface/huggingface.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/llama_stack/providers/remote/datasetio/huggingface/config.py b/llama_stack/providers/remote/datasetio/huggingface/config.py index 46470ce49..1cdae0625 100644 --- a/llama_stack/providers/remote/datasetio/huggingface/config.py +++ b/llama_stack/providers/remote/datasetio/huggingface/config.py @@ -3,12 +3,13 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from pydantic import BaseModel + from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR from llama_stack.providers.utils.kvstore.config import ( KVStoreConfig, SqliteKVStoreConfig, ) -from pydantic import BaseModel class HuggingfaceDatasetIOConfig(BaseModel): diff --git a/llama_stack/providers/remote/datasetio/huggingface/huggingface.py b/llama_stack/providers/remote/datasetio/huggingface/huggingface.py index 8d34df672..c2e4506bf 100644 --- a/llama_stack/providers/remote/datasetio/huggingface/huggingface.py +++ b/llama_stack/providers/remote/datasetio/huggingface/huggingface.py @@ -9,6 +9,7 @@ from llama_stack.apis.datasetio import * # noqa: F403 import datasets as hf_datasets + from llama_stack.providers.datatypes import DatasetsProtocolPrivate from llama_stack.providers.utils.datasetio.url_utils import get_dataframe_from_url from llama_stack.providers.utils.kvstore import kvstore_impl From d3956a1d22bbd480f4e14fd3f79b01cab7a23661 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Mon, 25 Nov 2024 22:02:45 -0800 Subject: [PATCH 6/6] fix description --- .../scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py b/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py index 554590f12..dc5df8e78 100644 --- a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +++ b/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py @@ -10,7 +10,7 @@ from llama_stack.apis.scoring_functions import ScoringFn answer_correctness_fn_def = ScoringFn( identifier="braintrust::answer-correctness", - description="Test whether an output is factual, compared to an original (`expected`) value. One of Braintrust LLM basd scorer https://github.com/braintrustdata/autoevals/blob/main/py/autoevals/llm.py", + description="Scores the correctness of the answer based on the ground truth.. One of Braintrust LLM basd scorer https://github.com/braintrustdata/autoevals/blob/main/py/autoevals/llm.py", params=None, provider_id="braintrust", provider_resource_id="answer-correctness",