Redact sensitive information from configs when printing, etc.

This commit is contained in:
Ashwin Bharambe 2025-01-02 11:40:48 -08:00
parent d9f75cc98f
commit e3f187fb83
13 changed files with 54 additions and 21 deletions

View file

@ -7,7 +7,7 @@
from typing import Optional
from llama_models.schema_utils import json_schema_type
from pydantic import BaseModel, Field
from pydantic import BaseModel, Field, SecretStr
@json_schema_type
@ -15,7 +15,7 @@ class TGIImplConfig(BaseModel):
url: str = Field(
description="The URL for the TGI serving endpoint",
)
api_token: Optional[str] = Field(
api_token: Optional[SecretStr] = Field(
default=None,
description="A bearer token if your TGI endpoint is protected.",
)
@ -32,7 +32,7 @@ class InferenceEndpointImplConfig(BaseModel):
endpoint_name: str = Field(
description="The name of the Hugging Face Inference Endpoint in the format of '{namespace}/{endpoint_name}' (e.g. 'my-cool-org/meta-llama-3-1-8b-instruct-rce'). Namespace is optional and will default to the user account if not provided.",
)
api_token: Optional[str] = Field(
api_token: Optional[SecretStr] = Field(
default=None,
description="Your Hugging Face user access token (will default to locally saved token if not provided)",
)
@ -55,7 +55,7 @@ class InferenceAPIImplConfig(BaseModel):
huggingface_repo: str = Field(
description="The model ID of the model on the Hugging Face Hub (e.g. 'meta-llama/Meta-Llama-3.1-70B-Instruct')",
)
api_token: Optional[str] = Field(
api_token: Optional[SecretStr] = Field(
default=None,
description="Your Hugging Face user access token (will default to locally saved token if not provided)",
)

View file

@ -290,7 +290,9 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):
class TGIAdapter(_HfAdapter):
async def initialize(self, config: TGIImplConfig) -> None:
log.info(f"Initializing TGI client with url={config.url}")
self.client = AsyncInferenceClient(model=config.url, token=config.api_token)
self.client = AsyncInferenceClient(
model=config.url, token=config.api_token.get_secret_value()
)
endpoint_info = await self.client.get_endpoint_info()
self.max_tokens = endpoint_info["max_total_tokens"]
self.model_id = endpoint_info["model_id"]
@ -299,7 +301,7 @@ class TGIAdapter(_HfAdapter):
class InferenceAPIAdapter(_HfAdapter):
async def initialize(self, config: InferenceAPIImplConfig) -> None:
self.client = AsyncInferenceClient(
model=config.huggingface_repo, token=config.api_token
model=config.huggingface_repo, token=config.api_token.get_secret_value()
)
endpoint_info = await self.client.get_endpoint_info()
self.max_tokens = endpoint_info["max_total_tokens"]
@ -309,7 +311,7 @@ class InferenceAPIAdapter(_HfAdapter):
class InferenceEndpointAdapter(_HfAdapter):
async def initialize(self, config: InferenceEndpointImplConfig) -> None:
# Get the inference endpoint details
api = HfApi(token=config.api_token)
api = HfApi(token=config.api_token.get_secret_value())
endpoint = api.get_inference_endpoint(config.endpoint_name)
# Wait for the endpoint to be ready (if not already)