mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-15 14:08:00 +00:00
Fix Dell distro and TGI inference
This commit is contained in:
parent
9b70bb9d4b
commit
ad66968909
2 changed files with 4 additions and 1 deletions
|
@ -23,6 +23,9 @@ providers:
|
||||||
provider_type: remote::chromadb
|
provider_type: remote::chromadb
|
||||||
config:
|
config:
|
||||||
url: ${env.CHROMA_URL}
|
url: ${env.CHROMA_URL}
|
||||||
|
kvstore:
|
||||||
|
type: sqlite
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter/}/chroma_remote_registry.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
|
|
|
@ -309,7 +309,7 @@ class TGIAdapter(_HfAdapter):
|
||||||
raise ValueError("You must provide a URL in run.yaml (or via the TGI_URL environment variable) to use TGI.")
|
raise ValueError("You must provide a URL in run.yaml (or via the TGI_URL environment variable) to use TGI.")
|
||||||
log.info(f"Initializing TGI client with url={config.url}")
|
log.info(f"Initializing TGI client with url={config.url}")
|
||||||
self.client = AsyncInferenceClient(
|
self.client = AsyncInferenceClient(
|
||||||
model=config.url,
|
model=config.url,provider="hf-inference"
|
||||||
)
|
)
|
||||||
endpoint_info = await self.client.get_endpoint_info()
|
endpoint_info = await self.client.get_endpoint_info()
|
||||||
self.max_tokens = endpoint_info["max_total_tokens"]
|
self.max_tokens = endpoint_info["max_total_tokens"]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue