mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-02 08:44:44 +00:00
all distros
This commit is contained in:
parent
a097bfa761
commit
7103892f54
16 changed files with 79 additions and 62 deletions
|
@ -6,6 +6,9 @@
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from llama_stack.distribution.datatypes import Provider
|
||||||
|
|
||||||
|
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
|
||||||
|
@ -20,9 +23,15 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||||
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||||
}
|
}
|
||||||
|
name = "bedrock"
|
||||||
|
memory_provider = Provider(
|
||||||
|
provider_id="faiss",
|
||||||
|
provider_type="inline::faiss",
|
||||||
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
)
|
||||||
|
|
||||||
return DistributionTemplate(
|
return DistributionTemplate(
|
||||||
name="bedrock",
|
name=name,
|
||||||
distro_type="self_hosted",
|
distro_type="self_hosted",
|
||||||
description="Use AWS Bedrock for running LLM inference and safety",
|
description="Use AWS Bedrock for running LLM inference and safety",
|
||||||
docker_image=None,
|
docker_image=None,
|
||||||
|
@ -30,7 +39,11 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
providers=providers,
|
providers=providers,
|
||||||
default_models=[],
|
default_models=[],
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(),
|
"run.yaml": RunConfigSettings(
|
||||||
|
provider_overrides={
|
||||||
|
"memory": [memory_provider],
|
||||||
|
},
|
||||||
|
),
|
||||||
},
|
},
|
||||||
run_config_env_vars={
|
run_config_env_vars={
|
||||||
"LLAMASTACK_PORT": (
|
"LLAMASTACK_PORT": (
|
||||||
|
|
|
@ -24,12 +24,6 @@ providers:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
namespace: null
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/faiss_store.db
|
||||||
- provider_id: chromadb
|
|
||||||
provider_type: remote::chromadb
|
|
||||||
config: {}
|
|
||||||
- provider_id: pgvector
|
|
||||||
provider_type: remote::pgvector
|
|
||||||
config: {}
|
|
||||||
safety:
|
safety:
|
||||||
- provider_id: bedrock
|
- provider_id: bedrock
|
||||||
provider_type: remote::bedrock
|
provider_type: remote::bedrock
|
||||||
|
|
|
@ -9,6 +9,7 @@ from pathlib import Path
|
||||||
from llama_models.sku_list import all_registered_models
|
from llama_models.sku_list import all_registered_models
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
||||||
|
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||||
from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
|
from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
|
||||||
from llama_stack.providers.remote.inference.fireworks.fireworks import MODEL_ALIASES
|
from llama_stack.providers.remote.inference.fireworks.fireworks import MODEL_ALIASES
|
||||||
|
|
||||||
|
@ -27,11 +28,18 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
name = "fireworks"
|
||||||
|
|
||||||
inference_provider = Provider(
|
inference_provider = Provider(
|
||||||
provider_id="fireworks",
|
provider_id="fireworks",
|
||||||
provider_type="remote::fireworks",
|
provider_type="remote::fireworks",
|
||||||
config=FireworksImplConfig.sample_run_config(),
|
config=FireworksImplConfig.sample_run_config(),
|
||||||
)
|
)
|
||||||
|
memory_provider = Provider(
|
||||||
|
provider_id="faiss",
|
||||||
|
provider_type="inline::faiss",
|
||||||
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
)
|
||||||
|
|
||||||
core_model_to_hf_repo = {
|
core_model_to_hf_repo = {
|
||||||
m.descriptor(): m.huggingface_repo for m in all_registered_models()
|
m.descriptor(): m.huggingface_repo for m in all_registered_models()
|
||||||
|
@ -45,7 +53,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
]
|
]
|
||||||
|
|
||||||
return DistributionTemplate(
|
return DistributionTemplate(
|
||||||
name="fireworks",
|
name=name,
|
||||||
distro_type="self_hosted",
|
distro_type="self_hosted",
|
||||||
description="Use Fireworks.AI for running LLM inference",
|
description="Use Fireworks.AI for running LLM inference",
|
||||||
docker_image=None,
|
docker_image=None,
|
||||||
|
@ -56,6 +64,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider],
|
"inference": [inference_provider],
|
||||||
|
"memory": [memory_provider],
|
||||||
},
|
},
|
||||||
default_models=default_models,
|
default_models=default_models,
|
||||||
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
|
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
|
||||||
|
|
|
@ -26,12 +26,6 @@ providers:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
namespace: null
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/faiss_store.db
|
||||||
- provider_id: chromadb
|
|
||||||
provider_type: remote::chromadb
|
|
||||||
config: {}
|
|
||||||
- provider_id: pgvector
|
|
||||||
provider_type: remote::pgvector
|
|
||||||
config: {}
|
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
||||||
|
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||||
from llama_stack.providers.remote.inference.tgi import InferenceEndpointImplConfig
|
from llama_stack.providers.remote.inference.tgi import InferenceEndpointImplConfig
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
@ -20,12 +21,17 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||||
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||||
}
|
}
|
||||||
|
name = "hf-endpoint"
|
||||||
inference_provider = Provider(
|
inference_provider = Provider(
|
||||||
provider_id="hf-endpoint",
|
provider_id="hf-endpoint",
|
||||||
provider_type="remote::hf::endpoint",
|
provider_type="remote::hf::endpoint",
|
||||||
config=InferenceEndpointImplConfig.sample_run_config(),
|
config=InferenceEndpointImplConfig.sample_run_config(),
|
||||||
)
|
)
|
||||||
|
memory_provider = Provider(
|
||||||
|
provider_id="faiss",
|
||||||
|
provider_type="inline::faiss",
|
||||||
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
)
|
||||||
|
|
||||||
inference_model = ModelInput(
|
inference_model = ModelInput(
|
||||||
model_id="${env.INFERENCE_MODEL}",
|
model_id="${env.INFERENCE_MODEL}",
|
||||||
|
@ -37,7 +43,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
)
|
)
|
||||||
|
|
||||||
return DistributionTemplate(
|
return DistributionTemplate(
|
||||||
name="hf-endpoint",
|
name=name,
|
||||||
distro_type="self_hosted",
|
distro_type="self_hosted",
|
||||||
description="Use (an external) Hugging Face Inference Endpoint for running LLM inference",
|
description="Use (an external) Hugging Face Inference Endpoint for running LLM inference",
|
||||||
docker_image=None,
|
docker_image=None,
|
||||||
|
@ -48,6 +54,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider],
|
"inference": [inference_provider],
|
||||||
|
"memory": [memory_provider],
|
||||||
},
|
},
|
||||||
default_models=[inference_model],
|
default_models=[inference_model],
|
||||||
),
|
),
|
||||||
|
|
|
@ -26,12 +26,6 @@ providers:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
namespace: null
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/faiss_store.db
|
||||||
- provider_id: chromadb
|
|
||||||
provider_type: remote::chromadb
|
|
||||||
config: {}
|
|
||||||
- provider_id: pgvector
|
|
||||||
provider_type: remote::pgvector
|
|
||||||
config: {}
|
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
||||||
|
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||||
from llama_stack.providers.remote.inference.tgi import InferenceAPIImplConfig
|
from llama_stack.providers.remote.inference.tgi import InferenceAPIImplConfig
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
@ -21,11 +22,17 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
name = "hf-serverless"
|
||||||
inference_provider = Provider(
|
inference_provider = Provider(
|
||||||
provider_id="hf-serverless",
|
provider_id="hf-serverless",
|
||||||
provider_type="remote::hf::serverless",
|
provider_type="remote::hf::serverless",
|
||||||
config=InferenceAPIImplConfig.sample_run_config(),
|
config=InferenceAPIImplConfig.sample_run_config(),
|
||||||
)
|
)
|
||||||
|
memory_provider = Provider(
|
||||||
|
provider_id="faiss",
|
||||||
|
provider_type="inline::faiss",
|
||||||
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
)
|
||||||
|
|
||||||
inference_model = ModelInput(
|
inference_model = ModelInput(
|
||||||
model_id="${env.INFERENCE_MODEL}",
|
model_id="${env.INFERENCE_MODEL}",
|
||||||
|
@ -37,7 +44,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
)
|
)
|
||||||
|
|
||||||
return DistributionTemplate(
|
return DistributionTemplate(
|
||||||
name="hf-serverless",
|
name=name,
|
||||||
distro_type="self_hosted",
|
distro_type="self_hosted",
|
||||||
description="Use (an external) Hugging Face Inference Endpoint for running LLM inference",
|
description="Use (an external) Hugging Face Inference Endpoint for running LLM inference",
|
||||||
docker_image=None,
|
docker_image=None,
|
||||||
|
@ -48,6 +55,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider],
|
"inference": [inference_provider],
|
||||||
|
"memory": [memory_provider],
|
||||||
},
|
},
|
||||||
default_models=[inference_model],
|
default_models=[inference_model],
|
||||||
),
|
),
|
||||||
|
|
|
@ -26,12 +26,6 @@ providers:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
namespace: null
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/faiss_store.db
|
||||||
- provider_id: chromadb
|
|
||||||
provider_type: remote::chromadb
|
|
||||||
config: {}
|
|
||||||
- provider_id: pgvector
|
|
||||||
provider_type: remote::pgvector
|
|
||||||
config: {}
|
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
|
|
|
@ -10,6 +10,7 @@ from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
||||||
from llama_stack.providers.inline.inference.meta_reference import (
|
from llama_stack.providers.inline.inference.meta_reference import (
|
||||||
MetaReferenceInferenceConfig,
|
MetaReferenceInferenceConfig,
|
||||||
)
|
)
|
||||||
|
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
|
||||||
|
@ -24,7 +25,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||||
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||||
}
|
}
|
||||||
|
name = "meta-reference-gpu"
|
||||||
inference_provider = Provider(
|
inference_provider = Provider(
|
||||||
provider_id="meta-reference-inference",
|
provider_id="meta-reference-inference",
|
||||||
provider_type="inline::meta-reference",
|
provider_type="inline::meta-reference",
|
||||||
|
@ -33,6 +34,11 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:null}",
|
checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:null}",
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
memory_provider = Provider(
|
||||||
|
provider_id="faiss",
|
||||||
|
provider_type="inline::faiss",
|
||||||
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
)
|
||||||
|
|
||||||
inference_model = ModelInput(
|
inference_model = ModelInput(
|
||||||
model_id="${env.INFERENCE_MODEL}",
|
model_id="${env.INFERENCE_MODEL}",
|
||||||
|
@ -44,7 +50,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
)
|
)
|
||||||
|
|
||||||
return DistributionTemplate(
|
return DistributionTemplate(
|
||||||
name="meta-reference-gpu",
|
name=name,
|
||||||
distro_type="self_hosted",
|
distro_type="self_hosted",
|
||||||
description="Use Meta Reference for running LLM inference",
|
description="Use Meta Reference for running LLM inference",
|
||||||
template_path=Path(__file__).parent / "doc_template.md",
|
template_path=Path(__file__).parent / "doc_template.md",
|
||||||
|
@ -54,6 +60,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider],
|
"inference": [inference_provider],
|
||||||
|
"memory": [memory_provider],
|
||||||
},
|
},
|
||||||
default_models=[inference_model],
|
default_models=[inference_model],
|
||||||
),
|
),
|
||||||
|
|
|
@ -27,12 +27,6 @@ providers:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
namespace: null
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db
|
||||||
- provider_id: chromadb
|
|
||||||
provider_type: remote::chromadb
|
|
||||||
config: {}
|
|
||||||
- provider_id: pgvector
|
|
||||||
provider_type: remote::pgvector
|
|
||||||
config: {}
|
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
|
|
|
@ -10,6 +10,7 @@ from llama_stack.distribution.datatypes import ModelInput, Provider
|
||||||
from llama_stack.providers.inline.inference.meta_reference import (
|
from llama_stack.providers.inline.inference.meta_reference import (
|
||||||
MetaReferenceQuantizedInferenceConfig,
|
MetaReferenceQuantizedInferenceConfig,
|
||||||
)
|
)
|
||||||
|
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
|
||||||
|
@ -24,7 +25,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||||
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||||
}
|
}
|
||||||
|
name = "meta-reference-quantized-gpu"
|
||||||
inference_provider = Provider(
|
inference_provider = Provider(
|
||||||
provider_id="meta-reference-inference",
|
provider_id="meta-reference-inference",
|
||||||
provider_type="inline::meta-reference-quantized",
|
provider_type="inline::meta-reference-quantized",
|
||||||
|
@ -33,13 +34,18 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:null}",
|
checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:null}",
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
memory_provider = Provider(
|
||||||
|
provider_id="faiss",
|
||||||
|
provider_type="inline::faiss",
|
||||||
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
)
|
||||||
|
|
||||||
inference_model = ModelInput(
|
inference_model = ModelInput(
|
||||||
model_id="${env.INFERENCE_MODEL}",
|
model_id="${env.INFERENCE_MODEL}",
|
||||||
provider_id="meta-reference-inference",
|
provider_id="meta-reference-inference",
|
||||||
)
|
)
|
||||||
return DistributionTemplate(
|
return DistributionTemplate(
|
||||||
name="meta-reference-quantized-gpu",
|
name=name,
|
||||||
distro_type="self_hosted",
|
distro_type="self_hosted",
|
||||||
description="Use Meta Reference with fp8, int4 quantization for running LLM inference",
|
description="Use Meta Reference with fp8, int4 quantization for running LLM inference",
|
||||||
template_path=Path(__file__).parent / "doc_template.md",
|
template_path=Path(__file__).parent / "doc_template.md",
|
||||||
|
@ -49,6 +55,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider],
|
"inference": [inference_provider],
|
||||||
|
"memory": [memory_provider],
|
||||||
},
|
},
|
||||||
default_models=[inference_model],
|
default_models=[inference_model],
|
||||||
),
|
),
|
||||||
|
|
|
@ -29,12 +29,6 @@ providers:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
namespace: null
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-quantized-gpu}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-quantized-gpu}/faiss_store.db
|
||||||
- provider_id: chromadb
|
|
||||||
provider_type: remote::chromadb
|
|
||||||
config: {}
|
|
||||||
- provider_id: pgvector
|
|
||||||
provider_type: remote::pgvector
|
|
||||||
config: {}
|
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
|
|
|
@ -24,12 +24,6 @@ providers:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
namespace: null
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db
|
||||||
- provider_id: chromadb
|
|
||||||
provider_type: remote::chromadb
|
|
||||||
config: {}
|
|
||||||
- provider_id: pgvector
|
|
||||||
provider_type: remote::pgvector
|
|
||||||
config: {}
|
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
||||||
|
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||||
from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
|
from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
@ -19,7 +20,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"agents": ["inline::meta-reference"],
|
"agents": ["inline::meta-reference"],
|
||||||
"telemetry": ["inline::meta-reference"],
|
"telemetry": ["inline::meta-reference"],
|
||||||
}
|
}
|
||||||
|
name = "remote-vllm"
|
||||||
inference_provider = Provider(
|
inference_provider = Provider(
|
||||||
provider_id="vllm-inference",
|
provider_id="vllm-inference",
|
||||||
provider_type="remote::vllm",
|
provider_type="remote::vllm",
|
||||||
|
@ -27,6 +28,11 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
url="${env.VLLM_URL}",
|
url="${env.VLLM_URL}",
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
memory_provider = Provider(
|
||||||
|
provider_id="faiss",
|
||||||
|
provider_type="inline::faiss",
|
||||||
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
)
|
||||||
|
|
||||||
inference_model = ModelInput(
|
inference_model = ModelInput(
|
||||||
model_id="${env.INFERENCE_MODEL}",
|
model_id="${env.INFERENCE_MODEL}",
|
||||||
|
@ -38,7 +44,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
)
|
)
|
||||||
|
|
||||||
return DistributionTemplate(
|
return DistributionTemplate(
|
||||||
name="remote-vllm",
|
name=name,
|
||||||
distro_type="self_hosted",
|
distro_type="self_hosted",
|
||||||
description="Use (an external) vLLM server for running LLM inference",
|
description="Use (an external) vLLM server for running LLM inference",
|
||||||
template_path=Path(__file__).parent / "doc_template.md",
|
template_path=Path(__file__).parent / "doc_template.md",
|
||||||
|
@ -48,6 +54,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider],
|
"inference": [inference_provider],
|
||||||
|
"memory": [memory_provider],
|
||||||
},
|
},
|
||||||
default_models=[inference_model],
|
default_models=[inference_model],
|
||||||
),
|
),
|
||||||
|
|
|
@ -25,12 +25,6 @@ providers:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
namespace: null
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/faiss_store.db
|
||||||
- provider_id: chromadb
|
|
||||||
provider_type: remote::chromadb
|
|
||||||
config: {}
|
|
||||||
- provider_id: pgvector
|
|
||||||
provider_type: remote::pgvector
|
|
||||||
config: {}
|
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
||||||
|
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||||
from llama_stack.providers.remote.inference.tgi import TGIImplConfig
|
from llama_stack.providers.remote.inference.tgi import TGIImplConfig
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
@ -22,7 +23,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||||
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||||
}
|
}
|
||||||
|
name = "tgi"
|
||||||
inference_provider = Provider(
|
inference_provider = Provider(
|
||||||
provider_id="tgi-inference",
|
provider_id="tgi-inference",
|
||||||
provider_type="remote::tgi",
|
provider_type="remote::tgi",
|
||||||
|
@ -30,6 +31,11 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
url="${env.TGI_URL}",
|
url="${env.TGI_URL}",
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
memory_provider = Provider(
|
||||||
|
provider_id="faiss",
|
||||||
|
provider_type="inline::faiss",
|
||||||
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
)
|
||||||
|
|
||||||
inference_model = ModelInput(
|
inference_model = ModelInput(
|
||||||
model_id="${env.INFERENCE_MODEL}",
|
model_id="${env.INFERENCE_MODEL}",
|
||||||
|
@ -41,7 +47,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
)
|
)
|
||||||
|
|
||||||
return DistributionTemplate(
|
return DistributionTemplate(
|
||||||
name="tgi",
|
name=name,
|
||||||
distro_type="self_hosted",
|
distro_type="self_hosted",
|
||||||
description="Use (an external) TGI server for running LLM inference",
|
description="Use (an external) TGI server for running LLM inference",
|
||||||
docker_image=None,
|
docker_image=None,
|
||||||
|
@ -52,6 +58,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider],
|
"inference": [inference_provider],
|
||||||
|
"memory": [memory_provider],
|
||||||
},
|
},
|
||||||
default_models=[inference_model],
|
default_models=[inference_model],
|
||||||
),
|
),
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue