mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-28 02:53:30 +00:00
# What does this PR do? The builtin implementation of code interpreter is not robust and has a really weak sandboxing shell (the `bubblewrap` container). Given the availability of better MCP code interpreter servers coming up, we should use them instead of baking an implementation into the Stack and expanding the vulnerability surface to the rest of the Stack. This PR only does the removal. We will add examples with how to integrate with MCPs in subsequent ones. ## Test Plan Existing tests.
147 lines
5.1 KiB
Python
147 lines
5.1 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
from pathlib import Path
|
|
|
|
from llama_stack.apis.models.models import ModelType
|
|
from llama_stack.distribution.datatypes import (
|
|
ModelInput,
|
|
Provider,
|
|
ShieldInput,
|
|
ToolGroupInput,
|
|
)
|
|
from llama_stack.providers.inline.inference.sentence_transformers import (
|
|
SentenceTransformersInferenceConfig,
|
|
)
|
|
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
|
from llama_stack.providers.remote.inference.tgi import TGIImplConfig
|
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
|
|
|
|
|
def get_distribution_template() -> DistributionTemplate:
|
|
providers = {
|
|
"inference": ["remote::tgi", "inline::sentence-transformers"],
|
|
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
|
"safety": ["inline::llama-guard"],
|
|
"agents": ["inline::meta-reference"],
|
|
"telemetry": ["inline::meta-reference"],
|
|
"eval": ["inline::meta-reference"],
|
|
"datasetio": ["remote::huggingface", "inline::localfs"],
|
|
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
|
"tool_runtime": [
|
|
"remote::brave-search",
|
|
"remote::tavily-search",
|
|
"inline::rag-runtime",
|
|
"remote::model-context-protocol",
|
|
],
|
|
}
|
|
name = "tgi"
|
|
inference_provider = Provider(
|
|
provider_id="tgi-inference",
|
|
provider_type="remote::tgi",
|
|
config=TGIImplConfig.sample_run_config(
|
|
url="${env.TGI_URL}",
|
|
),
|
|
)
|
|
embedding_provider = Provider(
|
|
provider_id="sentence-transformers",
|
|
provider_type="inline::sentence-transformers",
|
|
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
|
)
|
|
vector_io_provider = Provider(
|
|
provider_id="faiss",
|
|
provider_type="inline::faiss",
|
|
config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
|
)
|
|
|
|
inference_model = ModelInput(
|
|
model_id="${env.INFERENCE_MODEL}",
|
|
provider_id="tgi-inference",
|
|
)
|
|
embedding_model = ModelInput(
|
|
model_id="all-MiniLM-L6-v2",
|
|
provider_id="sentence-transformers",
|
|
model_type=ModelType.embedding,
|
|
metadata={
|
|
"embedding_dimension": 384,
|
|
},
|
|
)
|
|
safety_model = ModelInput(
|
|
model_id="${env.SAFETY_MODEL}",
|
|
provider_id="tgi-safety",
|
|
)
|
|
default_tool_groups = [
|
|
ToolGroupInput(
|
|
toolgroup_id="builtin::websearch",
|
|
provider_id="tavily-search",
|
|
),
|
|
ToolGroupInput(
|
|
toolgroup_id="builtin::rag",
|
|
provider_id="rag-runtime",
|
|
),
|
|
]
|
|
|
|
return DistributionTemplate(
|
|
name=name,
|
|
distro_type="self_hosted",
|
|
description="Use (an external) TGI server for running LLM inference",
|
|
container_image=None,
|
|
template_path=Path(__file__).parent / "doc_template.md",
|
|
providers=providers,
|
|
run_configs={
|
|
"run.yaml": RunConfigSettings(
|
|
provider_overrides={
|
|
"inference": [inference_provider, embedding_provider],
|
|
"vector_io": [vector_io_provider],
|
|
},
|
|
default_models=[inference_model, embedding_model],
|
|
default_tool_groups=default_tool_groups,
|
|
),
|
|
"run-with-safety.yaml": RunConfigSettings(
|
|
provider_overrides={
|
|
"inference": [
|
|
inference_provider,
|
|
Provider(
|
|
provider_id="tgi-safety",
|
|
provider_type="remote::tgi",
|
|
config=TGIImplConfig.sample_run_config(
|
|
url="${env.TGI_SAFETY_URL}",
|
|
),
|
|
),
|
|
],
|
|
"vector_io": [vector_io_provider],
|
|
},
|
|
default_models=[
|
|
inference_model,
|
|
safety_model,
|
|
],
|
|
default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
|
|
default_tool_groups=default_tool_groups,
|
|
),
|
|
},
|
|
run_config_env_vars={
|
|
"LLAMA_STACK_PORT": (
|
|
"8321",
|
|
"Port for the Llama Stack distribution server",
|
|
),
|
|
"INFERENCE_MODEL": (
|
|
"meta-llama/Llama-3.2-3B-Instruct",
|
|
"Inference model loaded into the TGI server",
|
|
),
|
|
"TGI_URL": (
|
|
"http://127.0.0.1:8080/v1",
|
|
"URL of the TGI server with the main inference model",
|
|
),
|
|
"TGI_SAFETY_URL": (
|
|
"http://127.0.0.1:8081/v1",
|
|
"URL of the TGI server with the safety model",
|
|
),
|
|
"SAFETY_MODEL": (
|
|
"meta-llama/Llama-Guard-3-1B",
|
|
"Name of the safety (Llama-Guard) model to use",
|
|
),
|
|
},
|
|
)
|