mirror of
https://github.com/meta-llama/llama-stack.git
synced 2026-01-01 01:00:00 +00:00
Merge branch 'main' into patch-1
This commit is contained in:
commit
f5edd07b29
91 changed files with 995 additions and 632 deletions
|
|
@ -6,54 +6,65 @@
|
|||
|
||||
from typing import Literal, Union
|
||||
|
||||
from llama_models.schema_utils import register_schema
|
||||
from llama_models.schema_utils import json_schema_type, register_schema
|
||||
from pydantic import BaseModel, Field
|
||||
from typing_extensions import Annotated
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class StringType(BaseModel):
|
||||
type: Literal["string"] = "string"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class NumberType(BaseModel):
|
||||
type: Literal["number"] = "number"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class BooleanType(BaseModel):
|
||||
type: Literal["boolean"] = "boolean"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ArrayType(BaseModel):
|
||||
type: Literal["array"] = "array"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ObjectType(BaseModel):
|
||||
type: Literal["object"] = "object"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class JsonType(BaseModel):
|
||||
type: Literal["json"] = "json"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class UnionType(BaseModel):
|
||||
type: Literal["union"] = "union"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ChatCompletionInputType(BaseModel):
|
||||
# expects List[Message] for messages
|
||||
type: Literal["chat_completion_input"] = "chat_completion_input"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class CompletionInputType(BaseModel):
|
||||
# expects InterleavedTextMedia for content
|
||||
type: Literal["completion_input"] = "completion_input"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class AgentTurnInputType(BaseModel):
|
||||
# expects List[Message] for messages (may also include attachments?)
|
||||
type: Literal["agent_turn_input"] = "agent_turn_input"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class DialogType(BaseModel):
|
||||
# expects List[Message] for messages
|
||||
# this type semantically contains the output label whereas ChatCompletionInputType does not
|
||||
|
|
|
|||
|
|
@ -182,8 +182,8 @@ def _generate_run_config(
|
|||
"""
|
||||
apis = list(build_config.distribution_spec.providers.keys())
|
||||
run_config = StackRunConfig(
|
||||
docker_image=(
|
||||
image_name if build_config.image_type == ImageType.docker.value else None
|
||||
container_image=(
|
||||
image_name if build_config.image_type == ImageType.container.value else None
|
||||
),
|
||||
image_name=image_name,
|
||||
apis=apis,
|
||||
|
|
@ -238,7 +238,7 @@ def _run_stack_build_command_from_build_config(
|
|||
image_name: Optional[str] = None,
|
||||
template_name: Optional[str] = None,
|
||||
) -> None:
|
||||
if build_config.image_type == ImageType.docker.value:
|
||||
if build_config.image_type == ImageType.container.value:
|
||||
if template_name:
|
||||
image_name = f"distribution-{template_name}"
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -47,8 +47,8 @@ class StackBuild(Subcommand):
|
|||
self.parser.add_argument(
|
||||
"--image-type",
|
||||
type=str,
|
||||
help="Image Type to use for the build. This can be either conda or docker. If not specified, will use the image type from the template config.",
|
||||
choices=["conda", "docker", "venv"],
|
||||
help="Image Type to use for the build. This can be either conda or container or venv. If not specified, will use the image type from the template config.",
|
||||
choices=["conda", "container", "venv"],
|
||||
default="conda",
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ class StackConfigure(Subcommand):
|
|||
self.parser.add_argument(
|
||||
"config",
|
||||
type=str,
|
||||
help="Path to the build config file (e.g. ~/.llama/builds/<image_type>/<name>-build.yaml). For docker, this could also be the name of the docker image. ",
|
||||
help="Path to the build config file (e.g. ~/.llama/builds/<image_type>/<name>-build.yaml). For container, this could also be the name of the container image. ",
|
||||
)
|
||||
|
||||
self.parser.add_argument(
|
||||
|
|
|
|||
|
|
@ -92,9 +92,9 @@ class StackRun(Subcommand):
|
|||
)
|
||||
|
||||
if not config_file.exists() and not has_yaml_suffix:
|
||||
# check if it's a build config saved to docker dir
|
||||
# check if it's a build config saved to container dir
|
||||
config_file = Path(
|
||||
BUILDS_BASE_DIR / ImageType.docker.value / f"{args.config}-run.yaml"
|
||||
BUILDS_BASE_DIR / ImageType.container.value / f"{args.config}-run.yaml"
|
||||
)
|
||||
|
||||
if not config_file.exists() and not has_yaml_suffix:
|
||||
|
|
@ -115,12 +115,12 @@ class StackRun(Subcommand):
|
|||
config_dict = yaml.safe_load(config_file.read_text())
|
||||
config = parse_and_maybe_upgrade_config(config_dict)
|
||||
|
||||
if config.docker_image:
|
||||
if config.container_image:
|
||||
script = (
|
||||
importlib.resources.files("llama_stack")
|
||||
/ "distribution/start_container.sh"
|
||||
)
|
||||
run_args = [script, config.docker_image]
|
||||
run_args = [script, config.container_image]
|
||||
else:
|
||||
current_conda_env = os.environ.get("CONDA_DEFAULT_ENV")
|
||||
image_name = args.image_name or current_conda_env
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ SERVER_DEPENDENCIES = [
|
|||
|
||||
|
||||
class ImageType(Enum):
|
||||
docker = "docker"
|
||||
container = "container"
|
||||
conda = "conda"
|
||||
venv = "venv"
|
||||
|
||||
|
|
@ -77,8 +77,8 @@ def get_provider_dependencies(
|
|||
|
||||
provider_spec = providers_for_api[provider_type]
|
||||
deps.extend(provider_spec.pip_packages)
|
||||
if provider_spec.docker_image:
|
||||
raise ValueError("A stack's dependencies cannot have a docker image")
|
||||
if provider_spec.container_image:
|
||||
raise ValueError("A stack's dependencies cannot have a container image")
|
||||
|
||||
normal_deps = []
|
||||
special_deps = []
|
||||
|
|
@ -109,23 +109,25 @@ def build_image(
|
|||
image_name: str,
|
||||
template_name: Optional[str] = None,
|
||||
):
|
||||
docker_image = build_config.distribution_spec.docker_image or "python:3.10-slim"
|
||||
container_image = (
|
||||
build_config.distribution_spec.container_image or "python:3.10-slim"
|
||||
)
|
||||
|
||||
normal_deps, special_deps = get_provider_dependencies(
|
||||
build_config.distribution_spec.providers
|
||||
)
|
||||
normal_deps += SERVER_DEPENDENCIES
|
||||
|
||||
if build_config.image_type == ImageType.docker.value:
|
||||
if build_config.image_type == ImageType.container.value:
|
||||
script = str(
|
||||
importlib.resources.files("llama_stack") / "distribution/build_container.sh"
|
||||
)
|
||||
args = [
|
||||
script,
|
||||
image_name,
|
||||
docker_image,
|
||||
container_image,
|
||||
str(build_file_path),
|
||||
str(BUILDS_BASE_DIR / ImageType.docker.value),
|
||||
str(BUILDS_BASE_DIR / ImageType.container.value),
|
||||
" ".join(normal_deps),
|
||||
]
|
||||
elif build_config.image_type == ImageType.conda.value:
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ PYPI_VERSION=${PYPI_VERSION:-}
|
|||
BUILD_PLATFORM=${BUILD_PLATFORM:-}
|
||||
|
||||
if [ "$#" -lt 4 ]; then
|
||||
echo "Usage: $0 <build_name> <docker_base> <pip_dependencies> [<special_pip_deps>]" >&2
|
||||
echo "Usage: $0 <build_name> <container_base> <pip_dependencies> [<special_pip_deps>]" >&2
|
||||
echo "Example: $0 my-fastapi-app python:3.9-slim 'fastapi uvicorn' " >&2
|
||||
exit 1
|
||||
fi
|
||||
|
|
@ -24,7 +24,7 @@ set -euo pipefail
|
|||
|
||||
build_name="$1"
|
||||
image_name="distribution-$build_name"
|
||||
docker_base=$2
|
||||
container_base=$2
|
||||
build_file_path=$3
|
||||
host_build_dir=$4
|
||||
pip_dependencies=$5
|
||||
|
|
@ -36,14 +36,14 @@ NC='\033[0m' # No Color
|
|||
|
||||
SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
|
||||
REPO_DIR=$(dirname $(dirname "$SCRIPT_DIR"))
|
||||
DOCKER_BINARY=${DOCKER_BINARY:-docker}
|
||||
DOCKER_OPTS=${DOCKER_OPTS:-}
|
||||
CONTAINER_BINARY=${CONTAINER_BINARY:-docker}
|
||||
CONTAINER_OPTS=${CONTAINER_OPTS:-}
|
||||
|
||||
TEMP_DIR=$(mktemp -d)
|
||||
|
||||
add_to_docker() {
|
||||
add_to_container() {
|
||||
local input
|
||||
output_file="$TEMP_DIR/Dockerfile"
|
||||
output_file="$TEMP_DIR/Containerfile"
|
||||
if [ -t 0 ]; then
|
||||
printf '%s\n' "$1" >>"$output_file"
|
||||
else
|
||||
|
|
@ -53,9 +53,9 @@ add_to_docker() {
|
|||
}
|
||||
|
||||
# Update and install UBI9 components if UBI9 base image is used
|
||||
if [[ $docker_base == *"registry.access.redhat.com/ubi9"* ]]; then
|
||||
add_to_docker << EOF
|
||||
FROM $docker_base
|
||||
if [[ $container_base == *"registry.access.redhat.com/ubi9"* ]]; then
|
||||
add_to_container << EOF
|
||||
FROM $container_base
|
||||
WORKDIR /app
|
||||
|
||||
RUN microdnf -y update && microdnf install -y iputils net-tools wget \
|
||||
|
|
@ -64,8 +64,8 @@ RUN microdnf -y update && microdnf install -y iputils net-tools wget \
|
|||
|
||||
EOF
|
||||
else
|
||||
add_to_docker << EOF
|
||||
FROM $docker_base
|
||||
add_to_container << EOF
|
||||
FROM $container_base
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && apt-get install -y \
|
||||
|
|
@ -82,7 +82,7 @@ fi
|
|||
# Add pip dependencies first since llama-stack is what will change most often
|
||||
# so we can reuse layers.
|
||||
if [ -n "$pip_dependencies" ]; then
|
||||
add_to_docker << EOF
|
||||
add_to_container << EOF
|
||||
RUN pip install --no-cache $pip_dependencies
|
||||
EOF
|
||||
fi
|
||||
|
|
@ -90,7 +90,7 @@ fi
|
|||
if [ -n "$special_pip_deps" ]; then
|
||||
IFS='#' read -ra parts <<<"$special_pip_deps"
|
||||
for part in "${parts[@]}"; do
|
||||
add_to_docker <<EOF
|
||||
add_to_container <<EOF
|
||||
RUN pip install --no-cache $part
|
||||
EOF
|
||||
done
|
||||
|
|
@ -108,16 +108,16 @@ if [ -n "$LLAMA_STACK_DIR" ]; then
|
|||
# Install in editable format. We will mount the source code into the container
|
||||
# so that changes will be reflected in the container without having to do a
|
||||
# rebuild. This is just for development convenience.
|
||||
add_to_docker << EOF
|
||||
add_to_container << EOF
|
||||
RUN pip install --no-cache -e $stack_mount
|
||||
EOF
|
||||
else
|
||||
if [ -n "$TEST_PYPI_VERSION" ]; then
|
||||
# these packages are damaged in test-pypi, so install them first
|
||||
add_to_docker << EOF
|
||||
add_to_container << EOF
|
||||
RUN pip install fastapi libcst
|
||||
EOF
|
||||
add_to_docker << EOF
|
||||
add_to_container << EOF
|
||||
RUN pip install --no-cache --extra-index-url https://test.pypi.org/simple/ \
|
||||
llama-models==$TEST_PYPI_VERSION llama-stack-client==$TEST_PYPI_VERSION llama-stack==$TEST_PYPI_VERSION
|
||||
|
||||
|
|
@ -128,7 +128,7 @@ EOF
|
|||
else
|
||||
SPEC_VERSION="llama-stack"
|
||||
fi
|
||||
add_to_docker << EOF
|
||||
add_to_container << EOF
|
||||
RUN pip install --no-cache $SPEC_VERSION
|
||||
EOF
|
||||
fi
|
||||
|
|
@ -140,14 +140,14 @@ if [ -n "$LLAMA_MODELS_DIR" ]; then
|
|||
exit 1
|
||||
fi
|
||||
|
||||
add_to_docker << EOF
|
||||
add_to_container << EOF
|
||||
RUN pip uninstall -y llama-models
|
||||
RUN pip install --no-cache $models_mount
|
||||
|
||||
EOF
|
||||
fi
|
||||
|
||||
add_to_docker << EOF
|
||||
add_to_container << EOF
|
||||
|
||||
# This would be good in production but for debugging flexibility lets not add it right now
|
||||
# We need a more solid production ready entrypoint.sh anyway
|
||||
|
|
@ -156,8 +156,8 @@ ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--templat
|
|||
|
||||
EOF
|
||||
|
||||
printf "Dockerfile created successfully in $TEMP_DIR/Dockerfile\n\n"
|
||||
cat $TEMP_DIR/Dockerfile
|
||||
printf "Containerfile created successfully in $TEMP_DIR/Containerfile\n\n"
|
||||
cat $TEMP_DIR/Containerfile
|
||||
printf "\n"
|
||||
|
||||
mounts=""
|
||||
|
|
@ -170,7 +170,7 @@ fi
|
|||
|
||||
if command -v selinuxenabled &>/dev/null && selinuxenabled; then
|
||||
# Disable SELinux labels -- we don't want to relabel the llama-stack source dir
|
||||
DOCKER_OPTS="$DOCKER_OPTS --security-opt label=disable"
|
||||
CONTAINER_OPTS="$CONTAINER_OPTS --security-opt label=disable"
|
||||
fi
|
||||
|
||||
# Set version tag based on PyPI version
|
||||
|
|
@ -200,7 +200,7 @@ else
|
|||
fi
|
||||
|
||||
set -x
|
||||
$DOCKER_BINARY build $DOCKER_OPTS $PLATFORM -t $image_tag -f "$TEMP_DIR/Dockerfile" "$REPO_DIR" $mounts
|
||||
$CONTAINER_BINARY build $CONTAINER_OPTS $PLATFORM -t $image_tag -f "$TEMP_DIR/Containerfile" "$REPO_DIR" $mounts
|
||||
|
||||
# clean up tmp/configs
|
||||
set +x
|
||||
|
|
|
|||
|
|
@ -6,8 +6,8 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
DOCKER_BINARY=${DOCKER_BINARY:-docker}
|
||||
DOCKER_OPTS=${DOCKER_OPTS:-}
|
||||
CONTAINER_BINARY=${CONTAINER_BINARY:-docker}
|
||||
CONTAINER_OPTS=${CONTAINER_OPTS:-}
|
||||
LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-}
|
||||
|
||||
set -euo pipefail
|
||||
|
|
@ -24,13 +24,13 @@ if [ $# -lt 2 ]; then
|
|||
exit 1
|
||||
fi
|
||||
|
||||
docker_image="$1"
|
||||
container_image="$1"
|
||||
host_build_dir="$2"
|
||||
container_build_dir="/app/builds"
|
||||
|
||||
if command -v selinuxenabled &> /dev/null && selinuxenabled; then
|
||||
# Disable SELinux labels
|
||||
DOCKER_OPTS="$DOCKER_OPTS --security-opt label=disable"
|
||||
CONTAINER_OPTS="$CONTAINER_OPTS --security-opt label=disable"
|
||||
fi
|
||||
|
||||
mounts=""
|
||||
|
|
@ -39,9 +39,9 @@ if [ -n "$LLAMA_STACK_DIR" ]; then
|
|||
fi
|
||||
|
||||
set -x
|
||||
$DOCKER_BINARY run $DOCKER_OPTS -it \
|
||||
$CONTAINER_BINARY run $CONTAINER_OPTS -it \
|
||||
--entrypoint "/usr/local/bin/llama" \
|
||||
-v $host_build_dir:$container_build_dir \
|
||||
$mounts \
|
||||
$docker_image \
|
||||
$container_image \
|
||||
stack configure ./llamastack-build.yaml --output-dir $container_build_dir
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ class AutoRoutedProviderSpec(ProviderSpec):
|
|||
provider_type: str = "router"
|
||||
config_class: str = ""
|
||||
|
||||
docker_image: Optional[str] = None
|
||||
container_image: Optional[str] = None
|
||||
routing_table_api: Api
|
||||
module: str
|
||||
provider_data_validator: Optional[str] = Field(
|
||||
|
|
@ -89,7 +89,7 @@ class AutoRoutedProviderSpec(ProviderSpec):
|
|||
class RoutingTableProviderSpec(ProviderSpec):
|
||||
provider_type: str = "routing_table"
|
||||
config_class: str = ""
|
||||
docker_image: Optional[str] = None
|
||||
container_image: Optional[str] = None
|
||||
|
||||
router_api: Api
|
||||
module: str
|
||||
|
|
@ -101,7 +101,7 @@ class DistributionSpec(BaseModel):
|
|||
default="",
|
||||
description="Description of the distribution",
|
||||
)
|
||||
docker_image: Optional[str] = None
|
||||
container_image: Optional[str] = None
|
||||
providers: Dict[str, Union[str, List[str]]] = Field(
|
||||
default_factory=dict,
|
||||
description="""
|
||||
|
|
@ -127,9 +127,9 @@ Reference to the distribution this package refers to. For unregistered (adhoc) p
|
|||
this could be just a hash
|
||||
""",
|
||||
)
|
||||
docker_image: Optional[str] = Field(
|
||||
container_image: Optional[str] = Field(
|
||||
default=None,
|
||||
description="Reference to the docker image if this package refers to a container",
|
||||
description="Reference to the container image if this package refers to a container",
|
||||
)
|
||||
apis: List[str] = Field(
|
||||
default_factory=list,
|
||||
|
|
@ -168,5 +168,5 @@ class BuildConfig(BaseModel):
|
|||
)
|
||||
image_type: str = Field(
|
||||
default="conda",
|
||||
description="Type of package to build (conda | docker | venv)",
|
||||
description="Type of package to build (conda | container | venv)",
|
||||
)
|
||||
|
|
|
|||
|
|
@ -145,7 +145,9 @@ async def resolve_impls(
|
|||
log.warning(
|
||||
f"Provider `{provider.provider_type}` for API `{api}` is deprecated and will be removed in a future release: {p.deprecation_warning}",
|
||||
)
|
||||
p.deps__ = [a.value for a in p.api_dependencies]
|
||||
p.deps__ = [a.value for a in p.api_dependencies] + [
|
||||
a.value for a in p.optional_api_dependencies
|
||||
]
|
||||
spec = ProviderWithSpec(
|
||||
spec=p,
|
||||
**(provider.model_dump()),
|
||||
|
|
@ -229,6 +231,9 @@ async def resolve_impls(
|
|||
inner_impls_by_provider_id = {f"inner-{x.value}": {} for x in router_apis}
|
||||
for api_str, provider in sorted_providers:
|
||||
deps = {a: impls[a] for a in provider.spec.api_dependencies}
|
||||
for a in provider.spec.optional_api_dependencies:
|
||||
if a in impls:
|
||||
deps[a] = impls[a]
|
||||
|
||||
inner_impls = {}
|
||||
if isinstance(provider.spec, RoutingTableProviderSpec):
|
||||
|
|
@ -265,7 +270,7 @@ def topological_sort(
|
|||
deps.append(dep)
|
||||
|
||||
for dep in deps:
|
||||
if dep not in visited:
|
||||
if dep not in visited and dep in providers_with_specs:
|
||||
dfs((dep, providers_with_specs[dep]), visited, stack)
|
||||
|
||||
stack.append(api_str)
|
||||
|
|
|
|||
|
|
@ -6,8 +6,8 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
DOCKER_BINARY=${DOCKER_BINARY:-docker}
|
||||
DOCKER_OPTS=${DOCKER_OPTS:-}
|
||||
CONTAINER_BINARY=${CONTAINER_BINARY:-docker}
|
||||
CONTAINER_OPTS=${CONTAINER_OPTS:-}
|
||||
LLAMA_CHECKPOINT_DIR=${LLAMA_CHECKPOINT_DIR:-}
|
||||
LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-}
|
||||
TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-}
|
||||
|
|
@ -31,7 +31,7 @@ if [ $# -lt 3 ]; then
|
|||
fi
|
||||
|
||||
build_name="$1"
|
||||
docker_image="localhost/distribution-$build_name"
|
||||
container_image="localhost/distribution-$build_name"
|
||||
shift
|
||||
|
||||
yaml_config="$1"
|
||||
|
|
@ -64,7 +64,7 @@ set -x
|
|||
|
||||
if command -v selinuxenabled &> /dev/null && selinuxenabled; then
|
||||
# Disable SELinux labels
|
||||
DOCKER_OPTS="$DOCKER_OPTS --security-opt label=disable"
|
||||
CONTAINER_OPTS="$CONTAINER_OPTS --security-opt label=disable"
|
||||
fi
|
||||
|
||||
mounts=""
|
||||
|
|
@ -73,7 +73,7 @@ if [ -n "$LLAMA_STACK_DIR" ]; then
|
|||
fi
|
||||
if [ -n "$LLAMA_CHECKPOINT_DIR" ]; then
|
||||
mounts="$mounts -v $LLAMA_CHECKPOINT_DIR:/root/.llama"
|
||||
DOCKER_OPTS="$DOCKER_OPTS --gpus=all"
|
||||
CONTAINER_OPTS="$CONTAINER_OPTS --gpus=all"
|
||||
fi
|
||||
|
||||
version_tag="latest"
|
||||
|
|
@ -85,11 +85,11 @@ elif [ -n "$TEST_PYPI_VERSION" ]; then
|
|||
version_tag="test-$TEST_PYPI_VERSION"
|
||||
fi
|
||||
|
||||
$DOCKER_BINARY run $DOCKER_OPTS -it \
|
||||
$CONTAINER_BINARY run $CONTAINER_OPTS -it \
|
||||
-p $port:$port \
|
||||
$env_vars \
|
||||
-v "$yaml_config:/app/config.yaml" \
|
||||
$mounts \
|
||||
--env LLAMA_STACK_PORT=$port \
|
||||
--entrypoint='["python", "-m", "llama_stack.distribution.server.server", "--yaml-config", "/app/config.yaml"]' \
|
||||
$docker_image:$version_tag
|
||||
$container_image:$version_tag
|
||||
|
|
|
|||
|
|
@ -14,6 +14,6 @@ def datasets():
|
|||
datasets_info = {
|
||||
d.identifier: d.to_dict() for d in llama_stack_api.client.datasets.list()
|
||||
}
|
||||
|
||||
selected_dataset = st.selectbox("Select a dataset", list(datasets_info.keys()))
|
||||
st.json(datasets_info[selected_dataset], expanded=True)
|
||||
if len(datasets_info) > 0:
|
||||
selected_dataset = st.selectbox("Select a dataset", list(datasets_info.keys()))
|
||||
st.json(datasets_info[selected_dataset], expanded=True)
|
||||
|
|
|
|||
|
|
@ -16,7 +16,8 @@ def eval_tasks():
|
|||
d.identifier: d.to_dict() for d in llama_stack_api.client.eval_tasks.list()
|
||||
}
|
||||
|
||||
selected_eval_task = st.selectbox(
|
||||
"Select an eval task", list(eval_tasks_info.keys()), key="eval_task_inspect"
|
||||
)
|
||||
st.json(eval_tasks_info[selected_eval_task], expanded=True)
|
||||
if len(eval_tasks_info) > 0:
|
||||
selected_eval_task = st.selectbox(
|
||||
"Select an eval task", list(eval_tasks_info.keys()), key="eval_task_inspect"
|
||||
)
|
||||
st.json(eval_tasks_info[selected_eval_task], expanded=True)
|
||||
|
|
|
|||
|
|
@ -10,11 +10,17 @@ from modules.api import llama_stack_api
|
|||
|
||||
def providers():
|
||||
st.header("🔍 API Providers")
|
||||
apis_providers_info = llama_stack_api.client.providers.list()
|
||||
# selected_api = st.selectbox("Select an API", list(apis_providers_info.keys()))
|
||||
for api in apis_providers_info.keys():
|
||||
apis_providers_lst = llama_stack_api.client.providers.list()
|
||||
api_to_providers = {}
|
||||
for api_provider in apis_providers_lst:
|
||||
if api_provider.api in api_to_providers:
|
||||
api_to_providers[api_provider.api].append(api_provider)
|
||||
else:
|
||||
api_to_providers[api_provider.api] = [api_provider]
|
||||
|
||||
for api in api_to_providers.keys():
|
||||
st.markdown(f"###### {api}")
|
||||
st.dataframe([p.to_dict() for p in apis_providers_info[api]], width=500)
|
||||
st.dataframe([x.to_dict() for x in api_to_providers[api]], width=500)
|
||||
|
||||
|
||||
providers()
|
||||
|
|
|
|||
|
|
@ -121,7 +121,7 @@ if prompt := st.chat_input("Example: What is Llama Stack?"):
|
|||
if stream:
|
||||
for chunk in response:
|
||||
if chunk.event.event_type == "progress":
|
||||
full_response += chunk.event.delta
|
||||
full_response += chunk.event.delta.text
|
||||
message_placeholder.markdown(full_response + "▌")
|
||||
message_placeholder.markdown(full_response)
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -44,14 +44,21 @@ def rag_chat_page():
|
|||
]
|
||||
|
||||
providers = llama_stack_api.client.providers.list()
|
||||
memory_provider = None
|
||||
|
||||
for x in providers:
|
||||
if x.api == "memory":
|
||||
memory_provider = x.provider_id
|
||||
|
||||
llama_stack_api.client.memory_banks.register(
|
||||
memory_bank_id=memory_bank_name, # Use the user-provided name
|
||||
params={
|
||||
"memory_bank_type": "vector",
|
||||
"embedding_model": "all-MiniLM-L6-v2",
|
||||
"chunk_size_in_tokens": 512,
|
||||
"overlap_size_in_tokens": 64,
|
||||
},
|
||||
provider_id=providers["memory"][0].provider_id,
|
||||
provider_id=memory_provider,
|
||||
)
|
||||
|
||||
# insert documents using the custom bank name
|
||||
|
|
@ -69,9 +76,6 @@ def rag_chat_page():
|
|||
"Select Memory Banks",
|
||||
memory_banks,
|
||||
)
|
||||
memory_bank_configs = [
|
||||
{"bank_id": bank_id, "type": "vector"} for bank_id in selected_memory_banks
|
||||
]
|
||||
|
||||
available_models = llama_stack_api.client.models.list()
|
||||
available_models = [
|
||||
|
|
@ -133,14 +137,13 @@ def rag_chat_page():
|
|||
sampling_params={
|
||||
"strategy": strategy,
|
||||
},
|
||||
tools=[
|
||||
{
|
||||
"type": "memory",
|
||||
"memory_bank_configs": memory_bank_configs,
|
||||
"query_generator_config": {"type": "default", "sep": " "},
|
||||
"max_tokens_in_context": 4096,
|
||||
"max_chunks": 10,
|
||||
}
|
||||
toolgroups=[
|
||||
dict(
|
||||
name="builtin::memory",
|
||||
args={
|
||||
"memory_bank_ids": [bank_id for bank_id in selected_memory_banks],
|
||||
},
|
||||
)
|
||||
],
|
||||
tool_choice="auto",
|
||||
tool_prompt_format="json",
|
||||
|
|
@ -179,7 +182,7 @@ def rag_chat_page():
|
|||
retrieval_response = ""
|
||||
for log in EventLogger().log(response):
|
||||
log.print()
|
||||
if log.role == "memory_retrieval":
|
||||
if log.role == "tool_execution":
|
||||
retrieval_response += log.content.replace("====", "").strip()
|
||||
retrieval_message_placeholder.info(retrieval_response)
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -96,6 +96,9 @@ class ProviderSpec(BaseModel):
|
|||
default_factory=list,
|
||||
description="Higher-level API surfaces may depend on other providers to provide their functionality",
|
||||
)
|
||||
optional_api_dependencies: List[Api] = Field(
|
||||
default_factory=list,
|
||||
)
|
||||
deprecation_warning: Optional[str] = Field(
|
||||
default=None,
|
||||
description="If this provider is deprecated, specify the warning message here",
|
||||
|
|
@ -147,11 +150,11 @@ class InlineProviderSpec(ProviderSpec):
|
|||
default_factory=list,
|
||||
description="The pip dependencies needed for this implementation",
|
||||
)
|
||||
docker_image: Optional[str] = Field(
|
||||
container_image: Optional[str] = Field(
|
||||
default=None,
|
||||
description="""
|
||||
The docker image to use for this implementation. If one is provided, pip_packages will be ignored.
|
||||
If a provider depends on other providers, the dependencies MUST NOT specify a docker image.
|
||||
The container image to use for this implementation. If one is provided, pip_packages will be ignored.
|
||||
If a provider depends on other providers, the dependencies MUST NOT specify a container image.
|
||||
""",
|
||||
)
|
||||
module: str = Field(
|
||||
|
|
@ -194,7 +197,7 @@ API responses, specify the adapter here.
|
|||
)
|
||||
|
||||
@property
|
||||
def docker_image(self) -> Optional[str]:
|
||||
def container_image(self) -> Optional[str]:
|
||||
return None
|
||||
|
||||
@property
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ def is_tracing_enabled(tracer):
|
|||
class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
|
||||
def __init__(self, config: TelemetryConfig, deps: Dict[str, Any]) -> None:
|
||||
self.config = config
|
||||
self.datasetio_api = deps[Api.datasetio]
|
||||
self.datasetio_api = deps.get(Api.datasetio)
|
||||
|
||||
resource = Resource.create(
|
||||
{
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ def available_providers() -> List[ProviderSpec]:
|
|||
"opentelemetry-sdk",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
],
|
||||
api_dependencies=[Api.datasetio],
|
||||
optional_api_dependencies=[Api.datasetio],
|
||||
module="llama_stack.providers.inline.telemetry.meta_reference",
|
||||
config_class="llama_stack.providers.inline.telemetry.meta_reference.config.TelemetryConfig",
|
||||
),
|
||||
|
|
|
|||
|
|
@ -176,7 +176,6 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
media_present = request_has_media(request)
|
||||
if isinstance(request, ChatCompletionRequest):
|
||||
if media_present:
|
||||
# vllm does not seem to work well with image urls, so we download the images
|
||||
input_dict["messages"] = [
|
||||
await convert_message_to_openai_dict(m, download=True)
|
||||
for m in request.messages
|
||||
|
|
|
|||
|
|
@ -6,15 +6,15 @@
|
|||
|
||||
from typing import AsyncGenerator, Dict, List, Optional
|
||||
|
||||
from llama_models.llama3.api.chat_format import ChatFormat
|
||||
|
||||
from llama_models.llama3.api.datatypes import (
|
||||
from llama_models.datatypes import (
|
||||
GreedySamplingStrategy,
|
||||
SamplingParams,
|
||||
StopReason,
|
||||
TopKSamplingStrategy,
|
||||
TopPSamplingStrategy,
|
||||
)
|
||||
|
||||
from llama_models.llama3.api.chat_format import ChatFormat
|
||||
from llama_models.llama3.api.datatypes import StopReason
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.common.content_types import (
|
||||
|
|
|
|||
|
|
@ -188,7 +188,7 @@ async def localize_image_content(media: ImageContentItem) -> Tuple[bytes, str]:
|
|||
async def convert_image_content_to_url(
|
||||
media: ImageContentItem, download: bool = False, include_format: bool = True
|
||||
) -> str:
|
||||
if media.url and not download:
|
||||
if media.url and (not download or media.url.uri.startswith("data")):
|
||||
return media.url.uri
|
||||
|
||||
content, format = await localize_image_content(media)
|
||||
|
|
|
|||
|
|
@ -22,6 +22,9 @@ class TelemetryDatasetMixin:
|
|||
dataset_id: str,
|
||||
max_depth: Optional[int] = None,
|
||||
) -> None:
|
||||
if self.datasetio_api is None:
|
||||
raise RuntimeError("DatasetIO API not available")
|
||||
|
||||
spans = await self.query_spans(
|
||||
attribute_filters=attribute_filters,
|
||||
attributes_to_return=attributes_to_save,
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"remote::tavily-search",
|
||||
"inline::code-interpreter",
|
||||
"inline::memory-runtime",
|
||||
"remote::model-context-protocol",
|
||||
],
|
||||
}
|
||||
name = "bedrock"
|
||||
|
|
@ -70,7 +71,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
name=name,
|
||||
distro_type="self_hosted",
|
||||
description="Use AWS Bedrock for running LLM inference and safety",
|
||||
docker_image=None,
|
||||
container_image=None,
|
||||
template_path=Path(__file__).parent / "doc_template.md",
|
||||
providers=providers,
|
||||
default_models=default_models,
|
||||
|
|
|
|||
|
|
@ -28,4 +28,5 @@ distribution_spec:
|
|||
- remote::tavily-search
|
||||
- inline::code-interpreter
|
||||
- inline::memory-runtime
|
||||
- remote::model-context-protocol
|
||||
image_type: conda
|
||||
|
|
|
|||
|
|
@ -81,6 +81,9 @@ providers:
|
|||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
name="cerebras",
|
||||
distro_type="self_hosted",
|
||||
description="Use Cerebras for running LLM inference",
|
||||
docker_image=None,
|
||||
container_image=None,
|
||||
template_path=Path(__file__).parent / "doc_template.md",
|
||||
providers=providers,
|
||||
default_models=default_models,
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ version: '2'
|
|||
name: experimental-post-training
|
||||
distribution_spec:
|
||||
description: Experimental template for post training
|
||||
docker_image: null
|
||||
container_image: null
|
||||
providers:
|
||||
inference:
|
||||
- inline::meta-reference
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
version: '2'
|
||||
image_name: experimental-post-training
|
||||
docker_image: null
|
||||
container_image: null
|
||||
conda_env: experimental-post-training
|
||||
apis:
|
||||
- agents
|
||||
|
|
|
|||
|
|
@ -28,4 +28,5 @@ distribution_spec:
|
|||
- remote::tavily-search
|
||||
- inline::code-interpreter
|
||||
- inline::memory-runtime
|
||||
- remote::model-context-protocol
|
||||
image_type: conda
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"remote::tavily-search",
|
||||
"inline::code-interpreter",
|
||||
"inline::memory-runtime",
|
||||
"remote::model-context-protocol",
|
||||
],
|
||||
}
|
||||
|
||||
|
|
@ -98,7 +99,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
name=name,
|
||||
distro_type="self_hosted",
|
||||
description="Use Fireworks.AI for running LLM inference",
|
||||
docker_image=None,
|
||||
container_image=None,
|
||||
template_path=Path(__file__).parent / "doc_template.md",
|
||||
providers=providers,
|
||||
default_models=default_models,
|
||||
|
|
|
|||
|
|
@ -92,6 +92,9 @@ providers:
|
|||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
|
||||
|
|
|
|||
|
|
@ -86,6 +86,9 @@ providers:
|
|||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
|
||||
|
|
|
|||
|
|
@ -28,4 +28,5 @@ distribution_spec:
|
|||
- remote::tavily-search
|
||||
- inline::code-interpreter
|
||||
- inline::memory-runtime
|
||||
- remote::model-context-protocol
|
||||
image_type: conda
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"remote::tavily-search",
|
||||
"inline::code-interpreter",
|
||||
"inline::memory-runtime",
|
||||
"remote::model-context-protocol",
|
||||
],
|
||||
}
|
||||
name = "hf-endpoint"
|
||||
|
|
@ -88,7 +89,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
name=name,
|
||||
distro_type="self_hosted",
|
||||
description="Use (an external) Hugging Face Inference Endpoint for running LLM inference",
|
||||
docker_image=None,
|
||||
container_image=None,
|
||||
template_path=None,
|
||||
providers=providers,
|
||||
default_models=[inference_model, safety_model],
|
||||
|
|
|
|||
|
|
@ -91,6 +91,9 @@ providers:
|
|||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db
|
||||
|
|
|
|||
|
|
@ -86,6 +86,9 @@ providers:
|
|||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db
|
||||
|
|
|
|||
|
|
@ -28,4 +28,5 @@ distribution_spec:
|
|||
- remote::tavily-search
|
||||
- inline::code-interpreter
|
||||
- inline::memory-runtime
|
||||
- remote::model-context-protocol
|
||||
image_type: conda
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"remote::tavily-search",
|
||||
"inline::code-interpreter",
|
||||
"inline::memory-runtime",
|
||||
"remote::model-context-protocol",
|
||||
],
|
||||
}
|
||||
|
||||
|
|
@ -89,7 +90,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
name=name,
|
||||
distro_type="self_hosted",
|
||||
description="Use (an external) Hugging Face Inference Endpoint for running LLM inference",
|
||||
docker_image=None,
|
||||
container_image=None,
|
||||
template_path=None,
|
||||
providers=providers,
|
||||
default_models=[inference_model, safety_model],
|
||||
|
|
|
|||
|
|
@ -91,6 +91,9 @@ providers:
|
|||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db
|
||||
|
|
|
|||
|
|
@ -86,6 +86,9 @@ providers:
|
|||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db
|
||||
|
|
|
|||
|
|
@ -28,4 +28,5 @@ distribution_spec:
|
|||
- remote::tavily-search
|
||||
- inline::code-interpreter
|
||||
- inline::memory-runtime
|
||||
- remote::model-context-protocol
|
||||
image_type: conda
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"remote::tavily-search",
|
||||
"inline::code-interpreter",
|
||||
"inline::memory-runtime",
|
||||
"remote::model-context-protocol",
|
||||
],
|
||||
}
|
||||
name = "meta-reference-gpu"
|
||||
|
|
|
|||
|
|
@ -93,6 +93,9 @@ providers:
|
|||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
|
||||
|
|
|
|||
|
|
@ -87,6 +87,9 @@ providers:
|
|||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
|
||||
|
|
|
|||
|
|
@ -28,4 +28,5 @@ distribution_spec:
|
|||
- remote::tavily-search
|
||||
- inline::code-interpreter
|
||||
- inline::memory-runtime
|
||||
- remote::model-context-protocol
|
||||
image_type: conda
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"remote::tavily-search",
|
||||
"inline::code-interpreter",
|
||||
"inline::memory-runtime",
|
||||
"remote::model-context-protocol",
|
||||
],
|
||||
}
|
||||
default_tool_groups = [
|
||||
|
|
|
|||
|
|
@ -89,6 +89,9 @@ providers:
|
|||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-quantized-gpu}/registry.db
|
||||
|
|
|
|||
|
|
@ -26,4 +26,5 @@ distribution_spec:
|
|||
- remote::tavily-search
|
||||
- inline::code-interpreter
|
||||
- inline::memory-runtime
|
||||
- remote::model-context-protocol
|
||||
image_type: conda
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"remote::tavily-search",
|
||||
"inline::code-interpreter",
|
||||
"inline::memory-runtime",
|
||||
"remote::model-context-protocol",
|
||||
],
|
||||
}
|
||||
|
||||
|
|
@ -68,7 +69,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
name="nvidia",
|
||||
distro_type="remote_hosted",
|
||||
description="Use NVIDIA NIM for running LLM inference",
|
||||
docker_image=None,
|
||||
container_image=None,
|
||||
template_path=Path(__file__).parent / "doc_template.md",
|
||||
providers=providers,
|
||||
default_models=default_models,
|
||||
|
|
|
|||
|
|
@ -83,6 +83,9 @@ providers:
|
|||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db
|
||||
|
|
|
|||
|
|
@ -90,7 +90,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
name=name,
|
||||
distro_type="self_hosted",
|
||||
description="Use (an external) Ollama server for running LLM inference",
|
||||
docker_image=None,
|
||||
container_image=None,
|
||||
template_path=Path(__file__).parent / "doc_template.md",
|
||||
providers=providers,
|
||||
default_models=[inference_model, safety_model],
|
||||
|
|
|
|||
|
|
@ -12,6 +12,15 @@ distribution_spec:
|
|||
- inline::llama-guard
|
||||
agents:
|
||||
- inline::meta-reference
|
||||
eval:
|
||||
- inline::meta-reference
|
||||
datasetio:
|
||||
- remote::huggingface
|
||||
- inline::localfs
|
||||
scoring:
|
||||
- inline::basic
|
||||
- inline::llm-as-judge
|
||||
- inline::braintrust
|
||||
telemetry:
|
||||
- inline::meta-reference
|
||||
tool_runtime:
|
||||
|
|
@ -19,4 +28,5 @@ distribution_spec:
|
|||
- remote::tavily-search
|
||||
- inline::code-interpreter
|
||||
- inline::memory-runtime
|
||||
- remote::model-context-protocol
|
||||
image_type: conda
|
||||
|
|
|
|||
|
|
@ -2,9 +2,12 @@ version: '2'
|
|||
image_name: remote-vllm
|
||||
apis:
|
||||
- agents
|
||||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- memory
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
providers:
|
||||
|
|
@ -44,6 +47,28 @@ providers:
|
|||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config: {}
|
||||
datasetio:
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
config: {}
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
config: {}
|
||||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
config: {}
|
||||
- provider_id: llm-as-judge
|
||||
provider_type: inline::llm-as-judge
|
||||
config: {}
|
||||
- provider_id: braintrust
|
||||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
@ -68,6 +93,9 @@ providers:
|
|||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
|
||||
|
|
|
|||
|
|
@ -2,9 +2,12 @@ version: '2'
|
|||
image_name: remote-vllm
|
||||
apis:
|
||||
- agents
|
||||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- memory
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
providers:
|
||||
|
|
@ -38,6 +41,28 @@ providers:
|
|||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config: {}
|
||||
datasetio:
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
config: {}
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
config: {}
|
||||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
config: {}
|
||||
- provider_id: llm-as-judge
|
||||
provider_type: inline::llm-as-judge
|
||||
config: {}
|
||||
- provider_id: braintrust
|
||||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
@ -62,6 +87,9 @@ providers:
|
|||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
|
||||
|
|
|
|||
|
|
@ -27,12 +27,16 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||
"safety": ["inline::llama-guard"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
"eval": ["inline::meta-reference"],
|
||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||
"telemetry": ["inline::meta-reference"],
|
||||
"tool_runtime": [
|
||||
"remote::brave-search",
|
||||
"remote::tavily-search",
|
||||
"inline::code-interpreter",
|
||||
"inline::memory-runtime",
|
||||
"remote::model-context-protocol",
|
||||
],
|
||||
}
|
||||
name = "remote-vllm"
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ class RunConfigSettings(BaseModel):
|
|||
self,
|
||||
name: str,
|
||||
providers: Dict[str, List[str]],
|
||||
docker_image: Optional[str] = None,
|
||||
container_image: Optional[str] = None,
|
||||
) -> StackRunConfig:
|
||||
provider_registry = get_provider_registry()
|
||||
|
||||
|
|
@ -83,7 +83,7 @@ class RunConfigSettings(BaseModel):
|
|||
|
||||
return StackRunConfig(
|
||||
image_name=name,
|
||||
docker_image=docker_image,
|
||||
container_image=container_image,
|
||||
apis=apis,
|
||||
providers=provider_configs,
|
||||
metadata_store=SqliteKVStoreConfig.sample_run_config(
|
||||
|
|
@ -112,7 +112,7 @@ class DistributionTemplate(BaseModel):
|
|||
|
||||
# Optional configuration
|
||||
run_config_env_vars: Optional[Dict[str, Tuple[str, str]]] = None
|
||||
docker_image: Optional[str] = None
|
||||
container_image: Optional[str] = None
|
||||
|
||||
default_models: Optional[List[ModelInput]] = None
|
||||
|
||||
|
|
@ -121,7 +121,7 @@ class DistributionTemplate(BaseModel):
|
|||
name=self.name,
|
||||
distribution_spec=DistributionSpec(
|
||||
description=self.description,
|
||||
docker_image=self.docker_image,
|
||||
container_image=self.container_image,
|
||||
providers=self.providers,
|
||||
),
|
||||
image_type="conda", # default to conda, can be overridden
|
||||
|
|
@ -169,7 +169,7 @@ class DistributionTemplate(BaseModel):
|
|||
|
||||
for yaml_pth, settings in self.run_configs.items():
|
||||
run_config = settings.run_config(
|
||||
self.name, self.providers, self.docker_image
|
||||
self.name, self.providers, self.container_image
|
||||
)
|
||||
with open(yaml_output_dir / yaml_pth, "w") as f:
|
||||
yaml.safe_dump(
|
||||
|
|
|
|||
|
|
@ -28,4 +28,5 @@ distribution_spec:
|
|||
- remote::tavily-search
|
||||
- inline::code-interpreter
|
||||
- inline::memory-runtime
|
||||
- remote::model-context-protocol
|
||||
image_type: conda
|
||||
|
|
|
|||
|
|
@ -86,6 +86,9 @@ providers:
|
|||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
|
||||
|
|
|
|||
|
|
@ -85,6 +85,9 @@ providers:
|
|||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"remote::tavily-search",
|
||||
"inline::code-interpreter",
|
||||
"inline::memory-runtime",
|
||||
"remote::model-context-protocol",
|
||||
],
|
||||
}
|
||||
name = "tgi"
|
||||
|
|
@ -92,7 +93,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
name=name,
|
||||
distro_type="self_hosted",
|
||||
description="Use (an external) TGI server for running LLM inference",
|
||||
docker_image=None,
|
||||
container_image=None,
|
||||
template_path=Path(__file__).parent / "doc_template.md",
|
||||
providers=providers,
|
||||
default_models=[inference_model, safety_model],
|
||||
|
|
|
|||
|
|
@ -28,4 +28,5 @@ distribution_spec:
|
|||
- remote::tavily-search
|
||||
- inline::code-interpreter
|
||||
- inline::memory-runtime
|
||||
- remote::model-context-protocol
|
||||
image_type: conda
|
||||
|
|
|
|||
|
|
@ -92,6 +92,9 @@ providers:
|
|||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
|
||||
|
|
|
|||
|
|
@ -86,6 +86,9 @@ providers:
|
|||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"remote::tavily-search",
|
||||
"inline::code-interpreter",
|
||||
"inline::memory-runtime",
|
||||
"remote::model-context-protocol",
|
||||
],
|
||||
}
|
||||
name = "together"
|
||||
|
|
@ -96,7 +97,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
name=name,
|
||||
distro_type="self_hosted",
|
||||
description="Use Together.AI for running LLM inference",
|
||||
docker_image=None,
|
||||
container_image=None,
|
||||
template_path=Path(__file__).parent / "doc_template.md",
|
||||
providers=providers,
|
||||
default_models=default_models,
|
||||
|
|
|
|||
|
|
@ -28,4 +28,5 @@ distribution_spec:
|
|||
- remote::tavily-search
|
||||
- inline::code-interpreter
|
||||
- inline::memory-runtime
|
||||
- remote::model-context-protocol
|
||||
image_type: conda
|
||||
|
|
|
|||
|
|
@ -89,6 +89,9 @@ providers:
|
|||
- provider_id: memory-runtime
|
||||
provider_type: inline::memory-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/registry.db
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"remote::tavily-search",
|
||||
"inline::code-interpreter",
|
||||
"inline::memory-runtime",
|
||||
"remote::model-context-protocol",
|
||||
],
|
||||
}
|
||||
|
||||
|
|
@ -84,7 +85,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
name=name,
|
||||
distro_type="self_hosted",
|
||||
description="Use a built-in vLLM engine for running LLM inference",
|
||||
docker_image=None,
|
||||
container_image=None,
|
||||
template_path=None,
|
||||
providers=providers,
|
||||
default_models=[inference_model],
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue