mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-30 07:39:38 +00:00
Some more toolchain -> stack changes
This commit is contained in:
parent
17172a8bf9
commit
4c045d9ed9
14 changed files with 38 additions and 43 deletions
|
@ -107,7 +107,7 @@ class StackConfigure(Subcommand):
|
||||||
|
|
||||||
if run_config_file.exists():
|
if run_config_file.exists():
|
||||||
cprint(
|
cprint(
|
||||||
f"Configuration already exists for {build_config.name}. Will overwrite...",
|
f"Configuration already exists at `{str(run_config_file)}`. Will overwrite...",
|
||||||
"yellow",
|
"yellow",
|
||||||
attrs=["bold"],
|
attrs=["bold"],
|
||||||
)
|
)
|
||||||
|
|
|
@ -3,8 +3,8 @@ distribution_spec:
|
||||||
description: Use code from `llama_stack` itself to serve all llama stack APIs
|
description: Use code from `llama_stack` itself to serve all llama stack APIs
|
||||||
providers:
|
providers:
|
||||||
inference: meta-reference
|
inference: meta-reference
|
||||||
memory: meta-reference-faiss
|
memory: meta-reference
|
||||||
safety: meta-reference
|
safety: meta-reference
|
||||||
agents: meta-reference
|
agents: meta-reference
|
||||||
telemetry: console
|
telemetry: meta-reference
|
||||||
image_type: conda
|
image_type: conda
|
||||||
|
|
|
@ -3,8 +3,8 @@ distribution_spec:
|
||||||
description: Use Fireworks.ai for running LLM inference
|
description: Use Fireworks.ai for running LLM inference
|
||||||
providers:
|
providers:
|
||||||
inference: remote::fireworks
|
inference: remote::fireworks
|
||||||
memory: meta-reference-faiss
|
memory: meta-reference
|
||||||
safety: meta-reference
|
safety: meta-reference
|
||||||
agents: meta-reference
|
agents: meta-reference
|
||||||
telemetry: console
|
telemetry: meta-reference
|
||||||
image_type: conda
|
image_type: conda
|
||||||
|
|
|
@ -3,8 +3,8 @@ distribution_spec:
|
||||||
description: Like local, but use ollama for running LLM inference
|
description: Like local, but use ollama for running LLM inference
|
||||||
providers:
|
providers:
|
||||||
inference: remote::ollama
|
inference: remote::ollama
|
||||||
memory: meta-reference-faiss
|
memory: meta-reference
|
||||||
safety: meta-reference
|
safety: meta-reference
|
||||||
agents: meta-reference
|
agents: meta-reference
|
||||||
telemetry: console
|
telemetry: meta-reference
|
||||||
image_type: conda
|
image_type: conda
|
||||||
|
|
|
@ -3,8 +3,8 @@ distribution_spec:
|
||||||
description: Use TGI (local or with Hugging Face Inference Endpoints for running LLM inference. When using HF Inference Endpoints, you must provide the name of the endpoint).
|
description: Use TGI (local or with Hugging Face Inference Endpoints for running LLM inference. When using HF Inference Endpoints, you must provide the name of the endpoint).
|
||||||
providers:
|
providers:
|
||||||
inference: remote::tgi
|
inference: remote::tgi
|
||||||
memory: meta-reference-faiss
|
memory: meta-reference
|
||||||
safety: meta-reference
|
safety: meta-reference
|
||||||
agents: meta-reference
|
agents: meta-reference
|
||||||
telemetry: console
|
telemetry: meta-reference
|
||||||
image_type: conda
|
image_type: conda
|
||||||
|
|
|
@ -3,8 +3,8 @@ distribution_spec:
|
||||||
description: Use Together.ai for running LLM inference
|
description: Use Together.ai for running LLM inference
|
||||||
providers:
|
providers:
|
||||||
inference: remote::together
|
inference: remote::together
|
||||||
memory: meta-reference-faiss
|
memory: meta-reference
|
||||||
safety: meta-reference
|
safety: meta-reference
|
||||||
agents: meta-reference
|
agents: meta-reference
|
||||||
telemetry: console
|
telemetry: meta-reference
|
||||||
image_type: conda
|
image_type: conda
|
||||||
|
|
|
@ -3,8 +3,8 @@ distribution_spec:
|
||||||
description: Use code from `llama_stack` itself to serve all llama stack APIs
|
description: Use code from `llama_stack` itself to serve all llama stack APIs
|
||||||
providers:
|
providers:
|
||||||
inference: meta-reference
|
inference: meta-reference
|
||||||
memory: meta-reference-faiss
|
memory: meta-reference
|
||||||
safety: meta-reference
|
safety: meta-reference
|
||||||
agents: meta-reference
|
agents: meta-reference
|
||||||
telemetry: console
|
telemetry: meta-reference
|
||||||
image_type: docker
|
image_type: docker
|
||||||
|
|
|
@ -7,11 +7,11 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
LLAMA_MODELS_DIR=${LLAMA_MODELS_DIR:-}
|
LLAMA_MODELS_DIR=${LLAMA_MODELS_DIR:-}
|
||||||
LLAMA_TOOLCHAIN_DIR=${LLAMA_TOOLCHAIN_DIR:-}
|
LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-}
|
||||||
TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-}
|
TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-}
|
||||||
|
|
||||||
if [ -n "$LLAMA_TOOLCHAIN_DIR" ]; then
|
if [ -n "$LLAMA_STACK_DIR" ]; then
|
||||||
echo "Using llama-stack-dir=$LLAMA_TOOLCHAIN_DIR"
|
echo "Using llama-stack-dir=$LLAMA_STACK_DIR"
|
||||||
fi
|
fi
|
||||||
if [ -n "$LLAMA_MODELS_DIR" ]; then
|
if [ -n "$LLAMA_MODELS_DIR" ]; then
|
||||||
echo "Using llama-models-dir=$LLAMA_MODELS_DIR"
|
echo "Using llama-models-dir=$LLAMA_MODELS_DIR"
|
||||||
|
@ -81,14 +81,14 @@ ensure_conda_env_python310() {
|
||||||
pip install --extra-index-url https://test.pypi.org/simple/ llama-models==$TEST_PYPI_VERSION llama-stack==$TEST_PYPI_VERSION $pip_dependencies
|
pip install --extra-index-url https://test.pypi.org/simple/ llama-models==$TEST_PYPI_VERSION llama-stack==$TEST_PYPI_VERSION $pip_dependencies
|
||||||
else
|
else
|
||||||
# Re-installing llama-stack in the new conda environment
|
# Re-installing llama-stack in the new conda environment
|
||||||
if [ -n "$LLAMA_TOOLCHAIN_DIR" ]; then
|
if [ -n "$LLAMA_STACK_DIR" ]; then
|
||||||
if [ ! -d "$LLAMA_TOOLCHAIN_DIR" ]; then
|
if [ ! -d "$LLAMA_STACK_DIR" ]; then
|
||||||
printf "${RED}Warning: LLAMA_TOOLCHAIN_DIR is set but directory does not exist: $LLAMA_TOOLCHAIN_DIR${NC}\n" >&2
|
printf "${RED}Warning: LLAMA_STACK_DIR is set but directory does not exist: $LLAMA_STACK_DIR${NC}\n" >&2
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
printf "Installing from LLAMA_TOOLCHAIN_DIR: $LLAMA_TOOLCHAIN_DIR\n"
|
printf "Installing from LLAMA_STACK_DIR: $LLAMA_STACK_DIR\n"
|
||||||
pip install --no-cache-dir -e "$LLAMA_TOOLCHAIN_DIR"
|
pip install --no-cache-dir -e "$LLAMA_STACK_DIR"
|
||||||
else
|
else
|
||||||
pip install --no-cache-dir llama-stack
|
pip install --no-cache-dir llama-stack
|
||||||
fi
|
fi
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
LLAMA_MODELS_DIR=${LLAMA_MODELS_DIR:-}
|
LLAMA_MODELS_DIR=${LLAMA_MODELS_DIR:-}
|
||||||
LLAMA_TOOLCHAIN_DIR=${LLAMA_TOOLCHAIN_DIR:-}
|
LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-}
|
||||||
TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-}
|
TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-}
|
||||||
|
|
||||||
if [ "$#" -ne 4 ]; then
|
if [ "$#" -ne 4 ]; then
|
||||||
|
@ -55,15 +55,15 @@ RUN apt-get update && apt-get install -y \
|
||||||
|
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
toolchain_mount="/app/llama-stack-source"
|
stack_mount="/app/llama-stack-source"
|
||||||
models_mount="/app/llama-models-source"
|
models_mount="/app/llama-models-source"
|
||||||
|
|
||||||
if [ -n "$LLAMA_TOOLCHAIN_DIR" ]; then
|
if [ -n "$LLAMA_STACK_DIR" ]; then
|
||||||
if [ ! -d "$LLAMA_TOOLCHAIN_DIR" ]; then
|
if [ ! -d "$LLAMA_STACK_DIR" ]; then
|
||||||
echo "${RED}Warning: LLAMA_TOOLCHAIN_DIR is set but directory does not exist: $LLAMA_TOOLCHAIN_DIR${NC}" >&2
|
echo "${RED}Warning: LLAMA_STACK_DIR is set but directory does not exist: $LLAMA_STACK_DIR${NC}" >&2
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
add_to_docker "RUN pip install $toolchain_mount"
|
add_to_docker "RUN pip install $stack_mount"
|
||||||
else
|
else
|
||||||
add_to_docker "RUN pip install llama-stack"
|
add_to_docker "RUN pip install llama-stack"
|
||||||
fi
|
fi
|
||||||
|
@ -90,7 +90,7 @@ add_to_docker <<EOF
|
||||||
# This would be good in production but for debugging flexibility lets not add it right now
|
# This would be good in production but for debugging flexibility lets not add it right now
|
||||||
# We need a more solid production ready entrypoint.sh anyway
|
# We need a more solid production ready entrypoint.sh anyway
|
||||||
#
|
#
|
||||||
# ENTRYPOINT ["python", "-m", "llama_stack.distribution.server"]
|
# ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server"]
|
||||||
|
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
|
@ -101,8 +101,8 @@ cat $TEMP_DIR/Dockerfile
|
||||||
printf "\n"
|
printf "\n"
|
||||||
|
|
||||||
mounts=""
|
mounts=""
|
||||||
if [ -n "$LLAMA_TOOLCHAIN_DIR" ]; then
|
if [ -n "$LLAMA_STACK_DIR" ]; then
|
||||||
mounts="$mounts -v $(readlink -f $LLAMA_TOOLCHAIN_DIR):$toolchain_mount"
|
mounts="$mounts -v $(readlink -f $LLAMA_STACK_DIR):$stack_mount"
|
||||||
fi
|
fi
|
||||||
if [ -n "$LLAMA_MODELS_DIR" ]; then
|
if [ -n "$LLAMA_MODELS_DIR" ]; then
|
||||||
mounts="$mounts -v $(readlink -f $LLAMA_MODELS_DIR):$models_mount"
|
mounts="$mounts -v $(readlink -f $LLAMA_MODELS_DIR):$models_mount"
|
||||||
|
|
|
@ -81,11 +81,13 @@ def configure_api_providers(
|
||||||
)
|
)
|
||||||
config.provider_map[api_str] = routing_entries
|
config.provider_map[api_str] = routing_entries
|
||||||
else:
|
else:
|
||||||
provider_spec = all_providers[api][spec.providers[api_str]]
|
p = spec.providers[api_str]
|
||||||
|
print(f"Configuring provider `{p}`...")
|
||||||
|
provider_spec = all_providers[api][p]
|
||||||
config_type = instantiate_class_type(provider_spec.config_class)
|
config_type = instantiate_class_type(provider_spec.config_class)
|
||||||
cfg = prompt_for_config(config_type, None)
|
cfg = prompt_for_config(config_type, None)
|
||||||
config.provider_map[api_str] = GenericProviderConfig(
|
config.provider_map[api_str] = GenericProviderConfig(
|
||||||
provider_id=spec.providers[api_str],
|
provider_id=p,
|
||||||
config=cfg.dict(),
|
config=cfg.dict(),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -3,10 +3,3 @@
|
||||||
#
|
#
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
import fire
|
|
||||||
|
|
||||||
from .server import main
|
|
||||||
|
|
||||||
if __name__ == __main__:
|
|
||||||
fire.Fire(main)
|
|
||||||
|
|
|
@ -47,8 +47,8 @@ from llama_stack.providers.utils.telemetry.tracing import (
|
||||||
)
|
)
|
||||||
from llama_stack.distribution.datatypes import * # noqa: F403
|
from llama_stack.distribution.datatypes import * # noqa: F403
|
||||||
|
|
||||||
from .distribution import api_endpoints, api_providers
|
from llama_stack.distribution.distribution import api_endpoints, api_providers
|
||||||
from .dynamic import instantiate_provider
|
from llama_stack.distribution.utils.dynamic import instantiate_provider
|
||||||
|
|
||||||
|
|
||||||
def is_async_iterator_type(typ):
|
def is_async_iterator_type(typ):
|
||||||
|
|
|
@ -37,6 +37,6 @@ eval "$(conda shell.bash hook)"
|
||||||
conda deactivate && conda activate "$env_name"
|
conda deactivate && conda activate "$env_name"
|
||||||
|
|
||||||
$CONDA_PREFIX/bin/python \
|
$CONDA_PREFIX/bin/python \
|
||||||
-m llama_stack.distribution.server \
|
-m llama_stack.distribution.server.server \
|
||||||
--yaml_config "$yaml_config" \
|
--yaml_config "$yaml_config" \
|
||||||
--port "$port" "$@"
|
--port "$port" "$@"
|
||||||
|
|
|
@ -38,6 +38,6 @@ podman run -it \
|
||||||
-p $port:$port \
|
-p $port:$port \
|
||||||
-v "$yaml_config:/app/config.yaml" \
|
-v "$yaml_config:/app/config.yaml" \
|
||||||
$docker_image \
|
$docker_image \
|
||||||
python -m llama_stack.distribution.server \
|
python -m llama_stack.distribution.server.server \
|
||||||
--yaml_config /app/config.yaml \
|
--yaml_config /app/config.yaml \
|
||||||
--port $port "$@"
|
--port $port "$@"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue