Merge branch 'main' into aslib-useful-init-error

This commit is contained in:
Matthew Farrellee 2025-07-29 17:42:45 -04:00 committed by GitHub
commit 6092341d90
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
106 changed files with 34234 additions and 1610 deletions

View file

@ -18,10 +18,6 @@ UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT:-500}
# mounting is not supported by docker buildx, so we use COPY instead
USE_COPY_NOT_MOUNT=${USE_COPY_NOT_MOUNT:-}
# Mount command for cache container .cache, can be overridden by the user if needed
MOUNT_CACHE=${MOUNT_CACHE:-"--mount=type=cache,id=llama-stack-cache,target=/root/.cache"}
# Path to the run.yaml file in the container
RUN_CONFIG_PATH=/app/run.yaml
@ -176,18 +172,13 @@ RUN pip install uv
EOF
fi
# Set the link mode to copy so that uv doesn't attempt to symlink to the cache directory
add_to_container << EOF
ENV UV_LINK_MODE=copy
EOF
# Add pip dependencies first since llama-stack is what will change most often
# so we can reuse layers.
if [ -n "$normal_deps" ]; then
read -ra pip_args <<< "$normal_deps"
quoted_deps=$(printf " %q" "${pip_args[@]}")
add_to_container << EOF
RUN $MOUNT_CACHE uv pip install $quoted_deps
RUN uv pip install --no-cache $quoted_deps
EOF
fi
@ -197,7 +188,7 @@ if [ -n "$optional_deps" ]; then
read -ra pip_args <<< "$part"
quoted_deps=$(printf " %q" "${pip_args[@]}")
add_to_container <<EOF
RUN $MOUNT_CACHE uv pip install $quoted_deps
RUN uv pip install --no-cache $quoted_deps
EOF
done
fi
@ -208,10 +199,10 @@ if [ -n "$external_provider_deps" ]; then
read -ra pip_args <<< "$part"
quoted_deps=$(printf " %q" "${pip_args[@]}")
add_to_container <<EOF
RUN $MOUNT_CACHE uv pip install $quoted_deps
RUN uv pip install --no-cache $quoted_deps
EOF
add_to_container <<EOF
RUN python3 - <<PYTHON | $MOUNT_CACHE uv pip install -r -
RUN python3 - <<PYTHON | uv pip install --no-cache -r -
import importlib
import sys
@ -293,7 +284,7 @@ COPY $dir $mount_point
EOF
fi
add_to_container << EOF
RUN $MOUNT_CACHE uv pip install -e $mount_point
RUN uv pip install --no-cache -e $mount_point
EOF
}
@ -308,10 +299,10 @@ else
if [ -n "$TEST_PYPI_VERSION" ]; then
# these packages are damaged in test-pypi, so install them first
add_to_container << EOF
RUN $MOUNT_CACHE uv pip install fastapi libcst
RUN uv pip install --no-cache fastapi libcst
EOF
add_to_container << EOF
RUN $MOUNT_CACHE uv pip install --extra-index-url https://test.pypi.org/simple/ \
RUN uv pip install --no-cache --extra-index-url https://test.pypi.org/simple/ \
--index-strategy unsafe-best-match \
llama-stack==$TEST_PYPI_VERSION
@ -323,7 +314,7 @@ EOF
SPEC_VERSION="llama-stack"
fi
add_to_container << EOF
RUN $MOUNT_CACHE uv pip install $SPEC_VERSION
RUN uv pip install --no-cache $SPEC_VERSION
EOF
fi
fi

View file

@ -79,11 +79,9 @@ class InferenceRouter(Inference):
async def initialize(self) -> None:
logger.debug("InferenceRouter.initialize")
pass
async def shutdown(self) -> None:
logger.debug("InferenceRouter.shutdown")
pass
async def register_model(
self,

View file

@ -94,6 +94,7 @@ RESOURCES = [
REGISTRY_REFRESH_INTERVAL_SECONDS = 300
REGISTRY_REFRESH_TASK = None
TEST_RECORDING_CONTEXT = None
async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
@ -307,6 +308,15 @@ def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConf
async def construct_stack(
run_config: StackRunConfig, provider_registry: ProviderRegistry | None = None
) -> dict[Api, Any]:
if "LLAMA_STACK_TEST_INFERENCE_MODE" in os.environ:
from llama_stack.testing.inference_recorder import setup_inference_recording
global TEST_RECORDING_CONTEXT
TEST_RECORDING_CONTEXT = setup_inference_recording()
if TEST_RECORDING_CONTEXT:
TEST_RECORDING_CONTEXT.__enter__()
logger.info(f"Inference recording enabled: mode={os.environ.get('LLAMA_STACK_TEST_INFERENCE_MODE')}")
dist_registry, _ = await create_dist_registry(run_config.metadata_store, run_config.image_name)
policy = run_config.server.auth.access_policy if run_config.server.auth else []
impls = await resolve_impls(
@ -352,6 +362,13 @@ async def shutdown_stack(impls: dict[Api, Any]):
except (Exception, asyncio.CancelledError) as e:
logger.exception(f"Failed to shutdown {impl_name}: {e}")
global TEST_RECORDING_CONTEXT
if TEST_RECORDING_CONTEXT:
try:
TEST_RECORDING_CONTEXT.__exit__(None, None, None)
except Exception as e:
logger.error(f"Error during inference recording cleanup: {e}")
global REGISTRY_REFRESH_TASK
if REGISTRY_REFRESH_TASK:
REGISTRY_REFRESH_TASK.cancel()