161 lines
6.7 KiB
Bash
161 lines
6.7 KiB
Bash
#!/usr/bin/env bash
|
|
# Run the CubeSandbox one-click installer, then run quickcheck.sh.
|
|
# Called by cube-install.service (Type=oneshot) after docker.service and
|
|
# cube-xfs-mount.service are both active.
|
|
set -euo pipefail
|
|
|
|
log() { printf '[cube-install] %s\n' "$*"; }
|
|
err() { printf '[cube-install] ERROR: %s\n' "$*" >&2; }
|
|
|
|
INSTALL_PREFIX="/usr/local/services/cubetoolbox"
|
|
QUICKCHECK="${INSTALL_PREFIX}/scripts/one-click/quickcheck.sh"
|
|
UP_SCRIPT="${INSTALL_PREFIX}/scripts/one-click/up-with-deps.sh"
|
|
MIRROR="${CUBE_MIRROR:-cn}"
|
|
INSTALLER_URL_CN="https://cnb.cool/CubeSandbox/CubeSandbox/-/git/raw/master/deploy/one-click/online-install.sh"
|
|
INSTALLER_URL_GH="https://github.com/tencentcloud/CubeSandbox/raw/master/deploy/one-click/online-install.sh"
|
|
|
|
# /dev/kvm sanity — required by the MicroVM hypervisor.
|
|
if [ ! -c /dev/kvm ]; then
|
|
err "/dev/kvm is not available inside the container."
|
|
err "Ensure the compose stack passes --device /dev/kvm and nested virt is enabled on the host."
|
|
exit 1
|
|
fi
|
|
log "KVM device present: $(ls -l /dev/kvm)"
|
|
|
|
# Wait for dockerd (started by docker.service) to be ready before install.sh
|
|
# tries to pull MySQL / Redis / CubeProxy images.
|
|
log "Waiting for docker daemon ..."
|
|
for i in $(seq 1 60); do
|
|
if docker info >/dev/null 2>&1; then
|
|
log "docker ready."
|
|
break
|
|
fi
|
|
sleep 2
|
|
done
|
|
if ! docker info >/dev/null 2>&1; then
|
|
err "docker daemon not ready after 120 s"
|
|
exit 1
|
|
fi
|
|
|
|
# Redirect TMPDIR to the 50 GB XFS volume.
|
|
# /tmp is only 256 MB (tmpfs) and mounted noexec — both cause install failures:
|
|
# - curl: (23) Failure writing output to destination (out of space)
|
|
# - extracted scripts fail to execute (noexec mount flag)
|
|
mkdir -p /data/tmp
|
|
export TMPDIR=/data/tmp
|
|
log "TMPDIR set to $TMPDIR ($(df -h /data/tmp | awk 'NR==2{print $4}') free)"
|
|
|
|
# Set CAROOT so mkcert can find / create the local CA directory on every boot.
|
|
# Without this, up-cube-proxy.sh calls `mkcert -install` which exits with:
|
|
# "ERROR: failed to find the default CA location"
|
|
# Because up-with-deps.sh runs under set -euo pipefail, that failure aborts
|
|
# the entire script before any compute services (network-agent, CubeAPI, etc.)
|
|
# are started. Persisting the CA on /data (named volume) means the cert is
|
|
# re-used across container restarts rather than regenerated each time.
|
|
export CAROOT=/data/mkcert-ca
|
|
mkdir -p "$CAROOT"
|
|
log "CAROOT set to $CAROOT"
|
|
|
|
# Run the upstream one-click installer on first boot; on subsequent boots
|
|
# just re-launch all services via up-with-deps.sh.
|
|
if [ -x "$QUICKCHECK" ] && [ "${CUBE_FORCE_REINSTALL:-0}" != "1" ]; then
|
|
log "CubeSandbox already installed at $INSTALL_PREFIX — starting services."
|
|
if [ ! -x "$UP_SCRIPT" ]; then
|
|
err "up-with-deps.sh not found at $UP_SCRIPT — reinstall required"
|
|
exit 1
|
|
fi
|
|
ONE_CLICK_TOOLBOX_ROOT="$INSTALL_PREFIX" \
|
|
ONE_CLICK_RUNTIME_ENV_FILE="${INSTALL_PREFIX}/.one-click.env" \
|
|
bash "$UP_SCRIPT" \
|
|
|| log "WARNING: up-with-deps.sh exited non-zero; services may still be starting"
|
|
else
|
|
log "Running CubeSandbox one-click installer (mirror=$MIRROR) ..."
|
|
if [ "$MIRROR" = "cn" ]; then
|
|
curl -fsSL "$INSTALLER_URL_CN" | MIRROR=cn bash
|
|
else
|
|
curl -fsSL "$INSTALLER_URL_GH" | bash
|
|
fi
|
|
fi
|
|
|
|
# Run quickcheck.sh with retries — network-agent initialises 500 tap interfaces
|
|
# which takes ~2 minutes; we retry every 30 s for up to 10 minutes.
|
|
QUICKCHECK_PASSED=0
|
|
if [ -x "$QUICKCHECK" ]; then
|
|
log "Running quickcheck.sh (retrying up to 10 min for network-agent tap init) ..."
|
|
for i in $(seq 1 20); do
|
|
if ONE_CLICK_TOOLBOX_ROOT="$INSTALL_PREFIX" \
|
|
ONE_CLICK_RUNTIME_ENV_FILE="${INSTALL_PREFIX}/.one-click.env" \
|
|
"$QUICKCHECK" 2>&1; then
|
|
QUICKCHECK_PASSED=1
|
|
break
|
|
fi
|
|
log "quickcheck attempt $i/20 failed — retrying in 30 s ..."
|
|
sleep 30
|
|
done
|
|
else
|
|
err "quickcheck.sh not found at $QUICKCHECK — install may have failed."
|
|
exit 1
|
|
fi
|
|
|
|
if [ "$QUICKCHECK_PASSED" != "1" ]; then
|
|
err "quickcheck.sh never passed after 20 attempts — CubeSandbox is unhealthy."
|
|
exit 1
|
|
fi
|
|
|
|
# Ensure containerd-shim-cube-rs is on Cubelet's clean PATH.
|
|
# up.sh/up-with-deps.sh launch Cubelet with:
|
|
# PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
|
# Cubelet resolves runtime shims from that PATH, so it cannot find
|
|
# containerd-shim-cube-rs unless it is symlinked into one of those dirs.
|
|
# We create the symlink unconditionally on every boot (both after fresh
|
|
# install and after the restart path) so Cubelet can start sandboxes.
|
|
SHIM_SRC="${INSTALL_PREFIX}/cube-shim/bin/containerd-shim-cube-rs"
|
|
SHIM_DST="/usr/local/bin/containerd-shim-cube-rs"
|
|
if [ -x "$SHIM_SRC" ]; then
|
|
ln -sf "$SHIM_SRC" "$SHIM_DST"
|
|
log "containerd-shim-cube-rs linked: $SHIM_DST -> $SHIM_SRC"
|
|
else
|
|
log "WARNING: $SHIM_SRC not found — Cubelet will not be able to start MicroVMs"
|
|
fi
|
|
|
|
# Restart Cubelet now that network-agent is confirmed ready.
|
|
# On first startup the Cubelet process begins before network-agent has finished
|
|
# initialising its 500 TAP interfaces (~2 min). This causes the
|
|
# io.cubelet.images-service.v1 plugin to fail with:
|
|
# "network-agent health check failed ... context deadline exceeded"
|
|
# leaving the gRPC cubelet.services.images.v1.Images service unregistered.
|
|
# When CubeMaster later tries to distribute a template artifact to the node it
|
|
# gets back gRPC Unimplemented and the build fails.
|
|
# Restarting Cubelet here — after quickcheck has confirmed network-agent is up —
|
|
# allows the images-service plugin to load successfully on the second boot.
|
|
CUBELET_BIN="${INSTALL_PREFIX}/Cubelet/bin/cubelet"
|
|
CUBELET_CFG="${INSTALL_PREFIX}/Cubelet/config/config.toml"
|
|
CUBELET_DYN="${INSTALL_PREFIX}/Cubelet/dynamicconf/conf.yaml"
|
|
CUBELET_LOG="/data/log/Cubelet/Cubelet-req.log"
|
|
|
|
if [ -x "$CUBELET_BIN" ]; then
|
|
log "Restarting Cubelet so images-service plugin loads against ready network-agent ..."
|
|
pkill -f "${CUBELET_BIN}" 2>/dev/null || true
|
|
sleep 2
|
|
mkdir -p "$(dirname "$CUBELET_LOG")"
|
|
nohup "$CUBELET_BIN" \
|
|
--config "$CUBELET_CFG" \
|
|
--dynamic-conf-path "$CUBELET_DYN" \
|
|
>>"$CUBELET_LOG" 2>&1 &
|
|
CUBELET_PID=$!
|
|
log "Cubelet restarted (PID ${CUBELET_PID}) — waiting 10 s for boot ..."
|
|
sleep 10
|
|
if kill -0 "$CUBELET_PID" 2>/dev/null; then
|
|
log "Cubelet is running."
|
|
else
|
|
log "WARNING: Cubelet PID ${CUBELET_PID} exited — check ${CUBELET_LOG}."
|
|
fi
|
|
fi
|
|
|
|
log "==================== CubeSandbox is up ===================="
|
|
log " CubeAPI: http://127.0.0.1:3000/health"
|
|
log " CubeMaster: http://127.0.0.1:8089/notify/health"
|
|
log " network-agent http://127.0.0.1:19090/healthz"
|
|
log " Logs: /data/log/{CubeAPI,CubeMaster,Cubelet}/"
|
|
log "==========================================================="
|