feat: add TurboOCR
This commit is contained in:
@@ -32,6 +32,7 @@ These services require building custom Docker images from source.
|
|||||||
|
|
||||||
| Service | Version |
|
| Service | Version |
|
||||||
| ------------------------------------------- | ------- |
|
| ------------------------------------------- | ------- |
|
||||||
|
| [CubeSandbox](./builds/cube-sandbox) | 0.1.7 |
|
||||||
| [Debian DinD](./builds/debian-dind) | 0.1.2 |
|
| [Debian DinD](./builds/debian-dind) | 0.1.2 |
|
||||||
| [DeerFlow](./builds/deer-flow) | 2.0 |
|
| [DeerFlow](./builds/deer-flow) | 2.0 |
|
||||||
| [goose](./builds/goose) | 1.18.0 |
|
| [goose](./builds/goose) | 1.18.0 |
|
||||||
@@ -121,7 +122,7 @@ These services require building custom Docker images from source.
|
|||||||
| [Minecraft Bedrock Server](./src/minecraft-bedrock-server) | latest |
|
| [Minecraft Bedrock Server](./src/minecraft-bedrock-server) | latest |
|
||||||
| [MinIO](./src/minio) | 0.20260202 |
|
| [MinIO](./src/minio) | 0.20260202 |
|
||||||
| [MLflow](./src/mlflow) | v2.20.2 |
|
| [MLflow](./src/mlflow) | v2.20.2 |
|
||||||
| [MoltBot](./apps/moltbot) | main |
|
| [OpenClaw](./apps/openclaw) | 2026.2.3 |
|
||||||
| [MongoDB ReplicaSet Single](./src/mongodb-replicaset-single) | 8.2.3 |
|
| [MongoDB ReplicaSet Single](./src/mongodb-replicaset-single) | 8.2.3 |
|
||||||
| [MongoDB ReplicaSet](./src/mongodb-replicaset) | 8.2.3 |
|
| [MongoDB ReplicaSet](./src/mongodb-replicaset) | 8.2.3 |
|
||||||
| [MongoDB Standalone](./src/mongodb-standalone) | 8.2.3 |
|
| [MongoDB Standalone](./src/mongodb-standalone) | 8.2.3 |
|
||||||
@@ -140,6 +141,7 @@ These services require building custom Docker images from source.
|
|||||||
| [Ollama](./src/ollama) | 0.14.3 |
|
| [Ollama](./src/ollama) | 0.14.3 |
|
||||||
| [Open WebUI](./src/open-webui) | main |
|
| [Open WebUI](./src/open-webui) | main |
|
||||||
| [Phoenix (Arize)](./src/phoenix) | 13.19.2 |
|
| [Phoenix (Arize)](./src/phoenix) | 13.19.2 |
|
||||||
|
| [Pingap](./src/pingap) | 0.12.7-full |
|
||||||
| [Pingora Proxy Manager](./src/pingora-proxy-manager) | v1.0.3 |
|
| [Pingora Proxy Manager](./src/pingora-proxy-manager) | v1.0.3 |
|
||||||
| [Open WebUI Rust](./src/open-webui-rust) | latest |
|
| [Open WebUI Rust](./src/open-webui-rust) | latest |
|
||||||
| [OpenCode](./src/opencode) | 1.1.27 |
|
| [OpenCode](./src/opencode) | 1.1.27 |
|
||||||
@@ -185,6 +187,7 @@ These services require building custom Docker images from source.
|
|||||||
| [TiKV](./src/tikv) | v8.5.0 |
|
| [TiKV](./src/tikv) | v8.5.0 |
|
||||||
| [Trigger.dev](./src/trigger-dev) | v4.2.0 |
|
| [Trigger.dev](./src/trigger-dev) | v4.2.0 |
|
||||||
| [TrailBase](./src/trailbase) | 0.22.4 |
|
| [TrailBase](./src/trailbase) | 0.22.4 |
|
||||||
|
| [TurboOCR](./src/turboocr) | v2.1.1 |
|
||||||
| [Valkey Cluster](./src/valkey-cluster) | 8.0 |
|
| [Valkey Cluster](./src/valkey-cluster) | 8.0 |
|
||||||
| [Valkey](./src/valkey) | 8.0 |
|
| [Valkey](./src/valkey) | 8.0 |
|
||||||
| [Verdaccio](./src/verdaccio) | 6.1.2 |
|
| [Verdaccio](./src/verdaccio) | 6.1.2 |
|
||||||
|
|||||||
+4
-1
@@ -32,6 +32,7 @@ docker compose exec redis redis-cli ping
|
|||||||
|
|
||||||
| 服务 | 版本 |
|
| 服务 | 版本 |
|
||||||
| ------------------------------------------- | ------- |
|
| ------------------------------------------- | ------- |
|
||||||
|
| [CubeSandbox](./builds/cube-sandbox) | 0.1.7 |
|
||||||
| [Debian DinD](./builds/debian-dind) | 0.1.2 |
|
| [Debian DinD](./builds/debian-dind) | 0.1.2 |
|
||||||
| [DeerFlow](./builds/deer-flow) | 2.0 |
|
| [DeerFlow](./builds/deer-flow) | 2.0 |
|
||||||
| [goose](./builds/goose) | 1.18.0 |
|
| [goose](./builds/goose) | 1.18.0 |
|
||||||
@@ -121,7 +122,7 @@ docker compose exec redis redis-cli ping
|
|||||||
| [Minecraft Bedrock Server](./src/minecraft-bedrock-server) | latest |
|
| [Minecraft Bedrock Server](./src/minecraft-bedrock-server) | latest |
|
||||||
| [MinIO](./src/minio) | 0.20260202 |
|
| [MinIO](./src/minio) | 0.20260202 |
|
||||||
| [MLflow](./src/mlflow) | v2.20.2 |
|
| [MLflow](./src/mlflow) | v2.20.2 |
|
||||||
| [MoltBot](./apps/moltbot) | main |
|
| [OpenClaw](./apps/openclaw) | 2026.2.3 |
|
||||||
| [MongoDB ReplicaSet Single](./src/mongodb-replicaset-single) | 8.2.3 |
|
| [MongoDB ReplicaSet Single](./src/mongodb-replicaset-single) | 8.2.3 |
|
||||||
| [MongoDB ReplicaSet](./src/mongodb-replicaset) | 8.2.3 |
|
| [MongoDB ReplicaSet](./src/mongodb-replicaset) | 8.2.3 |
|
||||||
| [MongoDB Standalone](./src/mongodb-standalone) | 8.2.3 |
|
| [MongoDB Standalone](./src/mongodb-standalone) | 8.2.3 |
|
||||||
@@ -140,6 +141,7 @@ docker compose exec redis redis-cli ping
|
|||||||
| [Ollama](./src/ollama) | 0.14.3 |
|
| [Ollama](./src/ollama) | 0.14.3 |
|
||||||
| [Open WebUI](./src/open-webui) | main |
|
| [Open WebUI](./src/open-webui) | main |
|
||||||
| [Phoenix (Arize)](./src/phoenix) | 13.19.2 |
|
| [Phoenix (Arize)](./src/phoenix) | 13.19.2 |
|
||||||
|
| [Pingap](./src/pingap) | 0.12.7-full |
|
||||||
| [Pingora Proxy Manager](./src/pingora-proxy-manager) | v1.0.3 |
|
| [Pingora Proxy Manager](./src/pingora-proxy-manager) | v1.0.3 |
|
||||||
| [Open WebUI Rust](./src/open-webui-rust) | latest |
|
| [Open WebUI Rust](./src/open-webui-rust) | latest |
|
||||||
| [OpenCode](./src/opencode) | 1.1.27 |
|
| [OpenCode](./src/opencode) | 1.1.27 |
|
||||||
@@ -185,6 +187,7 @@ docker compose exec redis redis-cli ping
|
|||||||
| [TiKV](./src/tikv) | v8.5.0 |
|
| [TiKV](./src/tikv) | v8.5.0 |
|
||||||
| [Trigger.dev](./src/trigger-dev) | v4.2.0 |
|
| [Trigger.dev](./src/trigger-dev) | v4.2.0 |
|
||||||
| [TrailBase](./src/trailbase) | 0.22.4 |
|
| [TrailBase](./src/trailbase) | 0.22.4 |
|
||||||
|
| [TurboOCR](./src/turboocr) | v2.1.1 |
|
||||||
| [Valkey Cluster](./src/valkey-cluster) | 8.0 |
|
| [Valkey Cluster](./src/valkey-cluster) | 8.0 |
|
||||||
| [Valkey](./src/valkey) | 8.0 |
|
| [Valkey](./src/valkey) | 8.0 |
|
||||||
| [Verdaccio](./src/verdaccio) | 6.1.2 |
|
| [Verdaccio](./src/verdaccio) | 6.1.2 |
|
||||||
|
|||||||
@@ -57,7 +57,7 @@ services:
|
|||||||
- NANOBOT_GATEWAY__PORT=${GATEWAY_PORT:-18790}
|
- NANOBOT_GATEWAY__PORT=${GATEWAY_PORT:-18790}
|
||||||
command: ${NANOBOT_COMMAND:-gateway}
|
command: ${NANOBOT_COMMAND:-gateway}
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: [CMD, python, -c, import sys; sys.exit(0)]
|
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:18790/')"]
|
||||||
interval: 30s
|
interval: 30s
|
||||||
timeout: 10s
|
timeout: 10s
|
||||||
retries: 3
|
retries: 3
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ x-defaults: &defaults
|
|||||||
services:
|
services:
|
||||||
openclaw-gateway:
|
openclaw-gateway:
|
||||||
<<: *defaults
|
<<: *defaults
|
||||||
image: ${GLOBAL_REGISTRY:-ghcr.io}/openclaw/openclaw:${OPENCLAW_VERSION:-2026.2.3}
|
image: ${GLOBAL_REGISTRY:-ghcr.io/}openclaw/openclaw:${OPENCLAW_VERSION:-2026.2.3}
|
||||||
environment:
|
environment:
|
||||||
- TZ=${TZ:-UTC}
|
- TZ=${TZ:-UTC}
|
||||||
- HOME=/home/node
|
- HOME=/home/node
|
||||||
@@ -60,7 +60,8 @@ services:
|
|||||||
|
|
||||||
openclaw-cli:
|
openclaw-cli:
|
||||||
<<: *defaults
|
<<: *defaults
|
||||||
image: ${GLOBAL_REGISTRY:-ghcr.io}/openclaw/openclaw:${OPENCLAW_VERSION:-2026.2.3}
|
restart: 'no'
|
||||||
|
image: ${GLOBAL_REGISTRY:-ghcr.io/}openclaw/openclaw:${OPENCLAW_VERSION:-2026.2.3}
|
||||||
environment:
|
environment:
|
||||||
- TZ=${TZ:-UTC}
|
- TZ=${TZ:-UTC}
|
||||||
- HOME=/home/node
|
- HOME=/home/node
|
||||||
@@ -70,8 +71,8 @@ services:
|
|||||||
- CLAUDE_WEB_SESSION_KEY=${CLAUDE_WEB_SESSION_KEY:-}
|
- CLAUDE_WEB_SESSION_KEY=${CLAUDE_WEB_SESSION_KEY:-}
|
||||||
- CLAUDE_WEB_COOKIE=${CLAUDE_WEB_COOKIE:-}
|
- CLAUDE_WEB_COOKIE=${CLAUDE_WEB_COOKIE:-}
|
||||||
volumes:
|
volumes:
|
||||||
- moltbot_config:/home/node/.clawdbot
|
- openclaw_config:/home/node/.openclaw
|
||||||
- moltbot_workspace:/home/node/clawd
|
- openclaw_workspace:/home/node/openclaw-workspace
|
||||||
stdin_open: true
|
stdin_open: true
|
||||||
tty: true
|
tty: true
|
||||||
entrypoint: [node, dist/index.js]
|
entrypoint: [node, dist/index.js]
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
STIRLING_VERSION="latest"
|
STIRLING_VERSION="latest"
|
||||||
|
|
||||||
# Port override
|
# Port override
|
||||||
PORT_OVERRIDE=8080
|
STIRLING_PORT_OVERRIDE=8080
|
||||||
|
|
||||||
# Security settings
|
# Security settings
|
||||||
ENABLE_SECURITY="false"
|
ENABLE_SECURITY="false"
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ This service deploys Stirling-PDF, a locally hosted web-based PDF manipulation t
|
|||||||
| Variable Name | Description | Default Value |
|
| Variable Name | Description | Default Value |
|
||||||
| -------------------- | ------------------------------------- | -------------- |
|
| -------------------- | ------------------------------------- | -------------- |
|
||||||
| STIRLING_VERSION | Stirling-PDF image version | `latest` |
|
| STIRLING_VERSION | Stirling-PDF image version | `latest` |
|
||||||
| PORT_OVERRIDE | Host port mapping | `8080` |
|
| STIRLING_PORT_OVERRIDE | Host port mapping | `8080` |
|
||||||
| ENABLE_SECURITY | Enable security features | `false` |
|
| ENABLE_SECURITY | Enable security features | `false` |
|
||||||
| ENABLE_LOGIN | Enable login functionality | `false` |
|
| ENABLE_LOGIN | Enable login functionality | `false` |
|
||||||
| INITIAL_USERNAME | Initial admin username | `admin` |
|
| INITIAL_USERNAME | Initial admin username | `admin` |
|
||||||
|
|||||||
@@ -13,7 +13,7 @@
|
|||||||
| 变量名 | 说明 | 默认值 |
|
| 变量名 | 说明 | 默认值 |
|
||||||
| -------------------- | ---------------------- | -------------- |
|
| -------------------- | ---------------------- | -------------- |
|
||||||
| STIRLING_VERSION | Stirling-PDF 镜像版本 | `latest` |
|
| STIRLING_VERSION | Stirling-PDF 镜像版本 | `latest` |
|
||||||
| PORT_OVERRIDE | 主机端口映射 | `8080` |
|
| STIRLING_PORT_OVERRIDE | 主机端口映射 | `8080` |
|
||||||
| ENABLE_SECURITY | 启用安全功能 | `false` |
|
| ENABLE_SECURITY | 启用安全功能 | `false` |
|
||||||
| ENABLE_LOGIN | 启用登录功能 | `false` |
|
| ENABLE_LOGIN | 启用登录功能 | `false` |
|
||||||
| INITIAL_USERNAME | 初始管理员用户名 | `admin` |
|
| INITIAL_USERNAME | 初始管理员用户名 | `admin` |
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ services:
|
|||||||
<<: *defaults
|
<<: *defaults
|
||||||
image: ${GLOBAL_REGISTRY:-}stirlingtools/stirling-pdf:${STIRLING_VERSION:-latest}
|
image: ${GLOBAL_REGISTRY:-}stirlingtools/stirling-pdf:${STIRLING_VERSION:-latest}
|
||||||
ports:
|
ports:
|
||||||
- '${PORT_OVERRIDE:-8080}:8080'
|
- '${STIRLING_PORT_OVERRIDE:-8080}:8080'
|
||||||
volumes:
|
volumes:
|
||||||
- stirling_trainingData:/usr/share/tessdata
|
- stirling_trainingData:/usr/share/tessdata
|
||||||
- stirling_configs:/configs
|
- stirling_configs:/configs
|
||||||
|
|||||||
@@ -0,0 +1,36 @@
|
|||||||
|
# --- Image / build ---
|
||||||
|
# Override prefix when pushing to a private registry (e.g. registry.example.com/)
|
||||||
|
GLOBAL_REGISTRY=
|
||||||
|
# Tag of the locally built image
|
||||||
|
CUBE_SANDBOX_VERSION=0.1.7
|
||||||
|
# Base image for the wrapper container.
|
||||||
|
# Default works globally. In mainland China, override with a regional mirror:
|
||||||
|
# UBUNTU_IMAGE=docker.m.daocloud.io/library/ubuntu:22.04
|
||||||
|
# UBUNTU_IMAGE=ccr.ccs.tencentyun.com/library/ubuntu:22.04
|
||||||
|
UBUNTU_IMAGE=ubuntu:22.04
|
||||||
|
|
||||||
|
# --- Runtime ---
|
||||||
|
# Timezone inside the container
|
||||||
|
TZ=Asia/Shanghai
|
||||||
|
|
||||||
|
# Mirror used by the upstream installer:
|
||||||
|
# cn -> https://cnb.cool/CubeSandbox + Tencent Cloud container registry (recommended in China)
|
||||||
|
# gh -> https://github.com (slower in China but works elsewhere)
|
||||||
|
CUBE_MIRROR=cn
|
||||||
|
|
||||||
|
# Size of the XFS-formatted loop file mounted at /data/cubelet inside the
|
||||||
|
# container. install.sh hard-requires XFS; the file lives on the cube_data
|
||||||
|
# named volume so it persists across container restarts.
|
||||||
|
CUBE_XFS_SIZE=50G
|
||||||
|
|
||||||
|
# Set to 1 to force re-running install.sh on next start
|
||||||
|
CUBE_FORCE_REINSTALL=0
|
||||||
|
|
||||||
|
# --- Resources ---
|
||||||
|
# CubeSandbox runs MySQL + Redis + CubeProxy + CoreDNS + CubeMaster + CubeAPI +
|
||||||
|
# Cubelet + network-agent inside the wrapper container, then spawns MicroVMs.
|
||||||
|
# Give it enough headroom; 16 GiB / 8 vCPU is a comfortable single-node default.
|
||||||
|
CUBE_CPU_LIMIT=8
|
||||||
|
CUBE_MEMORY_LIMIT=16G
|
||||||
|
CUBE_CPU_RESERVATION=2
|
||||||
|
CUBE_MEMORY_RESERVATION=8G
|
||||||
@@ -0,0 +1,134 @@
|
|||||||
|
# CubeSandbox in a privileged systemd+DinD container.
|
||||||
|
#
|
||||||
|
# CubeSandbox's official install.sh is designed for bare metal / VMs and
|
||||||
|
# requires a running systemd (it registers all services as systemd units).
|
||||||
|
# This image therefore runs systemd as PID 1 rather than tini.
|
||||||
|
#
|
||||||
|
# UBUNTU_IMAGE may be overridden to use a regional mirror, e.g.:
|
||||||
|
# docker.m.daocloud.io/library/ubuntu:22.04 (China DaoCloud mirror)
|
||||||
|
# ccr.ccs.tencentyun.com/library/ubuntu:22.04 (Tencent Cloud mirror)
|
||||||
|
ARG UBUNTU_IMAGE=ubuntu:22.04
|
||||||
|
FROM ${UBUNTU_IMAGE}
|
||||||
|
|
||||||
|
ENV DEBIAN_FRONTEND=noninteractive \
|
||||||
|
LANG=C.UTF-8 \
|
||||||
|
LC_ALL=C.UTF-8
|
||||||
|
|
||||||
|
# Core system deps + systemd as the container init system.
|
||||||
|
# deploy/one-click/install.sh requires: tar, rg (ripgrep), ss (iproute2),
|
||||||
|
# bash, curl, sed, pgrep (procps), date, docker, python3, ip (iproute2), awk (gawk).
|
||||||
|
# Plus DinD prerequisites: iptables, ca-certificates, gnupg.
|
||||||
|
# Plus xfsprogs for the XFS-backed /data/cubelet (install.sh hard requirement).
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
systemd \
|
||||||
|
systemd-sysv \
|
||||||
|
dbus \
|
||||||
|
ca-certificates \
|
||||||
|
curl \
|
||||||
|
gnupg \
|
||||||
|
lsb-release \
|
||||||
|
bash \
|
||||||
|
tar \
|
||||||
|
ripgrep \
|
||||||
|
iproute2 \
|
||||||
|
procps \
|
||||||
|
gawk \
|
||||||
|
sed \
|
||||||
|
python3 \
|
||||||
|
python3-pip \
|
||||||
|
iptables \
|
||||||
|
kmod \
|
||||||
|
xfsprogs \
|
||||||
|
e2fsprogs \
|
||||||
|
util-linux \
|
||||||
|
file \
|
||||||
|
less \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Mask systemd units that are irrelevant or will fail in a container context.
|
||||||
|
RUN for unit in \
|
||||||
|
getty@tty1.service \
|
||||||
|
apt-daily.service \
|
||||||
|
apt-daily-upgrade.service \
|
||||||
|
apt-daily.timer \
|
||||||
|
apt-daily-upgrade.timer \
|
||||||
|
motd-news.service \
|
||||||
|
motd-news.timer \
|
||||||
|
systemd-networkd.service \
|
||||||
|
systemd-networkd-wait-online.service \
|
||||||
|
systemd-udevd.service \
|
||||||
|
systemd-udevd-control.socket \
|
||||||
|
systemd-udevd-kernel.socket \
|
||||||
|
systemd-logind.service \
|
||||||
|
e2scrub_reap.service \
|
||||||
|
apparmor.service; do \
|
||||||
|
ln -sf /dev/null "/etc/systemd/system/${unit}"; \
|
||||||
|
done
|
||||||
|
|
||||||
|
# Install Docker CE + Compose plugin from the official Docker apt repository.
|
||||||
|
RUN install -m 0755 -d /etc/apt/keyrings \
|
||||||
|
&& curl -fsSL https://download.docker.com/linux/ubuntu/gpg \
|
||||||
|
| gpg --dearmor -o /etc/apt/keyrings/docker.gpg \
|
||||||
|
&& chmod a+r /etc/apt/keyrings/docker.gpg \
|
||||||
|
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] \
|
||||||
|
https://download.docker.com/linux/ubuntu $(. /etc/os-release && echo $VERSION_CODENAME) stable" \
|
||||||
|
> /etc/apt/sources.list.d/docker.list \
|
||||||
|
&& apt-get update \
|
||||||
|
&& apt-get install -y --no-install-recommends \
|
||||||
|
docker-ce \
|
||||||
|
docker-ce-cli \
|
||||||
|
containerd.io \
|
||||||
|
docker-buildx-plugin \
|
||||||
|
docker-compose-plugin \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Configure Docker daemon defaults.
|
||||||
|
RUN mkdir -p /etc/docker && printf '%s\n' \
|
||||||
|
'{' \
|
||||||
|
' "log-driver": "json-file",' \
|
||||||
|
' "log-opts": { "max-size": "50m", "max-file": "3" },' \
|
||||||
|
' "storage-driver": "overlay2"' \
|
||||||
|
'}' > /etc/docker/daemon.json
|
||||||
|
|
||||||
|
# Install E2B Python SDK so smoke tests can run from inside the container
|
||||||
|
# without polluting the WSL2 host with pip packages.
|
||||||
|
RUN pip3 install --no-cache-dir --break-system-packages \
|
||||||
|
e2b-code-interpreter==1.0.* \
|
||||||
|
requests \
|
||||||
|
|| pip3 install --no-cache-dir \
|
||||||
|
e2b-code-interpreter==1.0.* \
|
||||||
|
requests
|
||||||
|
|
||||||
|
# Persistent locations the installer writes to.
|
||||||
|
VOLUME ["/var/lib/docker", "/data", "/usr/local/services/cubetoolbox"]
|
||||||
|
|
||||||
|
# Helper scripts for the bootstrap flow.
|
||||||
|
COPY cube-init.sh /usr/local/bin/cube-init.sh
|
||||||
|
COPY cube-xfs-setup.sh /usr/local/bin/cube-xfs-setup.sh
|
||||||
|
COPY cube-install.sh /usr/local/bin/cube-install.sh
|
||||||
|
RUN chmod +x \
|
||||||
|
/usr/local/bin/cube-init.sh \
|
||||||
|
/usr/local/bin/cube-xfs-setup.sh \
|
||||||
|
/usr/local/bin/cube-install.sh
|
||||||
|
|
||||||
|
# Systemd service units for the CubeSandbox bootstrap sequence.
|
||||||
|
COPY cube-xfs-mount.service /etc/systemd/system/cube-xfs-mount.service
|
||||||
|
COPY cube-install.service /etc/systemd/system/cube-install.service
|
||||||
|
|
||||||
|
# Enable services by creating the wanted-by symlinks that systemctl enable
|
||||||
|
# would create (systemctl cannot run during a Docker image build).
|
||||||
|
RUN mkdir -p /etc/systemd/system/multi-user.target.wants \
|
||||||
|
&& ln -sf /etc/systemd/system/cube-xfs-mount.service \
|
||||||
|
/etc/systemd/system/multi-user.target.wants/cube-xfs-mount.service \
|
||||||
|
&& ln -sf /etc/systemd/system/cube-install.service \
|
||||||
|
/etc/systemd/system/multi-user.target.wants/cube-install.service \
|
||||||
|
&& ln -sf /lib/systemd/system/docker.service \
|
||||||
|
/etc/systemd/system/multi-user.target.wants/docker.service \
|
||||||
|
&& ln -sf /lib/systemd/system/containerd.service \
|
||||||
|
/etc/systemd/system/multi-user.target.wants/containerd.service
|
||||||
|
|
||||||
|
# cube-init.sh captures CUBE_* and TZ env vars from the container runtime
|
||||||
|
# into /etc/cube-sandbox.env (readable by systemd EnvironmentFile=), then
|
||||||
|
# execs /lib/systemd/systemd as PID 1.
|
||||||
|
ENTRYPOINT ["/usr/local/bin/cube-init.sh"]
|
||||||
|
CMD ["/lib/systemd/systemd"]
|
||||||
@@ -0,0 +1,150 @@
|
|||||||
|
# CubeSandbox
|
||||||
|
|
||||||
|
Run [TencentCloud CubeSandbox](https://github.com/TencentCloud/CubeSandbox) — a KVM-based MicroVM sandbox compatible with the E2B SDK — entirely inside a single privileged Docker container, without modifying the host system.
|
||||||
|
|
||||||
|
## Why this is unusual
|
||||||
|
|
||||||
|
CubeSandbox is **not** a containerized project upstream. Its core components (Cubelet, network-agent, cube-shim, cube-runtime, CubeAPI, CubeMaster) ship as host binaries and the official `install.sh` writes them to `/usr/local/services/cubetoolbox`, then starts them as native processes that talk to the host containerd.
|
||||||
|
|
||||||
|
This stack runs the **entire installer inside one privileged container** that:
|
||||||
|
|
||||||
|
1. Runs its own `dockerd` (Docker-in-Docker) for MySQL / Redis / CubeProxy / CoreDNS dependencies.
|
||||||
|
2. Creates an XFS-formatted loop volume at `/data/cubelet` (install.sh hard-requires XFS).
|
||||||
|
3. Executes the upstream [`online-install.sh`](https://github.com/TencentCloud/CubeSandbox/blob/master/deploy/one-click/online-install.sh) on first boot.
|
||||||
|
4. Tails logs to keep the container alive.
|
||||||
|
|
||||||
|
The result is essentially a **single-node CubeSandbox appliance container** suitable for evaluating CubeSandbox without changing your host.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- Built on Ubuntu 22.04 (the project's primary test environment)
|
||||||
|
- Self-contained: no host packages installed, no host paths mounted
|
||||||
|
- KVM passed through via `/dev/kvm`
|
||||||
|
- Persistent volumes for installed binaries, sandbox data, and DinD storage
|
||||||
|
- Health check covering CubeAPI, CubeMaster, and network-agent
|
||||||
|
- China-mainland mirror (`MIRROR=cn`) used by default
|
||||||
|
- Smoke-test script included (`smoke-test.sh`)
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
- Linux host (or WSL2 with KVM passthrough) with `/dev/kvm` available to Docker
|
||||||
|
- Nested virtualization enabled (Intel VT-x / AMD-V exposed)
|
||||||
|
- cgroup v2 (modern kernels — Debian 12+, Ubuntu 22.04+, kernel 5.10+)
|
||||||
|
- ≥ 16 GiB RAM and ≥ 8 vCPU recommended (8 GiB is the upstream minimum)
|
||||||
|
- ≥ 60 GiB free disk for the XFS loop file + Docker image layers
|
||||||
|
- Outbound internet to download the install bundle (~hundreds of MB) and Docker images
|
||||||
|
|
||||||
|
> On WSL2: confirm `/dev/kvm` is present (`ls -l /dev/kvm`) and your user is in the `kvm` group on the host distro.
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
1. Copy the example environment file (optional — defaults work):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cp .env.example .env
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Build and start (the first run downloads the CubeSandbox bundle and several Docker images — expect 5–20 minutes):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose up -d --build
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Watch the bootstrap log:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose logs -f cube-sandbox
|
||||||
|
```
|
||||||
|
|
||||||
|
Wait for the `==================== CubeSandbox is up ====================` banner.
|
||||||
|
|
||||||
|
4. Verify all services are healthy:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -fsS http://127.0.0.1:3000/health && echo # CubeAPI
|
||||||
|
curl -fsS http://127.0.0.1:8089/notify/health && echo # CubeMaster
|
||||||
|
curl -fsS http://127.0.0.1:19090/healthz && echo # network-agent
|
||||||
|
```
|
||||||
|
|
||||||
|
5. (Optional) Run the smoke test:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash smoke-test.sh # Health checks only
|
||||||
|
SKIP_TEMPLATE_BUILD=1 bash smoke-test.sh # Skip the slow template build
|
||||||
|
```
|
||||||
|
|
||||||
|
## Endpoints
|
||||||
|
|
||||||
|
Because the container uses `network_mode: host`, all CubeSandbox HTTP endpoints are reachable directly on the host loopback:
|
||||||
|
|
||||||
|
| Service | URL |
|
||||||
|
| ------------- | ------------------------------------ |
|
||||||
|
| CubeAPI | `http://127.0.0.1:3000` |
|
||||||
|
| CubeMaster | `http://127.0.0.1:8089` |
|
||||||
|
| network-agent | `http://127.0.0.1:19090` |
|
||||||
|
|
||||||
|
The CubeAPI exposes the E2B-compatible REST surface; point the [`e2b` Python SDK](https://e2b.dev) at `http://127.0.0.1:3000` to create sandboxes.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
Key environment variables (see `.env.example` for the full list):
|
||||||
|
|
||||||
|
| Variable | Description | Default |
|
||||||
|
| -------------------------- | ------------------------------------------------------------ | ---------------- |
|
||||||
|
| `GLOBAL_REGISTRY` | Image registry prefix when pushing to a private registry | _(empty)_ |
|
||||||
|
| `CUBE_SANDBOX_VERSION` | Tag of the locally built wrapper image | `0.1.7` |
|
||||||
|
| `UBUNTU_IMAGE` | Base Ubuntu version | `22.04` |
|
||||||
|
| `TZ` | Container timezone | `Asia/Shanghai` |
|
||||||
|
| `CUBE_MIRROR` | Installer mirror — `cn` (China CDN) or `gh` (GitHub) | `cn` |
|
||||||
|
| `CUBE_XFS_SIZE` | Size of the XFS loop file backing `/data/cubelet` | `50G` |
|
||||||
|
| `CUBE_FORCE_REINSTALL` | Set to `1` to re-run `install.sh` on next start | `0` |
|
||||||
|
| `CUBE_CPU_LIMIT` | CPU limit | `8` |
|
||||||
|
| `CUBE_MEMORY_LIMIT` | Memory limit | `16G` |
|
||||||
|
| `CUBE_CPU_RESERVATION` | CPU reservation | `2` |
|
||||||
|
| `CUBE_MEMORY_RESERVATION` | Memory reservation | `8G` |
|
||||||
|
|
||||||
|
## Storage
|
||||||
|
|
||||||
|
Three named volumes hold persistent state — your installed CubeSandbox survives `docker compose down && up`:
|
||||||
|
|
||||||
|
| Volume | Path inside container | Purpose |
|
||||||
|
| --------------- | ----------------------------------- | -------------------------------------------------- |
|
||||||
|
| `cube_dind_data` | `/var/lib/docker` | DinD daemon images / containers / volumes |
|
||||||
|
| `cube_data` | `/data` | XFS loop image, `/data/cubelet`, sandbox disks, logs |
|
||||||
|
| `cube_toolbox` | `/usr/local/services/cubetoolbox` | Installed CubeSandbox binaries and scripts |
|
||||||
|
|
||||||
|
To wipe everything and reinstall from scratch:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose down -v
|
||||||
|
docker compose up -d --build
|
||||||
|
```
|
||||||
|
|
||||||
|
## Security Considerations
|
||||||
|
|
||||||
|
⚠️ This stack is **highly privileged by design**. Only run it in trusted environments.
|
||||||
|
|
||||||
|
- `privileged: true` — required to mount the XFS loop volume, manage TAP interfaces, and run KVM
|
||||||
|
- `network_mode: host` — required so Cubelet can register the node IP and manage host TAP interfaces
|
||||||
|
- `cgroup: host` — required for the in-container `dockerd` to share the host's cgroup v2 hierarchy
|
||||||
|
- `/dev/kvm` and `/dev/net/tun` are passed through
|
||||||
|
|
||||||
|
These permissions are equivalent to what `online-install.sh` would request if it were run directly on your host. The advantage of the container wrapper is that all installer side-effects are confined to the three named volumes above, so removing the stack leaves no host residue.
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
- **`/dev/kvm not found`** — the host does not expose KVM to Docker. On WSL2, confirm nested virtualization is enabled and the kernel exposes `/dev/kvm`. On bare metal, ensure VT-x / AMD-V is enabled in BIOS.
|
||||||
|
- **First boot hangs at "Running CubeSandbox one-click installer"** — the installer is downloading the bundle (~hundreds of MB) and pulling several Docker images. Check progress with `docker compose logs -f cube-sandbox`.
|
||||||
|
- **`quickcheck.sh reported issues`** — open a shell in the container and inspect logs:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose exec cube-sandbox bash
|
||||||
|
ls /data/log/
|
||||||
|
tail -f /data/log/CubeAPI/*.log
|
||||||
|
```
|
||||||
|
- **Re-run the installer cleanly** — set `CUBE_FORCE_REINSTALL=1` in `.env` and `docker compose up -d --force-recreate`.
|
||||||
|
|
||||||
|
## Project Information
|
||||||
|
|
||||||
|
- Upstream: https://github.com/TencentCloud/CubeSandbox
|
||||||
|
- License: upstream project is Apache-2.0; this configuration is provided as-is for the Compose Anything project.
|
||||||
@@ -0,0 +1,151 @@
|
|||||||
|
# CubeSandbox
|
||||||
|
|
||||||
|
在单个特权 Docker 容器内完整运行 [腾讯云 CubeSandbox](https://github.com/TencentCloud/CubeSandbox)——一个基于 KVM、兼容 E2B SDK 的 MicroVM 沙箱——无需修改宿主系统。
|
||||||
|
|
||||||
|
## 为什么这个栈与众不同
|
||||||
|
|
||||||
|
CubeSandbox 上游**并不是**一个容器化项目。它的核心组件(Cubelet、network-agent、cube-shim、cube-runtime、CubeAPI、CubeMaster)以宿主机二进制形式分发,官方 `install.sh` 会把它们写入 `/usr/local/services/cubetoolbox`,然后作为本机进程启动并与宿主 containerd 集成。
|
||||||
|
|
||||||
|
本栈把**整个安装器塞进一个特权容器**:
|
||||||
|
|
||||||
|
1. 容器内自起一个 `dockerd`(Docker-in-Docker),用于运行 MySQL / Redis / CubeProxy / CoreDNS 等依赖。
|
||||||
|
2. 在 `/data/cubelet` 创建一个 XFS 格式的 loop 卷(install.sh 强制要求 XFS)。
|
||||||
|
3. 首次启动时执行上游的 [`online-install.sh`](https://github.com/TencentCloud/CubeSandbox/blob/master/deploy/one-click/online-install.sh)。
|
||||||
|
4. 通过 tail 日志保持容器存活。
|
||||||
|
|
||||||
|
最终得到一个**单节点 CubeSandbox 一体化容器**,方便在不改动宿主的前提下评估 CubeSandbox。
|
||||||
|
|
||||||
|
## 特性
|
||||||
|
|
||||||
|
- 基于 Ubuntu 22.04(项目主要测试环境)
|
||||||
|
- 自包含:不安装宿主机软件包,不挂载宿主路径
|
||||||
|
- 通过 `/dev/kvm` 透传 KVM
|
||||||
|
- 三个持久化命名卷分别保存安装产物、沙箱数据和 DinD 存储
|
||||||
|
- 健康检查覆盖 CubeAPI、CubeMaster、network-agent
|
||||||
|
- 默认使用国内镜像 (`MIRROR=cn`)
|
||||||
|
- 内置冒烟测试脚本(`smoke-test.sh`)
|
||||||
|
|
||||||
|
## 环境要求
|
||||||
|
|
||||||
|
- Linux 宿主(或开启 KVM 透传的 WSL2),`/dev/kvm` 对 Docker 可见
|
||||||
|
- 已开启嵌套虚拟化(暴露 Intel VT-x / AMD-V)
|
||||||
|
- cgroup v2(现代内核——Debian 12+、Ubuntu 22.04+、kernel 5.10+)
|
||||||
|
- 推荐 ≥ 16 GiB 内存、≥ 8 vCPU(上游最低 8 GiB)
|
||||||
|
- 至少 60 GiB 空闲磁盘,用于 XFS loop 文件 + Docker 镜像层
|
||||||
|
- 可访问外网,用于下载安装包(数百 MB)和 Docker 镜像
|
||||||
|
|
||||||
|
> WSL2 用户:先确认 `/dev/kvm` 存在(`ls -l /dev/kvm`),并且当前用户在宿主发行版的 `kvm` 组中。
|
||||||
|
|
||||||
|
## 快速开始
|
||||||
|
|
||||||
|
1. 复制示例环境文件(可选,默认值即可使用):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cp .env.example .env
|
||||||
|
```
|
||||||
|
|
||||||
|
2. 构建并启动(首次运行会下载 CubeSandbox 安装包和若干 Docker 镜像,预计 5-20 分钟):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose up -d --build
|
||||||
|
```
|
||||||
|
|
||||||
|
3. 观察启动日志:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose logs -f cube-sandbox
|
||||||
|
```
|
||||||
|
|
||||||
|
等待出现 `==================== CubeSandbox is up ====================` 横幅。
|
||||||
|
|
||||||
|
4. 验证所有服务健康:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -fsS http://127.0.0.1:3000/health && echo # CubeAPI
|
||||||
|
curl -fsS http://127.0.0.1:8089/notify/health && echo # CubeMaster
|
||||||
|
curl -fsS http://127.0.0.1:19090/healthz && echo # network-agent
|
||||||
|
```
|
||||||
|
|
||||||
|
5. (可选)运行冒烟测试:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash smoke-test.sh # 仅做健康检查
|
||||||
|
SKIP_TEMPLATE_BUILD=1 bash smoke-test.sh # 跳过较慢的模板构建步骤
|
||||||
|
```
|
||||||
|
|
||||||
|
## 服务端点
|
||||||
|
|
||||||
|
由于容器使用 `network_mode: host`,CubeSandbox 的所有 HTTP 端点都直接暴露在宿主回环地址上:
|
||||||
|
|
||||||
|
| 服务 | URL |
|
||||||
|
| ------------- | ------------------------------------ |
|
||||||
|
| CubeAPI | `http://127.0.0.1:3000` |
|
||||||
|
| CubeMaster | `http://127.0.0.1:8089` |
|
||||||
|
| network-agent | `http://127.0.0.1:19090` |
|
||||||
|
|
||||||
|
CubeAPI 暴露兼容 E2B 的 REST 接口;将 [`e2b` Python SDK](https://e2b.dev) 指向 `http://127.0.0.1:3000` 即可创建沙箱。
|
||||||
|
|
||||||
|
## 配置项
|
||||||
|
|
||||||
|
主要环境变量(完整列表见 `.env.example`):
|
||||||
|
|
||||||
|
| 变量 | 描述 | 默认值 |
|
||||||
|
| -------------------------- | --------------------------------------------------- | --------------- |
|
||||||
|
| `GLOBAL_REGISTRY` | 推送到私有仓库时使用的镜像前缀 | _(空)_ |
|
||||||
|
| `CUBE_SANDBOX_VERSION` | 本地构建的封装镜像 tag | `0.1.7` |
|
||||||
|
| `UBUNTU_IMAGE` | 基础 Ubuntu 版本 | `22.04` |
|
||||||
|
| `TZ` | 容器时区 | `Asia/Shanghai` |
|
||||||
|
| `CUBE_MIRROR` | 安装器镜像源——`cn`(国内 CDN)或 `gh`(GitHub) | `cn` |
|
||||||
|
| `CUBE_XFS_SIZE` | `/data/cubelet` 背后 XFS loop 文件大小 | `50G` |
|
||||||
|
| `CUBE_FORCE_REINSTALL` | 设为 `1` 时下次启动会重跑 `install.sh` | `0` |
|
||||||
|
| `CUBE_CPU_LIMIT` | CPU 上限 | `8` |
|
||||||
|
| `CUBE_MEMORY_LIMIT` | 内存上限 | `16G` |
|
||||||
|
| `CUBE_CPU_RESERVATION` | CPU 预留 | `2` |
|
||||||
|
| `CUBE_MEMORY_RESERVATION` | 内存预留 | `8G` |
|
||||||
|
|
||||||
|
## 存储
|
||||||
|
|
||||||
|
三个命名卷保存所有持久化状态——`docker compose down && up` 不会丢失安装:
|
||||||
|
|
||||||
|
| 卷 | 容器内路径 | 用途 |
|
||||||
|
| ---------------- | ----------------------------------- | --------------------------------------------------- |
|
||||||
|
| `cube_dind_data` | `/var/lib/docker` | DinD 守护进程的镜像 / 容器 / 卷 |
|
||||||
|
| `cube_data` | `/data` | XFS loop 文件、`/data/cubelet`、沙箱磁盘、日志 |
|
||||||
|
| `cube_toolbox` | `/usr/local/services/cubetoolbox` | 已安装的 CubeSandbox 二进制和脚本 |
|
||||||
|
|
||||||
|
完全清空并从头重装:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose down -v
|
||||||
|
docker compose up -d --build
|
||||||
|
```
|
||||||
|
|
||||||
|
## 安全说明
|
||||||
|
|
||||||
|
⚠️ 本栈**按设计是高特权的**,仅在受信环境中使用。
|
||||||
|
|
||||||
|
- `privileged: true`——挂载 XFS loop 卷、管理 TAP 接口、运行 KVM 所必需
|
||||||
|
- `network_mode: host`——Cubelet 注册节点 IP、管理宿主 TAP 接口所必需
|
||||||
|
- `cgroup: host`——容器内的 `dockerd` 共享宿主 cgroup v2 层级所必需
|
||||||
|
- 透传 `/dev/kvm` 和 `/dev/net/tun`
|
||||||
|
|
||||||
|
这些权限等同于直接在宿主上运行 `online-install.sh` 所需的权限。容器封装的好处在于:所有安装副作用都被限制在上述三个命名卷内,删除本栈不会在宿主上留下任何残留。
|
||||||
|
|
||||||
|
## 故障排查
|
||||||
|
|
||||||
|
- **`/dev/kvm not found`**:宿主未对 Docker 暴露 KVM。WSL2 用户请确认嵌套虚拟化已启用且内核暴露 `/dev/kvm`;裸金属用户请在 BIOS 中启用 VT-x / AMD-V。
|
||||||
|
- **首次启动卡在 "Running CubeSandbox one-click installer"**:安装器正在下载安装包(数百 MB)并拉取若干 Docker 镜像。用 `docker compose logs -f cube-sandbox` 查看进度。
|
||||||
|
- **`quickcheck.sh reported issues`**:进入容器查看日志:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose exec cube-sandbox bash
|
||||||
|
ls /data/log/
|
||||||
|
tail -f /data/log/CubeAPI/*.log
|
||||||
|
```
|
||||||
|
|
||||||
|
- **干净重跑安装**:在 `.env` 中设置 `CUBE_FORCE_REINSTALL=1`,然后 `docker compose up -d --force-recreate`。
|
||||||
|
|
||||||
|
## 项目信息
|
||||||
|
|
||||||
|
- 上游项目:https://github.com/TencentCloud/CubeSandbox
|
||||||
|
- 许可证:上游项目采用 Apache-2.0;本配置以 as-is 形式提供给 Compose Anything 项目使用。
|
||||||
@@ -0,0 +1,43 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Thin PID-1 wrapper: capture container runtime env vars into a file that
|
||||||
|
# systemd EnvironmentFile= can read, then exec systemd as PID 1.
|
||||||
|
#
|
||||||
|
# This script runs BEFORE systemd, so it must be kept minimal and must not
|
||||||
|
# depend on any CubeSandbox service being available.
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# Write CUBE_* and TZ vars to /etc/cube-sandbox.env so that
|
||||||
|
# cube-xfs-mount.service and cube-install.service can pick them up via
|
||||||
|
# EnvironmentFile=/etc/cube-sandbox.env.
|
||||||
|
install -m 0644 /dev/null /etc/cube-sandbox.env
|
||||||
|
printenv | grep -E '^(CUBE_|TZ=)' >> /etc/cube-sandbox.env 2>/dev/null || true
|
||||||
|
|
||||||
|
# Mount BPF filesystem required by network-agent eBPF map pinning.
|
||||||
|
# /sys/fs/bpf is not auto-mounted in Docker containers even when the kernel
|
||||||
|
# supports BPF; without it network-agent crashes on startup with
|
||||||
|
# "not on a bpf filesystem" and then a nil-pointer panic.
|
||||||
|
if ! mountpoint -q /sys/fs/bpf 2>/dev/null; then
|
||||||
|
mkdir -p /sys/fs/bpf
|
||||||
|
mount -t bpf none /sys/fs/bpf 2>/dev/null \
|
||||||
|
|| echo "[cube-init] WARNING: could not mount BPF filesystem; network-agent may fail" >&2
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Redirect CubeMaster's rootfs artifact workspace to the persistent data volume.
|
||||||
|
# Template builds export the sandbox image into a tar (often > 2 GB) before
|
||||||
|
# converting it to an ext4 disk image. /tmp is only a 2 GB tmpfs and is wiped on
|
||||||
|
# every container restart; /data (a named Docker volume) has 50+ GB and is
|
||||||
|
# persistent.
|
||||||
|
#
|
||||||
|
# We use a bind mount instead of a symlink: CubeMaster's Go startup code calls
|
||||||
|
# os.RemoveAll + os.MkdirAll on this path, which would silently replace a
|
||||||
|
# symlink with a real tmpfs directory. A bind-mount point returns EBUSY on
|
||||||
|
# removal, keeping the mount intact so all writes land on /data.
|
||||||
|
mkdir -p /data/cubemaster-rootfs-artifacts
|
||||||
|
mkdir -p /tmp/cubemaster-rootfs-artifacts
|
||||||
|
if ! mountpoint -q /tmp/cubemaster-rootfs-artifacts 2>/dev/null; then
|
||||||
|
mount --bind /data/cubemaster-rootfs-artifacts /tmp/cubemaster-rootfs-artifacts \
|
||||||
|
|| echo "[cube-init] WARNING: bind mount for cubemaster-rootfs-artifacts failed; writes may fill tmpfs" >&2
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Hand off to systemd (or whatever CMD was passed to the container).
|
||||||
|
exec "$@"
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=CubeSandbox one-click installer
|
||||||
|
# Requires both the XFS volume and dockerd to be ready before running.
|
||||||
|
# install.sh will pull Docker images (MySQL, Redis, CubeProxy, CoreDNS)
|
||||||
|
# and then register Cubelet / CubeAPI / CubeMaster / network-agent as
|
||||||
|
# systemd units via `systemctl enable --now`.
|
||||||
|
After=docker.service cube-xfs-mount.service
|
||||||
|
Requires=docker.service cube-xfs-mount.service
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=oneshot
|
||||||
|
RemainAfterExit=yes
|
||||||
|
EnvironmentFile=-/etc/cube-sandbox.env
|
||||||
|
ExecStart=/usr/local/bin/cube-install.sh
|
||||||
|
# First boot downloads ~400 MB + pulls several Docker images; allow 30 min.
|
||||||
|
TimeoutStartSec=1800
|
||||||
|
# Retry on transient network failures (e.g. download interrupted).
|
||||||
|
Restart=on-failure
|
||||||
|
RestartSec=30s
|
||||||
|
StandardOutput=journal
|
||||||
|
StandardError=journal
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
@@ -0,0 +1,160 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Run the CubeSandbox one-click installer, then run quickcheck.sh.
|
||||||
|
# Called by cube-install.service (Type=oneshot) after docker.service and
|
||||||
|
# cube-xfs-mount.service are both active.
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
log() { printf '[cube-install] %s\n' "$*"; }
|
||||||
|
err() { printf '[cube-install] ERROR: %s\n' "$*" >&2; }
|
||||||
|
|
||||||
|
INSTALL_PREFIX="/usr/local/services/cubetoolbox"
|
||||||
|
QUICKCHECK="${INSTALL_PREFIX}/scripts/one-click/quickcheck.sh"
|
||||||
|
UP_SCRIPT="${INSTALL_PREFIX}/scripts/one-click/up-with-deps.sh"
|
||||||
|
MIRROR="${CUBE_MIRROR:-cn}"
|
||||||
|
INSTALLER_URL_CN="https://cnb.cool/CubeSandbox/CubeSandbox/-/git/raw/master/deploy/one-click/online-install.sh"
|
||||||
|
INSTALLER_URL_GH="https://github.com/tencentcloud/CubeSandbox/raw/master/deploy/one-click/online-install.sh"
|
||||||
|
|
||||||
|
# /dev/kvm sanity — required by the MicroVM hypervisor.
|
||||||
|
if [ ! -c /dev/kvm ]; then
|
||||||
|
err "/dev/kvm is not available inside the container."
|
||||||
|
err "Ensure the compose stack passes --device /dev/kvm and nested virt is enabled on the host."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
log "KVM device present: $(ls -l /dev/kvm)"
|
||||||
|
|
||||||
|
# Wait for dockerd (started by docker.service) to be ready before install.sh
|
||||||
|
# tries to pull MySQL / Redis / CubeProxy images.
|
||||||
|
log "Waiting for docker daemon ..."
|
||||||
|
for i in $(seq 1 60); do
|
||||||
|
if docker info >/dev/null 2>&1; then
|
||||||
|
log "docker ready."
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 2
|
||||||
|
done
|
||||||
|
if ! docker info >/dev/null 2>&1; then
|
||||||
|
err "docker daemon not ready after 120 s"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Redirect TMPDIR to the 50 GB XFS volume.
|
||||||
|
# /tmp is only 256 MB (tmpfs) and mounted noexec — both cause install failures:
|
||||||
|
# - curl: (23) Failure writing output to destination (out of space)
|
||||||
|
# - extracted scripts fail to execute (noexec mount flag)
|
||||||
|
mkdir -p /data/tmp
|
||||||
|
export TMPDIR=/data/tmp
|
||||||
|
log "TMPDIR set to $TMPDIR ($(df -h /data/tmp | awk 'NR==2{print $4}') free)"
|
||||||
|
|
||||||
|
# Set CAROOT so mkcert can find / create the local CA directory on every boot.
|
||||||
|
# Without this, up-cube-proxy.sh calls `mkcert -install` which exits with:
|
||||||
|
# "ERROR: failed to find the default CA location"
|
||||||
|
# Because up-with-deps.sh runs under set -euo pipefail, that failure aborts
|
||||||
|
# the entire script before any compute services (network-agent, CubeAPI, etc.)
|
||||||
|
# are started. Persisting the CA on /data (named volume) means the cert is
|
||||||
|
# re-used across container restarts rather than regenerated each time.
|
||||||
|
export CAROOT=/data/mkcert-ca
|
||||||
|
mkdir -p "$CAROOT"
|
||||||
|
log "CAROOT set to $CAROOT"
|
||||||
|
|
||||||
|
# Run the upstream one-click installer on first boot; on subsequent boots
|
||||||
|
# just re-launch all services via up-with-deps.sh.
|
||||||
|
if [ -x "$QUICKCHECK" ] && [ "${CUBE_FORCE_REINSTALL:-0}" != "1" ]; then
|
||||||
|
log "CubeSandbox already installed at $INSTALL_PREFIX — starting services."
|
||||||
|
if [ ! -x "$UP_SCRIPT" ]; then
|
||||||
|
err "up-with-deps.sh not found at $UP_SCRIPT — reinstall required"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
ONE_CLICK_TOOLBOX_ROOT="$INSTALL_PREFIX" \
|
||||||
|
ONE_CLICK_RUNTIME_ENV_FILE="${INSTALL_PREFIX}/.one-click.env" \
|
||||||
|
bash "$UP_SCRIPT" \
|
||||||
|
|| log "WARNING: up-with-deps.sh exited non-zero; services may still be starting"
|
||||||
|
else
|
||||||
|
log "Running CubeSandbox one-click installer (mirror=$MIRROR) ..."
|
||||||
|
if [ "$MIRROR" = "cn" ]; then
|
||||||
|
curl -fsSL "$INSTALLER_URL_CN" | MIRROR=cn bash
|
||||||
|
else
|
||||||
|
curl -fsSL "$INSTALLER_URL_GH" | bash
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Run quickcheck.sh with retries — network-agent initialises 500 tap interfaces
|
||||||
|
# which takes ~2 minutes; we retry every 30 s for up to 10 minutes.
|
||||||
|
QUICKCHECK_PASSED=0
|
||||||
|
if [ -x "$QUICKCHECK" ]; then
|
||||||
|
log "Running quickcheck.sh (retrying up to 10 min for network-agent tap init) ..."
|
||||||
|
for i in $(seq 1 20); do
|
||||||
|
if ONE_CLICK_TOOLBOX_ROOT="$INSTALL_PREFIX" \
|
||||||
|
ONE_CLICK_RUNTIME_ENV_FILE="${INSTALL_PREFIX}/.one-click.env" \
|
||||||
|
"$QUICKCHECK" 2>&1; then
|
||||||
|
QUICKCHECK_PASSED=1
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
log "quickcheck attempt $i/20 failed — retrying in 30 s ..."
|
||||||
|
sleep 30
|
||||||
|
done
|
||||||
|
else
|
||||||
|
err "quickcheck.sh not found at $QUICKCHECK — install may have failed."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$QUICKCHECK_PASSED" != "1" ]; then
|
||||||
|
err "quickcheck.sh never passed after 20 attempts — CubeSandbox is unhealthy."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure containerd-shim-cube-rs is on Cubelet's clean PATH.
|
||||||
|
# up.sh/up-with-deps.sh launch Cubelet with:
|
||||||
|
# PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||||
|
# Cubelet resolves runtime shims from that PATH, so it cannot find
|
||||||
|
# containerd-shim-cube-rs unless it is symlinked into one of those dirs.
|
||||||
|
# We create the symlink unconditionally on every boot (both after fresh
|
||||||
|
# install and after the restart path) so Cubelet can start sandboxes.
|
||||||
|
SHIM_SRC="${INSTALL_PREFIX}/cube-shim/bin/containerd-shim-cube-rs"
|
||||||
|
SHIM_DST="/usr/local/bin/containerd-shim-cube-rs"
|
||||||
|
if [ -x "$SHIM_SRC" ]; then
|
||||||
|
ln -sf "$SHIM_SRC" "$SHIM_DST"
|
||||||
|
log "containerd-shim-cube-rs linked: $SHIM_DST -> $SHIM_SRC"
|
||||||
|
else
|
||||||
|
log "WARNING: $SHIM_SRC not found — Cubelet will not be able to start MicroVMs"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Restart Cubelet now that network-agent is confirmed ready.
|
||||||
|
# On first startup the Cubelet process begins before network-agent has finished
|
||||||
|
# initialising its 500 TAP interfaces (~2 min). This causes the
|
||||||
|
# io.cubelet.images-service.v1 plugin to fail with:
|
||||||
|
# "network-agent health check failed ... context deadline exceeded"
|
||||||
|
# leaving the gRPC cubelet.services.images.v1.Images service unregistered.
|
||||||
|
# When CubeMaster later tries to distribute a template artifact to the node it
|
||||||
|
# gets back gRPC Unimplemented and the build fails.
|
||||||
|
# Restarting Cubelet here — after quickcheck has confirmed network-agent is up —
|
||||||
|
# allows the images-service plugin to load successfully on the second boot.
|
||||||
|
CUBELET_BIN="${INSTALL_PREFIX}/Cubelet/bin/cubelet"
|
||||||
|
CUBELET_CFG="${INSTALL_PREFIX}/Cubelet/config/config.toml"
|
||||||
|
CUBELET_DYN="${INSTALL_PREFIX}/Cubelet/dynamicconf/conf.yaml"
|
||||||
|
CUBELET_LOG="/data/log/Cubelet/Cubelet-req.log"
|
||||||
|
|
||||||
|
if [ -x "$CUBELET_BIN" ]; then
|
||||||
|
log "Restarting Cubelet so images-service plugin loads against ready network-agent ..."
|
||||||
|
pkill -f "${CUBELET_BIN}" 2>/dev/null || true
|
||||||
|
sleep 2
|
||||||
|
mkdir -p "$(dirname "$CUBELET_LOG")"
|
||||||
|
nohup "$CUBELET_BIN" \
|
||||||
|
--config "$CUBELET_CFG" \
|
||||||
|
--dynamic-conf-path "$CUBELET_DYN" \
|
||||||
|
>>"$CUBELET_LOG" 2>&1 &
|
||||||
|
CUBELET_PID=$!
|
||||||
|
log "Cubelet restarted (PID ${CUBELET_PID}) — waiting 10 s for boot ..."
|
||||||
|
sleep 10
|
||||||
|
if kill -0 "$CUBELET_PID" 2>/dev/null; then
|
||||||
|
log "Cubelet is running."
|
||||||
|
else
|
||||||
|
log "WARNING: Cubelet PID ${CUBELET_PID} exited — check ${CUBELET_LOG}."
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
log "==================== CubeSandbox is up ===================="
|
||||||
|
log " CubeAPI: http://127.0.0.1:3000/health"
|
||||||
|
log " CubeMaster: http://127.0.0.1:8089/notify/health"
|
||||||
|
log " network-agent http://127.0.0.1:19090/healthz"
|
||||||
|
log " Logs: /data/log/{CubeAPI,CubeMaster,Cubelet}/"
|
||||||
|
log "==========================================================="
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=CubeSandbox XFS loop volume mount
|
||||||
|
# Must run before dockerd and the installer because install.sh validates that
|
||||||
|
# /data/cubelet is an XFS filesystem before proceeding.
|
||||||
|
DefaultDependencies=no
|
||||||
|
Before=cube-install.service docker.service
|
||||||
|
After=local-fs.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=oneshot
|
||||||
|
RemainAfterExit=yes
|
||||||
|
EnvironmentFile=-/etc/cube-sandbox.env
|
||||||
|
ExecStart=/usr/local/bin/cube-xfs-setup.sh
|
||||||
|
StandardOutput=journal
|
||||||
|
StandardError=journal
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
@@ -0,0 +1,31 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Create and mount the XFS-formatted loop volume at /data/cubelet.
|
||||||
|
# Called by cube-xfs-mount.service (Type=oneshot) before docker.service starts.
|
||||||
|
#
|
||||||
|
# install.sh hard-requires that /data/cubelet is on an XFS filesystem;
|
||||||
|
# it validates this with `df -T /data/cubelet | grep -q xfs`.
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
log() { printf '[cube-xfs] %s\n' "$*"; }
|
||||||
|
|
||||||
|
CUBE_DATA_DIR="${CUBE_DATA_DIR:-/data/cubelet}"
|
||||||
|
CUBE_XFS_IMG="${CUBE_XFS_IMG:-/data/cubelet.img}"
|
||||||
|
CUBE_XFS_SIZE="${CUBE_XFS_SIZE:-50G}"
|
||||||
|
|
||||||
|
mkdir -p /data "$CUBE_DATA_DIR"
|
||||||
|
|
||||||
|
current_fs="$(stat -fc %T "$CUBE_DATA_DIR" 2>/dev/null || echo unknown)"
|
||||||
|
if [ "$current_fs" = "xfs" ]; then
|
||||||
|
log "Already mounted: $CUBE_DATA_DIR ($current_fs) — nothing to do."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
log "Preparing XFS loop volume at $CUBE_XFS_IMG (size=$CUBE_XFS_SIZE) ..."
|
||||||
|
if [ ! -f "$CUBE_XFS_IMG" ]; then
|
||||||
|
fallocate -l "$CUBE_XFS_SIZE" "$CUBE_XFS_IMG"
|
||||||
|
mkfs.xfs -q -f "$CUBE_XFS_IMG"
|
||||||
|
log "Formatted $CUBE_XFS_IMG as XFS."
|
||||||
|
fi
|
||||||
|
|
||||||
|
mount -o loop "$CUBE_XFS_IMG" "$CUBE_DATA_DIR"
|
||||||
|
log "Mounted $CUBE_DATA_DIR ($(stat -fc %T "$CUBE_DATA_DIR"))."
|
||||||
@@ -0,0 +1,110 @@
|
|||||||
|
# CubeSandbox running inside a privileged systemd+DinD container.
|
||||||
|
#
|
||||||
|
# WHY THIS LOOKS UNUSUAL
|
||||||
|
# ----------------------
|
||||||
|
# CubeSandbox is NOT a containerized project upstream. Its core components
|
||||||
|
# (Cubelet, network-agent, cube-shim, CubeAPI, CubeMaster) ship as host
|
||||||
|
# binaries, and the official install.sh registers them as systemd units and
|
||||||
|
# manages them with systemctl.
|
||||||
|
#
|
||||||
|
# To run it purely with Docker without modifying the WSL2 host, this stack:
|
||||||
|
# 1. Runs systemd as PID 1 inside a privileged container so that
|
||||||
|
# install.sh can call systemctl enable / start / status normally.
|
||||||
|
# 2. Runs its own dockerd (DinD) for MySQL / Redis / CoreDNS / CubeProxy.
|
||||||
|
# 3. Mounts an XFS loop volume at /data/cubelet (install.sh hard-requires XFS).
|
||||||
|
# 4. Executes the upstream online-install.sh via cube-install.service.
|
||||||
|
#
|
||||||
|
# The /run and /run/lock paths are tmpfs so systemd can write its runtime
|
||||||
|
# state (PID files, socket files, etc.) during the container lifetime.
|
||||||
|
# stop_signal RTMIN+3 is the standard graceful-shutdown signal for systemd.
|
||||||
|
|
||||||
|
x-defaults: &defaults
|
||||||
|
restart: unless-stopped
|
||||||
|
logging:
|
||||||
|
driver: json-file
|
||||||
|
options:
|
||||||
|
max-size: 100m
|
||||||
|
max-file: '3'
|
||||||
|
|
||||||
|
services:
|
||||||
|
cube-sandbox:
|
||||||
|
<<: *defaults
|
||||||
|
image: ${GLOBAL_REGISTRY:-}compose-anything/cube-sandbox:${CUBE_SANDBOX_VERSION:-0.1.7}
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
args:
|
||||||
|
- UBUNTU_IMAGE=${UBUNTU_IMAGE:-ubuntu:22.04}
|
||||||
|
|
||||||
|
# CubeSandbox needs:
|
||||||
|
# - /dev/kvm for the MicroVM hypervisor
|
||||||
|
# - /dev/net/tun for cube TAP interfaces
|
||||||
|
# - SYS_ADMIN/NET_ADMIN to mount the XFS loop volume and create TAPs
|
||||||
|
# - Its own dockerd for MySQL / Redis / CubeProxy / CoreDNS
|
||||||
|
# - systemd as PID 1 so install.sh can register and start services
|
||||||
|
# The simplest correct configuration is privileged + host network.
|
||||||
|
privileged: true
|
||||||
|
network_mode: host
|
||||||
|
|
||||||
|
devices:
|
||||||
|
- /dev/kvm:/dev/kvm
|
||||||
|
- /dev/net/tun:/dev/net/tun
|
||||||
|
|
||||||
|
# cgroupns:host lets the in-container systemd + dockerd share the host's
|
||||||
|
# (i.e. WSL2's) cgroup v2 hierarchy directly — more reliable than private.
|
||||||
|
cgroup: host
|
||||||
|
|
||||||
|
# systemd needs to write its runtime state to /run; use tmpfs so it does
|
||||||
|
# not leak across container restarts and does not consume the named volumes.
|
||||||
|
tmpfs:
|
||||||
|
- /run:size=100m
|
||||||
|
- /run/lock:size=10m
|
||||||
|
- /tmp:size=2g,exec
|
||||||
|
|
||||||
|
# SIGRTMIN+3 is the proper graceful-shutdown signal for systemd.
|
||||||
|
stop_signal: RTMIN+3
|
||||||
|
|
||||||
|
environment:
|
||||||
|
- TZ=${TZ:-Asia/Shanghai}
|
||||||
|
# cn = pull installer + images via the cnb.cool / Tencent Cloud mirror
|
||||||
|
# gh = pull from raw.githubusercontent.com (slower in mainland China)
|
||||||
|
- CUBE_MIRROR=${CUBE_MIRROR:-cn}
|
||||||
|
# Size of the XFS loop file that backs /data/cubelet
|
||||||
|
- CUBE_XFS_SIZE=${CUBE_XFS_SIZE:-50G}
|
||||||
|
# Set to 1 to re-run install.sh even if a previous install is detected
|
||||||
|
- CUBE_FORCE_REINSTALL=${CUBE_FORCE_REINSTALL:-0}
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
# DinD docker daemon storage (images for MySQL, Redis, CoreDNS, CubeProxy)
|
||||||
|
- cube_dind_data:/var/lib/docker
|
||||||
|
# XFS loop image + mounted /data/cubelet + cube-shim disks + logs
|
||||||
|
- cube_data:/data
|
||||||
|
# Installed CubeSandbox binaries & scripts
|
||||||
|
- cube_toolbox:/usr/local/services/cubetoolbox
|
||||||
|
|
||||||
|
# No `ports:` block — we use network_mode: host so the CubeAPI on
|
||||||
|
# 127.0.0.1:3000 inside the container is the same socket as
|
||||||
|
# 127.0.0.1:3000 on the WSL2 host.
|
||||||
|
|
||||||
|
healthcheck:
|
||||||
|
test:
|
||||||
|
- CMD-SHELL
|
||||||
|
- "curl -fsS http://127.0.0.1:3000/health && curl -fsS http://127.0.0.1:8089/notify/health && curl -fsS http://127.0.0.1:19090/healthz"
|
||||||
|
interval: 30s
|
||||||
|
timeout: 15s
|
||||||
|
retries: 5
|
||||||
|
start_period: 600s # First boot downloads ~400 MB + Docker images; be generous.
|
||||||
|
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '${CUBE_CPU_LIMIT:-8}'
|
||||||
|
memory: ${CUBE_MEMORY_LIMIT:-16G}
|
||||||
|
reservations:
|
||||||
|
cpus: '${CUBE_CPU_RESERVATION:-2}'
|
||||||
|
memory: ${CUBE_MEMORY_RESERVATION:-8G}
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
cube_dind_data:
|
||||||
|
cube_data:
|
||||||
|
cube_toolbox:
|
||||||
@@ -0,0 +1,112 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Basic E2B SDK integration test against a local CubeSandbox instance.
|
||||||
|
|
||||||
|
Runs three checks:
|
||||||
|
1. Sandbox creation (debug=True → API at http://localhost:3000)
|
||||||
|
2. Code execution and output validation
|
||||||
|
3. Sandbox teardown
|
||||||
|
|
||||||
|
Usage (inside the cube-sandbox container):
|
||||||
|
python3 /root/e2b-test.py
|
||||||
|
|
||||||
|
Exit codes:
|
||||||
|
0 all tests passed
|
||||||
|
1 any test failed
|
||||||
|
"""
|
||||||
|
import sys
|
||||||
|
|
||||||
|
PASS = "\033[1;32m[ OK ]\033[0m"
|
||||||
|
FAIL = "\033[1;31m[FAIL]\033[0m"
|
||||||
|
INFO = "\033[1;36m[INFO]\033[0m"
|
||||||
|
|
||||||
|
|
||||||
|
def check(label: str, cond: bool, detail: str = "") -> bool:
|
||||||
|
if cond:
|
||||||
|
print(f"{PASS} {label}")
|
||||||
|
else:
|
||||||
|
print(f"{FAIL} {label}{': ' + detail if detail else ''}")
|
||||||
|
return cond
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
ok = True
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------ #
|
||||||
|
# 1. Import #
|
||||||
|
# ------------------------------------------------------------------ #
|
||||||
|
print(f"{INFO} Importing e2b_code_interpreter …")
|
||||||
|
try:
|
||||||
|
from e2b_code_interpreter import Sandbox # type: ignore
|
||||||
|
except ImportError as exc:
|
||||||
|
print(f"{FAIL} import failed: {exc}")
|
||||||
|
return 1
|
||||||
|
ok &= check("e2b_code_interpreter imported", True)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------ #
|
||||||
|
# 2. Create sandbox #
|
||||||
|
# ------------------------------------------------------------------ #
|
||||||
|
print(f"\n{INFO} Creating sandbox (debug=True → http://localhost:3000) …")
|
||||||
|
sb = None
|
||||||
|
try:
|
||||||
|
# debug=True makes the SDK target http://localhost:3000 instead of
|
||||||
|
# the E2B cloud and http://localhost:<port> for the envd connection.
|
||||||
|
sb = Sandbox(debug=True, api_key="local-test", timeout=120)
|
||||||
|
ok &= check("Sandbox created", sb is not None, f"id={sb.sandbox_id if sb else '?'}")
|
||||||
|
print(f" sandbox_id = {sb.sandbox_id}")
|
||||||
|
except Exception as exc:
|
||||||
|
ok &= check("Sandbox created", False, str(exc))
|
||||||
|
print(f"\n{INFO} Skipping remaining tests (sandbox creation failed)")
|
||||||
|
return 0 if ok else 1
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------ #
|
||||||
|
# 3. Execute code #
|
||||||
|
# ------------------------------------------------------------------ #
|
||||||
|
print(f"\n{INFO} Running code inside sandbox …")
|
||||||
|
try:
|
||||||
|
result = sb.run_code('print("Hello from CubeSandbox!")')
|
||||||
|
expected = "Hello from CubeSandbox!"
|
||||||
|
output = (result.text or "").strip()
|
||||||
|
ok &= check("Code executed without error", not result.error,
|
||||||
|
str(result.error) if result.error else "")
|
||||||
|
ok &= check("Output matches expected", output == expected,
|
||||||
|
f"got {output!r}")
|
||||||
|
except Exception as exc:
|
||||||
|
ok &= check("Code execution", False, str(exc))
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------ #
|
||||||
|
# 4. Multi-line / stateful execution #
|
||||||
|
# ------------------------------------------------------------------ #
|
||||||
|
print(f"\n{INFO} Running stateful multi-cell execution …")
|
||||||
|
try:
|
||||||
|
sb.run_code("x = 40 + 2")
|
||||||
|
result2 = sb.run_code("print(x)")
|
||||||
|
output2 = (result2.text or "").strip()
|
||||||
|
ok &= check("Stateful multi-cell execution", output2 == "42",
|
||||||
|
f"got {output2!r}")
|
||||||
|
except Exception as exc:
|
||||||
|
ok &= check("Stateful multi-cell execution", False, str(exc))
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------ #
|
||||||
|
# 5. Kill sandbox #
|
||||||
|
# ------------------------------------------------------------------ #
|
||||||
|
print(f"\n{INFO} Killing sandbox …")
|
||||||
|
try:
|
||||||
|
sb.kill()
|
||||||
|
ok &= check("Sandbox killed", True)
|
||||||
|
except Exception as exc:
|
||||||
|
ok &= check("Sandbox killed", False, str(exc))
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------ #
|
||||||
|
# Summary #
|
||||||
|
# ------------------------------------------------------------------ #
|
||||||
|
print()
|
||||||
|
if ok:
|
||||||
|
print(f"{PASS} All E2B SDK tests passed")
|
||||||
|
else:
|
||||||
|
print(f"{FAIL} Some E2B SDK tests FAILED")
|
||||||
|
return 0 if ok else 1
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
@@ -0,0 +1,104 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Smoke test for a running CubeSandbox stack.
|
||||||
|
#
|
||||||
|
# Run from the WSL2 host or from inside the cube-sandbox container - both work
|
||||||
|
# because the container uses network_mode: host.
|
||||||
|
#
|
||||||
|
# Steps:
|
||||||
|
# 1. Health-check all CubeSandbox services
|
||||||
|
# 2. (Optional, slow) Build a code-interpreter template from a public image
|
||||||
|
# 3. Create a sandbox via the E2B-compatible REST API, run a tiny payload,
|
||||||
|
# then destroy it
|
||||||
|
#
|
||||||
|
# Skip the slow template-build step with: SKIP_TEMPLATE_BUILD=1 ./smoke-test.sh
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# cubemastercli is installed to a non-standard prefix; add it to PATH so this
|
||||||
|
# script works both when run inside the container and from the WSL2 host.
|
||||||
|
export PATH="/usr/local/services/cubetoolbox/CubeMaster/bin:${PATH:-}"
|
||||||
|
|
||||||
|
CUBE_API="${CUBE_API:-http://127.0.0.1:3000}"
|
||||||
|
CUBE_MASTER="${CUBE_MASTER:-http://127.0.0.1:8089}"
|
||||||
|
CUBE_NETAGENT="${CUBE_NETAGENT:-http://127.0.0.1:19090}"
|
||||||
|
|
||||||
|
ok() { printf '\033[1;32m[ OK ]\033[0m %s\n' "$*"; }
|
||||||
|
fail() { printf '\033[1;31m[FAIL]\033[0m %s\n' "$*" >&2; exit 1; }
|
||||||
|
info() { printf '\033[1;36m[INFO]\033[0m %s\n' "$*"; }
|
||||||
|
|
||||||
|
#-------------------------------------------------------------------
|
||||||
|
# 1. Health checks (matches what install.sh's quickcheck.sh verifies)
|
||||||
|
#-------------------------------------------------------------------
|
||||||
|
info "Health: CubeAPI"
|
||||||
|
curl -fsS "${CUBE_API}/health" >/dev/null && ok "CubeAPI /health" || fail "CubeAPI /health"
|
||||||
|
echo
|
||||||
|
|
||||||
|
info "Health: CubeMaster"
|
||||||
|
curl -fsS "${CUBE_MASTER}/notify/health" >/dev/null && ok "CubeMaster /notify/health" || fail "CubeMaster /notify/health"
|
||||||
|
|
||||||
|
info "Health: network-agent"
|
||||||
|
curl -fsS "${CUBE_NETAGENT}/healthz" >/dev/null && ok "network-agent /healthz" || fail "network-agent /healthz"
|
||||||
|
curl -fsS "${CUBE_NETAGENT}/readyz" >/dev/null && ok "network-agent /readyz" || fail "network-agent /readyz"
|
||||||
|
|
||||||
|
#-------------------------------------------------------------------
|
||||||
|
# 2. Optional: build a sandbox template
|
||||||
|
#-------------------------------------------------------------------
|
||||||
|
TEMPLATE_ID="${CUBE_TEMPLATE_ID:-}"
|
||||||
|
|
||||||
|
if [ -z "$TEMPLATE_ID" ] && [ "${SKIP_TEMPLATE_BUILD:-0}" != "1" ]; then
|
||||||
|
info "No CUBE_TEMPLATE_ID provided; building one from ccr.ccs.tencentyun.com/ags-image/sandbox-code:latest"
|
||||||
|
info "(this can take 5-15 minutes; set SKIP_TEMPLATE_BUILD=1 to skip and only run health checks)"
|
||||||
|
|
||||||
|
if ! command -v cubemastercli >/dev/null 2>&1; then
|
||||||
|
# cubemastercli lives inside the container; exec into it
|
||||||
|
CUBE_CTR="$(docker compose ps -q cube-sandbox 2>/dev/null || true)"
|
||||||
|
[ -z "$CUBE_CTR" ] && fail "cube-sandbox container not running and cubemastercli not on PATH"
|
||||||
|
CMC="docker exec -i $CUBE_CTR cubemastercli"
|
||||||
|
else
|
||||||
|
CMC="cubemastercli"
|
||||||
|
fi
|
||||||
|
|
||||||
|
JOB_OUT="$($CMC tpl create-from-image \
|
||||||
|
--image ccr.ccs.tencentyun.com/ags-image/sandbox-code:latest \
|
||||||
|
--writable-layer-size 1G \
|
||||||
|
--expose-port 49999 \
|
||||||
|
--expose-port 49983 \
|
||||||
|
--probe 49999 2>&1)"
|
||||||
|
echo "$JOB_OUT"
|
||||||
|
JOB_ID="$(echo "$JOB_OUT" | grep -oE 'job_id[=: ]+[A-Za-z0-9_-]+' | head -1 | awk '{print $NF}')"
|
||||||
|
[ -z "$JOB_ID" ] && fail "could not parse job_id from output"
|
||||||
|
info "Watching job $JOB_ID ..."
|
||||||
|
$CMC tpl watch --job-id "$JOB_ID"
|
||||||
|
# Extract template_id from the create-from-image output (it's on the first few
|
||||||
|
# lines) rather than re-querying the list — list ordering is not guaranteed and
|
||||||
|
# could return a FAILED entry as the last line.
|
||||||
|
TEMPLATE_ID="$(echo "$JOB_OUT" | grep -E '\btemplate_id\b' | head -1 | awk '{print $NF}')"
|
||||||
|
[ -z "$TEMPLATE_ID" ] && fail "could not determine template id after build"
|
||||||
|
ok "Template built: $TEMPLATE_ID"
|
||||||
|
elif [ -z "$TEMPLATE_ID" ]; then
|
||||||
|
info "Skipping sandbox lifecycle test (no CUBE_TEMPLATE_ID and SKIP_TEMPLATE_BUILD=1)"
|
||||||
|
ok "Health checks passed - CubeSandbox stack is up"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
#-------------------------------------------------------------------
|
||||||
|
# 3. Create -> inspect -> destroy a sandbox via REST
|
||||||
|
#-------------------------------------------------------------------
|
||||||
|
info "Creating sandbox from template $TEMPLATE_ID ..."
|
||||||
|
RESP="$(curl -fsS -X POST "${CUBE_API}/sandboxes" \
|
||||||
|
-H 'Authorization: Bearer dummy' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d "{\"templateID\":\"${TEMPLATE_ID}\"}")"
|
||||||
|
SANDBOX_ID="$(echo "$RESP" | python3 -c 'import json,sys; print(json.load(sys.stdin).get("sandboxID",""))')"
|
||||||
|
[ -z "$SANDBOX_ID" ] && fail "no sandboxID in response: $RESP"
|
||||||
|
ok "Created sandbox $SANDBOX_ID"
|
||||||
|
|
||||||
|
info "Inspecting sandbox ..."
|
||||||
|
curl -fsS "${CUBE_API}/sandboxes/${SANDBOX_ID}" -H 'Authorization: Bearer dummy' \
|
||||||
|
| python3 -m json.tool
|
||||||
|
ok "Sandbox is queryable"
|
||||||
|
|
||||||
|
info "Destroying sandbox ..."
|
||||||
|
curl -fsS -X DELETE "${CUBE_API}/sandboxes/${SANDBOX_ID}" -H 'Authorization: Bearer dummy' >/dev/null
|
||||||
|
ok "Sandbox destroyed"
|
||||||
|
|
||||||
|
ok "All smoke tests passed"
|
||||||
@@ -28,7 +28,7 @@ services:
|
|||||||
- pingap
|
- pingap
|
||||||
- --autoreload
|
- --autoreload
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: [CMD-SHELL, "bash -c 'echo > /dev/tcp/localhost/80'"]
|
test: ["CMD-SHELL", "bash -c 'echo >/dev/tcp/localhost/80' || exit 1"]
|
||||||
interval: 30s
|
interval: 30s
|
||||||
timeout: 10s
|
timeout: 10s
|
||||||
retries: 3
|
retries: 3
|
||||||
|
|||||||
@@ -0,0 +1,59 @@
|
|||||||
|
# TurboOCR image version
|
||||||
|
# See https://github.com/aiptimizer/TurboOCR/releases for available tags
|
||||||
|
TURBOOCR_VERSION="v2.1.1"
|
||||||
|
|
||||||
|
# Language bundle (leave empty for latin / English-default)
|
||||||
|
# Supported: latin, chinese, greek, eslav, arabic, korean, thai
|
||||||
|
TURBOOCR_LANG=""
|
||||||
|
|
||||||
|
# When TURBOOCR_LANG=chinese, set to 1 to use the 84MB PP-OCRv5 server rec
|
||||||
|
# instead of the 16MB mobile rec (higher accuracy, more VRAM)
|
||||||
|
TURBOOCR_SERVER=""
|
||||||
|
|
||||||
|
# Concurrent GPU pipelines (~1.4 GB VRAM each); empty = auto-detect
|
||||||
|
TURBOOCR_PIPELINE_POOL_SIZE=""
|
||||||
|
|
||||||
|
# Disable PP-DocLayoutV3 layout detection model (1 = disable, saves ~300-500 MB VRAM)
|
||||||
|
TURBOOCR_DISABLE_LAYOUT=0
|
||||||
|
|
||||||
|
# Default PDF extraction mode
|
||||||
|
# ocr - render + full OCR (safest, immune to text-layer attacks)
|
||||||
|
# geometric - PDFium text layer only (~10x faster, but trusts PDF content)
|
||||||
|
# auto - per-page text layer if available, else OCR
|
||||||
|
# auto_verified - OCR + cross-check against text layer
|
||||||
|
TURBOOCR_PDF_MODE="ocr"
|
||||||
|
|
||||||
|
# Skip angle classifier (1 = skip, ~0.4ms latency savings)
|
||||||
|
TURBOOCR_DISABLE_ANGLE_CLS=0
|
||||||
|
|
||||||
|
# Max detection input size in pixels
|
||||||
|
TURBOOCR_DET_MAX_SIDE=960
|
||||||
|
|
||||||
|
# PDF render parallelism
|
||||||
|
TURBOOCR_PDF_DAEMONS=16
|
||||||
|
TURBOOCR_PDF_WORKERS=4
|
||||||
|
|
||||||
|
# Maximum pages allowed per PDF request
|
||||||
|
TURBOOCR_MAX_PDF_PAGES=2000
|
||||||
|
|
||||||
|
# Log level: debug / info / warn / error
|
||||||
|
TURBOOCR_LOG_LEVEL="info"
|
||||||
|
|
||||||
|
# Log format: json (structured) / text (human-readable)
|
||||||
|
TURBOOCR_LOG_FORMAT="json"
|
||||||
|
|
||||||
|
# Host port mappings
|
||||||
|
TURBOOCR_HTTP_PORT_OVERRIDE=8000
|
||||||
|
TURBOOCR_GRPC_PORT_OVERRIDE=50051
|
||||||
|
|
||||||
|
# Resource limits
|
||||||
|
TURBOOCR_CPU_LIMIT=8.0
|
||||||
|
TURBOOCR_MEMORY_LIMIT=12G
|
||||||
|
TURBOOCR_CPU_RESERVATION=2.0
|
||||||
|
TURBOOCR_MEMORY_RESERVATION=4G
|
||||||
|
|
||||||
|
# Number of NVIDIA GPUs to reserve
|
||||||
|
TURBOOCR_GPU_COUNT=1
|
||||||
|
|
||||||
|
# Shared memory size for the container
|
||||||
|
TURBOOCR_SHM_SIZE=2g
|
||||||
@@ -0,0 +1,119 @@
|
|||||||
|
# TurboOCR
|
||||||
|
|
||||||
|
[English](./README.md) | [中文](./README.zh.md)
|
||||||
|
|
||||||
|
This service deploys [TurboOCR](https://github.com/aiptimizer/TurboOCR), a GPU-accelerated OCR server built on C++ / CUDA / TensorRT / PP-OCRv5. It exposes both an HTTP API and a gRPC API from a single binary that share the same GPU pipeline pool, with Prometheus metrics built in.
|
||||||
|
|
||||||
|
## Services
|
||||||
|
|
||||||
|
- `turboocr`: TurboOCR HTTP (port 8000) + gRPC (port 50051) inference server
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
- Linux host with NVIDIA driver 595 or newer
|
||||||
|
- Turing or newer GPU (RTX 20-series / GTX 16-series and up)
|
||||||
|
- [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) installed and configured for Docker
|
||||||
|
|
||||||
|
## Environment Variables
|
||||||
|
|
||||||
|
| Variable Name | Description | Default Value |
|
||||||
|
| ----------------------------- | --------------------------------------------------------------------------------- | ------------- |
|
||||||
|
| `TURBOOCR_VERSION` | TurboOCR image version | `v2.1.1` |
|
||||||
|
| `TURBOOCR_LANG` | Language bundle: `latin`, `chinese`, `greek`, `eslav`, `arabic`, `korean`, `thai` | `""` (latin) |
|
||||||
|
| `TURBOOCR_SERVER` | With `chinese`, set to `1` for the 84 MB server rec | `""` |
|
||||||
|
| `TURBOOCR_PIPELINE_POOL_SIZE` | Concurrent GPU pipelines (~1.4 GB VRAM each); empty = auto | `""` |
|
||||||
|
| `TURBOOCR_DISABLE_LAYOUT` | Disable layout detection model (saves ~300-500 MB VRAM) | `0` |
|
||||||
|
| `TURBOOCR_PDF_MODE` | Default PDF mode: `ocr` / `geometric` / `auto` / `auto_verified` | `ocr` |
|
||||||
|
| `TURBOOCR_DISABLE_ANGLE_CLS` | Skip angle classifier (~0.4 ms savings) | `0` |
|
||||||
|
| `TURBOOCR_DET_MAX_SIDE` | Max detection input size in pixels | `960` |
|
||||||
|
| `TURBOOCR_PDF_DAEMONS` | PDF render daemons | `16` |
|
||||||
|
| `TURBOOCR_PDF_WORKERS` | PDF worker threads | `4` |
|
||||||
|
| `TURBOOCR_MAX_PDF_PAGES` | Maximum pages per PDF request | `2000` |
|
||||||
|
| `TURBOOCR_LOG_LEVEL` | Log level: `debug` / `info` / `warn` / `error` | `info` |
|
||||||
|
| `TURBOOCR_LOG_FORMAT` | Log format: `json` / `text` | `json` |
|
||||||
|
| `TURBOOCR_HTTP_PORT_OVERRIDE` | Host port for HTTP API | `8000` |
|
||||||
|
| `TURBOOCR_GRPC_PORT_OVERRIDE` | Host port for gRPC API | `50051` |
|
||||||
|
| `TURBOOCR_CPU_LIMIT` | CPU limit | `8.0` |
|
||||||
|
| `TURBOOCR_MEMORY_LIMIT` | Memory limit | `12G` |
|
||||||
|
| `TURBOOCR_GPU_COUNT` | Number of NVIDIA GPUs to reserve | `1` |
|
||||||
|
| `TURBOOCR_SHM_SIZE` | Shared memory size | `2g` |
|
||||||
|
|
||||||
|
Copy `.env.example` to `.env` and override only the variables you need to change.
|
||||||
|
|
||||||
|
## Volumes
|
||||||
|
|
||||||
|
- `turboocr_trt_cache`: Caches TensorRT engines built from ONNX on first start. Must be a **named** volume — a bind-mount of an empty host directory would shadow the baked-in language bundles and the server would fail to load models.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
### Start TurboOCR
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
The first start builds TensorRT engines from ONNX. Build time depends on your GPU: roughly 5 minutes on high-end desktop GPUs and 20–30 minutes on laptop GPUs. The container may report `unhealthy` while compilation is in progress — this is normal. Once the build finishes the server starts and the container transitions to `healthy`. Subsequent restarts reuse the cached engines and start in seconds.
|
||||||
|
|
||||||
|
### Endpoints
|
||||||
|
|
||||||
|
- HTTP API: <http://localhost:8000>
|
||||||
|
- gRPC API: `localhost:50051`
|
||||||
|
- Health: <http://localhost:8000/health>
|
||||||
|
- Readiness: <http://localhost:8000/health/ready>
|
||||||
|
- Metrics (Prometheus): <http://localhost:8000/metrics>
|
||||||
|
|
||||||
|
### Test the API
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Image — raw bytes (fastest path)
|
||||||
|
curl -X POST http://localhost:8000/ocr/raw \
|
||||||
|
--data-binary @document.png \
|
||||||
|
-H "Content-Type: image/png"
|
||||||
|
|
||||||
|
# Image — base64 JSON
|
||||||
|
curl -X POST http://localhost:8000/ocr \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"image":"'$(base64 -w0 document.png)'"}'
|
||||||
|
|
||||||
|
# PDF — raw bytes
|
||||||
|
curl -X POST http://localhost:8000/ocr/pdf \
|
||||||
|
--data-binary @document.pdf
|
||||||
|
|
||||||
|
# PDF with layout detection enabled
|
||||||
|
curl -X POST "http://localhost:8000/ocr/pdf?layout=1&mode=auto" \
|
||||||
|
--data-binary @document.pdf
|
||||||
|
```
|
||||||
|
|
||||||
|
> **Important:** Use HTTP keep-alive. Sending many short-lived connections (e.g. one `curl` per request in a loop) can overwhelm the server. Standard HTTP client libraries (`requests.Session`, `aiohttp`, Go `http.Client`, etc.) reuse connections by default.
|
||||||
|
|
||||||
|
### Switching Languages
|
||||||
|
|
||||||
|
Edit `.env` and restart:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
TURBOOCR_LANG=chinese
|
||||||
|
TURBOOCR_SERVER=1 # optional: use the 84 MB Chinese server rec
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
All language bundles are baked into the image at build time (SHA256-verified from the pinned PP-OCRv5 release). No runtime downloads.
|
||||||
|
|
||||||
|
## Performance Tuning
|
||||||
|
|
||||||
|
- **GPU pipelines** — set `TURBOOCR_PIPELINE_POOL_SIZE` based on available VRAM (~1.4 GB each)
|
||||||
|
- **Layout overhead** — `?layout=1` reduces throughput by ~20%; set `TURBOOCR_DISABLE_LAYOUT=1` to skip loading the model entirely
|
||||||
|
- **Shared memory** — increase `TURBOOCR_SHM_SIZE` if you process very large PDFs
|
||||||
|
|
||||||
|
## Security Notes
|
||||||
|
|
||||||
|
- The API has no authentication by default. Put a reverse proxy (nginx, Caddy) in front for production.
|
||||||
|
- The default PDF mode is `ocr`, which only trusts pixel data and is safe for untrusted PDF uploads.
|
||||||
|
- Do **not** set `TURBOOCR_PDF_MODE` to `geometric` or `auto` globally if you accept PDFs from untrusted sources — a malicious PDF can embed invisible text or remap glyphs to inject arbitrary strings into the text layer.
|
||||||
|
- Use `auto_verified` for higher accuracy on trusted documents; it cross-checks the native text layer against OCR results.
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
TurboOCR is licensed under the MIT License. See the [TurboOCR GitHub repository](https://github.com/aiptimizer/TurboOCR) for details.
|
||||||
@@ -0,0 +1,119 @@
|
|||||||
|
# TurboOCR
|
||||||
|
|
||||||
|
[English](./README.md) | [中文](./README.zh.md)
|
||||||
|
|
||||||
|
此服务用于部署 [TurboOCR](https://github.com/aiptimizer/TurboOCR),一个基于 C++ / CUDA / TensorRT / PP-OCRv5 的 GPU 加速 OCR 服务器。单一二进制同时提供 HTTP 与 gRPC 两套接口,共享同一个 GPU 流水线池,并内置 Prometheus 指标。
|
||||||
|
|
||||||
|
## 服务
|
||||||
|
|
||||||
|
- `turboocr`:TurboOCR HTTP(端口 8000)+ gRPC(端口 50051)推理服务
|
||||||
|
|
||||||
|
## 运行要求
|
||||||
|
|
||||||
|
- Linux 主机,NVIDIA 驱动 595 或更高版本
|
||||||
|
- Turing 及以上架构 GPU(RTX 20 系列 / GTX 16 系列及更新)
|
||||||
|
- 已安装并配置好 [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html)
|
||||||
|
|
||||||
|
## 环境变量
|
||||||
|
|
||||||
|
| 变量名 | 说明 | 默认值 |
|
||||||
|
| ----------------------------- | ------------------------------------------------------------------------ | ------------- |
|
||||||
|
| `TURBOOCR_VERSION` | TurboOCR 镜像版本 | `v2.1.1` |
|
||||||
|
| `TURBOOCR_LANG` | 语言包:`latin`、`chinese`、`greek`、`eslav`、`arabic`、`korean`、`thai` | `""`(latin) |
|
||||||
|
| `TURBOOCR_SERVER` | 当 `chinese` 时,设为 `1` 使用 84 MB 服务端识别模型 | `""` |
|
||||||
|
| `TURBOOCR_PIPELINE_POOL_SIZE` | 并发 GPU 流水线数(每条约 1.4 GB 显存),留空则自动 | `""` |
|
||||||
|
| `TURBOOCR_DISABLE_LAYOUT` | 禁用版面检测模型(节省约 300-500 MB 显存) | `0` |
|
||||||
|
| `TURBOOCR_PDF_MODE` | PDF 默认模式:`ocr` / `geometric` / `auto` / `auto_verified` | `ocr` |
|
||||||
|
| `TURBOOCR_DISABLE_ANGLE_CLS` | 跳过方向分类器(约节省 0.4 ms) | `0` |
|
||||||
|
| `TURBOOCR_DET_MAX_SIDE` | 检测输入最大尺寸(像素) | `960` |
|
||||||
|
| `TURBOOCR_PDF_DAEMONS` | PDF 渲染守护进程数 | `16` |
|
||||||
|
| `TURBOOCR_PDF_WORKERS` | PDF 工作线程数 | `4` |
|
||||||
|
| `TURBOOCR_MAX_PDF_PAGES` | 单次 PDF 请求最大页数 | `2000` |
|
||||||
|
| `TURBOOCR_LOG_LEVEL` | 日志级别:`debug` / `info` / `warn` / `error` | `info` |
|
||||||
|
| `TURBOOCR_LOG_FORMAT` | 日志格式:`json` / `text` | `json` |
|
||||||
|
| `TURBOOCR_HTTP_PORT_OVERRIDE` | HTTP API 主机端口 | `8000` |
|
||||||
|
| `TURBOOCR_GRPC_PORT_OVERRIDE` | gRPC API 主机端口 | `50051` |
|
||||||
|
| `TURBOOCR_CPU_LIMIT` | CPU 限制 | `8.0` |
|
||||||
|
| `TURBOOCR_MEMORY_LIMIT` | 内存限制 | `12G` |
|
||||||
|
| `TURBOOCR_GPU_COUNT` | 预留的 NVIDIA GPU 数量 | `1` |
|
||||||
|
| `TURBOOCR_SHM_SIZE` | 共享内存大小 | `2g` |
|
||||||
|
|
||||||
|
复制 `.env.example` 为 `.env`,仅覆盖你需要修改的变量。
|
||||||
|
|
||||||
|
## 卷
|
||||||
|
|
||||||
|
- `turboocr_trt_cache`:缓存首次启动时由 ONNX 构建出的 TensorRT 引擎。必须使用**命名卷**,如果绑定挂载一个空的主机目录,会覆盖镜像内置的语言包,导致服务无法加载模型。
|
||||||
|
|
||||||
|
## 使用方法
|
||||||
|
|
||||||
|
### 启动 TurboOCR
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
首次启动需要从 ONNX 构建 TensorRT 引擎,耗时因 GPU 而异:高端桌面 GPU 约 5 分钟,笔记本 GPU 约 20–30 分钟。编译期间容器可能显示 `unhealthy`,这属于正常现象——构建完成后服务会自动启动并切换为 `healthy`。后续重启会复用缓存的引擎,几乎瞬间完成。
|
||||||
|
|
||||||
|
### 访问端点
|
||||||
|
|
||||||
|
- HTTP API:<http://localhost:8000>
|
||||||
|
- gRPC API:`localhost:50051`
|
||||||
|
- 健康检查:<http://localhost:8000/health>
|
||||||
|
- 就绪检查:<http://localhost:8000/health/ready>
|
||||||
|
- Prometheus 指标:<http://localhost:8000/metrics>
|
||||||
|
|
||||||
|
### 测试 API
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 图片 —— 原始字节(最快路径)
|
||||||
|
curl -X POST http://localhost:8000/ocr/raw \
|
||||||
|
--data-binary @document.png \
|
||||||
|
-H "Content-Type: image/png"
|
||||||
|
|
||||||
|
# 图片 —— base64 JSON
|
||||||
|
curl -X POST http://localhost:8000/ocr \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"image":"'$(base64 -w0 document.png)'"}'
|
||||||
|
|
||||||
|
# PDF —— 原始字节
|
||||||
|
curl -X POST http://localhost:8000/ocr/pdf \
|
||||||
|
--data-binary @document.pdf
|
||||||
|
|
||||||
|
# PDF 启用版面检测
|
||||||
|
curl -X POST "http://localhost:8000/ocr/pdf?layout=1&mode=auto" \
|
||||||
|
--data-binary @document.pdf
|
||||||
|
```
|
||||||
|
|
||||||
|
> **重要提示**:请使用 HTTP keep-alive。如果在循环中频繁建立短连接(例如每次请求一个 `curl`),可能会压垮服务。标准 HTTP 客户端库(`requests.Session`、`aiohttp`、Go `http.Client` 等)默认会复用连接。
|
||||||
|
|
||||||
|
### 切换语言
|
||||||
|
|
||||||
|
修改 `.env` 后重启:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
TURBOOCR_LANG=chinese
|
||||||
|
TURBOOCR_SERVER=1 # 可选:使用 84 MB 的中文服务端识别模型
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
所有语言包都在构建镜像时打包进来(基于固定版本的 PP-OCRv5 发布,并校验 SHA256),运行时无需联网下载。
|
||||||
|
|
||||||
|
## 性能调优
|
||||||
|
|
||||||
|
- **GPU 流水线**:根据显存大小设置 `TURBOOCR_PIPELINE_POOL_SIZE`(每条约 1.4 GB)
|
||||||
|
- **版面开销**:`?layout=1` 会使吞吐下降约 20%;设置 `TURBOOCR_DISABLE_LAYOUT=1` 可完全跳过模型加载
|
||||||
|
- **共享内存**:处理超大 PDF 时可增加 `TURBOOCR_SHM_SIZE`
|
||||||
|
|
||||||
|
## 安全说明
|
||||||
|
|
||||||
|
- API 默认无身份认证。生产环境请在前面套一层反向代理(nginx、Caddy 等)。
|
||||||
|
- PDF 默认模式为 `ocr`,只信任像素数据,可安全处理不可信来源的 PDF 上传。
|
||||||
|
- 如果你的服务接收不可信来源的 PDF,**不要**将 `TURBOOCR_PDF_MODE` 全局设为 `geometric` 或 `auto`:恶意 PDF 可以嵌入隐形文字、重映射 ToUnicode 字符或在文本层注入任意字符串。
|
||||||
|
- 在可信文档场景下可使用 `auto_verified` 模式,会先做 OCR,再用文本层与之对照校验。
|
||||||
|
|
||||||
|
## 许可证
|
||||||
|
|
||||||
|
TurboOCR 采用 MIT 许可证。详情请参见 [TurboOCR GitHub 仓库](https://github.com/aiptimizer/TurboOCR)。
|
||||||
@@ -0,0 +1,71 @@
|
|||||||
|
x-defaults: &defaults
|
||||||
|
restart: unless-stopped
|
||||||
|
logging:
|
||||||
|
driver: json-file
|
||||||
|
options:
|
||||||
|
max-size: 100m
|
||||||
|
max-file: '3'
|
||||||
|
|
||||||
|
services:
|
||||||
|
turboocr:
|
||||||
|
<<: *defaults
|
||||||
|
image: ${GLOBAL_REGISTRY:-ghcr.io/}aiptimizer/turboocr:${TURBOOCR_VERSION:-v2.1.1}
|
||||||
|
ports:
|
||||||
|
- '${TURBOOCR_HTTP_PORT_OVERRIDE:-8000}:8000'
|
||||||
|
- '${TURBOOCR_GRPC_PORT_OVERRIDE:-50051}:50051'
|
||||||
|
volumes:
|
||||||
|
# Named volume caches TensorRT engines built from ONNX on first start (~90s).
|
||||||
|
# Must be a named volume - bind-mounting an empty host dir would shadow the
|
||||||
|
# baked-in language bundles and prevent the server from loading models.
|
||||||
|
- turboocr_trt_cache:/home/ocr/.cache/turbo-ocr
|
||||||
|
environment:
|
||||||
|
- TZ=${TZ:-UTC}
|
||||||
|
# Language bundle: latin (default), chinese, greek, eslav, arabic, korean, thai
|
||||||
|
- OCR_LANG=${TURBOOCR_LANG:-}
|
||||||
|
# Set to 1 with OCR_LANG=chinese to use the 84MB server rec instead of 16MB mobile
|
||||||
|
- OCR_SERVER=${TURBOOCR_SERVER:-}
|
||||||
|
# Concurrent GPU pipelines (~1.4 GB VRAM each); empty = auto
|
||||||
|
- PIPELINE_POOL_SIZE=${TURBOOCR_PIPELINE_POOL_SIZE:-}
|
||||||
|
# Set to 1 to disable PP-DocLayoutV3 layout detection (saves ~300-500 MB VRAM)
|
||||||
|
- DISABLE_LAYOUT=${TURBOOCR_DISABLE_LAYOUT:-0}
|
||||||
|
# Default PDF mode: ocr (safest) / geometric / auto / auto_verified
|
||||||
|
- ENABLE_PDF_MODE=${TURBOOCR_PDF_MODE:-ocr}
|
||||||
|
# Skip angle classifier (~0.4ms savings)
|
||||||
|
- DISABLE_ANGLE_CLS=${TURBOOCR_DISABLE_ANGLE_CLS:-0}
|
||||||
|
# Max detection input size
|
||||||
|
- DET_MAX_SIDE=${TURBOOCR_DET_MAX_SIDE:-960}
|
||||||
|
# PDF render parallelism
|
||||||
|
- PDF_DAEMONS=${TURBOOCR_PDF_DAEMONS:-16}
|
||||||
|
- PDF_WORKERS=${TURBOOCR_PDF_WORKERS:-4}
|
||||||
|
# Maximum pages per PDF request
|
||||||
|
- MAX_PDF_PAGES=${TURBOOCR_MAX_PDF_PAGES:-2000}
|
||||||
|
# Log level: debug / info / warn / error
|
||||||
|
- LOG_LEVEL=${TURBOOCR_LOG_LEVEL:-info}
|
||||||
|
# Log format: json (structured) / text (human-readable)
|
||||||
|
- LOG_FORMAT=${TURBOOCR_LOG_FORMAT:-json}
|
||||||
|
healthcheck:
|
||||||
|
test: [CMD, curl, -fsS, 'http://localhost:8000/health']
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 5
|
||||||
|
# First start builds TensorRT engines from ONNX. Build time varies by GPU:
|
||||||
|
# ~5 min on high-end desktop GPUs, 20-30 min on laptop GPUs. The container
|
||||||
|
# may show "unhealthy" during compilation but will become healthy once done.
|
||||||
|
# Subsequent restarts reuse the cached engines and start in seconds.
|
||||||
|
start_period: 30m
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: ${TURBOOCR_CPU_LIMIT:-8.0}
|
||||||
|
memory: ${TURBOOCR_MEMORY_LIMIT:-12G}
|
||||||
|
reservations:
|
||||||
|
cpus: ${TURBOOCR_CPU_RESERVATION:-2.0}
|
||||||
|
memory: ${TURBOOCR_MEMORY_RESERVATION:-4G}
|
||||||
|
devices:
|
||||||
|
- driver: nvidia
|
||||||
|
count: ${TURBOOCR_GPU_COUNT:-1}
|
||||||
|
capabilities: [gpu]
|
||||||
|
shm_size: ${TURBOOCR_SHM_SIZE:-2g}
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
turboocr_trt_cache:
|
||||||
Reference in New Issue
Block a user