feat: add TurboOCR

This commit is contained in:
Sun-ZhenXing
2026-04-28 10:05:39 +08:00
parent 3483dd80f0
commit ce16588916
25 changed files with 1460 additions and 12 deletions
+4 -1
View File
@@ -32,6 +32,7 @@ These services require building custom Docker images from source.
| Service | Version | | Service | Version |
| ------------------------------------------- | ------- | | ------------------------------------------- | ------- |
| [CubeSandbox](./builds/cube-sandbox) | 0.1.7 |
| [Debian DinD](./builds/debian-dind) | 0.1.2 | | [Debian DinD](./builds/debian-dind) | 0.1.2 |
| [DeerFlow](./builds/deer-flow) | 2.0 | | [DeerFlow](./builds/deer-flow) | 2.0 |
| [goose](./builds/goose) | 1.18.0 | | [goose](./builds/goose) | 1.18.0 |
@@ -121,7 +122,7 @@ These services require building custom Docker images from source.
| [Minecraft Bedrock Server](./src/minecraft-bedrock-server) | latest | | [Minecraft Bedrock Server](./src/minecraft-bedrock-server) | latest |
| [MinIO](./src/minio) | 0.20260202 | | [MinIO](./src/minio) | 0.20260202 |
| [MLflow](./src/mlflow) | v2.20.2 | | [MLflow](./src/mlflow) | v2.20.2 |
| [MoltBot](./apps/moltbot) | main | | [OpenClaw](./apps/openclaw) | 2026.2.3 |
| [MongoDB ReplicaSet Single](./src/mongodb-replicaset-single) | 8.2.3 | | [MongoDB ReplicaSet Single](./src/mongodb-replicaset-single) | 8.2.3 |
| [MongoDB ReplicaSet](./src/mongodb-replicaset) | 8.2.3 | | [MongoDB ReplicaSet](./src/mongodb-replicaset) | 8.2.3 |
| [MongoDB Standalone](./src/mongodb-standalone) | 8.2.3 | | [MongoDB Standalone](./src/mongodb-standalone) | 8.2.3 |
@@ -140,6 +141,7 @@ These services require building custom Docker images from source.
| [Ollama](./src/ollama) | 0.14.3 | | [Ollama](./src/ollama) | 0.14.3 |
| [Open WebUI](./src/open-webui) | main | | [Open WebUI](./src/open-webui) | main |
| [Phoenix (Arize)](./src/phoenix) | 13.19.2 | | [Phoenix (Arize)](./src/phoenix) | 13.19.2 |
| [Pingap](./src/pingap) | 0.12.7-full |
| [Pingora Proxy Manager](./src/pingora-proxy-manager) | v1.0.3 | | [Pingora Proxy Manager](./src/pingora-proxy-manager) | v1.0.3 |
| [Open WebUI Rust](./src/open-webui-rust) | latest | | [Open WebUI Rust](./src/open-webui-rust) | latest |
| [OpenCode](./src/opencode) | 1.1.27 | | [OpenCode](./src/opencode) | 1.1.27 |
@@ -185,6 +187,7 @@ These services require building custom Docker images from source.
| [TiKV](./src/tikv) | v8.5.0 | | [TiKV](./src/tikv) | v8.5.0 |
| [Trigger.dev](./src/trigger-dev) | v4.2.0 | | [Trigger.dev](./src/trigger-dev) | v4.2.0 |
| [TrailBase](./src/trailbase) | 0.22.4 | | [TrailBase](./src/trailbase) | 0.22.4 |
| [TurboOCR](./src/turboocr) | v2.1.1 |
| [Valkey Cluster](./src/valkey-cluster) | 8.0 | | [Valkey Cluster](./src/valkey-cluster) | 8.0 |
| [Valkey](./src/valkey) | 8.0 | | [Valkey](./src/valkey) | 8.0 |
| [Verdaccio](./src/verdaccio) | 6.1.2 | | [Verdaccio](./src/verdaccio) | 6.1.2 |
+4 -1
View File
@@ -32,6 +32,7 @@ docker compose exec redis redis-cli ping
| 服务 | 版本 | | 服务 | 版本 |
| ------------------------------------------- | ------- | | ------------------------------------------- | ------- |
| [CubeSandbox](./builds/cube-sandbox) | 0.1.7 |
| [Debian DinD](./builds/debian-dind) | 0.1.2 | | [Debian DinD](./builds/debian-dind) | 0.1.2 |
| [DeerFlow](./builds/deer-flow) | 2.0 | | [DeerFlow](./builds/deer-flow) | 2.0 |
| [goose](./builds/goose) | 1.18.0 | | [goose](./builds/goose) | 1.18.0 |
@@ -121,7 +122,7 @@ docker compose exec redis redis-cli ping
| [Minecraft Bedrock Server](./src/minecraft-bedrock-server) | latest | | [Minecraft Bedrock Server](./src/minecraft-bedrock-server) | latest |
| [MinIO](./src/minio) | 0.20260202 | | [MinIO](./src/minio) | 0.20260202 |
| [MLflow](./src/mlflow) | v2.20.2 | | [MLflow](./src/mlflow) | v2.20.2 |
| [MoltBot](./apps/moltbot) | main | | [OpenClaw](./apps/openclaw) | 2026.2.3 |
| [MongoDB ReplicaSet Single](./src/mongodb-replicaset-single) | 8.2.3 | | [MongoDB ReplicaSet Single](./src/mongodb-replicaset-single) | 8.2.3 |
| [MongoDB ReplicaSet](./src/mongodb-replicaset) | 8.2.3 | | [MongoDB ReplicaSet](./src/mongodb-replicaset) | 8.2.3 |
| [MongoDB Standalone](./src/mongodb-standalone) | 8.2.3 | | [MongoDB Standalone](./src/mongodb-standalone) | 8.2.3 |
@@ -140,6 +141,7 @@ docker compose exec redis redis-cli ping
| [Ollama](./src/ollama) | 0.14.3 | | [Ollama](./src/ollama) | 0.14.3 |
| [Open WebUI](./src/open-webui) | main | | [Open WebUI](./src/open-webui) | main |
| [Phoenix (Arize)](./src/phoenix) | 13.19.2 | | [Phoenix (Arize)](./src/phoenix) | 13.19.2 |
| [Pingap](./src/pingap) | 0.12.7-full |
| [Pingora Proxy Manager](./src/pingora-proxy-manager) | v1.0.3 | | [Pingora Proxy Manager](./src/pingora-proxy-manager) | v1.0.3 |
| [Open WebUI Rust](./src/open-webui-rust) | latest | | [Open WebUI Rust](./src/open-webui-rust) | latest |
| [OpenCode](./src/opencode) | 1.1.27 | | [OpenCode](./src/opencode) | 1.1.27 |
@@ -185,6 +187,7 @@ docker compose exec redis redis-cli ping
| [TiKV](./src/tikv) | v8.5.0 | | [TiKV](./src/tikv) | v8.5.0 |
| [Trigger.dev](./src/trigger-dev) | v4.2.0 | | [Trigger.dev](./src/trigger-dev) | v4.2.0 |
| [TrailBase](./src/trailbase) | 0.22.4 | | [TrailBase](./src/trailbase) | 0.22.4 |
| [TurboOCR](./src/turboocr) | v2.1.1 |
| [Valkey Cluster](./src/valkey-cluster) | 8.0 | | [Valkey Cluster](./src/valkey-cluster) | 8.0 |
| [Valkey](./src/valkey) | 8.0 | | [Valkey](./src/valkey) | 8.0 |
| [Verdaccio](./src/verdaccio) | 6.1.2 | | [Verdaccio](./src/verdaccio) | 6.1.2 |
+1 -1
View File
@@ -57,7 +57,7 @@ services:
- NANOBOT_GATEWAY__PORT=${GATEWAY_PORT:-18790} - NANOBOT_GATEWAY__PORT=${GATEWAY_PORT:-18790}
command: ${NANOBOT_COMMAND:-gateway} command: ${NANOBOT_COMMAND:-gateway}
healthcheck: healthcheck:
test: [CMD, python, -c, import sys; sys.exit(0)] test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:18790/')"]
interval: 30s interval: 30s
timeout: 10s timeout: 10s
retries: 3 retries: 3
+5 -4
View File
@@ -13,7 +13,7 @@ x-defaults: &defaults
services: services:
openclaw-gateway: openclaw-gateway:
<<: *defaults <<: *defaults
image: ${GLOBAL_REGISTRY:-ghcr.io}/openclaw/openclaw:${OPENCLAW_VERSION:-2026.2.3} image: ${GLOBAL_REGISTRY:-ghcr.io/}openclaw/openclaw:${OPENCLAW_VERSION:-2026.2.3}
environment: environment:
- TZ=${TZ:-UTC} - TZ=${TZ:-UTC}
- HOME=/home/node - HOME=/home/node
@@ -60,7 +60,8 @@ services:
openclaw-cli: openclaw-cli:
<<: *defaults <<: *defaults
image: ${GLOBAL_REGISTRY:-ghcr.io}/openclaw/openclaw:${OPENCLAW_VERSION:-2026.2.3} restart: 'no'
image: ${GLOBAL_REGISTRY:-ghcr.io/}openclaw/openclaw:${OPENCLAW_VERSION:-2026.2.3}
environment: environment:
- TZ=${TZ:-UTC} - TZ=${TZ:-UTC}
- HOME=/home/node - HOME=/home/node
@@ -70,8 +71,8 @@ services:
- CLAUDE_WEB_SESSION_KEY=${CLAUDE_WEB_SESSION_KEY:-} - CLAUDE_WEB_SESSION_KEY=${CLAUDE_WEB_SESSION_KEY:-}
- CLAUDE_WEB_COOKIE=${CLAUDE_WEB_COOKIE:-} - CLAUDE_WEB_COOKIE=${CLAUDE_WEB_COOKIE:-}
volumes: volumes:
- moltbot_config:/home/node/.clawdbot - openclaw_config:/home/node/.openclaw
- moltbot_workspace:/home/node/clawd - openclaw_workspace:/home/node/openclaw-workspace
stdin_open: true stdin_open: true
tty: true tty: true
entrypoint: [node, dist/index.js] entrypoint: [node, dist/index.js]
+1 -1
View File
@@ -2,7 +2,7 @@
STIRLING_VERSION="latest" STIRLING_VERSION="latest"
# Port override # Port override
PORT_OVERRIDE=8080 STIRLING_PORT_OVERRIDE=8080
# Security settings # Security settings
ENABLE_SECURITY="false" ENABLE_SECURITY="false"
+1 -1
View File
@@ -13,7 +13,7 @@ This service deploys Stirling-PDF, a locally hosted web-based PDF manipulation t
| Variable Name | Description | Default Value | | Variable Name | Description | Default Value |
| -------------------- | ------------------------------------- | -------------- | | -------------------- | ------------------------------------- | -------------- |
| STIRLING_VERSION | Stirling-PDF image version | `latest` | | STIRLING_VERSION | Stirling-PDF image version | `latest` |
| PORT_OVERRIDE | Host port mapping | `8080` | | STIRLING_PORT_OVERRIDE | Host port mapping | `8080` |
| ENABLE_SECURITY | Enable security features | `false` | | ENABLE_SECURITY | Enable security features | `false` |
| ENABLE_LOGIN | Enable login functionality | `false` | | ENABLE_LOGIN | Enable login functionality | `false` |
| INITIAL_USERNAME | Initial admin username | `admin` | | INITIAL_USERNAME | Initial admin username | `admin` |
+1 -1
View File
@@ -13,7 +13,7 @@
| 变量名 | 说明 | 默认值 | | 变量名 | 说明 | 默认值 |
| -------------------- | ---------------------- | -------------- | | -------------------- | ---------------------- | -------------- |
| STIRLING_VERSION | Stirling-PDF 镜像版本 | `latest` | | STIRLING_VERSION | Stirling-PDF 镜像版本 | `latest` |
| PORT_OVERRIDE | 主机端口映射 | `8080` | | STIRLING_PORT_OVERRIDE | 主机端口映射 | `8080` |
| ENABLE_SECURITY | 启用安全功能 | `false` | | ENABLE_SECURITY | 启用安全功能 | `false` |
| ENABLE_LOGIN | 启用登录功能 | `false` | | ENABLE_LOGIN | 启用登录功能 | `false` |
| INITIAL_USERNAME | 初始管理员用户名 | `admin` | | INITIAL_USERNAME | 初始管理员用户名 | `admin` |
+1 -1
View File
@@ -11,7 +11,7 @@ services:
<<: *defaults <<: *defaults
image: ${GLOBAL_REGISTRY:-}stirlingtools/stirling-pdf:${STIRLING_VERSION:-latest} image: ${GLOBAL_REGISTRY:-}stirlingtools/stirling-pdf:${STIRLING_VERSION:-latest}
ports: ports:
- '${PORT_OVERRIDE:-8080}:8080' - '${STIRLING_PORT_OVERRIDE:-8080}:8080'
volumes: volumes:
- stirling_trainingData:/usr/share/tessdata - stirling_trainingData:/usr/share/tessdata
- stirling_configs:/configs - stirling_configs:/configs
+36
View File
@@ -0,0 +1,36 @@
# --- Image / build ---
# Override prefix when pushing to a private registry (e.g. registry.example.com/)
GLOBAL_REGISTRY=
# Tag of the locally built image
CUBE_SANDBOX_VERSION=0.1.7
# Base image for the wrapper container.
# Default works globally. In mainland China, override with a regional mirror:
# UBUNTU_IMAGE=docker.m.daocloud.io/library/ubuntu:22.04
# UBUNTU_IMAGE=ccr.ccs.tencentyun.com/library/ubuntu:22.04
UBUNTU_IMAGE=ubuntu:22.04
# --- Runtime ---
# Timezone inside the container
TZ=Asia/Shanghai
# Mirror used by the upstream installer:
# cn -> https://cnb.cool/CubeSandbox + Tencent Cloud container registry (recommended in China)
# gh -> https://github.com (slower in China but works elsewhere)
CUBE_MIRROR=cn
# Size of the XFS-formatted loop file mounted at /data/cubelet inside the
# container. install.sh hard-requires XFS; the file lives on the cube_data
# named volume so it persists across container restarts.
CUBE_XFS_SIZE=50G
# Set to 1 to force re-running install.sh on next start
CUBE_FORCE_REINSTALL=0
# --- Resources ---
# CubeSandbox runs MySQL + Redis + CubeProxy + CoreDNS + CubeMaster + CubeAPI +
# Cubelet + network-agent inside the wrapper container, then spawns MicroVMs.
# Give it enough headroom; 16 GiB / 8 vCPU is a comfortable single-node default.
CUBE_CPU_LIMIT=8
CUBE_MEMORY_LIMIT=16G
CUBE_CPU_RESERVATION=2
CUBE_MEMORY_RESERVATION=8G
+134
View File
@@ -0,0 +1,134 @@
# CubeSandbox in a privileged systemd+DinD container.
#
# CubeSandbox's official install.sh is designed for bare metal / VMs and
# requires a running systemd (it registers all services as systemd units).
# This image therefore runs systemd as PID 1 rather than tini.
#
# UBUNTU_IMAGE may be overridden to use a regional mirror, e.g.:
# docker.m.daocloud.io/library/ubuntu:22.04 (China DaoCloud mirror)
# ccr.ccs.tencentyun.com/library/ubuntu:22.04 (Tencent Cloud mirror)
ARG UBUNTU_IMAGE=ubuntu:22.04
FROM ${UBUNTU_IMAGE}
ENV DEBIAN_FRONTEND=noninteractive \
LANG=C.UTF-8 \
LC_ALL=C.UTF-8
# Core system deps + systemd as the container init system.
# deploy/one-click/install.sh requires: tar, rg (ripgrep), ss (iproute2),
# bash, curl, sed, pgrep (procps), date, docker, python3, ip (iproute2), awk (gawk).
# Plus DinD prerequisites: iptables, ca-certificates, gnupg.
# Plus xfsprogs for the XFS-backed /data/cubelet (install.sh hard requirement).
RUN apt-get update && apt-get install -y --no-install-recommends \
systemd \
systemd-sysv \
dbus \
ca-certificates \
curl \
gnupg \
lsb-release \
bash \
tar \
ripgrep \
iproute2 \
procps \
gawk \
sed \
python3 \
python3-pip \
iptables \
kmod \
xfsprogs \
e2fsprogs \
util-linux \
file \
less \
&& rm -rf /var/lib/apt/lists/*
# Mask systemd units that are irrelevant or will fail in a container context.
RUN for unit in \
getty@tty1.service \
apt-daily.service \
apt-daily-upgrade.service \
apt-daily.timer \
apt-daily-upgrade.timer \
motd-news.service \
motd-news.timer \
systemd-networkd.service \
systemd-networkd-wait-online.service \
systemd-udevd.service \
systemd-udevd-control.socket \
systemd-udevd-kernel.socket \
systemd-logind.service \
e2scrub_reap.service \
apparmor.service; do \
ln -sf /dev/null "/etc/systemd/system/${unit}"; \
done
# Install Docker CE + Compose plugin from the official Docker apt repository.
RUN install -m 0755 -d /etc/apt/keyrings \
&& curl -fsSL https://download.docker.com/linux/ubuntu/gpg \
| gpg --dearmor -o /etc/apt/keyrings/docker.gpg \
&& chmod a+r /etc/apt/keyrings/docker.gpg \
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] \
https://download.docker.com/linux/ubuntu $(. /etc/os-release && echo $VERSION_CODENAME) stable" \
> /etc/apt/sources.list.d/docker.list \
&& apt-get update \
&& apt-get install -y --no-install-recommends \
docker-ce \
docker-ce-cli \
containerd.io \
docker-buildx-plugin \
docker-compose-plugin \
&& rm -rf /var/lib/apt/lists/*
# Configure Docker daemon defaults.
RUN mkdir -p /etc/docker && printf '%s\n' \
'{' \
' "log-driver": "json-file",' \
' "log-opts": { "max-size": "50m", "max-file": "3" },' \
' "storage-driver": "overlay2"' \
'}' > /etc/docker/daemon.json
# Install E2B Python SDK so smoke tests can run from inside the container
# without polluting the WSL2 host with pip packages.
RUN pip3 install --no-cache-dir --break-system-packages \
e2b-code-interpreter==1.0.* \
requests \
|| pip3 install --no-cache-dir \
e2b-code-interpreter==1.0.* \
requests
# Persistent locations the installer writes to.
VOLUME ["/var/lib/docker", "/data", "/usr/local/services/cubetoolbox"]
# Helper scripts for the bootstrap flow.
COPY cube-init.sh /usr/local/bin/cube-init.sh
COPY cube-xfs-setup.sh /usr/local/bin/cube-xfs-setup.sh
COPY cube-install.sh /usr/local/bin/cube-install.sh
RUN chmod +x \
/usr/local/bin/cube-init.sh \
/usr/local/bin/cube-xfs-setup.sh \
/usr/local/bin/cube-install.sh
# Systemd service units for the CubeSandbox bootstrap sequence.
COPY cube-xfs-mount.service /etc/systemd/system/cube-xfs-mount.service
COPY cube-install.service /etc/systemd/system/cube-install.service
# Enable services by creating the wanted-by symlinks that systemctl enable
# would create (systemctl cannot run during a Docker image build).
RUN mkdir -p /etc/systemd/system/multi-user.target.wants \
&& ln -sf /etc/systemd/system/cube-xfs-mount.service \
/etc/systemd/system/multi-user.target.wants/cube-xfs-mount.service \
&& ln -sf /etc/systemd/system/cube-install.service \
/etc/systemd/system/multi-user.target.wants/cube-install.service \
&& ln -sf /lib/systemd/system/docker.service \
/etc/systemd/system/multi-user.target.wants/docker.service \
&& ln -sf /lib/systemd/system/containerd.service \
/etc/systemd/system/multi-user.target.wants/containerd.service
# cube-init.sh captures CUBE_* and TZ env vars from the container runtime
# into /etc/cube-sandbox.env (readable by systemd EnvironmentFile=), then
# execs /lib/systemd/systemd as PID 1.
ENTRYPOINT ["/usr/local/bin/cube-init.sh"]
CMD ["/lib/systemd/systemd"]
+150
View File
@@ -0,0 +1,150 @@
# CubeSandbox
Run [TencentCloud CubeSandbox](https://github.com/TencentCloud/CubeSandbox) — a KVM-based MicroVM sandbox compatible with the E2B SDK — entirely inside a single privileged Docker container, without modifying the host system.
## Why this is unusual
CubeSandbox is **not** a containerized project upstream. Its core components (Cubelet, network-agent, cube-shim, cube-runtime, CubeAPI, CubeMaster) ship as host binaries and the official `install.sh` writes them to `/usr/local/services/cubetoolbox`, then starts them as native processes that talk to the host containerd.
This stack runs the **entire installer inside one privileged container** that:
1. Runs its own `dockerd` (Docker-in-Docker) for MySQL / Redis / CubeProxy / CoreDNS dependencies.
2. Creates an XFS-formatted loop volume at `/data/cubelet` (install.sh hard-requires XFS).
3. Executes the upstream [`online-install.sh`](https://github.com/TencentCloud/CubeSandbox/blob/master/deploy/one-click/online-install.sh) on first boot.
4. Tails logs to keep the container alive.
The result is essentially a **single-node CubeSandbox appliance container** suitable for evaluating CubeSandbox without changing your host.
## Features
- Built on Ubuntu 22.04 (the project's primary test environment)
- Self-contained: no host packages installed, no host paths mounted
- KVM passed through via `/dev/kvm`
- Persistent volumes for installed binaries, sandbox data, and DinD storage
- Health check covering CubeAPI, CubeMaster, and network-agent
- China-mainland mirror (`MIRROR=cn`) used by default
- Smoke-test script included (`smoke-test.sh`)
## Requirements
- Linux host (or WSL2 with KVM passthrough) with `/dev/kvm` available to Docker
- Nested virtualization enabled (Intel VT-x / AMD-V exposed)
- cgroup v2 (modern kernels — Debian 12+, Ubuntu 22.04+, kernel 5.10+)
- ≥ 16 GiB RAM and ≥ 8 vCPU recommended (8 GiB is the upstream minimum)
- ≥ 60 GiB free disk for the XFS loop file + Docker image layers
- Outbound internet to download the install bundle (~hundreds of MB) and Docker images
> On WSL2: confirm `/dev/kvm` is present (`ls -l /dev/kvm`) and your user is in the `kvm` group on the host distro.
## Quick Start
1. Copy the example environment file (optional — defaults work):
```bash
cp .env.example .env
```
2. Build and start (the first run downloads the CubeSandbox bundle and several Docker images — expect 520 minutes):
```bash
docker compose up -d --build
```
3. Watch the bootstrap log:
```bash
docker compose logs -f cube-sandbox
```
Wait for the `==================== CubeSandbox is up ====================` banner.
4. Verify all services are healthy:
```bash
curl -fsS http://127.0.0.1:3000/health && echo # CubeAPI
curl -fsS http://127.0.0.1:8089/notify/health && echo # CubeMaster
curl -fsS http://127.0.0.1:19090/healthz && echo # network-agent
```
5. (Optional) Run the smoke test:
```bash
bash smoke-test.sh # Health checks only
SKIP_TEMPLATE_BUILD=1 bash smoke-test.sh # Skip the slow template build
```
## Endpoints
Because the container uses `network_mode: host`, all CubeSandbox HTTP endpoints are reachable directly on the host loopback:
| Service | URL |
| ------------- | ------------------------------------ |
| CubeAPI | `http://127.0.0.1:3000` |
| CubeMaster | `http://127.0.0.1:8089` |
| network-agent | `http://127.0.0.1:19090` |
The CubeAPI exposes the E2B-compatible REST surface; point the [`e2b` Python SDK](https://e2b.dev) at `http://127.0.0.1:3000` to create sandboxes.
## Configuration
Key environment variables (see `.env.example` for the full list):
| Variable | Description | Default |
| -------------------------- | ------------------------------------------------------------ | ---------------- |
| `GLOBAL_REGISTRY` | Image registry prefix when pushing to a private registry | _(empty)_ |
| `CUBE_SANDBOX_VERSION` | Tag of the locally built wrapper image | `0.1.7` |
| `UBUNTU_IMAGE` | Base Ubuntu version | `22.04` |
| `TZ` | Container timezone | `Asia/Shanghai` |
| `CUBE_MIRROR` | Installer mirror — `cn` (China CDN) or `gh` (GitHub) | `cn` |
| `CUBE_XFS_SIZE` | Size of the XFS loop file backing `/data/cubelet` | `50G` |
| `CUBE_FORCE_REINSTALL` | Set to `1` to re-run `install.sh` on next start | `0` |
| `CUBE_CPU_LIMIT` | CPU limit | `8` |
| `CUBE_MEMORY_LIMIT` | Memory limit | `16G` |
| `CUBE_CPU_RESERVATION` | CPU reservation | `2` |
| `CUBE_MEMORY_RESERVATION` | Memory reservation | `8G` |
## Storage
Three named volumes hold persistent state — your installed CubeSandbox survives `docker compose down && up`:
| Volume | Path inside container | Purpose |
| --------------- | ----------------------------------- | -------------------------------------------------- |
| `cube_dind_data` | `/var/lib/docker` | DinD daemon images / containers / volumes |
| `cube_data` | `/data` | XFS loop image, `/data/cubelet`, sandbox disks, logs |
| `cube_toolbox` | `/usr/local/services/cubetoolbox` | Installed CubeSandbox binaries and scripts |
To wipe everything and reinstall from scratch:
```bash
docker compose down -v
docker compose up -d --build
```
## Security Considerations
⚠️ This stack is **highly privileged by design**. Only run it in trusted environments.
- `privileged: true` — required to mount the XFS loop volume, manage TAP interfaces, and run KVM
- `network_mode: host` — required so Cubelet can register the node IP and manage host TAP interfaces
- `cgroup: host` — required for the in-container `dockerd` to share the host's cgroup v2 hierarchy
- `/dev/kvm` and `/dev/net/tun` are passed through
These permissions are equivalent to what `online-install.sh` would request if it were run directly on your host. The advantage of the container wrapper is that all installer side-effects are confined to the three named volumes above, so removing the stack leaves no host residue.
## Troubleshooting
- **`/dev/kvm not found`** — the host does not expose KVM to Docker. On WSL2, confirm nested virtualization is enabled and the kernel exposes `/dev/kvm`. On bare metal, ensure VT-x / AMD-V is enabled in BIOS.
- **First boot hangs at "Running CubeSandbox one-click installer"** — the installer is downloading the bundle (~hundreds of MB) and pulling several Docker images. Check progress with `docker compose logs -f cube-sandbox`.
- **`quickcheck.sh reported issues`** — open a shell in the container and inspect logs:
```bash
docker compose exec cube-sandbox bash
ls /data/log/
tail -f /data/log/CubeAPI/*.log
```
- **Re-run the installer cleanly** — set `CUBE_FORCE_REINSTALL=1` in `.env` and `docker compose up -d --force-recreate`.
## Project Information
- Upstream: https://github.com/TencentCloud/CubeSandbox
- License: upstream project is Apache-2.0; this configuration is provided as-is for the Compose Anything project.
+151
View File
@@ -0,0 +1,151 @@
# CubeSandbox
在单个特权 Docker 容器内完整运行 [腾讯云 CubeSandbox](https://github.com/TencentCloud/CubeSandbox)——一个基于 KVM、兼容 E2B SDK 的 MicroVM 沙箱——无需修改宿主系统。
## 为什么这个栈与众不同
CubeSandbox 上游**并不是**一个容器化项目。它的核心组件(Cubelet、network-agent、cube-shim、cube-runtime、CubeAPI、CubeMaster)以宿主机二进制形式分发,官方 `install.sh` 会把它们写入 `/usr/local/services/cubetoolbox`,然后作为本机进程启动并与宿主 containerd 集成。
本栈把**整个安装器塞进一个特权容器**:
1. 容器内自起一个 `dockerd`Docker-in-Docker),用于运行 MySQL / Redis / CubeProxy / CoreDNS 等依赖。
2.`/data/cubelet` 创建一个 XFS 格式的 loop 卷(install.sh 强制要求 XFS)。
3. 首次启动时执行上游的 [`online-install.sh`](https://github.com/TencentCloud/CubeSandbox/blob/master/deploy/one-click/online-install.sh)。
4. 通过 tail 日志保持容器存活。
最终得到一个**单节点 CubeSandbox 一体化容器**,方便在不改动宿主的前提下评估 CubeSandbox。
## 特性
- 基于 Ubuntu 22.04(项目主要测试环境)
- 自包含:不安装宿主机软件包,不挂载宿主路径
- 通过 `/dev/kvm` 透传 KVM
- 三个持久化命名卷分别保存安装产物、沙箱数据和 DinD 存储
- 健康检查覆盖 CubeAPI、CubeMaster、network-agent
- 默认使用国内镜像 (`MIRROR=cn`)
- 内置冒烟测试脚本(`smoke-test.sh`
## 环境要求
- Linux 宿主(或开启 KVM 透传的 WSL2),`/dev/kvm` 对 Docker 可见
- 已开启嵌套虚拟化(暴露 Intel VT-x / AMD-V
- cgroup v2(现代内核——Debian 12+、Ubuntu 22.04+、kernel 5.10+
- 推荐 ≥ 16 GiB 内存、≥ 8 vCPU(上游最低 8 GiB
- 至少 60 GiB 空闲磁盘,用于 XFS loop 文件 + Docker 镜像层
- 可访问外网,用于下载安装包(数百 MB)和 Docker 镜像
> WSL2 用户:先确认 `/dev/kvm` 存在(`ls -l /dev/kvm`),并且当前用户在宿主发行版的 `kvm` 组中。
## 快速开始
1. 复制示例环境文件(可选,默认值即可使用):
```bash
cp .env.example .env
```
2. 构建并启动(首次运行会下载 CubeSandbox 安装包和若干 Docker 镜像,预计 5-20 分钟):
```bash
docker compose up -d --build
```
3. 观察启动日志:
```bash
docker compose logs -f cube-sandbox
```
等待出现 `==================== CubeSandbox is up ====================` 横幅。
4. 验证所有服务健康:
```bash
curl -fsS http://127.0.0.1:3000/health && echo # CubeAPI
curl -fsS http://127.0.0.1:8089/notify/health && echo # CubeMaster
curl -fsS http://127.0.0.1:19090/healthz && echo # network-agent
```
5. (可选)运行冒烟测试:
```bash
bash smoke-test.sh # 仅做健康检查
SKIP_TEMPLATE_BUILD=1 bash smoke-test.sh # 跳过较慢的模板构建步骤
```
## 服务端点
由于容器使用 `network_mode: host`CubeSandbox 的所有 HTTP 端点都直接暴露在宿主回环地址上:
| 服务 | URL |
| ------------- | ------------------------------------ |
| CubeAPI | `http://127.0.0.1:3000` |
| CubeMaster | `http://127.0.0.1:8089` |
| network-agent | `http://127.0.0.1:19090` |
CubeAPI 暴露兼容 E2B 的 REST 接口;将 [`e2b` Python SDK](https://e2b.dev) 指向 `http://127.0.0.1:3000` 即可创建沙箱。
## 配置项
主要环境变量(完整列表见 `.env.example`):
| 变量 | 描述 | 默认值 |
| -------------------------- | --------------------------------------------------- | --------------- |
| `GLOBAL_REGISTRY` | 推送到私有仓库时使用的镜像前缀 | _(空)_ |
| `CUBE_SANDBOX_VERSION` | 本地构建的封装镜像 tag | `0.1.7` |
| `UBUNTU_IMAGE` | 基础 Ubuntu 版本 | `22.04` |
| `TZ` | 容器时区 | `Asia/Shanghai` |
| `CUBE_MIRROR` | 安装器镜像源——`cn`(国内 CDN)或 `gh`GitHub | `cn` |
| `CUBE_XFS_SIZE` | `/data/cubelet` 背后 XFS loop 文件大小 | `50G` |
| `CUBE_FORCE_REINSTALL` | 设为 `1` 时下次启动会重跑 `install.sh` | `0` |
| `CUBE_CPU_LIMIT` | CPU 上限 | `8` |
| `CUBE_MEMORY_LIMIT` | 内存上限 | `16G` |
| `CUBE_CPU_RESERVATION` | CPU 预留 | `2` |
| `CUBE_MEMORY_RESERVATION` | 内存预留 | `8G` |
## 存储
三个命名卷保存所有持久化状态——`docker compose down && up` 不会丢失安装:
| 卷 | 容器内路径 | 用途 |
| ---------------- | ----------------------------------- | --------------------------------------------------- |
| `cube_dind_data` | `/var/lib/docker` | DinD 守护进程的镜像 / 容器 / 卷 |
| `cube_data` | `/data` | XFS loop 文件、`/data/cubelet`、沙箱磁盘、日志 |
| `cube_toolbox` | `/usr/local/services/cubetoolbox` | 已安装的 CubeSandbox 二进制和脚本 |
完全清空并从头重装:
```bash
docker compose down -v
docker compose up -d --build
```
## 安全说明
⚠️ 本栈**按设计是高特权的**,仅在受信环境中使用。
- `privileged: true`——挂载 XFS loop 卷、管理 TAP 接口、运行 KVM 所必需
- `network_mode: host`——Cubelet 注册节点 IP、管理宿主 TAP 接口所必需
- `cgroup: host`——容器内的 `dockerd` 共享宿主 cgroup v2 层级所必需
- 透传 `/dev/kvm` 和 `/dev/net/tun`
这些权限等同于直接在宿主上运行 `online-install.sh` 所需的权限。容器封装的好处在于:所有安装副作用都被限制在上述三个命名卷内,删除本栈不会在宿主上留下任何残留。
## 故障排查
- **`/dev/kvm not found`**:宿主未对 Docker 暴露 KVM。WSL2 用户请确认嵌套虚拟化已启用且内核暴露 `/dev/kvm`;裸金属用户请在 BIOS 中启用 VT-x / AMD-V。
- **首次启动卡在 "Running CubeSandbox one-click installer"**:安装器正在下载安装包(数百 MB)并拉取若干 Docker 镜像。用 `docker compose logs -f cube-sandbox` 查看进度。
- **`quickcheck.sh reported issues`**:进入容器查看日志:
```bash
docker compose exec cube-sandbox bash
ls /data/log/
tail -f /data/log/CubeAPI/*.log
```
- **干净重跑安装**:在 `.env` 中设置 `CUBE_FORCE_REINSTALL=1`,然后 `docker compose up -d --force-recreate`。
## 项目信息
- 上游项目:https://github.com/TencentCloud/CubeSandbox
- 许可证:上游项目采用 Apache-2.0;本配置以 as-is 形式提供给 Compose Anything 项目使用。
+43
View File
@@ -0,0 +1,43 @@
#!/usr/bin/env bash
# Thin PID-1 wrapper: capture container runtime env vars into a file that
# systemd EnvironmentFile= can read, then exec systemd as PID 1.
#
# This script runs BEFORE systemd, so it must be kept minimal and must not
# depend on any CubeSandbox service being available.
set -euo pipefail
# Write CUBE_* and TZ vars to /etc/cube-sandbox.env so that
# cube-xfs-mount.service and cube-install.service can pick them up via
# EnvironmentFile=/etc/cube-sandbox.env.
install -m 0644 /dev/null /etc/cube-sandbox.env
printenv | grep -E '^(CUBE_|TZ=)' >> /etc/cube-sandbox.env 2>/dev/null || true
# Mount BPF filesystem required by network-agent eBPF map pinning.
# /sys/fs/bpf is not auto-mounted in Docker containers even when the kernel
# supports BPF; without it network-agent crashes on startup with
# "not on a bpf filesystem" and then a nil-pointer panic.
if ! mountpoint -q /sys/fs/bpf 2>/dev/null; then
mkdir -p /sys/fs/bpf
mount -t bpf none /sys/fs/bpf 2>/dev/null \
|| echo "[cube-init] WARNING: could not mount BPF filesystem; network-agent may fail" >&2
fi
# Redirect CubeMaster's rootfs artifact workspace to the persistent data volume.
# Template builds export the sandbox image into a tar (often > 2 GB) before
# converting it to an ext4 disk image. /tmp is only a 2 GB tmpfs and is wiped on
# every container restart; /data (a named Docker volume) has 50+ GB and is
# persistent.
#
# We use a bind mount instead of a symlink: CubeMaster's Go startup code calls
# os.RemoveAll + os.MkdirAll on this path, which would silently replace a
# symlink with a real tmpfs directory. A bind-mount point returns EBUSY on
# removal, keeping the mount intact so all writes land on /data.
mkdir -p /data/cubemaster-rootfs-artifacts
mkdir -p /tmp/cubemaster-rootfs-artifacts
if ! mountpoint -q /tmp/cubemaster-rootfs-artifacts 2>/dev/null; then
mount --bind /data/cubemaster-rootfs-artifacts /tmp/cubemaster-rootfs-artifacts \
|| echo "[cube-init] WARNING: bind mount for cubemaster-rootfs-artifacts failed; writes may fill tmpfs" >&2
fi
# Hand off to systemd (or whatever CMD was passed to the container).
exec "$@"
+24
View File
@@ -0,0 +1,24 @@
[Unit]
Description=CubeSandbox one-click installer
# Requires both the XFS volume and dockerd to be ready before running.
# install.sh will pull Docker images (MySQL, Redis, CubeProxy, CoreDNS)
# and then register Cubelet / CubeAPI / CubeMaster / network-agent as
# systemd units via `systemctl enable --now`.
After=docker.service cube-xfs-mount.service
Requires=docker.service cube-xfs-mount.service
[Service]
Type=oneshot
RemainAfterExit=yes
EnvironmentFile=-/etc/cube-sandbox.env
ExecStart=/usr/local/bin/cube-install.sh
# First boot downloads ~400 MB + pulls several Docker images; allow 30 min.
TimeoutStartSec=1800
# Retry on transient network failures (e.g. download interrupted).
Restart=on-failure
RestartSec=30s
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target
+160
View File
@@ -0,0 +1,160 @@
#!/usr/bin/env bash
# Run the CubeSandbox one-click installer, then run quickcheck.sh.
# Called by cube-install.service (Type=oneshot) after docker.service and
# cube-xfs-mount.service are both active.
set -euo pipefail
log() { printf '[cube-install] %s\n' "$*"; }
err() { printf '[cube-install] ERROR: %s\n' "$*" >&2; }
INSTALL_PREFIX="/usr/local/services/cubetoolbox"
QUICKCHECK="${INSTALL_PREFIX}/scripts/one-click/quickcheck.sh"
UP_SCRIPT="${INSTALL_PREFIX}/scripts/one-click/up-with-deps.sh"
MIRROR="${CUBE_MIRROR:-cn}"
INSTALLER_URL_CN="https://cnb.cool/CubeSandbox/CubeSandbox/-/git/raw/master/deploy/one-click/online-install.sh"
INSTALLER_URL_GH="https://github.com/tencentcloud/CubeSandbox/raw/master/deploy/one-click/online-install.sh"
# /dev/kvm sanity — required by the MicroVM hypervisor.
if [ ! -c /dev/kvm ]; then
err "/dev/kvm is not available inside the container."
err "Ensure the compose stack passes --device /dev/kvm and nested virt is enabled on the host."
exit 1
fi
log "KVM device present: $(ls -l /dev/kvm)"
# Wait for dockerd (started by docker.service) to be ready before install.sh
# tries to pull MySQL / Redis / CubeProxy images.
log "Waiting for docker daemon ..."
for i in $(seq 1 60); do
if docker info >/dev/null 2>&1; then
log "docker ready."
break
fi
sleep 2
done
if ! docker info >/dev/null 2>&1; then
err "docker daemon not ready after 120 s"
exit 1
fi
# Redirect TMPDIR to the 50 GB XFS volume.
# /tmp is only 256 MB (tmpfs) and mounted noexec — both cause install failures:
# - curl: (23) Failure writing output to destination (out of space)
# - extracted scripts fail to execute (noexec mount flag)
mkdir -p /data/tmp
export TMPDIR=/data/tmp
log "TMPDIR set to $TMPDIR ($(df -h /data/tmp | awk 'NR==2{print $4}') free)"
# Set CAROOT so mkcert can find / create the local CA directory on every boot.
# Without this, up-cube-proxy.sh calls `mkcert -install` which exits with:
# "ERROR: failed to find the default CA location"
# Because up-with-deps.sh runs under set -euo pipefail, that failure aborts
# the entire script before any compute services (network-agent, CubeAPI, etc.)
# are started. Persisting the CA on /data (named volume) means the cert is
# re-used across container restarts rather than regenerated each time.
export CAROOT=/data/mkcert-ca
mkdir -p "$CAROOT"
log "CAROOT set to $CAROOT"
# Run the upstream one-click installer on first boot; on subsequent boots
# just re-launch all services via up-with-deps.sh.
if [ -x "$QUICKCHECK" ] && [ "${CUBE_FORCE_REINSTALL:-0}" != "1" ]; then
log "CubeSandbox already installed at $INSTALL_PREFIX — starting services."
if [ ! -x "$UP_SCRIPT" ]; then
err "up-with-deps.sh not found at $UP_SCRIPT — reinstall required"
exit 1
fi
ONE_CLICK_TOOLBOX_ROOT="$INSTALL_PREFIX" \
ONE_CLICK_RUNTIME_ENV_FILE="${INSTALL_PREFIX}/.one-click.env" \
bash "$UP_SCRIPT" \
|| log "WARNING: up-with-deps.sh exited non-zero; services may still be starting"
else
log "Running CubeSandbox one-click installer (mirror=$MIRROR) ..."
if [ "$MIRROR" = "cn" ]; then
curl -fsSL "$INSTALLER_URL_CN" | MIRROR=cn bash
else
curl -fsSL "$INSTALLER_URL_GH" | bash
fi
fi
# Run quickcheck.sh with retries — network-agent initialises 500 tap interfaces
# which takes ~2 minutes; we retry every 30 s for up to 10 minutes.
QUICKCHECK_PASSED=0
if [ -x "$QUICKCHECK" ]; then
log "Running quickcheck.sh (retrying up to 10 min for network-agent tap init) ..."
for i in $(seq 1 20); do
if ONE_CLICK_TOOLBOX_ROOT="$INSTALL_PREFIX" \
ONE_CLICK_RUNTIME_ENV_FILE="${INSTALL_PREFIX}/.one-click.env" \
"$QUICKCHECK" 2>&1; then
QUICKCHECK_PASSED=1
break
fi
log "quickcheck attempt $i/20 failed — retrying in 30 s ..."
sleep 30
done
else
err "quickcheck.sh not found at $QUICKCHECK — install may have failed."
exit 1
fi
if [ "$QUICKCHECK_PASSED" != "1" ]; then
err "quickcheck.sh never passed after 20 attempts — CubeSandbox is unhealthy."
exit 1
fi
# Ensure containerd-shim-cube-rs is on Cubelet's clean PATH.
# up.sh/up-with-deps.sh launch Cubelet with:
# PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
# Cubelet resolves runtime shims from that PATH, so it cannot find
# containerd-shim-cube-rs unless it is symlinked into one of those dirs.
# We create the symlink unconditionally on every boot (both after fresh
# install and after the restart path) so Cubelet can start sandboxes.
SHIM_SRC="${INSTALL_PREFIX}/cube-shim/bin/containerd-shim-cube-rs"
SHIM_DST="/usr/local/bin/containerd-shim-cube-rs"
if [ -x "$SHIM_SRC" ]; then
ln -sf "$SHIM_SRC" "$SHIM_DST"
log "containerd-shim-cube-rs linked: $SHIM_DST -> $SHIM_SRC"
else
log "WARNING: $SHIM_SRC not found — Cubelet will not be able to start MicroVMs"
fi
# Restart Cubelet now that network-agent is confirmed ready.
# On first startup the Cubelet process begins before network-agent has finished
# initialising its 500 TAP interfaces (~2 min). This causes the
# io.cubelet.images-service.v1 plugin to fail with:
# "network-agent health check failed ... context deadline exceeded"
# leaving the gRPC cubelet.services.images.v1.Images service unregistered.
# When CubeMaster later tries to distribute a template artifact to the node it
# gets back gRPC Unimplemented and the build fails.
# Restarting Cubelet here — after quickcheck has confirmed network-agent is up —
# allows the images-service plugin to load successfully on the second boot.
CUBELET_BIN="${INSTALL_PREFIX}/Cubelet/bin/cubelet"
CUBELET_CFG="${INSTALL_PREFIX}/Cubelet/config/config.toml"
CUBELET_DYN="${INSTALL_PREFIX}/Cubelet/dynamicconf/conf.yaml"
CUBELET_LOG="/data/log/Cubelet/Cubelet-req.log"
if [ -x "$CUBELET_BIN" ]; then
log "Restarting Cubelet so images-service plugin loads against ready network-agent ..."
pkill -f "${CUBELET_BIN}" 2>/dev/null || true
sleep 2
mkdir -p "$(dirname "$CUBELET_LOG")"
nohup "$CUBELET_BIN" \
--config "$CUBELET_CFG" \
--dynamic-conf-path "$CUBELET_DYN" \
>>"$CUBELET_LOG" 2>&1 &
CUBELET_PID=$!
log "Cubelet restarted (PID ${CUBELET_PID}) — waiting 10 s for boot ..."
sleep 10
if kill -0 "$CUBELET_PID" 2>/dev/null; then
log "Cubelet is running."
else
log "WARNING: Cubelet PID ${CUBELET_PID} exited — check ${CUBELET_LOG}."
fi
fi
log "==================== CubeSandbox is up ===================="
log " CubeAPI: http://127.0.0.1:3000/health"
log " CubeMaster: http://127.0.0.1:8089/notify/health"
log " network-agent http://127.0.0.1:19090/healthz"
log " Logs: /data/log/{CubeAPI,CubeMaster,Cubelet}/"
log "==========================================================="
@@ -0,0 +1,18 @@
[Unit]
Description=CubeSandbox XFS loop volume mount
# Must run before dockerd and the installer because install.sh validates that
# /data/cubelet is an XFS filesystem before proceeding.
DefaultDependencies=no
Before=cube-install.service docker.service
After=local-fs.target
[Service]
Type=oneshot
RemainAfterExit=yes
EnvironmentFile=-/etc/cube-sandbox.env
ExecStart=/usr/local/bin/cube-xfs-setup.sh
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target
+31
View File
@@ -0,0 +1,31 @@
#!/usr/bin/env bash
# Create and mount the XFS-formatted loop volume at /data/cubelet.
# Called by cube-xfs-mount.service (Type=oneshot) before docker.service starts.
#
# install.sh hard-requires that /data/cubelet is on an XFS filesystem;
# it validates this with `df -T /data/cubelet | grep -q xfs`.
set -euo pipefail
log() { printf '[cube-xfs] %s\n' "$*"; }
CUBE_DATA_DIR="${CUBE_DATA_DIR:-/data/cubelet}"
CUBE_XFS_IMG="${CUBE_XFS_IMG:-/data/cubelet.img}"
CUBE_XFS_SIZE="${CUBE_XFS_SIZE:-50G}"
mkdir -p /data "$CUBE_DATA_DIR"
current_fs="$(stat -fc %T "$CUBE_DATA_DIR" 2>/dev/null || echo unknown)"
if [ "$current_fs" = "xfs" ]; then
log "Already mounted: $CUBE_DATA_DIR ($current_fs) — nothing to do."
exit 0
fi
log "Preparing XFS loop volume at $CUBE_XFS_IMG (size=$CUBE_XFS_SIZE) ..."
if [ ! -f "$CUBE_XFS_IMG" ]; then
fallocate -l "$CUBE_XFS_SIZE" "$CUBE_XFS_IMG"
mkfs.xfs -q -f "$CUBE_XFS_IMG"
log "Formatted $CUBE_XFS_IMG as XFS."
fi
mount -o loop "$CUBE_XFS_IMG" "$CUBE_DATA_DIR"
log "Mounted $CUBE_DATA_DIR ($(stat -fc %T "$CUBE_DATA_DIR"))."
+110
View File
@@ -0,0 +1,110 @@
# CubeSandbox running inside a privileged systemd+DinD container.
#
# WHY THIS LOOKS UNUSUAL
# ----------------------
# CubeSandbox is NOT a containerized project upstream. Its core components
# (Cubelet, network-agent, cube-shim, CubeAPI, CubeMaster) ship as host
# binaries, and the official install.sh registers them as systemd units and
# manages them with systemctl.
#
# To run it purely with Docker without modifying the WSL2 host, this stack:
# 1. Runs systemd as PID 1 inside a privileged container so that
# install.sh can call systemctl enable / start / status normally.
# 2. Runs its own dockerd (DinD) for MySQL / Redis / CoreDNS / CubeProxy.
# 3. Mounts an XFS loop volume at /data/cubelet (install.sh hard-requires XFS).
# 4. Executes the upstream online-install.sh via cube-install.service.
#
# The /run and /run/lock paths are tmpfs so systemd can write its runtime
# state (PID files, socket files, etc.) during the container lifetime.
# stop_signal RTMIN+3 is the standard graceful-shutdown signal for systemd.
x-defaults: &defaults
restart: unless-stopped
logging:
driver: json-file
options:
max-size: 100m
max-file: '3'
services:
cube-sandbox:
<<: *defaults
image: ${GLOBAL_REGISTRY:-}compose-anything/cube-sandbox:${CUBE_SANDBOX_VERSION:-0.1.7}
build:
context: .
dockerfile: Dockerfile
args:
- UBUNTU_IMAGE=${UBUNTU_IMAGE:-ubuntu:22.04}
# CubeSandbox needs:
# - /dev/kvm for the MicroVM hypervisor
# - /dev/net/tun for cube TAP interfaces
# - SYS_ADMIN/NET_ADMIN to mount the XFS loop volume and create TAPs
# - Its own dockerd for MySQL / Redis / CubeProxy / CoreDNS
# - systemd as PID 1 so install.sh can register and start services
# The simplest correct configuration is privileged + host network.
privileged: true
network_mode: host
devices:
- /dev/kvm:/dev/kvm
- /dev/net/tun:/dev/net/tun
# cgroupns:host lets the in-container systemd + dockerd share the host's
# (i.e. WSL2's) cgroup v2 hierarchy directly — more reliable than private.
cgroup: host
# systemd needs to write its runtime state to /run; use tmpfs so it does
# not leak across container restarts and does not consume the named volumes.
tmpfs:
- /run:size=100m
- /run/lock:size=10m
- /tmp:size=2g,exec
# SIGRTMIN+3 is the proper graceful-shutdown signal for systemd.
stop_signal: RTMIN+3
environment:
- TZ=${TZ:-Asia/Shanghai}
# cn = pull installer + images via the cnb.cool / Tencent Cloud mirror
# gh = pull from raw.githubusercontent.com (slower in mainland China)
- CUBE_MIRROR=${CUBE_MIRROR:-cn}
# Size of the XFS loop file that backs /data/cubelet
- CUBE_XFS_SIZE=${CUBE_XFS_SIZE:-50G}
# Set to 1 to re-run install.sh even if a previous install is detected
- CUBE_FORCE_REINSTALL=${CUBE_FORCE_REINSTALL:-0}
volumes:
# DinD docker daemon storage (images for MySQL, Redis, CoreDNS, CubeProxy)
- cube_dind_data:/var/lib/docker
# XFS loop image + mounted /data/cubelet + cube-shim disks + logs
- cube_data:/data
# Installed CubeSandbox binaries & scripts
- cube_toolbox:/usr/local/services/cubetoolbox
# No `ports:` block — we use network_mode: host so the CubeAPI on
# 127.0.0.1:3000 inside the container is the same socket as
# 127.0.0.1:3000 on the WSL2 host.
healthcheck:
test:
- CMD-SHELL
- "curl -fsS http://127.0.0.1:3000/health && curl -fsS http://127.0.0.1:8089/notify/health && curl -fsS http://127.0.0.1:19090/healthz"
interval: 30s
timeout: 15s
retries: 5
start_period: 600s # First boot downloads ~400 MB + Docker images; be generous.
deploy:
resources:
limits:
cpus: '${CUBE_CPU_LIMIT:-8}'
memory: ${CUBE_MEMORY_LIMIT:-16G}
reservations:
cpus: '${CUBE_CPU_RESERVATION:-2}'
memory: ${CUBE_MEMORY_RESERVATION:-8G}
volumes:
cube_dind_data:
cube_data:
cube_toolbox:
+112
View File
@@ -0,0 +1,112 @@
#!/usr/bin/env python3
"""
Basic E2B SDK integration test against a local CubeSandbox instance.
Runs three checks:
1. Sandbox creation (debug=True → API at http://localhost:3000)
2. Code execution and output validation
3. Sandbox teardown
Usage (inside the cube-sandbox container):
python3 /root/e2b-test.py
Exit codes:
0 all tests passed
1 any test failed
"""
import sys
PASS = "\033[1;32m[ OK ]\033[0m"
FAIL = "\033[1;31m[FAIL]\033[0m"
INFO = "\033[1;36m[INFO]\033[0m"
def check(label: str, cond: bool, detail: str = "") -> bool:
if cond:
print(f"{PASS} {label}")
else:
print(f"{FAIL} {label}{': ' + detail if detail else ''}")
return cond
def main() -> int:
ok = True
# ------------------------------------------------------------------ #
# 1. Import #
# ------------------------------------------------------------------ #
print(f"{INFO} Importing e2b_code_interpreter …")
try:
from e2b_code_interpreter import Sandbox # type: ignore
except ImportError as exc:
print(f"{FAIL} import failed: {exc}")
return 1
ok &= check("e2b_code_interpreter imported", True)
# ------------------------------------------------------------------ #
# 2. Create sandbox #
# ------------------------------------------------------------------ #
print(f"\n{INFO} Creating sandbox (debug=True → http://localhost:3000) …")
sb = None
try:
# debug=True makes the SDK target http://localhost:3000 instead of
# the E2B cloud and http://localhost:<port> for the envd connection.
sb = Sandbox(debug=True, api_key="local-test", timeout=120)
ok &= check("Sandbox created", sb is not None, f"id={sb.sandbox_id if sb else '?'}")
print(f" sandbox_id = {sb.sandbox_id}")
except Exception as exc:
ok &= check("Sandbox created", False, str(exc))
print(f"\n{INFO} Skipping remaining tests (sandbox creation failed)")
return 0 if ok else 1
# ------------------------------------------------------------------ #
# 3. Execute code #
# ------------------------------------------------------------------ #
print(f"\n{INFO} Running code inside sandbox …")
try:
result = sb.run_code('print("Hello from CubeSandbox!")')
expected = "Hello from CubeSandbox!"
output = (result.text or "").strip()
ok &= check("Code executed without error", not result.error,
str(result.error) if result.error else "")
ok &= check("Output matches expected", output == expected,
f"got {output!r}")
except Exception as exc:
ok &= check("Code execution", False, str(exc))
# ------------------------------------------------------------------ #
# 4. Multi-line / stateful execution #
# ------------------------------------------------------------------ #
print(f"\n{INFO} Running stateful multi-cell execution …")
try:
sb.run_code("x = 40 + 2")
result2 = sb.run_code("print(x)")
output2 = (result2.text or "").strip()
ok &= check("Stateful multi-cell execution", output2 == "42",
f"got {output2!r}")
except Exception as exc:
ok &= check("Stateful multi-cell execution", False, str(exc))
# ------------------------------------------------------------------ #
# 5. Kill sandbox #
# ------------------------------------------------------------------ #
print(f"\n{INFO} Killing sandbox …")
try:
sb.kill()
ok &= check("Sandbox killed", True)
except Exception as exc:
ok &= check("Sandbox killed", False, str(exc))
# ------------------------------------------------------------------ #
# Summary #
# ------------------------------------------------------------------ #
print()
if ok:
print(f"{PASS} All E2B SDK tests passed")
else:
print(f"{FAIL} Some E2B SDK tests FAILED")
return 0 if ok else 1
if __name__ == "__main__":
sys.exit(main())
+104
View File
@@ -0,0 +1,104 @@
#!/usr/bin/env bash
# Smoke test for a running CubeSandbox stack.
#
# Run from the WSL2 host or from inside the cube-sandbox container - both work
# because the container uses network_mode: host.
#
# Steps:
# 1. Health-check all CubeSandbox services
# 2. (Optional, slow) Build a code-interpreter template from a public image
# 3. Create a sandbox via the E2B-compatible REST API, run a tiny payload,
# then destroy it
#
# Skip the slow template-build step with: SKIP_TEMPLATE_BUILD=1 ./smoke-test.sh
set -euo pipefail
# cubemastercli is installed to a non-standard prefix; add it to PATH so this
# script works both when run inside the container and from the WSL2 host.
export PATH="/usr/local/services/cubetoolbox/CubeMaster/bin:${PATH:-}"
CUBE_API="${CUBE_API:-http://127.0.0.1:3000}"
CUBE_MASTER="${CUBE_MASTER:-http://127.0.0.1:8089}"
CUBE_NETAGENT="${CUBE_NETAGENT:-http://127.0.0.1:19090}"
ok() { printf '\033[1;32m[ OK ]\033[0m %s\n' "$*"; }
fail() { printf '\033[1;31m[FAIL]\033[0m %s\n' "$*" >&2; exit 1; }
info() { printf '\033[1;36m[INFO]\033[0m %s\n' "$*"; }
#-------------------------------------------------------------------
# 1. Health checks (matches what install.sh's quickcheck.sh verifies)
#-------------------------------------------------------------------
info "Health: CubeAPI"
curl -fsS "${CUBE_API}/health" >/dev/null && ok "CubeAPI /health" || fail "CubeAPI /health"
echo
info "Health: CubeMaster"
curl -fsS "${CUBE_MASTER}/notify/health" >/dev/null && ok "CubeMaster /notify/health" || fail "CubeMaster /notify/health"
info "Health: network-agent"
curl -fsS "${CUBE_NETAGENT}/healthz" >/dev/null && ok "network-agent /healthz" || fail "network-agent /healthz"
curl -fsS "${CUBE_NETAGENT}/readyz" >/dev/null && ok "network-agent /readyz" || fail "network-agent /readyz"
#-------------------------------------------------------------------
# 2. Optional: build a sandbox template
#-------------------------------------------------------------------
TEMPLATE_ID="${CUBE_TEMPLATE_ID:-}"
if [ -z "$TEMPLATE_ID" ] && [ "${SKIP_TEMPLATE_BUILD:-0}" != "1" ]; then
info "No CUBE_TEMPLATE_ID provided; building one from ccr.ccs.tencentyun.com/ags-image/sandbox-code:latest"
info "(this can take 5-15 minutes; set SKIP_TEMPLATE_BUILD=1 to skip and only run health checks)"
if ! command -v cubemastercli >/dev/null 2>&1; then
# cubemastercli lives inside the container; exec into it
CUBE_CTR="$(docker compose ps -q cube-sandbox 2>/dev/null || true)"
[ -z "$CUBE_CTR" ] && fail "cube-sandbox container not running and cubemastercli not on PATH"
CMC="docker exec -i $CUBE_CTR cubemastercli"
else
CMC="cubemastercli"
fi
JOB_OUT="$($CMC tpl create-from-image \
--image ccr.ccs.tencentyun.com/ags-image/sandbox-code:latest \
--writable-layer-size 1G \
--expose-port 49999 \
--expose-port 49983 \
--probe 49999 2>&1)"
echo "$JOB_OUT"
JOB_ID="$(echo "$JOB_OUT" | grep -oE 'job_id[=: ]+[A-Za-z0-9_-]+' | head -1 | awk '{print $NF}')"
[ -z "$JOB_ID" ] && fail "could not parse job_id from output"
info "Watching job $JOB_ID ..."
$CMC tpl watch --job-id "$JOB_ID"
# Extract template_id from the create-from-image output (it's on the first few
# lines) rather than re-querying the list — list ordering is not guaranteed and
# could return a FAILED entry as the last line.
TEMPLATE_ID="$(echo "$JOB_OUT" | grep -E '\btemplate_id\b' | head -1 | awk '{print $NF}')"
[ -z "$TEMPLATE_ID" ] && fail "could not determine template id after build"
ok "Template built: $TEMPLATE_ID"
elif [ -z "$TEMPLATE_ID" ]; then
info "Skipping sandbox lifecycle test (no CUBE_TEMPLATE_ID and SKIP_TEMPLATE_BUILD=1)"
ok "Health checks passed - CubeSandbox stack is up"
exit 0
fi
#-------------------------------------------------------------------
# 3. Create -> inspect -> destroy a sandbox via REST
#-------------------------------------------------------------------
info "Creating sandbox from template $TEMPLATE_ID ..."
RESP="$(curl -fsS -X POST "${CUBE_API}/sandboxes" \
-H 'Authorization: Bearer dummy' \
-H 'Content-Type: application/json' \
-d "{\"templateID\":\"${TEMPLATE_ID}\"}")"
SANDBOX_ID="$(echo "$RESP" | python3 -c 'import json,sys; print(json.load(sys.stdin).get("sandboxID",""))')"
[ -z "$SANDBOX_ID" ] && fail "no sandboxID in response: $RESP"
ok "Created sandbox $SANDBOX_ID"
info "Inspecting sandbox ..."
curl -fsS "${CUBE_API}/sandboxes/${SANDBOX_ID}" -H 'Authorization: Bearer dummy' \
| python3 -m json.tool
ok "Sandbox is queryable"
info "Destroying sandbox ..."
curl -fsS -X DELETE "${CUBE_API}/sandboxes/${SANDBOX_ID}" -H 'Authorization: Bearer dummy' >/dev/null
ok "Sandbox destroyed"
ok "All smoke tests passed"
+1 -1
View File
@@ -28,7 +28,7 @@ services:
- pingap - pingap
- --autoreload - --autoreload
healthcheck: healthcheck:
test: [CMD-SHELL, "bash -c 'echo > /dev/tcp/localhost/80'"] test: ["CMD-SHELL", "bash -c 'echo >/dev/tcp/localhost/80' || exit 1"]
interval: 30s interval: 30s
timeout: 10s timeout: 10s
retries: 3 retries: 3
+59
View File
@@ -0,0 +1,59 @@
# TurboOCR image version
# See https://github.com/aiptimizer/TurboOCR/releases for available tags
TURBOOCR_VERSION="v2.1.1"
# Language bundle (leave empty for latin / English-default)
# Supported: latin, chinese, greek, eslav, arabic, korean, thai
TURBOOCR_LANG=""
# When TURBOOCR_LANG=chinese, set to 1 to use the 84MB PP-OCRv5 server rec
# instead of the 16MB mobile rec (higher accuracy, more VRAM)
TURBOOCR_SERVER=""
# Concurrent GPU pipelines (~1.4 GB VRAM each); empty = auto-detect
TURBOOCR_PIPELINE_POOL_SIZE=""
# Disable PP-DocLayoutV3 layout detection model (1 = disable, saves ~300-500 MB VRAM)
TURBOOCR_DISABLE_LAYOUT=0
# Default PDF extraction mode
# ocr - render + full OCR (safest, immune to text-layer attacks)
# geometric - PDFium text layer only (~10x faster, but trusts PDF content)
# auto - per-page text layer if available, else OCR
# auto_verified - OCR + cross-check against text layer
TURBOOCR_PDF_MODE="ocr"
# Skip angle classifier (1 = skip, ~0.4ms latency savings)
TURBOOCR_DISABLE_ANGLE_CLS=0
# Max detection input size in pixels
TURBOOCR_DET_MAX_SIDE=960
# PDF render parallelism
TURBOOCR_PDF_DAEMONS=16
TURBOOCR_PDF_WORKERS=4
# Maximum pages allowed per PDF request
TURBOOCR_MAX_PDF_PAGES=2000
# Log level: debug / info / warn / error
TURBOOCR_LOG_LEVEL="info"
# Log format: json (structured) / text (human-readable)
TURBOOCR_LOG_FORMAT="json"
# Host port mappings
TURBOOCR_HTTP_PORT_OVERRIDE=8000
TURBOOCR_GRPC_PORT_OVERRIDE=50051
# Resource limits
TURBOOCR_CPU_LIMIT=8.0
TURBOOCR_MEMORY_LIMIT=12G
TURBOOCR_CPU_RESERVATION=2.0
TURBOOCR_MEMORY_RESERVATION=4G
# Number of NVIDIA GPUs to reserve
TURBOOCR_GPU_COUNT=1
# Shared memory size for the container
TURBOOCR_SHM_SIZE=2g
+119
View File
@@ -0,0 +1,119 @@
# TurboOCR
[English](./README.md) | [中文](./README.zh.md)
This service deploys [TurboOCR](https://github.com/aiptimizer/TurboOCR), a GPU-accelerated OCR server built on C++ / CUDA / TensorRT / PP-OCRv5. It exposes both an HTTP API and a gRPC API from a single binary that share the same GPU pipeline pool, with Prometheus metrics built in.
## Services
- `turboocr`: TurboOCR HTTP (port 8000) + gRPC (port 50051) inference server
## Requirements
- Linux host with NVIDIA driver 595 or newer
- Turing or newer GPU (RTX 20-series / GTX 16-series and up)
- [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) installed and configured for Docker
## Environment Variables
| Variable Name | Description | Default Value |
| ----------------------------- | --------------------------------------------------------------------------------- | ------------- |
| `TURBOOCR_VERSION` | TurboOCR image version | `v2.1.1` |
| `TURBOOCR_LANG` | Language bundle: `latin`, `chinese`, `greek`, `eslav`, `arabic`, `korean`, `thai` | `""` (latin) |
| `TURBOOCR_SERVER` | With `chinese`, set to `1` for the 84 MB server rec | `""` |
| `TURBOOCR_PIPELINE_POOL_SIZE` | Concurrent GPU pipelines (~1.4 GB VRAM each); empty = auto | `""` |
| `TURBOOCR_DISABLE_LAYOUT` | Disable layout detection model (saves ~300-500 MB VRAM) | `0` |
| `TURBOOCR_PDF_MODE` | Default PDF mode: `ocr` / `geometric` / `auto` / `auto_verified` | `ocr` |
| `TURBOOCR_DISABLE_ANGLE_CLS` | Skip angle classifier (~0.4 ms savings) | `0` |
| `TURBOOCR_DET_MAX_SIDE` | Max detection input size in pixels | `960` |
| `TURBOOCR_PDF_DAEMONS` | PDF render daemons | `16` |
| `TURBOOCR_PDF_WORKERS` | PDF worker threads | `4` |
| `TURBOOCR_MAX_PDF_PAGES` | Maximum pages per PDF request | `2000` |
| `TURBOOCR_LOG_LEVEL` | Log level: `debug` / `info` / `warn` / `error` | `info` |
| `TURBOOCR_LOG_FORMAT` | Log format: `json` / `text` | `json` |
| `TURBOOCR_HTTP_PORT_OVERRIDE` | Host port for HTTP API | `8000` |
| `TURBOOCR_GRPC_PORT_OVERRIDE` | Host port for gRPC API | `50051` |
| `TURBOOCR_CPU_LIMIT` | CPU limit | `8.0` |
| `TURBOOCR_MEMORY_LIMIT` | Memory limit | `12G` |
| `TURBOOCR_GPU_COUNT` | Number of NVIDIA GPUs to reserve | `1` |
| `TURBOOCR_SHM_SIZE` | Shared memory size | `2g` |
Copy `.env.example` to `.env` and override only the variables you need to change.
## Volumes
- `turboocr_trt_cache`: Caches TensorRT engines built from ONNX on first start. Must be a **named** volume — a bind-mount of an empty host directory would shadow the baked-in language bundles and the server would fail to load models.
## Usage
### Start TurboOCR
```bash
docker compose up -d
```
The first start builds TensorRT engines from ONNX. Build time depends on your GPU: roughly 5 minutes on high-end desktop GPUs and 2030 minutes on laptop GPUs. The container may report `unhealthy` while compilation is in progress — this is normal. Once the build finishes the server starts and the container transitions to `healthy`. Subsequent restarts reuse the cached engines and start in seconds.
### Endpoints
- HTTP API: <http://localhost:8000>
- gRPC API: `localhost:50051`
- Health: <http://localhost:8000/health>
- Readiness: <http://localhost:8000/health/ready>
- Metrics (Prometheus): <http://localhost:8000/metrics>
### Test the API
```bash
# Image — raw bytes (fastest path)
curl -X POST http://localhost:8000/ocr/raw \
--data-binary @document.png \
-H "Content-Type: image/png"
# Image — base64 JSON
curl -X POST http://localhost:8000/ocr \
-H "Content-Type: application/json" \
-d '{"image":"'$(base64 -w0 document.png)'"}'
# PDF — raw bytes
curl -X POST http://localhost:8000/ocr/pdf \
--data-binary @document.pdf
# PDF with layout detection enabled
curl -X POST "http://localhost:8000/ocr/pdf?layout=1&mode=auto" \
--data-binary @document.pdf
```
> **Important:** Use HTTP keep-alive. Sending many short-lived connections (e.g. one `curl` per request in a loop) can overwhelm the server. Standard HTTP client libraries (`requests.Session`, `aiohttp`, Go `http.Client`, etc.) reuse connections by default.
### Switching Languages
Edit `.env` and restart:
```bash
TURBOOCR_LANG=chinese
TURBOOCR_SERVER=1 # optional: use the 84 MB Chinese server rec
```
```bash
docker compose up -d
```
All language bundles are baked into the image at build time (SHA256-verified from the pinned PP-OCRv5 release). No runtime downloads.
## Performance Tuning
- **GPU pipelines** — set `TURBOOCR_PIPELINE_POOL_SIZE` based on available VRAM (~1.4 GB each)
- **Layout overhead** — `?layout=1` reduces throughput by ~20%; set `TURBOOCR_DISABLE_LAYOUT=1` to skip loading the model entirely
- **Shared memory** — increase `TURBOOCR_SHM_SIZE` if you process very large PDFs
## Security Notes
- The API has no authentication by default. Put a reverse proxy (nginx, Caddy) in front for production.
- The default PDF mode is `ocr`, which only trusts pixel data and is safe for untrusted PDF uploads.
- Do **not** set `TURBOOCR_PDF_MODE` to `geometric` or `auto` globally if you accept PDFs from untrusted sources — a malicious PDF can embed invisible text or remap glyphs to inject arbitrary strings into the text layer.
- Use `auto_verified` for higher accuracy on trusted documents; it cross-checks the native text layer against OCR results.
## License
TurboOCR is licensed under the MIT License. See the [TurboOCR GitHub repository](https://github.com/aiptimizer/TurboOCR) for details.
+119
View File
@@ -0,0 +1,119 @@
# TurboOCR
[English](./README.md) | [中文](./README.zh.md)
此服务用于部署 [TurboOCR](https://github.com/aiptimizer/TurboOCR),一个基于 C++ / CUDA / TensorRT / PP-OCRv5 的 GPU 加速 OCR 服务器。单一二进制同时提供 HTTP 与 gRPC 两套接口,共享同一个 GPU 流水线池,并内置 Prometheus 指标。
## 服务
- `turboocr`TurboOCR HTTP(端口 8000+ gRPC(端口 50051)推理服务
## 运行要求
- Linux 主机,NVIDIA 驱动 595 或更高版本
- Turing 及以上架构 GPURTX 20 系列 / GTX 16 系列及更新)
- 已安装并配置好 [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html)
## 环境变量
| 变量名 | 说明 | 默认值 |
| ----------------------------- | ------------------------------------------------------------------------ | ------------- |
| `TURBOOCR_VERSION` | TurboOCR 镜像版本 | `v2.1.1` |
| `TURBOOCR_LANG` | 语言包:`latin``chinese``greek``eslav``arabic``korean``thai` | `""`latin |
| `TURBOOCR_SERVER` | 当 `chinese` 时,设为 `1` 使用 84 MB 服务端识别模型 | `""` |
| `TURBOOCR_PIPELINE_POOL_SIZE` | 并发 GPU 流水线数(每条约 1.4 GB 显存),留空则自动 | `""` |
| `TURBOOCR_DISABLE_LAYOUT` | 禁用版面检测模型(节省约 300-500 MB 显存) | `0` |
| `TURBOOCR_PDF_MODE` | PDF 默认模式:`ocr` / `geometric` / `auto` / `auto_verified` | `ocr` |
| `TURBOOCR_DISABLE_ANGLE_CLS` | 跳过方向分类器(约节省 0.4 ms) | `0` |
| `TURBOOCR_DET_MAX_SIDE` | 检测输入最大尺寸(像素) | `960` |
| `TURBOOCR_PDF_DAEMONS` | PDF 渲染守护进程数 | `16` |
| `TURBOOCR_PDF_WORKERS` | PDF 工作线程数 | `4` |
| `TURBOOCR_MAX_PDF_PAGES` | 单次 PDF 请求最大页数 | `2000` |
| `TURBOOCR_LOG_LEVEL` | 日志级别:`debug` / `info` / `warn` / `error` | `info` |
| `TURBOOCR_LOG_FORMAT` | 日志格式:`json` / `text` | `json` |
| `TURBOOCR_HTTP_PORT_OVERRIDE` | HTTP API 主机端口 | `8000` |
| `TURBOOCR_GRPC_PORT_OVERRIDE` | gRPC API 主机端口 | `50051` |
| `TURBOOCR_CPU_LIMIT` | CPU 限制 | `8.0` |
| `TURBOOCR_MEMORY_LIMIT` | 内存限制 | `12G` |
| `TURBOOCR_GPU_COUNT` | 预留的 NVIDIA GPU 数量 | `1` |
| `TURBOOCR_SHM_SIZE` | 共享内存大小 | `2g` |
复制 `.env.example``.env`,仅覆盖你需要修改的变量。
## 卷
- `turboocr_trt_cache`:缓存首次启动时由 ONNX 构建出的 TensorRT 引擎。必须使用**命名卷**,如果绑定挂载一个空的主机目录,会覆盖镜像内置的语言包,导致服务无法加载模型。
## 使用方法
### 启动 TurboOCR
```bash
docker compose up -d
```
首次启动需要从 ONNX 构建 TensorRT 引擎,耗时因 GPU 而异:高端桌面 GPU 约 5 分钟,笔记本 GPU 约 20–30 分钟。编译期间容器可能显示 `unhealthy`,这属于正常现象——构建完成后服务会自动启动并切换为 `healthy`。后续重启会复用缓存的引擎,几乎瞬间完成。
### 访问端点
- HTTP API<http://localhost:8000>
- gRPC API`localhost:50051`
- 健康检查:<http://localhost:8000/health>
- 就绪检查:<http://localhost:8000/health/ready>
- Prometheus 指标:<http://localhost:8000/metrics>
### 测试 API
```bash
# 图片 —— 原始字节(最快路径)
curl -X POST http://localhost:8000/ocr/raw \
--data-binary @document.png \
-H "Content-Type: image/png"
# 图片 —— base64 JSON
curl -X POST http://localhost:8000/ocr \
-H "Content-Type: application/json" \
-d '{"image":"'$(base64 -w0 document.png)'"}'
# PDF —— 原始字节
curl -X POST http://localhost:8000/ocr/pdf \
--data-binary @document.pdf
# PDF 启用版面检测
curl -X POST "http://localhost:8000/ocr/pdf?layout=1&mode=auto" \
--data-binary @document.pdf
```
> **重要提示**:请使用 HTTP keep-alive。如果在循环中频繁建立短连接(例如每次请求一个 `curl`),可能会压垮服务。标准 HTTP 客户端库(`requests.Session`、`aiohttp`、Go `http.Client` 等)默认会复用连接。
### 切换语言
修改 `.env` 后重启:
```bash
TURBOOCR_LANG=chinese
TURBOOCR_SERVER=1 # 可选:使用 84 MB 的中文服务端识别模型
```
```bash
docker compose up -d
```
所有语言包都在构建镜像时打包进来(基于固定版本的 PP-OCRv5 发布,并校验 SHA256),运行时无需联网下载。
## 性能调优
- **GPU 流水线**:根据显存大小设置 `TURBOOCR_PIPELINE_POOL_SIZE`(每条约 1.4 GB
- **版面开销**`?layout=1` 会使吞吐下降约 20%;设置 `TURBOOCR_DISABLE_LAYOUT=1` 可完全跳过模型加载
- **共享内存**:处理超大 PDF 时可增加 `TURBOOCR_SHM_SIZE`
## 安全说明
- API 默认无身份认证。生产环境请在前面套一层反向代理(nginx、Caddy 等)。
- PDF 默认模式为 `ocr`,只信任像素数据,可安全处理不可信来源的 PDF 上传。
- 如果你的服务接收不可信来源的 PDF,**不要**将 `TURBOOCR_PDF_MODE` 全局设为 `geometric``auto`:恶意 PDF 可以嵌入隐形文字、重映射 ToUnicode 字符或在文本层注入任意字符串。
- 在可信文档场景下可使用 `auto_verified` 模式,会先做 OCR,再用文本层与之对照校验。
## 许可证
TurboOCR 采用 MIT 许可证。详情请参见 [TurboOCR GitHub 仓库](https://github.com/aiptimizer/TurboOCR)。
+71
View File
@@ -0,0 +1,71 @@
x-defaults: &defaults
restart: unless-stopped
logging:
driver: json-file
options:
max-size: 100m
max-file: '3'
services:
turboocr:
<<: *defaults
image: ${GLOBAL_REGISTRY:-ghcr.io/}aiptimizer/turboocr:${TURBOOCR_VERSION:-v2.1.1}
ports:
- '${TURBOOCR_HTTP_PORT_OVERRIDE:-8000}:8000'
- '${TURBOOCR_GRPC_PORT_OVERRIDE:-50051}:50051'
volumes:
# Named volume caches TensorRT engines built from ONNX on first start (~90s).
# Must be a named volume - bind-mounting an empty host dir would shadow the
# baked-in language bundles and prevent the server from loading models.
- turboocr_trt_cache:/home/ocr/.cache/turbo-ocr
environment:
- TZ=${TZ:-UTC}
# Language bundle: latin (default), chinese, greek, eslav, arabic, korean, thai
- OCR_LANG=${TURBOOCR_LANG:-}
# Set to 1 with OCR_LANG=chinese to use the 84MB server rec instead of 16MB mobile
- OCR_SERVER=${TURBOOCR_SERVER:-}
# Concurrent GPU pipelines (~1.4 GB VRAM each); empty = auto
- PIPELINE_POOL_SIZE=${TURBOOCR_PIPELINE_POOL_SIZE:-}
# Set to 1 to disable PP-DocLayoutV3 layout detection (saves ~300-500 MB VRAM)
- DISABLE_LAYOUT=${TURBOOCR_DISABLE_LAYOUT:-0}
# Default PDF mode: ocr (safest) / geometric / auto / auto_verified
- ENABLE_PDF_MODE=${TURBOOCR_PDF_MODE:-ocr}
# Skip angle classifier (~0.4ms savings)
- DISABLE_ANGLE_CLS=${TURBOOCR_DISABLE_ANGLE_CLS:-0}
# Max detection input size
- DET_MAX_SIDE=${TURBOOCR_DET_MAX_SIDE:-960}
# PDF render parallelism
- PDF_DAEMONS=${TURBOOCR_PDF_DAEMONS:-16}
- PDF_WORKERS=${TURBOOCR_PDF_WORKERS:-4}
# Maximum pages per PDF request
- MAX_PDF_PAGES=${TURBOOCR_MAX_PDF_PAGES:-2000}
# Log level: debug / info / warn / error
- LOG_LEVEL=${TURBOOCR_LOG_LEVEL:-info}
# Log format: json (structured) / text (human-readable)
- LOG_FORMAT=${TURBOOCR_LOG_FORMAT:-json}
healthcheck:
test: [CMD, curl, -fsS, 'http://localhost:8000/health']
interval: 30s
timeout: 10s
retries: 5
# First start builds TensorRT engines from ONNX. Build time varies by GPU:
# ~5 min on high-end desktop GPUs, 20-30 min on laptop GPUs. The container
# may show "unhealthy" during compilation but will become healthy once done.
# Subsequent restarts reuse the cached engines and start in seconds.
start_period: 30m
deploy:
resources:
limits:
cpus: ${TURBOOCR_CPU_LIMIT:-8.0}
memory: ${TURBOOCR_MEMORY_LIMIT:-12G}
reservations:
cpus: ${TURBOOCR_CPU_RESERVATION:-2.0}
memory: ${TURBOOCR_MEMORY_RESERVATION:-4G}
devices:
- driver: nvidia
count: ${TURBOOCR_GPU_COUNT:-1}
capabilities: [gpu]
shm_size: ${TURBOOCR_SHM_SIZE:-2g}
volumes:
turboocr_trt_cache: