From 5f8503df42ddd4ce68381a83caac3fad12f3c9f4 Mon Sep 17 00:00:00 2001 From: Sun-ZhenXing <1006925066@qq.com> Date: Wed, 29 Apr 2026 11:54:59 +0800 Subject: [PATCH] feat: add build turboocr --- README.md | 1 + README.zh.md | 1 + builds/turboocr/.env.example | 73 ++++++++++++++++ builds/turboocr/Dockerfile.cpu | 104 +++++++++++++++++++++++ builds/turboocr/Dockerfile.cuda12 | 118 ++++++++++++++++++++++++++ builds/turboocr/README.md | 127 ++++++++++++++++++++++++++++ builds/turboocr/README.zh.md | 127 ++++++++++++++++++++++++++++ builds/turboocr/docker-compose.yaml | 110 ++++++++++++++++++++++++ src/turboocr/README.md | 4 +- src/turboocr/README.zh.md | 4 +- src/turboocr/docker-compose.yaml | 14 +-- 11 files changed, 675 insertions(+), 8 deletions(-) create mode 100644 builds/turboocr/.env.example create mode 100644 builds/turboocr/Dockerfile.cpu create mode 100644 builds/turboocr/Dockerfile.cuda12 create mode 100644 builds/turboocr/README.md create mode 100644 builds/turboocr/README.zh.md create mode 100644 builds/turboocr/docker-compose.yaml diff --git a/README.md b/README.md index b457ceb..79d906b 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,7 @@ These services require building custom Docker images from source. | [Multica](./builds/multica) | v0.1.32 | | [OpenFang](./builds/openfang) | 0.1.0 | | [Paperclip](./builds/paperclip) | main | +| [TurboOCR](./builds/turboocr) | v2.1.1 | ## Supported Services diff --git a/README.zh.md b/README.zh.md index fbf1f2c..990bfac 100644 --- a/README.zh.md +++ b/README.zh.md @@ -42,6 +42,7 @@ docker compose exec redis redis-cli ping | [Multica](./builds/multica) | v0.1.32 | | [OpenFang](./builds/openfang) | 0.1.0 | | [Paperclip](./builds/paperclip) | main | +| [TurboOCR](./builds/turboocr) | v2.1.1 | ## 已经支持的服务 diff --git a/builds/turboocr/.env.example b/builds/turboocr/.env.example new file mode 100644 index 0000000..d42f753 --- /dev/null +++ b/builds/turboocr/.env.example @@ -0,0 +1,73 @@ +# Source build configuration +TURBOOCR_VERSION=v2.1.1 + +# Registry mirror prefix for docker build — leave empty for direct pull. +# China users: set to "docker.m.daocloud.io/" to proxy Docker Hub via DaoCloud. +# Example: TURBOOCR_DOCKER_MIRROR=docker.m.daocloud.io/ +TURBOOCR_DOCKER_MIRROR= + +# NGC (nvcr.io) mirror prefix for the CUDA 12.x GPU build — leave empty for direct pull. +# Standard Docker Hub mirrors (e.g. DaoCloud) do NOT proxy nvcr.io. +# Set this only if you have a dedicated NGC pull-through proxy. +TURBOOCR_NGC_MIRROR= + +# Network configuration +TURBOOCR_HTTP_PORT_OVERRIDE=8000 +TURBOOCR_GRPC_PORT_OVERRIDE=50051 + +# Language bundle: latin (default), chinese, greek, eslav, arabic, korean, thai +TURBOOCR_LANG= +# Set to 1 with TURBOOCR_LANG=chinese to use the 84 MB server rec model +TURBOOCR_SERVER= + +# GPU pipeline pool — number of concurrent inference pipelines (~1.4 GB VRAM each). +# Leave empty to let the server choose automatically based on available VRAM. +# Ignored in CPU mode. +TURBOOCR_PIPELINE_POOL_SIZE= + +# Set to 1 to skip loading the PP-DocLayoutV3 layout detection model. +# Saves ~300-500 MB VRAM and cuts first-start compilation time by ~28 min on laptop GPUs. +# Only do this if you do not need the ?layout=1 PDF endpoint. +TURBOOCR_DISABLE_LAYOUT=0 + +# Default PDF parsing mode: ocr (safest) / geometric / auto / auto_verified +TURBOOCR_PDF_MODE=ocr + +# Set to 1 to skip the angle classifier (~0.4 ms savings per image) +TURBOOCR_DISABLE_ANGLE_CLS=0 + +# Maximum detection input dimension in pixels +TURBOOCR_DET_MAX_SIDE=960 + +# PDF render parallelism +TURBOOCR_PDF_DAEMONS=16 +TURBOOCR_PDF_WORKERS=4 + +# Maximum pages accepted in a single PDF request +TURBOOCR_MAX_PDF_PAGES=2000 + +# Log level: debug / info / warn / error +TURBOOCR_LOG_LEVEL=info + +# Log format: json (structured) / text (human-readable) +TURBOOCR_LOG_FORMAT=json + +# Resources — GPU variant (profile: gpu) +# First-start builds TRT engines; 12 G covers the GPU + engine compilation headroom. +TURBOOCR_CPU_LIMIT=8.0 +TURBOOCR_MEMORY_LIMIT=12G +TURBOOCR_CPU_RESERVATION=2.0 +TURBOOCR_MEMORY_RESERVATION=4G + +# Number of NVIDIA GPUs to reserve (GPU variant only) +TURBOOCR_GPU_COUNT=1 + +# Shared memory — fastpdf2png uses /dev/shm for inter-process PDF page transfers +TURBOOCR_SHM_SIZE=2g + +# Logging +TURBOOCR_LOG_MAX_SIZE=100m +TURBOOCR_LOG_MAX_FILE=3 + +# Timezone +TZ=UTC diff --git a/builds/turboocr/Dockerfile.cpu b/builds/turboocr/Dockerfile.cpu new file mode 100644 index 0000000..1d11f2d --- /dev/null +++ b/builds/turboocr/Dockerfile.cpu @@ -0,0 +1,104 @@ +# ============================================================ +# TurboOCR — CPU-only build (ONNX Runtime backend, no GPU required) +# Base image: ubuntu:24.04 +# +# Produces: /app/build_cpu/paddle_cpu_server (HTTP + gRPC server) +# +# Image size: ~500 MB (vs ~10 GB for the GPU image). +# No TRT compilation on first start — ONNX Runtime is used directly. +# Startup is fast (~30 s) and requires no NVIDIA driver. +# +# Build: docker build -f Dockerfile.cpu -t turboocr-cpu . +# ============================================================ + +ARG TURBOOCR_VERSION=v2.1.1 +ARG ORT_VERSION=1.22.0 +# Registry mirror prefix — leave empty for direct pull. +# China users: set to "docker.m.daocloud.io/" to proxy Docker Hub via DaoCloud. +ARG DOCKER_MIRROR= + +FROM ${DOCKER_MIRROR}ubuntu:24.04 + +# Re-declare ARGs after FROM so they remain in scope +ARG TURBOOCR_VERSION +ARG ORT_VERSION + +ENV DEBIAN_FRONTEND=noninteractive + +# Install build dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + cmake \ + g++ \ + make \ + pkg-config \ + libopencv-dev \ + nginx \ + gosu \ + libgrpc++-dev \ + libc-ares-dev \ + libprotobuf-dev \ + protobuf-compiler \ + protobuf-compiler-grpc \ + libjsoncpp-dev \ + uuid-dev \ + zlib1g-dev \ + libssl-dev \ + git \ + wget \ + curl \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +# Install Drogon HTTP framework (async, epoll-based) +RUN cd /tmp && \ + git clone --depth 1 --branch v1.9.12 https://github.com/drogonframework/drogon.git && \ + cd drogon && git submodule update --init && \ + mkdir build && cd build && \ + cmake .. -DBUILD_EXAMPLES=OFF -DBUILD_CTL=OFF -DBUILD_ORM=OFF \ + -DBUILD_POSTGRESQL=OFF -DBUILD_MYSQL=OFF -DBUILD_SQLITE=OFF \ + -DBUILD_REDIS=OFF -DBUILD_TESTING=OFF && \ + make -j$(nproc) && make install && \ + rm -rf /tmp/drogon + +# Install ONNX Runtime C++ SDK +RUN cd /tmp && \ + wget -q "https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VERSION}/onnxruntime-linux-x64-${ORT_VERSION}.tgz" && \ + tar xzf "onnxruntime-linux-x64-${ORT_VERSION}.tgz" && \ + cp -r "onnxruntime-linux-x64-${ORT_VERSION}/include/"* /usr/local/include/ && \ + cp "onnxruntime-linux-x64-${ORT_VERSION}/lib/libonnxruntime.so"* /usr/local/lib/ && \ + ldconfig && rm -rf /tmp/onnxruntime* + +# Clone TurboOCR at the pinned release tag +RUN git clone --depth 1 --branch "${TURBOOCR_VERSION}" \ + https://github.com/aiptimizer/TurboOCR.git /app + +WORKDIR /app + +# Install fastpdf2png (PDF renderer — PDFium vendored in third_party/). +# Copy vendored libpdfium first so the installer does not need network access. +RUN cp third_party/pdfium/lib/libpdfium.so /usr/lib/ && ldconfig && \ + bash scripts/install_fastpdf2png.sh && \ + { cp bin/libpdfium.so /usr/lib/ 2>/dev/null || true; } && \ + ldconfig + +# Build CPU-only mode with ONNX Runtime backend +RUN mkdir -p build_cpu && cd build_cpu && \ + cmake .. -DUSE_CPU_ONLY=ON -DFETCH_MODELS=OFF && \ + make -j$(nproc) + +# Create non-root user and redirect /app/models/rec into the named cache volume. +RUN useradd -m -s /bin/bash ocr \ + && chmod +x /app/scripts/entrypoint.sh \ + && mkdir -p /home/ocr/.cache/turbo-ocr/models/rec /app/models \ + && ln -s /home/ocr/.cache/turbo-ocr/models/rec /app/models/rec + +# Fetch all PP-OCRv5 language bundles (SHA256-verified from pinned GitHub Release) +ARG OCR_INCLUDE_SERVER=1 +ENV OCR_INCLUDE_SERVER=${OCR_INCLUDE_SERVER} +RUN bash scripts/fetch_release_models.sh \ + && chown -R ocr:ocr /app /home/ocr/.cache + +EXPOSE 8000 50051 + +ENTRYPOINT ["/app/scripts/entrypoint.sh"] +CMD ["./build_cpu/paddle_cpu_server"] diff --git a/builds/turboocr/Dockerfile.cuda12 b/builds/turboocr/Dockerfile.cuda12 new file mode 100644 index 0000000..a4fa92c --- /dev/null +++ b/builds/turboocr/Dockerfile.cuda12 @@ -0,0 +1,118 @@ +# ============================================================ +# TurboOCR — CUDA 12.x build (TensorRT 10.8 / CUDA 12.7) +# Base image: nvcr.io/nvidia/tensorrt:24.12-py3 +# +# Supported compute capabilities (NVIDIA GPU reference): +# https://developer.nvidia.com/cuda-gpus +# 7.5 Turing — GTX 16xx / RTX 20xx +# 8.0 Ampere — A100, RTX 30xx server-class +# 8.6 Ampere — RTX 30xx desktop / laptop +# 8.9 Ada — RTX 40xx +# +# Blackwell (CC 12.0) requires CUDA 13.x. +# For that, use the upstream docker/Dockerfile.gpu (tensorrt:26.03-py3). +# +# Build: docker build -f Dockerfile.cuda12 -t turboocr-cuda12 . +# ============================================================ + +ARG TURBOOCR_VERSION=v2.1.1 +ARG CMAKE_VERSION=3.31.6 +ARG ORT_VERSION=1.22.0 +# NGC registry mirror prefix — leave empty for direct pull from nvcr.io. +# Note: standard Docker Hub mirrors (e.g. DaoCloud) do NOT proxy nvcr.io. +# Set this only if you have a dedicated NGC mirror or a pull-through proxy. +ARG NGC_MIRROR= + +FROM ${NGC_MIRROR}nvcr.io/nvidia/tensorrt:24.12-py3 + +# Re-declare ARGs after FROM so they remain in scope +ARG TURBOOCR_VERSION +ARG CMAKE_VERSION +ARG ORT_VERSION + +# Install build dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + pkg-config \ + libopencv-dev \ + nginx \ + gosu \ + libgrpc++-dev \ + libprotobuf-dev \ + protobuf-compiler-grpc \ + libjsoncpp-dev \ + uuid-dev \ + zlib1g-dev \ + libssl-dev \ + libc-ares-dev \ + git \ + wget \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Install Drogon HTTP framework (async, epoll-based) +RUN cd /tmp && \ + git clone --depth 1 --branch v1.9.12 https://github.com/drogonframework/drogon.git && \ + cd drogon && git submodule update --init && \ + mkdir build && cd build && \ + cmake .. -DBUILD_EXAMPLES=OFF -DBUILD_CTL=OFF -DBUILD_ORM=OFF \ + -DBUILD_POSTGRESQL=OFF -DBUILD_MYSQL=OFF -DBUILD_SQLITE=OFF \ + -DBUILD_REDIS=OFF -DBUILD_TESTING=OFF && \ + make -j$(nproc) && make install && \ + rm -rf /tmp/drogon + +# Upgrade CMake (the base image may ship an older version) +RUN cd /tmp && \ + wget -q "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz" && \ + tar xzf "cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz" && \ + cp -r "cmake-${CMAKE_VERSION}-linux-x86_64/bin/"* /usr/local/bin/ && \ + cp -r "cmake-${CMAKE_VERSION}-linux-x86_64/share/"* /usr/local/share/ && \ + rm -rf /tmp/cmake* + +# Install ONNX Runtime C++ SDK (used by the CPU inference fallback path) +RUN cd /tmp && \ + wget -q "https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VERSION}/onnxruntime-linux-x64-${ORT_VERSION}.tgz" && \ + tar xzf "onnxruntime-linux-x64-${ORT_VERSION}.tgz" && \ + cp -r "onnxruntime-linux-x64-${ORT_VERSION}/include/"* /usr/local/include/ && \ + cp "onnxruntime-linux-x64-${ORT_VERSION}/lib/libonnxruntime.so"* /usr/local/lib/ && \ + ldconfig && rm -rf /tmp/onnxruntime* + +# Clone TurboOCR at the pinned release tag +RUN git clone --depth 1 --branch "${TURBOOCR_VERSION}" \ + https://github.com/aiptimizer/TurboOCR.git /app + +WORKDIR /app + +# Install fastpdf2png (PDF renderer — PDFium vendored in third_party/) +RUN bash scripts/install_fastpdf2png.sh && \ + cp bin/libpdfium.so /usr/lib/ && ldconfig + +# Build GPU mode. +# - CUDA_ARCHITECTURES: 7.5-8.9 covers Turing through Ada Lovelace under CUDA 12.x. +# CC 12.0 (Blackwell) is excluded — it requires CUDA 13.x. +# - TENSORRT_DIR: /usr/local/tensorrt is the cmake default and matches the 24.12-py3 +# base image layout. No override needed (upstream 26.03 uses /usr/lib/x86_64-linux-gnu). +# - FETCH_MODELS=OFF: models are fetched in a separate layer below for better caching. +RUN mkdir -p build && cd build && \ + cmake .. \ + -DFETCH_MODELS=OFF \ + -DCMAKE_CUDA_ARCHITECTURES="75;80;86;89" \ + && make -j$(nproc) + +# Create non-root user and redirect /app/models/rec into the named cache volume. +# TRT engines built at first start are persisted via: -v turboocr_cache:/home/ocr/.cache/turbo-ocr +RUN useradd -m -s /bin/bash ocr \ + && chmod +x /app/scripts/entrypoint.sh \ + && mkdir -p /home/ocr/.cache/turbo-ocr/models/rec /app/models \ + && ln -s /home/ocr/.cache/turbo-ocr/models/rec /app/models/rec + +# Fetch all PP-OCRv5 language bundles (SHA256-verified from pinned GitHub Release) +ARG OCR_INCLUDE_SERVER=1 +ENV OCR_INCLUDE_SERVER=${OCR_INCLUDE_SERVER} +RUN bash scripts/fetch_release_models.sh \ + && chown -R ocr:ocr /app /home/ocr/.cache + +EXPOSE 8000 50051 + +ENTRYPOINT ["/app/scripts/entrypoint.sh"] +CMD ["./build/paddle_highspeed_cpp"] diff --git a/builds/turboocr/README.md b/builds/turboocr/README.md new file mode 100644 index 0000000..dfe4697 --- /dev/null +++ b/builds/turboocr/README.md @@ -0,0 +1,127 @@ +# TurboOCR — Custom Builds + +[中文文档](README.zh.md) + +This directory builds [TurboOCR](https://github.com/aiptimizer/TurboOCR) from source for two targets that are not covered by the upstream pre-built images: + +| Variant | Dockerfile | Profile | Base image | +| ------- | ---------- | ------- | ---------- | +| **CUDA 12.x** | `Dockerfile.cuda12` | `gpu` | `nvcr.io/nvidia/tensorrt:24.12-py3` (TRT 10.8 / CUDA 12.7) | +| **CPU-only** | `Dockerfile.cpu` | `cpu` | `ubuntu:24.04` (ONNX Runtime) | + +The upstream pre-built image targets CUDA 13.x (Blackwell / CC 12.0). Use this directory if your GPU is on CUDA 12.x (Turing through Ada Lovelace, CC 7.5–8.9) or if you have no GPU at all. + +## Quick Start + +1. Copy the example environment file: + + ```bash + cp .env.example .env + ``` + +2. Build and start the variant you need: + + **CUDA 12.x (GPU — Turing through Ada Lovelace):** + + ```bash + docker compose --profile gpu up -d --build + ``` + + **CPU-only (no GPU required):** + + ```bash + docker compose --profile cpu up -d --build + ``` + +3. Access the API at . + +> **Note:** The first build compiles Drogon and TurboOCR from source, which takes 10–30 minutes depending on your CPU core count. Subsequent builds use the Docker layer cache and are fast. + +## First-Start Behavior + +### GPU variant + +On the very first container start, TensorRT compiles 4 ONNX models into engine files. Measured times on an RTX 3070 Laptop: + +| Engine | Time | +| ------ | ---- | +| det | ~5 min | +| rec | ~30 min | +| cls | ~4 min | +| layout | ~28 min | +| **Total** | **~67–90 min** | + +High-end desktop GPUs finish in ~15 minutes. The container shows `unhealthy` during compilation — this is expected. Once all engines are ready the server starts and the status transitions to `healthy`. Subsequent restarts reuse the cached engines and start in seconds. + +> **Tip:** Set `TURBOOCR_DISABLE_LAYOUT=1` to skip the layout detection engine (~28 min savings on laptop GPUs). Use this only if you do not need the `?layout=1` PDF endpoint. + +### CPU variant + +No TRT compilation occurs. ONNX Runtime loads the models directly at startup. The container is typically `healthy` within 60 seconds. + +## Default Ports + +| Port | Protocol | Description | +| ---- | -------- | ----------- | +| 8000 | HTTP | OCR REST API + health/metrics | +| 50051 | gRPC | OCR gRPC API | + +## Important Environment Variables + +| Variable | Description | Default | +| -------- | ----------- | ------- | +| `TURBOOCR_VERSION` | Git tag used for the source build | `v2.1.1` | +| `TURBOOCR_HTTP_PORT_OVERRIDE` | Host port for the HTTP API | `8000` | +| `TURBOOCR_GRPC_PORT_OVERRIDE` | Host port for the gRPC API | `50051` | +| `TURBOOCR_LANG` | Language bundle: `latin`, `chinese`, `greek`, `eslav`, `arabic`, `korean`, `thai` | `""` (latin) | +| `TURBOOCR_SERVER` | With `chinese`, set to `1` for the 84 MB server rec model | `""` | +| `TURBOOCR_PIPELINE_POOL_SIZE` | Concurrent GPU pipelines (~1.4 GB VRAM each); empty = auto | `""` | +| `TURBOOCR_DISABLE_LAYOUT` | Disable layout detection model (saves ~300–500 MB VRAM) | `0` | +| `TURBOOCR_PDF_MODE` | PDF parsing mode: `ocr` / `geometric` / `auto` / `auto_verified` | `ocr` | +| `TURBOOCR_CPU_LIMIT` | CPU core limit (both variants) | `8.0` | +| `TURBOOCR_MEMORY_LIMIT` | Memory limit — `12G` for GPU, `4G` for CPU | variant default | +| `TURBOOCR_GPU_COUNT` | NVIDIA GPUs to reserve (GPU variant only) | `1` | +| `TURBOOCR_SHM_SIZE` | Shared memory for fastpdf2png — `2g` for GPU, `512m` for CPU | variant default | +| `TZ` | Container timezone | `UTC` | + +## Storage + +- `turboocr_build_cache` — named volume at `/home/ocr/.cache/turbo-ocr`. Stores TRT engine files (GPU) or the model cache directory (CPU). Must be a named volume — a bind-mount of an empty host directory would shadow the baked-in language bundles and the server would fail to load models. + +## Supported GPU Architectures (CUDA 12.x variant) + +| Compute Capability | Architecture | GPUs | +| ------------------ | ------------ | ---- | +| 7.5 | Turing | GTX 16xx, RTX 20xx | +| 8.0 | Ampere | A100, RTX 30xx (server) | +| 8.6 | Ampere | RTX 30xx (desktop / laptop) | +| 8.9 | Ada Lovelace | RTX 40xx | + +Blackwell (CC 12.0, RTX 50xx) requires CUDA 13.x — use the upstream pre-built image from `src/turboocr` instead. + +## Notes + +- Both Dockerfiles build TurboOCR from source via `git clone` inside the image. A working internet connection is required at build time. +- The CUDA 12.x Dockerfile overrides `CMAKE_CUDA_ARCHITECTURES` to `75;80;86;89`, removing CC 12.0 which is not supported by CUDA 12.x. +- TensorRT 10.8 is located at `/usr/local/tensorrt` in the `24.12-py3` base image, which matches the CMake default. No `-DTENSORRT_DIR` override is needed. +- The CPU variant uses ONNX Runtime 1.22.0 and produces a `paddle_cpu_server` binary with both HTTP and gRPC interfaces. + +## Endpoints + +- HTTP API: +- gRPC API: `localhost:50051` +- Health: +- Readiness: +- Metrics (Prometheus): + +## Security Notes + +- The API has no authentication by default. Put a reverse proxy (nginx, Caddy) in front for production. +- The default PDF mode is `ocr`, which only trusts pixel data and is safe for untrusted PDF uploads. +- Do **not** set `TURBOOCR_PDF_MODE` to `geometric` or `auto` globally if you accept PDFs from untrusted sources. + +## References + +- [TurboOCR Repository](https://github.com/aiptimizer/TurboOCR) +- [NVIDIA TensorRT Container Releases](https://docs.nvidia.com/deeplearning/tensorrt/container-release-notes/) +- [NVIDIA CUDA GPU Compute Capability Table](https://developer.nvidia.com/cuda-gpus) diff --git a/builds/turboocr/README.zh.md b/builds/turboocr/README.zh.md new file mode 100644 index 0000000..4dd48a3 --- /dev/null +++ b/builds/turboocr/README.zh.md @@ -0,0 +1,127 @@ +# TurboOCR — 自定义构建 + +[English](README.md) + +此目录从源码构建 [TurboOCR](https://github.com/aiptimizer/TurboOCR),覆盖上游预构建镜像未提供的两个目标: + +| 变体 | Dockerfile | Profile | 基础镜像 | +| ---- | ---------- | ------- | -------- | +| **CUDA 12.x** | `Dockerfile.cuda12` | `gpu` | `nvcr.io/nvidia/tensorrt:24.12-py3`(TRT 10.8 / CUDA 12.7) | +| **纯 CPU** | `Dockerfile.cpu` | `cpu` | `ubuntu:24.04`(ONNX Runtime) | + +上游预构建镜像针对 CUDA 13.x(Blackwell / CC 12.0)。如果你的 GPU 属于 CUDA 12.x 范围(Turing 到 Ada Lovelace,CC 7.5–8.9),或者没有 GPU,请使用本目录。 + +## 快速开始 + +1. 复制示例环境文件: + + ```bash + cp .env.example .env + ``` + +2. 按需构建并启动对应变体: + + **CUDA 12.x(GPU — Turing 到 Ada Lovelace):** + + ```bash + docker compose --profile gpu up -d --build + ``` + + **纯 CPU(无需 GPU):** + + ```bash + docker compose --profile cpu up -d --build + ``` + +3. 访问 API:。 + +> **说明:** 首次构建需要从源码编译 Drogon 和 TurboOCR,耗时约 10–30 分钟,具体取决于 CPU 核心数。后续构建会复用 Docker 层缓存,速度很快。 + +## 首次启动说明 + +### GPU 变体 + +容器首次启动时,TensorRT 会将 4 个 ONNX 模型编译为引擎文件。在 RTX 3070 Laptop 上的实测耗时: + +| 引擎 | 耗时 | +| ---- | ---- | +| det | 约 5 分钟 | +| rec | 约 30 分钟 | +| cls | 约 4 分钟 | +| layout | 约 28 分钟 | +| **合计** | **约 67–90 分钟** | + +高端桌面 GPU 约 15 分钟完成。编译期间容器显示 `unhealthy` 属于正常现象——所有引擎构建完成后服务启动,状态切换为 `healthy`。后续重启会复用缓存引擎,几乎瞬间完成。 + +> **提示:** 设置 `TURBOOCR_DISABLE_LAYOUT=1` 可跳过版面检测引擎的编译(笔记本 GPU 约节省 28 分钟)。仅在不需要 `?layout=1` PDF 端点时使用此选项。 + +### CPU 变体 + +无 TRT 编译过程。ONNX Runtime 在启动时直接加载模型,通常在 60 秒内变为 `healthy`。 + +## 默认端口 + +| 端口 | 协议 | 说明 | +| ---- | ---- | ---- | +| 8000 | HTTP | OCR REST API + 健康检查/指标 | +| 50051 | gRPC | OCR gRPC API | + +## 主要环境变量 + +| 变量名 | 说明 | 默认值 | +| ------ | ---- | ------ | +| `TURBOOCR_VERSION` | 构建所用的 Git 标签 | `v2.1.1` | +| `TURBOOCR_HTTP_PORT_OVERRIDE` | HTTP API 主机端口 | `8000` | +| `TURBOOCR_GRPC_PORT_OVERRIDE` | gRPC API 主机端口 | `50051` | +| `TURBOOCR_LANG` | 语言包:`latin`、`chinese`、`greek`、`eslav`、`arabic`、`korean`、`thai` | `""`(latin) | +| `TURBOOCR_SERVER` | 当使用 `chinese` 时,设为 `1` 启用 84 MB 服务端识别模型 | `""` | +| `TURBOOCR_PIPELINE_POOL_SIZE` | 并发 GPU 流水线数(每条约 1.4 GB 显存),留空则自动 | `""` | +| `TURBOOCR_DISABLE_LAYOUT` | 禁用版面检测模型(节省约 300–500 MB 显存) | `0` | +| `TURBOOCR_PDF_MODE` | PDF 解析模式:`ocr` / `geometric` / `auto` / `auto_verified` | `ocr` | +| `TURBOOCR_CPU_LIMIT` | CPU 核心限制(两个变体通用) | `8.0` | +| `TURBOOCR_MEMORY_LIMIT` | 内存限制——GPU 变体 `12G`,CPU 变体 `4G` | 变体默认值 | +| `TURBOOCR_GPU_COUNT` | 预留的 NVIDIA GPU 数量(仅 GPU 变体) | `1` | +| `TURBOOCR_SHM_SIZE` | fastpdf2png 共享内存——GPU 变体 `2g`,CPU 变体 `512m` | 变体默认值 | +| `TZ` | 容器时区 | `UTC` | + +## 存储 + +- `turboocr_build_cache`——命名卷,挂载于 `/home/ocr/.cache/turbo-ocr`。用于存储 TRT 引擎文件(GPU 变体)或模型缓存目录(CPU 变体)。必须使用**命名卷**——绑定挂载空主机目录会遮蔽镜像内置语言包,导致服务无法加载模型。 + +## 支持的 GPU 架构(CUDA 12.x 变体) + +| 算力版本 | 架构 | GPU 型号 | +| -------- | ---- | -------- | +| 7.5 | Turing | GTX 16xx、RTX 20xx | +| 8.0 | Ampere | A100、RTX 30xx(服务器) | +| 8.6 | Ampere | RTX 30xx(桌面/笔记本) | +| 8.9 | Ada Lovelace | RTX 40xx | + +Blackwell(CC 12.0,RTX 50xx)需要 CUDA 13.x——请改用 `src/turboocr` 中的上游预构建镜像。 + +## 说明 + +- 两个 Dockerfile 均在镜像内通过 `git clone` 从源码构建 TurboOCR,构建时需要可访问互联网。 +- CUDA 12.x Dockerfile 将 `CMAKE_CUDA_ARCHITECTURES` 设置为 `75;80;86;89`,去除了 CUDA 12.x 不支持的 CC 12.0。 +- TensorRT 10.8 在 `24.12-py3` 基础镜像中位于 `/usr/local/tensorrt`,与 CMake 默认值一致,无需额外的 `-DTENSORRT_DIR` 参数。 +- CPU 变体使用 ONNX Runtime 1.22.0,生成同时支持 HTTP 和 gRPC 接口的 `paddle_cpu_server` 二进制文件。 + +## 访问端点 + +- HTTP API: +- gRPC API:`localhost:50051` +- 健康检查: +- 就绪检查: +- Prometheus 指标: + +## 安全说明 + +- API 默认无身份认证。生产环境请在前面套一层反向代理(nginx、Caddy 等)。 +- PDF 默认模式为 `ocr`,只信任像素数据,可安全处理不可信来源的 PDF 上传。 +- 如果你的服务接收不可信来源的 PDF,**不要**将 `TURBOOCR_PDF_MODE` 全局设为 `geometric` 或 `auto`。 + +## 参考链接 + +- [TurboOCR 仓库](https://github.com/aiptimizer/TurboOCR) +- [NVIDIA TensorRT 容器发布说明](https://docs.nvidia.com/deeplearning/tensorrt/container-release-notes/) +- [NVIDIA CUDA GPU 算力版本对照表](https://developer.nvidia.com/cuda-gpus) diff --git a/builds/turboocr/docker-compose.yaml b/builds/turboocr/docker-compose.yaml new file mode 100644 index 0000000..6f32be5 --- /dev/null +++ b/builds/turboocr/docker-compose.yaml @@ -0,0 +1,110 @@ +x-defaults: &defaults + restart: unless-stopped + logging: + driver: json-file + options: + max-size: ${TURBOOCR_LOG_MAX_SIZE:-100m} + max-file: '${TURBOOCR_LOG_MAX_FILE:-3}' + +x-turboocr-common: &turboocr-common + <<: *defaults + ports: + - '${TURBOOCR_HTTP_PORT_OVERRIDE:-8000}:8000' + - '${TURBOOCR_GRPC_PORT_OVERRIDE:-50051}:50051' + volumes: + # Named volume persists TRT engines (GPU) or ONNX model cache (CPU). + # Must be a named volume — bind-mounting an empty host dir shadows the + # baked-in language bundles and prevents the server from loading models. + - turboocr_build_cache:/home/ocr/.cache/turbo-ocr + environment: + - TZ=${TZ:-UTC} + # Language bundle: latin (default), chinese, greek, eslav, arabic, korean, thai + - OCR_LANG=${TURBOOCR_LANG:-} + # Set to 1 with OCR_LANG=chinese to use the 84 MB server rec model + - OCR_SERVER=${TURBOOCR_SERVER:-} + # Concurrent GPU pipelines (~1.4 GB VRAM each); empty = auto; ignored in CPU mode + - PIPELINE_POOL_SIZE=${TURBOOCR_PIPELINE_POOL_SIZE:-} + # Set to 1 to disable PP-DocLayoutV3 layout detection (saves ~300-500 MB VRAM) + - DISABLE_LAYOUT=${TURBOOCR_DISABLE_LAYOUT:-0} + # Default PDF mode: ocr (safest) / geometric / auto / auto_verified + - ENABLE_PDF_MODE=${TURBOOCR_PDF_MODE:-ocr} + # Skip angle classifier (~0.4 ms savings) + - DISABLE_ANGLE_CLS=${TURBOOCR_DISABLE_ANGLE_CLS:-0} + # Max detection input size in pixels + - DET_MAX_SIDE=${TURBOOCR_DET_MAX_SIDE:-960} + # PDF render parallelism + - PDF_DAEMONS=${TURBOOCR_PDF_DAEMONS:-16} + - PDF_WORKERS=${TURBOOCR_PDF_WORKERS:-4} + # Maximum pages per PDF request + - MAX_PDF_PAGES=${TURBOOCR_MAX_PDF_PAGES:-2000} + # Log level: debug / info / warn / error + - LOG_LEVEL=${TURBOOCR_LOG_LEVEL:-info} + # Log format: json (structured) / text (human-readable) + - LOG_FORMAT=${TURBOOCR_LOG_FORMAT:-json} + +services: + turboocr-cuda12: + <<: *turboocr-common + profiles: [gpu] + build: + context: . + dockerfile: Dockerfile.cuda12 + args: + TURBOOCR_VERSION: ${TURBOOCR_VERSION:-v2.1.1} + NGC_MIRROR: ${TURBOOCR_NGC_MIRROR:-} + image: ${GLOBAL_REGISTRY:-}alexsuntop/turboocr-cuda12:${TURBOOCR_VERSION:-v2.1.1} + healthcheck: + test: [CMD, curl, -fsS, 'http://localhost:8000/health'] + interval: 30s + timeout: 10s + retries: 5 + # First start builds 4 TensorRT engines from ONNX. Measured times on an + # RTX 3070 Laptop: det (~5 min) + rec (~30 min) + cls (~4 min) + + # layout (~28 min) = ~67-90 min. High-end desktop GPUs finish in ~15 min. + # Set TURBOOCR_DISABLE_LAYOUT=1 to skip layout and save ~28 min. + # Subsequent restarts reuse the cached engines and start in seconds. + start_period: 120m + deploy: + resources: + limits: + cpus: ${TURBOOCR_CPU_LIMIT:-8.0} + memory: ${TURBOOCR_MEMORY_LIMIT:-12G} + reservations: + cpus: ${TURBOOCR_CPU_RESERVATION:-2.0} + memory: ${TURBOOCR_MEMORY_RESERVATION:-4G} + devices: + - driver: nvidia + count: ${TURBOOCR_GPU_COUNT:-1} + capabilities: [gpu] + shm_size: ${TURBOOCR_SHM_SIZE:-2g} + + turboocr-cpu: + <<: *turboocr-common + profiles: [cpu] + build: + context: . + dockerfile: Dockerfile.cpu + args: + TURBOOCR_VERSION: ${TURBOOCR_VERSION:-v2.1.1} + DOCKER_MIRROR: ${TURBOOCR_DOCKER_MIRROR:-} + image: ${GLOBAL_REGISTRY:-}alexsuntop/turboocr-cpu:${TURBOOCR_VERSION:-v2.1.1} + healthcheck: + test: [CMD, curl, -fsS, 'http://localhost:8000/health'] + interval: 30s + timeout: 10s + retries: 5 + # CPU mode uses ONNX Runtime directly — no TRT compilation on first start. + # Expect startup in under 60 s on most hardware. + start_period: 2m + deploy: + resources: + limits: + cpus: ${TURBOOCR_CPU_LIMIT:-8.0} + memory: ${TURBOOCR_MEMORY_LIMIT:-4G} + reservations: + cpus: ${TURBOOCR_CPU_RESERVATION:-2.0} + memory: ${TURBOOCR_MEMORY_RESERVATION:-1G} + shm_size: ${TURBOOCR_SHM_SIZE:-512m} + +volumes: + turboocr_build_cache: diff --git a/src/turboocr/README.md b/src/turboocr/README.md index 36fb2db..ffa00fe 100644 --- a/src/turboocr/README.md +++ b/src/turboocr/README.md @@ -52,7 +52,9 @@ Copy `.env.example` to `.env` and override only the variables you need to change docker compose up -d ``` -The first start builds TensorRT engines from ONNX. Build time depends on your GPU: roughly 5 minutes on high-end desktop GPUs and 20–30 minutes on laptop GPUs. The container may report `unhealthy` while compilation is in progress — this is normal. Once the build finishes the server starts and the container transitions to `healthy`. Subsequent restarts reuse the cached engines and start in seconds. +The first start builds 4 TensorRT engines from ONNX. Measured build times on an RTX 3070 Laptop: det (~5 min) + rec (~30 min) + cls (~4 min) + layout (~28 min) = **~67–90 minutes total**. High-end desktop GPUs finish in ~15 minutes. The container reports `unhealthy` while compilation is in progress — this is expected. Once all engines are built the server starts and the container transitions to `healthy`. Subsequent restarts reuse the cached engines and start in seconds. + +> **Tip — faster first boot:** Set `TURBOOCR_DISABLE_LAYOUT=1` to skip the layout detection engine (~28 min on laptop GPUs). Only do this if you don't need the `?layout=1` PDF endpoint. ### Endpoints diff --git a/src/turboocr/README.zh.md b/src/turboocr/README.zh.md index 0f7eed9..ee0cff4 100644 --- a/src/turboocr/README.zh.md +++ b/src/turboocr/README.zh.md @@ -52,7 +52,9 @@ docker compose up -d ``` -首次启动需要从 ONNX 构建 TensorRT 引擎,耗时因 GPU 而异:高端桌面 GPU 约 5 分钟,笔记本 GPU 约 20–30 分钟。编译期间容器可能显示 `unhealthy`,这属于正常现象——构建完成后服务会自动启动并切换为 `healthy`。后续重启会复用缓存的引擎,几乎瞬间完成。 +首次启动需要编译 4 个 TensorRT 引擎。在 RTX 3070 Laptop 上的实测耗时:det(约 5 分钟)+ rec(约 30 分钟)+ cls(约 4 分钟)+ layout(约 28 分钟)= **总计约 67–90 分钟**。高端桌面 GPU 约 15 分钟完成。编译期间容器显示 `unhealthy` 属于正常现象——所有引擎构建完成后服务会自动启动并切换为 `healthy`。后续重启会复用缓存的引擎,几乎瞬间完成。 + +> **提示——加快首次启动**:设置 `TURBOOCR_DISABLE_LAYOUT=1` 可跳过版面检测引擎的编译(笔记本 GPU 约节省 28 分钟)。仅在不需要 `?layout=1` PDF 端点时使用此选项。 ### 访问端点 diff --git a/src/turboocr/docker-compose.yaml b/src/turboocr/docker-compose.yaml index aa34680..18a9d7b 100644 --- a/src/turboocr/docker-compose.yaml +++ b/src/turboocr/docker-compose.yaml @@ -14,7 +14,7 @@ services: - '${TURBOOCR_HTTP_PORT_OVERRIDE:-8000}:8000' - '${TURBOOCR_GRPC_PORT_OVERRIDE:-50051}:50051' volumes: - # Named volume caches TensorRT engines built from ONNX on first start (~90s). + # Named volume caches TensorRT engines built from ONNX on first start. # Must be a named volume - bind-mounting an empty host dir would shadow the # baked-in language bundles and prevent the server from loading models. - turboocr_trt_cache:/home/ocr/.cache/turbo-ocr @@ -48,11 +48,13 @@ services: interval: 30s timeout: 10s retries: 5 - # First start builds TensorRT engines from ONNX. Build time varies by GPU: - # ~5 min on high-end desktop GPUs, 20-30 min on laptop GPUs. The container - # may show "unhealthy" during compilation but will become healthy once done. - # Subsequent restarts reuse the cached engines and start in seconds. - start_period: 30m + # First start builds 4 TensorRT engines from ONNX. Measured build times: + # det (~5 min) + rec (~30 min) + cls (~4 min) + layout (~28 min) ≈ 67-90 min + # on an RTX 3070 Laptop. High-end desktop GPUs finish in ~15 min. + # Set TURBOOCR_DISABLE_LAYOUT=1 to skip the layout engine and cut ~28 min. + # The container shows "unhealthy" while building but recovers once done. + # Subsequent restarts reuse cached engines and start in seconds. + start_period: 120m deploy: resources: limits: