feat: add build turboocr

This commit is contained in:
Sun-ZhenXing
2026-04-29 11:54:59 +08:00
parent ce16588916
commit 5f8503df42
11 changed files with 675 additions and 8 deletions
+1
View File
@@ -42,6 +42,7 @@ These services require building custom Docker images from source.
| [Multica](./builds/multica) | v0.1.32 | | [Multica](./builds/multica) | v0.1.32 |
| [OpenFang](./builds/openfang) | 0.1.0 | | [OpenFang](./builds/openfang) | 0.1.0 |
| [Paperclip](./builds/paperclip) | main | | [Paperclip](./builds/paperclip) | main |
| [TurboOCR](./builds/turboocr) | v2.1.1 |
## Supported Services ## Supported Services
+1
View File
@@ -42,6 +42,7 @@ docker compose exec redis redis-cli ping
| [Multica](./builds/multica) | v0.1.32 | | [Multica](./builds/multica) | v0.1.32 |
| [OpenFang](./builds/openfang) | 0.1.0 | | [OpenFang](./builds/openfang) | 0.1.0 |
| [Paperclip](./builds/paperclip) | main | | [Paperclip](./builds/paperclip) | main |
| [TurboOCR](./builds/turboocr) | v2.1.1 |
## 已经支持的服务 ## 已经支持的服务
+73
View File
@@ -0,0 +1,73 @@
# Source build configuration
TURBOOCR_VERSION=v2.1.1
# Registry mirror prefix for docker build — leave empty for direct pull.
# China users: set to "docker.m.daocloud.io/" to proxy Docker Hub via DaoCloud.
# Example: TURBOOCR_DOCKER_MIRROR=docker.m.daocloud.io/
TURBOOCR_DOCKER_MIRROR=
# NGC (nvcr.io) mirror prefix for the CUDA 12.x GPU build — leave empty for direct pull.
# Standard Docker Hub mirrors (e.g. DaoCloud) do NOT proxy nvcr.io.
# Set this only if you have a dedicated NGC pull-through proxy.
TURBOOCR_NGC_MIRROR=
# Network configuration
TURBOOCR_HTTP_PORT_OVERRIDE=8000
TURBOOCR_GRPC_PORT_OVERRIDE=50051
# Language bundle: latin (default), chinese, greek, eslav, arabic, korean, thai
TURBOOCR_LANG=
# Set to 1 with TURBOOCR_LANG=chinese to use the 84 MB server rec model
TURBOOCR_SERVER=
# GPU pipeline pool — number of concurrent inference pipelines (~1.4 GB VRAM each).
# Leave empty to let the server choose automatically based on available VRAM.
# Ignored in CPU mode.
TURBOOCR_PIPELINE_POOL_SIZE=
# Set to 1 to skip loading the PP-DocLayoutV3 layout detection model.
# Saves ~300-500 MB VRAM and cuts first-start compilation time by ~28 min on laptop GPUs.
# Only do this if you do not need the ?layout=1 PDF endpoint.
TURBOOCR_DISABLE_LAYOUT=0
# Default PDF parsing mode: ocr (safest) / geometric / auto / auto_verified
TURBOOCR_PDF_MODE=ocr
# Set to 1 to skip the angle classifier (~0.4 ms savings per image)
TURBOOCR_DISABLE_ANGLE_CLS=0
# Maximum detection input dimension in pixels
TURBOOCR_DET_MAX_SIDE=960
# PDF render parallelism
TURBOOCR_PDF_DAEMONS=16
TURBOOCR_PDF_WORKERS=4
# Maximum pages accepted in a single PDF request
TURBOOCR_MAX_PDF_PAGES=2000
# Log level: debug / info / warn / error
TURBOOCR_LOG_LEVEL=info
# Log format: json (structured) / text (human-readable)
TURBOOCR_LOG_FORMAT=json
# Resources — GPU variant (profile: gpu)
# First-start builds TRT engines; 12 G covers the GPU + engine compilation headroom.
TURBOOCR_CPU_LIMIT=8.0
TURBOOCR_MEMORY_LIMIT=12G
TURBOOCR_CPU_RESERVATION=2.0
TURBOOCR_MEMORY_RESERVATION=4G
# Number of NVIDIA GPUs to reserve (GPU variant only)
TURBOOCR_GPU_COUNT=1
# Shared memory — fastpdf2png uses /dev/shm for inter-process PDF page transfers
TURBOOCR_SHM_SIZE=2g
# Logging
TURBOOCR_LOG_MAX_SIZE=100m
TURBOOCR_LOG_MAX_FILE=3
# Timezone
TZ=UTC
+104
View File
@@ -0,0 +1,104 @@
# ============================================================
# TurboOCR — CPU-only build (ONNX Runtime backend, no GPU required)
# Base image: ubuntu:24.04
#
# Produces: /app/build_cpu/paddle_cpu_server (HTTP + gRPC server)
#
# Image size: ~500 MB (vs ~10 GB for the GPU image).
# No TRT compilation on first start — ONNX Runtime is used directly.
# Startup is fast (~30 s) and requires no NVIDIA driver.
#
# Build: docker build -f Dockerfile.cpu -t turboocr-cpu .
# ============================================================
ARG TURBOOCR_VERSION=v2.1.1
ARG ORT_VERSION=1.22.0
# Registry mirror prefix — leave empty for direct pull.
# China users: set to "docker.m.daocloud.io/" to proxy Docker Hub via DaoCloud.
ARG DOCKER_MIRROR=
FROM ${DOCKER_MIRROR}ubuntu:24.04
# Re-declare ARGs after FROM so they remain in scope
ARG TURBOOCR_VERSION
ARG ORT_VERSION
ENV DEBIAN_FRONTEND=noninteractive
# Install build dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
cmake \
g++ \
make \
pkg-config \
libopencv-dev \
nginx \
gosu \
libgrpc++-dev \
libc-ares-dev \
libprotobuf-dev \
protobuf-compiler \
protobuf-compiler-grpc \
libjsoncpp-dev \
uuid-dev \
zlib1g-dev \
libssl-dev \
git \
wget \
curl \
ca-certificates \
&& rm -rf /var/lib/apt/lists/*
# Install Drogon HTTP framework (async, epoll-based)
RUN cd /tmp && \
git clone --depth 1 --branch v1.9.12 https://github.com/drogonframework/drogon.git && \
cd drogon && git submodule update --init && \
mkdir build && cd build && \
cmake .. -DBUILD_EXAMPLES=OFF -DBUILD_CTL=OFF -DBUILD_ORM=OFF \
-DBUILD_POSTGRESQL=OFF -DBUILD_MYSQL=OFF -DBUILD_SQLITE=OFF \
-DBUILD_REDIS=OFF -DBUILD_TESTING=OFF && \
make -j$(nproc) && make install && \
rm -rf /tmp/drogon
# Install ONNX Runtime C++ SDK
RUN cd /tmp && \
wget -q "https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VERSION}/onnxruntime-linux-x64-${ORT_VERSION}.tgz" && \
tar xzf "onnxruntime-linux-x64-${ORT_VERSION}.tgz" && \
cp -r "onnxruntime-linux-x64-${ORT_VERSION}/include/"* /usr/local/include/ && \
cp "onnxruntime-linux-x64-${ORT_VERSION}/lib/libonnxruntime.so"* /usr/local/lib/ && \
ldconfig && rm -rf /tmp/onnxruntime*
# Clone TurboOCR at the pinned release tag
RUN git clone --depth 1 --branch "${TURBOOCR_VERSION}" \
https://github.com/aiptimizer/TurboOCR.git /app
WORKDIR /app
# Install fastpdf2png (PDF renderer — PDFium vendored in third_party/).
# Copy vendored libpdfium first so the installer does not need network access.
RUN cp third_party/pdfium/lib/libpdfium.so /usr/lib/ && ldconfig && \
bash scripts/install_fastpdf2png.sh && \
{ cp bin/libpdfium.so /usr/lib/ 2>/dev/null || true; } && \
ldconfig
# Build CPU-only mode with ONNX Runtime backend
RUN mkdir -p build_cpu && cd build_cpu && \
cmake .. -DUSE_CPU_ONLY=ON -DFETCH_MODELS=OFF && \
make -j$(nproc)
# Create non-root user and redirect /app/models/rec into the named cache volume.
RUN useradd -m -s /bin/bash ocr \
&& chmod +x /app/scripts/entrypoint.sh \
&& mkdir -p /home/ocr/.cache/turbo-ocr/models/rec /app/models \
&& ln -s /home/ocr/.cache/turbo-ocr/models/rec /app/models/rec
# Fetch all PP-OCRv5 language bundles (SHA256-verified from pinned GitHub Release)
ARG OCR_INCLUDE_SERVER=1
ENV OCR_INCLUDE_SERVER=${OCR_INCLUDE_SERVER}
RUN bash scripts/fetch_release_models.sh \
&& chown -R ocr:ocr /app /home/ocr/.cache
EXPOSE 8000 50051
ENTRYPOINT ["/app/scripts/entrypoint.sh"]
CMD ["./build_cpu/paddle_cpu_server"]
+118
View File
@@ -0,0 +1,118 @@
# ============================================================
# TurboOCR — CUDA 12.x build (TensorRT 10.8 / CUDA 12.7)
# Base image: nvcr.io/nvidia/tensorrt:24.12-py3
#
# Supported compute capabilities (NVIDIA GPU reference):
# https://developer.nvidia.com/cuda-gpus
# 7.5 Turing — GTX 16xx / RTX 20xx
# 8.0 Ampere — A100, RTX 30xx server-class
# 8.6 Ampere — RTX 30xx desktop / laptop
# 8.9 Ada — RTX 40xx
#
# Blackwell (CC 12.0) requires CUDA 13.x.
# For that, use the upstream docker/Dockerfile.gpu (tensorrt:26.03-py3).
#
# Build: docker build -f Dockerfile.cuda12 -t turboocr-cuda12 .
# ============================================================
ARG TURBOOCR_VERSION=v2.1.1
ARG CMAKE_VERSION=3.31.6
ARG ORT_VERSION=1.22.0
# NGC registry mirror prefix — leave empty for direct pull from nvcr.io.
# Note: standard Docker Hub mirrors (e.g. DaoCloud) do NOT proxy nvcr.io.
# Set this only if you have a dedicated NGC mirror or a pull-through proxy.
ARG NGC_MIRROR=
FROM ${NGC_MIRROR}nvcr.io/nvidia/tensorrt:24.12-py3
# Re-declare ARGs after FROM so they remain in scope
ARG TURBOOCR_VERSION
ARG CMAKE_VERSION
ARG ORT_VERSION
# Install build dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
pkg-config \
libopencv-dev \
nginx \
gosu \
libgrpc++-dev \
libprotobuf-dev \
protobuf-compiler-grpc \
libjsoncpp-dev \
uuid-dev \
zlib1g-dev \
libssl-dev \
libc-ares-dev \
git \
wget \
curl \
&& rm -rf /var/lib/apt/lists/*
# Install Drogon HTTP framework (async, epoll-based)
RUN cd /tmp && \
git clone --depth 1 --branch v1.9.12 https://github.com/drogonframework/drogon.git && \
cd drogon && git submodule update --init && \
mkdir build && cd build && \
cmake .. -DBUILD_EXAMPLES=OFF -DBUILD_CTL=OFF -DBUILD_ORM=OFF \
-DBUILD_POSTGRESQL=OFF -DBUILD_MYSQL=OFF -DBUILD_SQLITE=OFF \
-DBUILD_REDIS=OFF -DBUILD_TESTING=OFF && \
make -j$(nproc) && make install && \
rm -rf /tmp/drogon
# Upgrade CMake (the base image may ship an older version)
RUN cd /tmp && \
wget -q "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz" && \
tar xzf "cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz" && \
cp -r "cmake-${CMAKE_VERSION}-linux-x86_64/bin/"* /usr/local/bin/ && \
cp -r "cmake-${CMAKE_VERSION}-linux-x86_64/share/"* /usr/local/share/ && \
rm -rf /tmp/cmake*
# Install ONNX Runtime C++ SDK (used by the CPU inference fallback path)
RUN cd /tmp && \
wget -q "https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VERSION}/onnxruntime-linux-x64-${ORT_VERSION}.tgz" && \
tar xzf "onnxruntime-linux-x64-${ORT_VERSION}.tgz" && \
cp -r "onnxruntime-linux-x64-${ORT_VERSION}/include/"* /usr/local/include/ && \
cp "onnxruntime-linux-x64-${ORT_VERSION}/lib/libonnxruntime.so"* /usr/local/lib/ && \
ldconfig && rm -rf /tmp/onnxruntime*
# Clone TurboOCR at the pinned release tag
RUN git clone --depth 1 --branch "${TURBOOCR_VERSION}" \
https://github.com/aiptimizer/TurboOCR.git /app
WORKDIR /app
# Install fastpdf2png (PDF renderer — PDFium vendored in third_party/)
RUN bash scripts/install_fastpdf2png.sh && \
cp bin/libpdfium.so /usr/lib/ && ldconfig
# Build GPU mode.
# - CUDA_ARCHITECTURES: 7.5-8.9 covers Turing through Ada Lovelace under CUDA 12.x.
# CC 12.0 (Blackwell) is excluded — it requires CUDA 13.x.
# - TENSORRT_DIR: /usr/local/tensorrt is the cmake default and matches the 24.12-py3
# base image layout. No override needed (upstream 26.03 uses /usr/lib/x86_64-linux-gnu).
# - FETCH_MODELS=OFF: models are fetched in a separate layer below for better caching.
RUN mkdir -p build && cd build && \
cmake .. \
-DFETCH_MODELS=OFF \
-DCMAKE_CUDA_ARCHITECTURES="75;80;86;89" \
&& make -j$(nproc)
# Create non-root user and redirect /app/models/rec into the named cache volume.
# TRT engines built at first start are persisted via: -v turboocr_cache:/home/ocr/.cache/turbo-ocr
RUN useradd -m -s /bin/bash ocr \
&& chmod +x /app/scripts/entrypoint.sh \
&& mkdir -p /home/ocr/.cache/turbo-ocr/models/rec /app/models \
&& ln -s /home/ocr/.cache/turbo-ocr/models/rec /app/models/rec
# Fetch all PP-OCRv5 language bundles (SHA256-verified from pinned GitHub Release)
ARG OCR_INCLUDE_SERVER=1
ENV OCR_INCLUDE_SERVER=${OCR_INCLUDE_SERVER}
RUN bash scripts/fetch_release_models.sh \
&& chown -R ocr:ocr /app /home/ocr/.cache
EXPOSE 8000 50051
ENTRYPOINT ["/app/scripts/entrypoint.sh"]
CMD ["./build/paddle_highspeed_cpp"]
+127
View File
@@ -0,0 +1,127 @@
# TurboOCR — Custom Builds
[中文文档](README.zh.md)
This directory builds [TurboOCR](https://github.com/aiptimizer/TurboOCR) from source for two targets that are not covered by the upstream pre-built images:
| Variant | Dockerfile | Profile | Base image |
| ------- | ---------- | ------- | ---------- |
| **CUDA 12.x** | `Dockerfile.cuda12` | `gpu` | `nvcr.io/nvidia/tensorrt:24.12-py3` (TRT 10.8 / CUDA 12.7) |
| **CPU-only** | `Dockerfile.cpu` | `cpu` | `ubuntu:24.04` (ONNX Runtime) |
The upstream pre-built image targets CUDA 13.x (Blackwell / CC 12.0). Use this directory if your GPU is on CUDA 12.x (Turing through Ada Lovelace, CC 7.58.9) or if you have no GPU at all.
## Quick Start
1. Copy the example environment file:
```bash
cp .env.example .env
```
2. Build and start the variant you need:
**CUDA 12.x (GPU — Turing through Ada Lovelace):**
```bash
docker compose --profile gpu up -d --build
```
**CPU-only (no GPU required):**
```bash
docker compose --profile cpu up -d --build
```
3. Access the API at <http://localhost:8000>.
> **Note:** The first build compiles Drogon and TurboOCR from source, which takes 1030 minutes depending on your CPU core count. Subsequent builds use the Docker layer cache and are fast.
## First-Start Behavior
### GPU variant
On the very first container start, TensorRT compiles 4 ONNX models into engine files. Measured times on an RTX 3070 Laptop:
| Engine | Time |
| ------ | ---- |
| det | ~5 min |
| rec | ~30 min |
| cls | ~4 min |
| layout | ~28 min |
| **Total** | **~6790 min** |
High-end desktop GPUs finish in ~15 minutes. The container shows `unhealthy` during compilation — this is expected. Once all engines are ready the server starts and the status transitions to `healthy`. Subsequent restarts reuse the cached engines and start in seconds.
> **Tip:** Set `TURBOOCR_DISABLE_LAYOUT=1` to skip the layout detection engine (~28 min savings on laptop GPUs). Use this only if you do not need the `?layout=1` PDF endpoint.
### CPU variant
No TRT compilation occurs. ONNX Runtime loads the models directly at startup. The container is typically `healthy` within 60 seconds.
## Default Ports
| Port | Protocol | Description |
| ---- | -------- | ----------- |
| 8000 | HTTP | OCR REST API + health/metrics |
| 50051 | gRPC | OCR gRPC API |
## Important Environment Variables
| Variable | Description | Default |
| -------- | ----------- | ------- |
| `TURBOOCR_VERSION` | Git tag used for the source build | `v2.1.1` |
| `TURBOOCR_HTTP_PORT_OVERRIDE` | Host port for the HTTP API | `8000` |
| `TURBOOCR_GRPC_PORT_OVERRIDE` | Host port for the gRPC API | `50051` |
| `TURBOOCR_LANG` | Language bundle: `latin`, `chinese`, `greek`, `eslav`, `arabic`, `korean`, `thai` | `""` (latin) |
| `TURBOOCR_SERVER` | With `chinese`, set to `1` for the 84 MB server rec model | `""` |
| `TURBOOCR_PIPELINE_POOL_SIZE` | Concurrent GPU pipelines (~1.4 GB VRAM each); empty = auto | `""` |
| `TURBOOCR_DISABLE_LAYOUT` | Disable layout detection model (saves ~300500 MB VRAM) | `0` |
| `TURBOOCR_PDF_MODE` | PDF parsing mode: `ocr` / `geometric` / `auto` / `auto_verified` | `ocr` |
| `TURBOOCR_CPU_LIMIT` | CPU core limit (both variants) | `8.0` |
| `TURBOOCR_MEMORY_LIMIT` | Memory limit — `12G` for GPU, `4G` for CPU | variant default |
| `TURBOOCR_GPU_COUNT` | NVIDIA GPUs to reserve (GPU variant only) | `1` |
| `TURBOOCR_SHM_SIZE` | Shared memory for fastpdf2png — `2g` for GPU, `512m` for CPU | variant default |
| `TZ` | Container timezone | `UTC` |
## Storage
- `turboocr_build_cache` — named volume at `/home/ocr/.cache/turbo-ocr`. Stores TRT engine files (GPU) or the model cache directory (CPU). Must be a named volume — a bind-mount of an empty host directory would shadow the baked-in language bundles and the server would fail to load models.
## Supported GPU Architectures (CUDA 12.x variant)
| Compute Capability | Architecture | GPUs |
| ------------------ | ------------ | ---- |
| 7.5 | Turing | GTX 16xx, RTX 20xx |
| 8.0 | Ampere | A100, RTX 30xx (server) |
| 8.6 | Ampere | RTX 30xx (desktop / laptop) |
| 8.9 | Ada Lovelace | RTX 40xx |
Blackwell (CC 12.0, RTX 50xx) requires CUDA 13.x — use the upstream pre-built image from `src/turboocr` instead.
## Notes
- Both Dockerfiles build TurboOCR from source via `git clone` inside the image. A working internet connection is required at build time.
- The CUDA 12.x Dockerfile overrides `CMAKE_CUDA_ARCHITECTURES` to `75;80;86;89`, removing CC 12.0 which is not supported by CUDA 12.x.
- TensorRT 10.8 is located at `/usr/local/tensorrt` in the `24.12-py3` base image, which matches the CMake default. No `-DTENSORRT_DIR` override is needed.
- The CPU variant uses ONNX Runtime 1.22.0 and produces a `paddle_cpu_server` binary with both HTTP and gRPC interfaces.
## Endpoints
- HTTP API: <http://localhost:8000>
- gRPC API: `localhost:50051`
- Health: <http://localhost:8000/health>
- Readiness: <http://localhost:8000/health/ready>
- Metrics (Prometheus): <http://localhost:8000/metrics>
## Security Notes
- The API has no authentication by default. Put a reverse proxy (nginx, Caddy) in front for production.
- The default PDF mode is `ocr`, which only trusts pixel data and is safe for untrusted PDF uploads.
- Do **not** set `TURBOOCR_PDF_MODE` to `geometric` or `auto` globally if you accept PDFs from untrusted sources.
## References
- [TurboOCR Repository](https://github.com/aiptimizer/TurboOCR)
- [NVIDIA TensorRT Container Releases](https://docs.nvidia.com/deeplearning/tensorrt/container-release-notes/)
- [NVIDIA CUDA GPU Compute Capability Table](https://developer.nvidia.com/cuda-gpus)
+127
View File
@@ -0,0 +1,127 @@
# TurboOCR — 自定义构建
[English](README.md)
此目录从源码构建 [TurboOCR](https://github.com/aiptimizer/TurboOCR),覆盖上游预构建镜像未提供的两个目标:
| 变体 | Dockerfile | Profile | 基础镜像 |
| ---- | ---------- | ------- | -------- |
| **CUDA 12.x** | `Dockerfile.cuda12` | `gpu` | `nvcr.io/nvidia/tensorrt:24.12-py3`TRT 10.8 / CUDA 12.7 |
| **纯 CPU** | `Dockerfile.cpu` | `cpu` | `ubuntu:24.04`ONNX Runtime |
上游预构建镜像针对 CUDA 13.xBlackwell / CC 12.0)。如果你的 GPU 属于 CUDA 12.x 范围(Turing 到 Ada LovelaceCC 7.58.9),或者没有 GPU,请使用本目录。
## 快速开始
1. 复制示例环境文件:
```bash
cp .env.example .env
```
2. 按需构建并启动对应变体:
**CUDA 12.xGPU — Turing 到 Ada Lovelace):**
```bash
docker compose --profile gpu up -d --build
```
**纯 CPU(无需 GPU):**
```bash
docker compose --profile cpu up -d --build
```
3. 访问 API<http://localhost:8000>。
> **说明:** 首次构建需要从源码编译 Drogon 和 TurboOCR,耗时约 10–30 分钟,具体取决于 CPU 核心数。后续构建会复用 Docker 层缓存,速度很快。
## 首次启动说明
### GPU 变体
容器首次启动时,TensorRT 会将 4 个 ONNX 模型编译为引擎文件。在 RTX 3070 Laptop 上的实测耗时:
| 引擎 | 耗时 |
| ---- | ---- |
| det | 约 5 分钟 |
| rec | 约 30 分钟 |
| cls | 约 4 分钟 |
| layout | 约 28 分钟 |
| **合计** | **约 6790 分钟** |
高端桌面 GPU 约 15 分钟完成。编译期间容器显示 `unhealthy` 属于正常现象——所有引擎构建完成后服务启动,状态切换为 `healthy`。后续重启会复用缓存引擎,几乎瞬间完成。
> **提示:** 设置 `TURBOOCR_DISABLE_LAYOUT=1` 可跳过版面检测引擎的编译(笔记本 GPU 约节省 28 分钟)。仅在不需要 `?layout=1` PDF 端点时使用此选项。
### CPU 变体
无 TRT 编译过程。ONNX Runtime 在启动时直接加载模型,通常在 60 秒内变为 `healthy`。
## 默认端口
| 端口 | 协议 | 说明 |
| ---- | ---- | ---- |
| 8000 | HTTP | OCR REST API + 健康检查/指标 |
| 50051 | gRPC | OCR gRPC API |
## 主要环境变量
| 变量名 | 说明 | 默认值 |
| ------ | ---- | ------ |
| `TURBOOCR_VERSION` | 构建所用的 Git 标签 | `v2.1.1` |
| `TURBOOCR_HTTP_PORT_OVERRIDE` | HTTP API 主机端口 | `8000` |
| `TURBOOCR_GRPC_PORT_OVERRIDE` | gRPC API 主机端口 | `50051` |
| `TURBOOCR_LANG` | 语言包:`latin`、`chinese`、`greek`、`eslav`、`arabic`、`korean`、`thai` | `""`latin |
| `TURBOOCR_SERVER` | 当使用 `chinese` 时,设为 `1` 启用 84 MB 服务端识别模型 | `""` |
| `TURBOOCR_PIPELINE_POOL_SIZE` | 并发 GPU 流水线数(每条约 1.4 GB 显存),留空则自动 | `""` |
| `TURBOOCR_DISABLE_LAYOUT` | 禁用版面检测模型(节省约 300–500 MB 显存) | `0` |
| `TURBOOCR_PDF_MODE` | PDF 解析模式:`ocr` / `geometric` / `auto` / `auto_verified` | `ocr` |
| `TURBOOCR_CPU_LIMIT` | CPU 核心限制(两个变体通用) | `8.0` |
| `TURBOOCR_MEMORY_LIMIT` | 内存限制——GPU 变体 `12G`CPU 变体 `4G` | 变体默认值 |
| `TURBOOCR_GPU_COUNT` | 预留的 NVIDIA GPU 数量(仅 GPU 变体) | `1` |
| `TURBOOCR_SHM_SIZE` | fastpdf2png 共享内存——GPU 变体 `2g`CPU 变体 `512m` | 变体默认值 |
| `TZ` | 容器时区 | `UTC` |
## 存储
- `turboocr_build_cache`——命名卷,挂载于 `/home/ocr/.cache/turbo-ocr`。用于存储 TRT 引擎文件(GPU 变体)或模型缓存目录(CPU 变体)。必须使用**命名卷**——绑定挂载空主机目录会遮蔽镜像内置语言包,导致服务无法加载模型。
## 支持的 GPU 架构(CUDA 12.x 变体)
| 算力版本 | 架构 | GPU 型号 |
| -------- | ---- | -------- |
| 7.5 | Turing | GTX 16xx、RTX 20xx |
| 8.0 | Ampere | A100、RTX 30xx(服务器) |
| 8.6 | Ampere | RTX 30xx(桌面/笔记本) |
| 8.9 | Ada Lovelace | RTX 40xx |
BlackwellCC 12.0RTX 50xx)需要 CUDA 13.x——请改用 `src/turboocr` 中的上游预构建镜像。
## 说明
- 两个 Dockerfile 均在镜像内通过 `git clone` 从源码构建 TurboOCR,构建时需要可访问互联网。
- CUDA 12.x Dockerfile 将 `CMAKE_CUDA_ARCHITECTURES` 设置为 `75;80;86;89`,去除了 CUDA 12.x 不支持的 CC 12.0。
- TensorRT 10.8 在 `24.12-py3` 基础镜像中位于 `/usr/local/tensorrt`,与 CMake 默认值一致,无需额外的 `-DTENSORRT_DIR` 参数。
- CPU 变体使用 ONNX Runtime 1.22.0,生成同时支持 HTTP 和 gRPC 接口的 `paddle_cpu_server` 二进制文件。
## 访问端点
- HTTP API<http://localhost:8000>
- gRPC API`localhost:50051`
- 健康检查:<http://localhost:8000/health>
- 就绪检查:<http://localhost:8000/health/ready>
- Prometheus 指标:<http://localhost:8000/metrics>
## 安全说明
- API 默认无身份认证。生产环境请在前面套一层反向代理(nginx、Caddy 等)。
- PDF 默认模式为 `ocr`,只信任像素数据,可安全处理不可信来源的 PDF 上传。
- 如果你的服务接收不可信来源的 PDF,**不要**将 `TURBOOCR_PDF_MODE` 全局设为 `geometric` 或 `auto`。
## 参考链接
- [TurboOCR 仓库](https://github.com/aiptimizer/TurboOCR)
- [NVIDIA TensorRT 容器发布说明](https://docs.nvidia.com/deeplearning/tensorrt/container-release-notes/)
- [NVIDIA CUDA GPU 算力版本对照表](https://developer.nvidia.com/cuda-gpus)
+110
View File
@@ -0,0 +1,110 @@
x-defaults: &defaults
restart: unless-stopped
logging:
driver: json-file
options:
max-size: ${TURBOOCR_LOG_MAX_SIZE:-100m}
max-file: '${TURBOOCR_LOG_MAX_FILE:-3}'
x-turboocr-common: &turboocr-common
<<: *defaults
ports:
- '${TURBOOCR_HTTP_PORT_OVERRIDE:-8000}:8000'
- '${TURBOOCR_GRPC_PORT_OVERRIDE:-50051}:50051'
volumes:
# Named volume persists TRT engines (GPU) or ONNX model cache (CPU).
# Must be a named volume — bind-mounting an empty host dir shadows the
# baked-in language bundles and prevents the server from loading models.
- turboocr_build_cache:/home/ocr/.cache/turbo-ocr
environment:
- TZ=${TZ:-UTC}
# Language bundle: latin (default), chinese, greek, eslav, arabic, korean, thai
- OCR_LANG=${TURBOOCR_LANG:-}
# Set to 1 with OCR_LANG=chinese to use the 84 MB server rec model
- OCR_SERVER=${TURBOOCR_SERVER:-}
# Concurrent GPU pipelines (~1.4 GB VRAM each); empty = auto; ignored in CPU mode
- PIPELINE_POOL_SIZE=${TURBOOCR_PIPELINE_POOL_SIZE:-}
# Set to 1 to disable PP-DocLayoutV3 layout detection (saves ~300-500 MB VRAM)
- DISABLE_LAYOUT=${TURBOOCR_DISABLE_LAYOUT:-0}
# Default PDF mode: ocr (safest) / geometric / auto / auto_verified
- ENABLE_PDF_MODE=${TURBOOCR_PDF_MODE:-ocr}
# Skip angle classifier (~0.4 ms savings)
- DISABLE_ANGLE_CLS=${TURBOOCR_DISABLE_ANGLE_CLS:-0}
# Max detection input size in pixels
- DET_MAX_SIDE=${TURBOOCR_DET_MAX_SIDE:-960}
# PDF render parallelism
- PDF_DAEMONS=${TURBOOCR_PDF_DAEMONS:-16}
- PDF_WORKERS=${TURBOOCR_PDF_WORKERS:-4}
# Maximum pages per PDF request
- MAX_PDF_PAGES=${TURBOOCR_MAX_PDF_PAGES:-2000}
# Log level: debug / info / warn / error
- LOG_LEVEL=${TURBOOCR_LOG_LEVEL:-info}
# Log format: json (structured) / text (human-readable)
- LOG_FORMAT=${TURBOOCR_LOG_FORMAT:-json}
services:
turboocr-cuda12:
<<: *turboocr-common
profiles: [gpu]
build:
context: .
dockerfile: Dockerfile.cuda12
args:
TURBOOCR_VERSION: ${TURBOOCR_VERSION:-v2.1.1}
NGC_MIRROR: ${TURBOOCR_NGC_MIRROR:-}
image: ${GLOBAL_REGISTRY:-}alexsuntop/turboocr-cuda12:${TURBOOCR_VERSION:-v2.1.1}
healthcheck:
test: [CMD, curl, -fsS, 'http://localhost:8000/health']
interval: 30s
timeout: 10s
retries: 5
# First start builds 4 TensorRT engines from ONNX. Measured times on an
# RTX 3070 Laptop: det (~5 min) + rec (~30 min) + cls (~4 min) +
# layout (~28 min) = ~67-90 min. High-end desktop GPUs finish in ~15 min.
# Set TURBOOCR_DISABLE_LAYOUT=1 to skip layout and save ~28 min.
# Subsequent restarts reuse the cached engines and start in seconds.
start_period: 120m
deploy:
resources:
limits:
cpus: ${TURBOOCR_CPU_LIMIT:-8.0}
memory: ${TURBOOCR_MEMORY_LIMIT:-12G}
reservations:
cpus: ${TURBOOCR_CPU_RESERVATION:-2.0}
memory: ${TURBOOCR_MEMORY_RESERVATION:-4G}
devices:
- driver: nvidia
count: ${TURBOOCR_GPU_COUNT:-1}
capabilities: [gpu]
shm_size: ${TURBOOCR_SHM_SIZE:-2g}
turboocr-cpu:
<<: *turboocr-common
profiles: [cpu]
build:
context: .
dockerfile: Dockerfile.cpu
args:
TURBOOCR_VERSION: ${TURBOOCR_VERSION:-v2.1.1}
DOCKER_MIRROR: ${TURBOOCR_DOCKER_MIRROR:-}
image: ${GLOBAL_REGISTRY:-}alexsuntop/turboocr-cpu:${TURBOOCR_VERSION:-v2.1.1}
healthcheck:
test: [CMD, curl, -fsS, 'http://localhost:8000/health']
interval: 30s
timeout: 10s
retries: 5
# CPU mode uses ONNX Runtime directly — no TRT compilation on first start.
# Expect startup in under 60 s on most hardware.
start_period: 2m
deploy:
resources:
limits:
cpus: ${TURBOOCR_CPU_LIMIT:-8.0}
memory: ${TURBOOCR_MEMORY_LIMIT:-4G}
reservations:
cpus: ${TURBOOCR_CPU_RESERVATION:-2.0}
memory: ${TURBOOCR_MEMORY_RESERVATION:-1G}
shm_size: ${TURBOOCR_SHM_SIZE:-512m}
volumes:
turboocr_build_cache:
+3 -1
View File
@@ -52,7 +52,9 @@ Copy `.env.example` to `.env` and override only the variables you need to change
docker compose up -d docker compose up -d
``` ```
The first start builds TensorRT engines from ONNX. Build time depends on your GPU: roughly 5 minutes on high-end desktop GPUs and 2030 minutes on laptop GPUs. The container may report `unhealthy` while compilation is in progress — this is normal. Once the build finishes the server starts and the container transitions to `healthy`. Subsequent restarts reuse the cached engines and start in seconds. The first start builds 4 TensorRT engines from ONNX. Measured build times on an RTX 3070 Laptop: det (~5 min) + rec (~30 min) + cls (~4 min) + layout (~28 min) = **~6790 minutes total**. High-end desktop GPUs finish in ~15 minutes. The container reports `unhealthy` while compilation is in progress — this is expected. Once all engines are built the server starts and the container transitions to `healthy`. Subsequent restarts reuse the cached engines and start in seconds.
> **Tip — faster first boot:** Set `TURBOOCR_DISABLE_LAYOUT=1` to skip the layout detection engine (~28 min on laptop GPUs). Only do this if you don't need the `?layout=1` PDF endpoint.
### Endpoints ### Endpoints
+3 -1
View File
@@ -52,7 +52,9 @@
docker compose up -d docker compose up -d
``` ```
首次启动需要从 ONNX 构建 TensorRT 引擎,耗时因 GPU 而异:高端桌面 GPU 约 5 分钟,笔记本 GPU 约 2030 分钟。编译期间容器可能显示 `unhealthy`,这属于正常现象——构建完成后服务会自动启动并切换为 `healthy`。后续重启会复用缓存的引擎,几乎瞬间完成。 首次启动需要编译 4 个 TensorRT 引擎。在 RTX 3070 Laptop 上的实测耗时:det(约 5 分钟)+ rec(约 30 分钟)+ cls(约 4 分钟)+ layout(约 28 分钟)= **总计约 6790 分钟**。高端桌面 GPU 约 15 分钟完成。编译期间容器显示 `unhealthy` 属于正常现象——所有引擎构建完成后服务会自动启动并切换为 `healthy`。后续重启会复用缓存的引擎,几乎瞬间完成。
> **提示——加快首次启动**:设置 `TURBOOCR_DISABLE_LAYOUT=1` 可跳过版面检测引擎的编译(笔记本 GPU 约节省 28 分钟)。仅在不需要 `?layout=1` PDF 端点时使用此选项。
### 访问端点 ### 访问端点
+8 -6
View File
@@ -14,7 +14,7 @@ services:
- '${TURBOOCR_HTTP_PORT_OVERRIDE:-8000}:8000' - '${TURBOOCR_HTTP_PORT_OVERRIDE:-8000}:8000'
- '${TURBOOCR_GRPC_PORT_OVERRIDE:-50051}:50051' - '${TURBOOCR_GRPC_PORT_OVERRIDE:-50051}:50051'
volumes: volumes:
# Named volume caches TensorRT engines built from ONNX on first start (~90s). # Named volume caches TensorRT engines built from ONNX on first start.
# Must be a named volume - bind-mounting an empty host dir would shadow the # Must be a named volume - bind-mounting an empty host dir would shadow the
# baked-in language bundles and prevent the server from loading models. # baked-in language bundles and prevent the server from loading models.
- turboocr_trt_cache:/home/ocr/.cache/turbo-ocr - turboocr_trt_cache:/home/ocr/.cache/turbo-ocr
@@ -48,11 +48,13 @@ services:
interval: 30s interval: 30s
timeout: 10s timeout: 10s
retries: 5 retries: 5
# First start builds TensorRT engines from ONNX. Build time varies by GPU: # First start builds 4 TensorRT engines from ONNX. Measured build times:
# ~5 min on high-end desktop GPUs, 20-30 min on laptop GPUs. The container # det (~5 min) + rec (~30 min) + cls (~4 min) + layout (~28 min) ≈ 67-90 min
# may show "unhealthy" during compilation but will become healthy once done. # on an RTX 3070 Laptop. High-end desktop GPUs finish in ~15 min.
# Subsequent restarts reuse the cached engines and start in seconds. # Set TURBOOCR_DISABLE_LAYOUT=1 to skip the layout engine and cut ~28 min.
start_period: 30m # The container shows "unhealthy" while building but recovers once done.
# Subsequent restarts reuse cached engines and start in seconds.
start_period: 120m
deploy: deploy:
resources: resources:
limits: limits: