feat: add build turboocr
This commit is contained in:
@@ -0,0 +1,73 @@
|
||||
# Source build configuration
|
||||
TURBOOCR_VERSION=v2.1.1
|
||||
|
||||
# Registry mirror prefix for docker build — leave empty for direct pull.
|
||||
# China users: set to "docker.m.daocloud.io/" to proxy Docker Hub via DaoCloud.
|
||||
# Example: TURBOOCR_DOCKER_MIRROR=docker.m.daocloud.io/
|
||||
TURBOOCR_DOCKER_MIRROR=
|
||||
|
||||
# NGC (nvcr.io) mirror prefix for the CUDA 12.x GPU build — leave empty for direct pull.
|
||||
# Standard Docker Hub mirrors (e.g. DaoCloud) do NOT proxy nvcr.io.
|
||||
# Set this only if you have a dedicated NGC pull-through proxy.
|
||||
TURBOOCR_NGC_MIRROR=
|
||||
|
||||
# Network configuration
|
||||
TURBOOCR_HTTP_PORT_OVERRIDE=8000
|
||||
TURBOOCR_GRPC_PORT_OVERRIDE=50051
|
||||
|
||||
# Language bundle: latin (default), chinese, greek, eslav, arabic, korean, thai
|
||||
TURBOOCR_LANG=
|
||||
# Set to 1 with TURBOOCR_LANG=chinese to use the 84 MB server rec model
|
||||
TURBOOCR_SERVER=
|
||||
|
||||
# GPU pipeline pool — number of concurrent inference pipelines (~1.4 GB VRAM each).
|
||||
# Leave empty to let the server choose automatically based on available VRAM.
|
||||
# Ignored in CPU mode.
|
||||
TURBOOCR_PIPELINE_POOL_SIZE=
|
||||
|
||||
# Set to 1 to skip loading the PP-DocLayoutV3 layout detection model.
|
||||
# Saves ~300-500 MB VRAM and cuts first-start compilation time by ~28 min on laptop GPUs.
|
||||
# Only do this if you do not need the ?layout=1 PDF endpoint.
|
||||
TURBOOCR_DISABLE_LAYOUT=0
|
||||
|
||||
# Default PDF parsing mode: ocr (safest) / geometric / auto / auto_verified
|
||||
TURBOOCR_PDF_MODE=ocr
|
||||
|
||||
# Set to 1 to skip the angle classifier (~0.4 ms savings per image)
|
||||
TURBOOCR_DISABLE_ANGLE_CLS=0
|
||||
|
||||
# Maximum detection input dimension in pixels
|
||||
TURBOOCR_DET_MAX_SIDE=960
|
||||
|
||||
# PDF render parallelism
|
||||
TURBOOCR_PDF_DAEMONS=16
|
||||
TURBOOCR_PDF_WORKERS=4
|
||||
|
||||
# Maximum pages accepted in a single PDF request
|
||||
TURBOOCR_MAX_PDF_PAGES=2000
|
||||
|
||||
# Log level: debug / info / warn / error
|
||||
TURBOOCR_LOG_LEVEL=info
|
||||
|
||||
# Log format: json (structured) / text (human-readable)
|
||||
TURBOOCR_LOG_FORMAT=json
|
||||
|
||||
# Resources — GPU variant (profile: gpu)
|
||||
# First-start builds TRT engines; 12 G covers the GPU + engine compilation headroom.
|
||||
TURBOOCR_CPU_LIMIT=8.0
|
||||
TURBOOCR_MEMORY_LIMIT=12G
|
||||
TURBOOCR_CPU_RESERVATION=2.0
|
||||
TURBOOCR_MEMORY_RESERVATION=4G
|
||||
|
||||
# Number of NVIDIA GPUs to reserve (GPU variant only)
|
||||
TURBOOCR_GPU_COUNT=1
|
||||
|
||||
# Shared memory — fastpdf2png uses /dev/shm for inter-process PDF page transfers
|
||||
TURBOOCR_SHM_SIZE=2g
|
||||
|
||||
# Logging
|
||||
TURBOOCR_LOG_MAX_SIZE=100m
|
||||
TURBOOCR_LOG_MAX_FILE=3
|
||||
|
||||
# Timezone
|
||||
TZ=UTC
|
||||
@@ -0,0 +1,104 @@
|
||||
# ============================================================
|
||||
# TurboOCR — CPU-only build (ONNX Runtime backend, no GPU required)
|
||||
# Base image: ubuntu:24.04
|
||||
#
|
||||
# Produces: /app/build_cpu/paddle_cpu_server (HTTP + gRPC server)
|
||||
#
|
||||
# Image size: ~500 MB (vs ~10 GB for the GPU image).
|
||||
# No TRT compilation on first start — ONNX Runtime is used directly.
|
||||
# Startup is fast (~30 s) and requires no NVIDIA driver.
|
||||
#
|
||||
# Build: docker build -f Dockerfile.cpu -t turboocr-cpu .
|
||||
# ============================================================
|
||||
|
||||
ARG TURBOOCR_VERSION=v2.1.1
|
||||
ARG ORT_VERSION=1.22.0
|
||||
# Registry mirror prefix — leave empty for direct pull.
|
||||
# China users: set to "docker.m.daocloud.io/" to proxy Docker Hub via DaoCloud.
|
||||
ARG DOCKER_MIRROR=
|
||||
|
||||
FROM ${DOCKER_MIRROR}ubuntu:24.04
|
||||
|
||||
# Re-declare ARGs after FROM so they remain in scope
|
||||
ARG TURBOOCR_VERSION
|
||||
ARG ORT_VERSION
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# Install build dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
cmake \
|
||||
g++ \
|
||||
make \
|
||||
pkg-config \
|
||||
libopencv-dev \
|
||||
nginx \
|
||||
gosu \
|
||||
libgrpc++-dev \
|
||||
libc-ares-dev \
|
||||
libprotobuf-dev \
|
||||
protobuf-compiler \
|
||||
protobuf-compiler-grpc \
|
||||
libjsoncpp-dev \
|
||||
uuid-dev \
|
||||
zlib1g-dev \
|
||||
libssl-dev \
|
||||
git \
|
||||
wget \
|
||||
curl \
|
||||
ca-certificates \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Drogon HTTP framework (async, epoll-based)
|
||||
RUN cd /tmp && \
|
||||
git clone --depth 1 --branch v1.9.12 https://github.com/drogonframework/drogon.git && \
|
||||
cd drogon && git submodule update --init && \
|
||||
mkdir build && cd build && \
|
||||
cmake .. -DBUILD_EXAMPLES=OFF -DBUILD_CTL=OFF -DBUILD_ORM=OFF \
|
||||
-DBUILD_POSTGRESQL=OFF -DBUILD_MYSQL=OFF -DBUILD_SQLITE=OFF \
|
||||
-DBUILD_REDIS=OFF -DBUILD_TESTING=OFF && \
|
||||
make -j$(nproc) && make install && \
|
||||
rm -rf /tmp/drogon
|
||||
|
||||
# Install ONNX Runtime C++ SDK
|
||||
RUN cd /tmp && \
|
||||
wget -q "https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VERSION}/onnxruntime-linux-x64-${ORT_VERSION}.tgz" && \
|
||||
tar xzf "onnxruntime-linux-x64-${ORT_VERSION}.tgz" && \
|
||||
cp -r "onnxruntime-linux-x64-${ORT_VERSION}/include/"* /usr/local/include/ && \
|
||||
cp "onnxruntime-linux-x64-${ORT_VERSION}/lib/libonnxruntime.so"* /usr/local/lib/ && \
|
||||
ldconfig && rm -rf /tmp/onnxruntime*
|
||||
|
||||
# Clone TurboOCR at the pinned release tag
|
||||
RUN git clone --depth 1 --branch "${TURBOOCR_VERSION}" \
|
||||
https://github.com/aiptimizer/TurboOCR.git /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install fastpdf2png (PDF renderer — PDFium vendored in third_party/).
|
||||
# Copy vendored libpdfium first so the installer does not need network access.
|
||||
RUN cp third_party/pdfium/lib/libpdfium.so /usr/lib/ && ldconfig && \
|
||||
bash scripts/install_fastpdf2png.sh && \
|
||||
{ cp bin/libpdfium.so /usr/lib/ 2>/dev/null || true; } && \
|
||||
ldconfig
|
||||
|
||||
# Build CPU-only mode with ONNX Runtime backend
|
||||
RUN mkdir -p build_cpu && cd build_cpu && \
|
||||
cmake .. -DUSE_CPU_ONLY=ON -DFETCH_MODELS=OFF && \
|
||||
make -j$(nproc)
|
||||
|
||||
# Create non-root user and redirect /app/models/rec into the named cache volume.
|
||||
RUN useradd -m -s /bin/bash ocr \
|
||||
&& chmod +x /app/scripts/entrypoint.sh \
|
||||
&& mkdir -p /home/ocr/.cache/turbo-ocr/models/rec /app/models \
|
||||
&& ln -s /home/ocr/.cache/turbo-ocr/models/rec /app/models/rec
|
||||
|
||||
# Fetch all PP-OCRv5 language bundles (SHA256-verified from pinned GitHub Release)
|
||||
ARG OCR_INCLUDE_SERVER=1
|
||||
ENV OCR_INCLUDE_SERVER=${OCR_INCLUDE_SERVER}
|
||||
RUN bash scripts/fetch_release_models.sh \
|
||||
&& chown -R ocr:ocr /app /home/ocr/.cache
|
||||
|
||||
EXPOSE 8000 50051
|
||||
|
||||
ENTRYPOINT ["/app/scripts/entrypoint.sh"]
|
||||
CMD ["./build_cpu/paddle_cpu_server"]
|
||||
@@ -0,0 +1,118 @@
|
||||
# ============================================================
|
||||
# TurboOCR — CUDA 12.x build (TensorRT 10.8 / CUDA 12.7)
|
||||
# Base image: nvcr.io/nvidia/tensorrt:24.12-py3
|
||||
#
|
||||
# Supported compute capabilities (NVIDIA GPU reference):
|
||||
# https://developer.nvidia.com/cuda-gpus
|
||||
# 7.5 Turing — GTX 16xx / RTX 20xx
|
||||
# 8.0 Ampere — A100, RTX 30xx server-class
|
||||
# 8.6 Ampere — RTX 30xx desktop / laptop
|
||||
# 8.9 Ada — RTX 40xx
|
||||
#
|
||||
# Blackwell (CC 12.0) requires CUDA 13.x.
|
||||
# For that, use the upstream docker/Dockerfile.gpu (tensorrt:26.03-py3).
|
||||
#
|
||||
# Build: docker build -f Dockerfile.cuda12 -t turboocr-cuda12 .
|
||||
# ============================================================
|
||||
|
||||
ARG TURBOOCR_VERSION=v2.1.1
|
||||
ARG CMAKE_VERSION=3.31.6
|
||||
ARG ORT_VERSION=1.22.0
|
||||
# NGC registry mirror prefix — leave empty for direct pull from nvcr.io.
|
||||
# Note: standard Docker Hub mirrors (e.g. DaoCloud) do NOT proxy nvcr.io.
|
||||
# Set this only if you have a dedicated NGC mirror or a pull-through proxy.
|
||||
ARG NGC_MIRROR=
|
||||
|
||||
FROM ${NGC_MIRROR}nvcr.io/nvidia/tensorrt:24.12-py3
|
||||
|
||||
# Re-declare ARGs after FROM so they remain in scope
|
||||
ARG TURBOOCR_VERSION
|
||||
ARG CMAKE_VERSION
|
||||
ARG ORT_VERSION
|
||||
|
||||
# Install build dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
pkg-config \
|
||||
libopencv-dev \
|
||||
nginx \
|
||||
gosu \
|
||||
libgrpc++-dev \
|
||||
libprotobuf-dev \
|
||||
protobuf-compiler-grpc \
|
||||
libjsoncpp-dev \
|
||||
uuid-dev \
|
||||
zlib1g-dev \
|
||||
libssl-dev \
|
||||
libc-ares-dev \
|
||||
git \
|
||||
wget \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Drogon HTTP framework (async, epoll-based)
|
||||
RUN cd /tmp && \
|
||||
git clone --depth 1 --branch v1.9.12 https://github.com/drogonframework/drogon.git && \
|
||||
cd drogon && git submodule update --init && \
|
||||
mkdir build && cd build && \
|
||||
cmake .. -DBUILD_EXAMPLES=OFF -DBUILD_CTL=OFF -DBUILD_ORM=OFF \
|
||||
-DBUILD_POSTGRESQL=OFF -DBUILD_MYSQL=OFF -DBUILD_SQLITE=OFF \
|
||||
-DBUILD_REDIS=OFF -DBUILD_TESTING=OFF && \
|
||||
make -j$(nproc) && make install && \
|
||||
rm -rf /tmp/drogon
|
||||
|
||||
# Upgrade CMake (the base image may ship an older version)
|
||||
RUN cd /tmp && \
|
||||
wget -q "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz" && \
|
||||
tar xzf "cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz" && \
|
||||
cp -r "cmake-${CMAKE_VERSION}-linux-x86_64/bin/"* /usr/local/bin/ && \
|
||||
cp -r "cmake-${CMAKE_VERSION}-linux-x86_64/share/"* /usr/local/share/ && \
|
||||
rm -rf /tmp/cmake*
|
||||
|
||||
# Install ONNX Runtime C++ SDK (used by the CPU inference fallback path)
|
||||
RUN cd /tmp && \
|
||||
wget -q "https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VERSION}/onnxruntime-linux-x64-${ORT_VERSION}.tgz" && \
|
||||
tar xzf "onnxruntime-linux-x64-${ORT_VERSION}.tgz" && \
|
||||
cp -r "onnxruntime-linux-x64-${ORT_VERSION}/include/"* /usr/local/include/ && \
|
||||
cp "onnxruntime-linux-x64-${ORT_VERSION}/lib/libonnxruntime.so"* /usr/local/lib/ && \
|
||||
ldconfig && rm -rf /tmp/onnxruntime*
|
||||
|
||||
# Clone TurboOCR at the pinned release tag
|
||||
RUN git clone --depth 1 --branch "${TURBOOCR_VERSION}" \
|
||||
https://github.com/aiptimizer/TurboOCR.git /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install fastpdf2png (PDF renderer — PDFium vendored in third_party/)
|
||||
RUN bash scripts/install_fastpdf2png.sh && \
|
||||
cp bin/libpdfium.so /usr/lib/ && ldconfig
|
||||
|
||||
# Build GPU mode.
|
||||
# - CUDA_ARCHITECTURES: 7.5-8.9 covers Turing through Ada Lovelace under CUDA 12.x.
|
||||
# CC 12.0 (Blackwell) is excluded — it requires CUDA 13.x.
|
||||
# - TENSORRT_DIR: /usr/local/tensorrt is the cmake default and matches the 24.12-py3
|
||||
# base image layout. No override needed (upstream 26.03 uses /usr/lib/x86_64-linux-gnu).
|
||||
# - FETCH_MODELS=OFF: models are fetched in a separate layer below for better caching.
|
||||
RUN mkdir -p build && cd build && \
|
||||
cmake .. \
|
||||
-DFETCH_MODELS=OFF \
|
||||
-DCMAKE_CUDA_ARCHITECTURES="75;80;86;89" \
|
||||
&& make -j$(nproc)
|
||||
|
||||
# Create non-root user and redirect /app/models/rec into the named cache volume.
|
||||
# TRT engines built at first start are persisted via: -v turboocr_cache:/home/ocr/.cache/turbo-ocr
|
||||
RUN useradd -m -s /bin/bash ocr \
|
||||
&& chmod +x /app/scripts/entrypoint.sh \
|
||||
&& mkdir -p /home/ocr/.cache/turbo-ocr/models/rec /app/models \
|
||||
&& ln -s /home/ocr/.cache/turbo-ocr/models/rec /app/models/rec
|
||||
|
||||
# Fetch all PP-OCRv5 language bundles (SHA256-verified from pinned GitHub Release)
|
||||
ARG OCR_INCLUDE_SERVER=1
|
||||
ENV OCR_INCLUDE_SERVER=${OCR_INCLUDE_SERVER}
|
||||
RUN bash scripts/fetch_release_models.sh \
|
||||
&& chown -R ocr:ocr /app /home/ocr/.cache
|
||||
|
||||
EXPOSE 8000 50051
|
||||
|
||||
ENTRYPOINT ["/app/scripts/entrypoint.sh"]
|
||||
CMD ["./build/paddle_highspeed_cpp"]
|
||||
@@ -0,0 +1,127 @@
|
||||
# TurboOCR — Custom Builds
|
||||
|
||||
[中文文档](README.zh.md)
|
||||
|
||||
This directory builds [TurboOCR](https://github.com/aiptimizer/TurboOCR) from source for two targets that are not covered by the upstream pre-built images:
|
||||
|
||||
| Variant | Dockerfile | Profile | Base image |
|
||||
| ------- | ---------- | ------- | ---------- |
|
||||
| **CUDA 12.x** | `Dockerfile.cuda12` | `gpu` | `nvcr.io/nvidia/tensorrt:24.12-py3` (TRT 10.8 / CUDA 12.7) |
|
||||
| **CPU-only** | `Dockerfile.cpu` | `cpu` | `ubuntu:24.04` (ONNX Runtime) |
|
||||
|
||||
The upstream pre-built image targets CUDA 13.x (Blackwell / CC 12.0). Use this directory if your GPU is on CUDA 12.x (Turing through Ada Lovelace, CC 7.5–8.9) or if you have no GPU at all.
|
||||
|
||||
## Quick Start
|
||||
|
||||
1. Copy the example environment file:
|
||||
|
||||
```bash
|
||||
cp .env.example .env
|
||||
```
|
||||
|
||||
2. Build and start the variant you need:
|
||||
|
||||
**CUDA 12.x (GPU — Turing through Ada Lovelace):**
|
||||
|
||||
```bash
|
||||
docker compose --profile gpu up -d --build
|
||||
```
|
||||
|
||||
**CPU-only (no GPU required):**
|
||||
|
||||
```bash
|
||||
docker compose --profile cpu up -d --build
|
||||
```
|
||||
|
||||
3. Access the API at <http://localhost:8000>.
|
||||
|
||||
> **Note:** The first build compiles Drogon and TurboOCR from source, which takes 10–30 minutes depending on your CPU core count. Subsequent builds use the Docker layer cache and are fast.
|
||||
|
||||
## First-Start Behavior
|
||||
|
||||
### GPU variant
|
||||
|
||||
On the very first container start, TensorRT compiles 4 ONNX models into engine files. Measured times on an RTX 3070 Laptop:
|
||||
|
||||
| Engine | Time |
|
||||
| ------ | ---- |
|
||||
| det | ~5 min |
|
||||
| rec | ~30 min |
|
||||
| cls | ~4 min |
|
||||
| layout | ~28 min |
|
||||
| **Total** | **~67–90 min** |
|
||||
|
||||
High-end desktop GPUs finish in ~15 minutes. The container shows `unhealthy` during compilation — this is expected. Once all engines are ready the server starts and the status transitions to `healthy`. Subsequent restarts reuse the cached engines and start in seconds.
|
||||
|
||||
> **Tip:** Set `TURBOOCR_DISABLE_LAYOUT=1` to skip the layout detection engine (~28 min savings on laptop GPUs). Use this only if you do not need the `?layout=1` PDF endpoint.
|
||||
|
||||
### CPU variant
|
||||
|
||||
No TRT compilation occurs. ONNX Runtime loads the models directly at startup. The container is typically `healthy` within 60 seconds.
|
||||
|
||||
## Default Ports
|
||||
|
||||
| Port | Protocol | Description |
|
||||
| ---- | -------- | ----------- |
|
||||
| 8000 | HTTP | OCR REST API + health/metrics |
|
||||
| 50051 | gRPC | OCR gRPC API |
|
||||
|
||||
## Important Environment Variables
|
||||
|
||||
| Variable | Description | Default |
|
||||
| -------- | ----------- | ------- |
|
||||
| `TURBOOCR_VERSION` | Git tag used for the source build | `v2.1.1` |
|
||||
| `TURBOOCR_HTTP_PORT_OVERRIDE` | Host port for the HTTP API | `8000` |
|
||||
| `TURBOOCR_GRPC_PORT_OVERRIDE` | Host port for the gRPC API | `50051` |
|
||||
| `TURBOOCR_LANG` | Language bundle: `latin`, `chinese`, `greek`, `eslav`, `arabic`, `korean`, `thai` | `""` (latin) |
|
||||
| `TURBOOCR_SERVER` | With `chinese`, set to `1` for the 84 MB server rec model | `""` |
|
||||
| `TURBOOCR_PIPELINE_POOL_SIZE` | Concurrent GPU pipelines (~1.4 GB VRAM each); empty = auto | `""` |
|
||||
| `TURBOOCR_DISABLE_LAYOUT` | Disable layout detection model (saves ~300–500 MB VRAM) | `0` |
|
||||
| `TURBOOCR_PDF_MODE` | PDF parsing mode: `ocr` / `geometric` / `auto` / `auto_verified` | `ocr` |
|
||||
| `TURBOOCR_CPU_LIMIT` | CPU core limit (both variants) | `8.0` |
|
||||
| `TURBOOCR_MEMORY_LIMIT` | Memory limit — `12G` for GPU, `4G` for CPU | variant default |
|
||||
| `TURBOOCR_GPU_COUNT` | NVIDIA GPUs to reserve (GPU variant only) | `1` |
|
||||
| `TURBOOCR_SHM_SIZE` | Shared memory for fastpdf2png — `2g` for GPU, `512m` for CPU | variant default |
|
||||
| `TZ` | Container timezone | `UTC` |
|
||||
|
||||
## Storage
|
||||
|
||||
- `turboocr_build_cache` — named volume at `/home/ocr/.cache/turbo-ocr`. Stores TRT engine files (GPU) or the model cache directory (CPU). Must be a named volume — a bind-mount of an empty host directory would shadow the baked-in language bundles and the server would fail to load models.
|
||||
|
||||
## Supported GPU Architectures (CUDA 12.x variant)
|
||||
|
||||
| Compute Capability | Architecture | GPUs |
|
||||
| ------------------ | ------------ | ---- |
|
||||
| 7.5 | Turing | GTX 16xx, RTX 20xx |
|
||||
| 8.0 | Ampere | A100, RTX 30xx (server) |
|
||||
| 8.6 | Ampere | RTX 30xx (desktop / laptop) |
|
||||
| 8.9 | Ada Lovelace | RTX 40xx |
|
||||
|
||||
Blackwell (CC 12.0, RTX 50xx) requires CUDA 13.x — use the upstream pre-built image from `src/turboocr` instead.
|
||||
|
||||
## Notes
|
||||
|
||||
- Both Dockerfiles build TurboOCR from source via `git clone` inside the image. A working internet connection is required at build time.
|
||||
- The CUDA 12.x Dockerfile overrides `CMAKE_CUDA_ARCHITECTURES` to `75;80;86;89`, removing CC 12.0 which is not supported by CUDA 12.x.
|
||||
- TensorRT 10.8 is located at `/usr/local/tensorrt` in the `24.12-py3` base image, which matches the CMake default. No `-DTENSORRT_DIR` override is needed.
|
||||
- The CPU variant uses ONNX Runtime 1.22.0 and produces a `paddle_cpu_server` binary with both HTTP and gRPC interfaces.
|
||||
|
||||
## Endpoints
|
||||
|
||||
- HTTP API: <http://localhost:8000>
|
||||
- gRPC API: `localhost:50051`
|
||||
- Health: <http://localhost:8000/health>
|
||||
- Readiness: <http://localhost:8000/health/ready>
|
||||
- Metrics (Prometheus): <http://localhost:8000/metrics>
|
||||
|
||||
## Security Notes
|
||||
|
||||
- The API has no authentication by default. Put a reverse proxy (nginx, Caddy) in front for production.
|
||||
- The default PDF mode is `ocr`, which only trusts pixel data and is safe for untrusted PDF uploads.
|
||||
- Do **not** set `TURBOOCR_PDF_MODE` to `geometric` or `auto` globally if you accept PDFs from untrusted sources.
|
||||
|
||||
## References
|
||||
|
||||
- [TurboOCR Repository](https://github.com/aiptimizer/TurboOCR)
|
||||
- [NVIDIA TensorRT Container Releases](https://docs.nvidia.com/deeplearning/tensorrt/container-release-notes/)
|
||||
- [NVIDIA CUDA GPU Compute Capability Table](https://developer.nvidia.com/cuda-gpus)
|
||||
@@ -0,0 +1,127 @@
|
||||
# TurboOCR — 自定义构建
|
||||
|
||||
[English](README.md)
|
||||
|
||||
此目录从源码构建 [TurboOCR](https://github.com/aiptimizer/TurboOCR),覆盖上游预构建镜像未提供的两个目标:
|
||||
|
||||
| 变体 | Dockerfile | Profile | 基础镜像 |
|
||||
| ---- | ---------- | ------- | -------- |
|
||||
| **CUDA 12.x** | `Dockerfile.cuda12` | `gpu` | `nvcr.io/nvidia/tensorrt:24.12-py3`(TRT 10.8 / CUDA 12.7) |
|
||||
| **纯 CPU** | `Dockerfile.cpu` | `cpu` | `ubuntu:24.04`(ONNX Runtime) |
|
||||
|
||||
上游预构建镜像针对 CUDA 13.x(Blackwell / CC 12.0)。如果你的 GPU 属于 CUDA 12.x 范围(Turing 到 Ada Lovelace,CC 7.5–8.9),或者没有 GPU,请使用本目录。
|
||||
|
||||
## 快速开始
|
||||
|
||||
1. 复制示例环境文件:
|
||||
|
||||
```bash
|
||||
cp .env.example .env
|
||||
```
|
||||
|
||||
2. 按需构建并启动对应变体:
|
||||
|
||||
**CUDA 12.x(GPU — Turing 到 Ada Lovelace):**
|
||||
|
||||
```bash
|
||||
docker compose --profile gpu up -d --build
|
||||
```
|
||||
|
||||
**纯 CPU(无需 GPU):**
|
||||
|
||||
```bash
|
||||
docker compose --profile cpu up -d --build
|
||||
```
|
||||
|
||||
3. 访问 API:<http://localhost:8000>。
|
||||
|
||||
> **说明:** 首次构建需要从源码编译 Drogon 和 TurboOCR,耗时约 10–30 分钟,具体取决于 CPU 核心数。后续构建会复用 Docker 层缓存,速度很快。
|
||||
|
||||
## 首次启动说明
|
||||
|
||||
### GPU 变体
|
||||
|
||||
容器首次启动时,TensorRT 会将 4 个 ONNX 模型编译为引擎文件。在 RTX 3070 Laptop 上的实测耗时:
|
||||
|
||||
| 引擎 | 耗时 |
|
||||
| ---- | ---- |
|
||||
| det | 约 5 分钟 |
|
||||
| rec | 约 30 分钟 |
|
||||
| cls | 约 4 分钟 |
|
||||
| layout | 约 28 分钟 |
|
||||
| **合计** | **约 67–90 分钟** |
|
||||
|
||||
高端桌面 GPU 约 15 分钟完成。编译期间容器显示 `unhealthy` 属于正常现象——所有引擎构建完成后服务启动,状态切换为 `healthy`。后续重启会复用缓存引擎,几乎瞬间完成。
|
||||
|
||||
> **提示:** 设置 `TURBOOCR_DISABLE_LAYOUT=1` 可跳过版面检测引擎的编译(笔记本 GPU 约节省 28 分钟)。仅在不需要 `?layout=1` PDF 端点时使用此选项。
|
||||
|
||||
### CPU 变体
|
||||
|
||||
无 TRT 编译过程。ONNX Runtime 在启动时直接加载模型,通常在 60 秒内变为 `healthy`。
|
||||
|
||||
## 默认端口
|
||||
|
||||
| 端口 | 协议 | 说明 |
|
||||
| ---- | ---- | ---- |
|
||||
| 8000 | HTTP | OCR REST API + 健康检查/指标 |
|
||||
| 50051 | gRPC | OCR gRPC API |
|
||||
|
||||
## 主要环境变量
|
||||
|
||||
| 变量名 | 说明 | 默认值 |
|
||||
| ------ | ---- | ------ |
|
||||
| `TURBOOCR_VERSION` | 构建所用的 Git 标签 | `v2.1.1` |
|
||||
| `TURBOOCR_HTTP_PORT_OVERRIDE` | HTTP API 主机端口 | `8000` |
|
||||
| `TURBOOCR_GRPC_PORT_OVERRIDE` | gRPC API 主机端口 | `50051` |
|
||||
| `TURBOOCR_LANG` | 语言包:`latin`、`chinese`、`greek`、`eslav`、`arabic`、`korean`、`thai` | `""`(latin) |
|
||||
| `TURBOOCR_SERVER` | 当使用 `chinese` 时,设为 `1` 启用 84 MB 服务端识别模型 | `""` |
|
||||
| `TURBOOCR_PIPELINE_POOL_SIZE` | 并发 GPU 流水线数(每条约 1.4 GB 显存),留空则自动 | `""` |
|
||||
| `TURBOOCR_DISABLE_LAYOUT` | 禁用版面检测模型(节省约 300–500 MB 显存) | `0` |
|
||||
| `TURBOOCR_PDF_MODE` | PDF 解析模式:`ocr` / `geometric` / `auto` / `auto_verified` | `ocr` |
|
||||
| `TURBOOCR_CPU_LIMIT` | CPU 核心限制(两个变体通用) | `8.0` |
|
||||
| `TURBOOCR_MEMORY_LIMIT` | 内存限制——GPU 变体 `12G`,CPU 变体 `4G` | 变体默认值 |
|
||||
| `TURBOOCR_GPU_COUNT` | 预留的 NVIDIA GPU 数量(仅 GPU 变体) | `1` |
|
||||
| `TURBOOCR_SHM_SIZE` | fastpdf2png 共享内存——GPU 变体 `2g`,CPU 变体 `512m` | 变体默认值 |
|
||||
| `TZ` | 容器时区 | `UTC` |
|
||||
|
||||
## 存储
|
||||
|
||||
- `turboocr_build_cache`——命名卷,挂载于 `/home/ocr/.cache/turbo-ocr`。用于存储 TRT 引擎文件(GPU 变体)或模型缓存目录(CPU 变体)。必须使用**命名卷**——绑定挂载空主机目录会遮蔽镜像内置语言包,导致服务无法加载模型。
|
||||
|
||||
## 支持的 GPU 架构(CUDA 12.x 变体)
|
||||
|
||||
| 算力版本 | 架构 | GPU 型号 |
|
||||
| -------- | ---- | -------- |
|
||||
| 7.5 | Turing | GTX 16xx、RTX 20xx |
|
||||
| 8.0 | Ampere | A100、RTX 30xx(服务器) |
|
||||
| 8.6 | Ampere | RTX 30xx(桌面/笔记本) |
|
||||
| 8.9 | Ada Lovelace | RTX 40xx |
|
||||
|
||||
Blackwell(CC 12.0,RTX 50xx)需要 CUDA 13.x——请改用 `src/turboocr` 中的上游预构建镜像。
|
||||
|
||||
## 说明
|
||||
|
||||
- 两个 Dockerfile 均在镜像内通过 `git clone` 从源码构建 TurboOCR,构建时需要可访问互联网。
|
||||
- CUDA 12.x Dockerfile 将 `CMAKE_CUDA_ARCHITECTURES` 设置为 `75;80;86;89`,去除了 CUDA 12.x 不支持的 CC 12.0。
|
||||
- TensorRT 10.8 在 `24.12-py3` 基础镜像中位于 `/usr/local/tensorrt`,与 CMake 默认值一致,无需额外的 `-DTENSORRT_DIR` 参数。
|
||||
- CPU 变体使用 ONNX Runtime 1.22.0,生成同时支持 HTTP 和 gRPC 接口的 `paddle_cpu_server` 二进制文件。
|
||||
|
||||
## 访问端点
|
||||
|
||||
- HTTP API:<http://localhost:8000>
|
||||
- gRPC API:`localhost:50051`
|
||||
- 健康检查:<http://localhost:8000/health>
|
||||
- 就绪检查:<http://localhost:8000/health/ready>
|
||||
- Prometheus 指标:<http://localhost:8000/metrics>
|
||||
|
||||
## 安全说明
|
||||
|
||||
- API 默认无身份认证。生产环境请在前面套一层反向代理(nginx、Caddy 等)。
|
||||
- PDF 默认模式为 `ocr`,只信任像素数据,可安全处理不可信来源的 PDF 上传。
|
||||
- 如果你的服务接收不可信来源的 PDF,**不要**将 `TURBOOCR_PDF_MODE` 全局设为 `geometric` 或 `auto`。
|
||||
|
||||
## 参考链接
|
||||
|
||||
- [TurboOCR 仓库](https://github.com/aiptimizer/TurboOCR)
|
||||
- [NVIDIA TensorRT 容器发布说明](https://docs.nvidia.com/deeplearning/tensorrt/container-release-notes/)
|
||||
- [NVIDIA CUDA GPU 算力版本对照表](https://developer.nvidia.com/cuda-gpus)
|
||||
@@ -0,0 +1,110 @@
|
||||
x-defaults: &defaults
|
||||
restart: unless-stopped
|
||||
logging:
|
||||
driver: json-file
|
||||
options:
|
||||
max-size: ${TURBOOCR_LOG_MAX_SIZE:-100m}
|
||||
max-file: '${TURBOOCR_LOG_MAX_FILE:-3}'
|
||||
|
||||
x-turboocr-common: &turboocr-common
|
||||
<<: *defaults
|
||||
ports:
|
||||
- '${TURBOOCR_HTTP_PORT_OVERRIDE:-8000}:8000'
|
||||
- '${TURBOOCR_GRPC_PORT_OVERRIDE:-50051}:50051'
|
||||
volumes:
|
||||
# Named volume persists TRT engines (GPU) or ONNX model cache (CPU).
|
||||
# Must be a named volume — bind-mounting an empty host dir shadows the
|
||||
# baked-in language bundles and prevents the server from loading models.
|
||||
- turboocr_build_cache:/home/ocr/.cache/turbo-ocr
|
||||
environment:
|
||||
- TZ=${TZ:-UTC}
|
||||
# Language bundle: latin (default), chinese, greek, eslav, arabic, korean, thai
|
||||
- OCR_LANG=${TURBOOCR_LANG:-}
|
||||
# Set to 1 with OCR_LANG=chinese to use the 84 MB server rec model
|
||||
- OCR_SERVER=${TURBOOCR_SERVER:-}
|
||||
# Concurrent GPU pipelines (~1.4 GB VRAM each); empty = auto; ignored in CPU mode
|
||||
- PIPELINE_POOL_SIZE=${TURBOOCR_PIPELINE_POOL_SIZE:-}
|
||||
# Set to 1 to disable PP-DocLayoutV3 layout detection (saves ~300-500 MB VRAM)
|
||||
- DISABLE_LAYOUT=${TURBOOCR_DISABLE_LAYOUT:-0}
|
||||
# Default PDF mode: ocr (safest) / geometric / auto / auto_verified
|
||||
- ENABLE_PDF_MODE=${TURBOOCR_PDF_MODE:-ocr}
|
||||
# Skip angle classifier (~0.4 ms savings)
|
||||
- DISABLE_ANGLE_CLS=${TURBOOCR_DISABLE_ANGLE_CLS:-0}
|
||||
# Max detection input size in pixels
|
||||
- DET_MAX_SIDE=${TURBOOCR_DET_MAX_SIDE:-960}
|
||||
# PDF render parallelism
|
||||
- PDF_DAEMONS=${TURBOOCR_PDF_DAEMONS:-16}
|
||||
- PDF_WORKERS=${TURBOOCR_PDF_WORKERS:-4}
|
||||
# Maximum pages per PDF request
|
||||
- MAX_PDF_PAGES=${TURBOOCR_MAX_PDF_PAGES:-2000}
|
||||
# Log level: debug / info / warn / error
|
||||
- LOG_LEVEL=${TURBOOCR_LOG_LEVEL:-info}
|
||||
# Log format: json (structured) / text (human-readable)
|
||||
- LOG_FORMAT=${TURBOOCR_LOG_FORMAT:-json}
|
||||
|
||||
services:
|
||||
turboocr-cuda12:
|
||||
<<: *turboocr-common
|
||||
profiles: [gpu]
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.cuda12
|
||||
args:
|
||||
TURBOOCR_VERSION: ${TURBOOCR_VERSION:-v2.1.1}
|
||||
NGC_MIRROR: ${TURBOOCR_NGC_MIRROR:-}
|
||||
image: ${GLOBAL_REGISTRY:-}alexsuntop/turboocr-cuda12:${TURBOOCR_VERSION:-v2.1.1}
|
||||
healthcheck:
|
||||
test: [CMD, curl, -fsS, 'http://localhost:8000/health']
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
# First start builds 4 TensorRT engines from ONNX. Measured times on an
|
||||
# RTX 3070 Laptop: det (~5 min) + rec (~30 min) + cls (~4 min) +
|
||||
# layout (~28 min) = ~67-90 min. High-end desktop GPUs finish in ~15 min.
|
||||
# Set TURBOOCR_DISABLE_LAYOUT=1 to skip layout and save ~28 min.
|
||||
# Subsequent restarts reuse the cached engines and start in seconds.
|
||||
start_period: 120m
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: ${TURBOOCR_CPU_LIMIT:-8.0}
|
||||
memory: ${TURBOOCR_MEMORY_LIMIT:-12G}
|
||||
reservations:
|
||||
cpus: ${TURBOOCR_CPU_RESERVATION:-2.0}
|
||||
memory: ${TURBOOCR_MEMORY_RESERVATION:-4G}
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: ${TURBOOCR_GPU_COUNT:-1}
|
||||
capabilities: [gpu]
|
||||
shm_size: ${TURBOOCR_SHM_SIZE:-2g}
|
||||
|
||||
turboocr-cpu:
|
||||
<<: *turboocr-common
|
||||
profiles: [cpu]
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.cpu
|
||||
args:
|
||||
TURBOOCR_VERSION: ${TURBOOCR_VERSION:-v2.1.1}
|
||||
DOCKER_MIRROR: ${TURBOOCR_DOCKER_MIRROR:-}
|
||||
image: ${GLOBAL_REGISTRY:-}alexsuntop/turboocr-cpu:${TURBOOCR_VERSION:-v2.1.1}
|
||||
healthcheck:
|
||||
test: [CMD, curl, -fsS, 'http://localhost:8000/health']
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
# CPU mode uses ONNX Runtime directly — no TRT compilation on first start.
|
||||
# Expect startup in under 60 s on most hardware.
|
||||
start_period: 2m
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: ${TURBOOCR_CPU_LIMIT:-8.0}
|
||||
memory: ${TURBOOCR_MEMORY_LIMIT:-4G}
|
||||
reservations:
|
||||
cpus: ${TURBOOCR_CPU_RESERVATION:-2.0}
|
||||
memory: ${TURBOOCR_MEMORY_RESERVATION:-1G}
|
||||
shm_size: ${TURBOOCR_SHM_SIZE:-512m}
|
||||
|
||||
volumes:
|
||||
turboocr_build_cache:
|
||||
Reference in New Issue
Block a user