# ============================================================ # TurboOCR — CPU-only build (ONNX Runtime backend, no GPU required) # Base image: ubuntu:24.04 # # Produces: /app/build_cpu/paddle_cpu_server (HTTP + gRPC server) # # Image size: ~500 MB (vs ~10 GB for the GPU image). # No TRT compilation on first start — ONNX Runtime is used directly. # Startup is fast (~30 s) and requires no NVIDIA driver. # # Build: docker build -f Dockerfile.cpu -t turboocr-cpu . # ============================================================ ARG TURBOOCR_VERSION=v2.1.1 ARG ORT_VERSION=1.22.0 # Registry mirror prefix — leave empty for direct pull. # China users: set to "docker.m.daocloud.io/" to proxy Docker Hub via DaoCloud. ARG DOCKER_MIRROR= FROM ${DOCKER_MIRROR}ubuntu:24.04 # Re-declare ARGs after FROM so they remain in scope ARG TURBOOCR_VERSION ARG ORT_VERSION ENV DEBIAN_FRONTEND=noninteractive # Install build dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ cmake \ g++ \ make \ pkg-config \ libopencv-dev \ nginx \ gosu \ libgrpc++-dev \ libc-ares-dev \ libprotobuf-dev \ protobuf-compiler \ protobuf-compiler-grpc \ libjsoncpp-dev \ uuid-dev \ zlib1g-dev \ libssl-dev \ git \ wget \ curl \ ca-certificates \ && rm -rf /var/lib/apt/lists/* # Install Drogon HTTP framework (async, epoll-based) RUN cd /tmp && \ git clone --depth 1 --branch v1.9.12 https://github.com/drogonframework/drogon.git && \ cd drogon && git submodule update --init && \ mkdir build && cd build && \ cmake .. -DBUILD_EXAMPLES=OFF -DBUILD_CTL=OFF -DBUILD_ORM=OFF \ -DBUILD_POSTGRESQL=OFF -DBUILD_MYSQL=OFF -DBUILD_SQLITE=OFF \ -DBUILD_REDIS=OFF -DBUILD_TESTING=OFF && \ make -j$(nproc) && make install && \ rm -rf /tmp/drogon # Install ONNX Runtime C++ SDK RUN cd /tmp && \ wget -q "https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VERSION}/onnxruntime-linux-x64-${ORT_VERSION}.tgz" && \ tar xzf "onnxruntime-linux-x64-${ORT_VERSION}.tgz" && \ cp -r "onnxruntime-linux-x64-${ORT_VERSION}/include/"* /usr/local/include/ && \ cp "onnxruntime-linux-x64-${ORT_VERSION}/lib/libonnxruntime.so"* /usr/local/lib/ && \ ldconfig && rm -rf /tmp/onnxruntime* # Clone TurboOCR at the pinned release tag RUN git clone --depth 1 --branch "${TURBOOCR_VERSION}" \ https://github.com/aiptimizer/TurboOCR.git /app WORKDIR /app # Install fastpdf2png (PDF renderer — PDFium vendored in third_party/). # Copy vendored libpdfium first so the installer does not need network access. RUN cp third_party/pdfium/lib/libpdfium.so /usr/lib/ && ldconfig && \ bash scripts/install_fastpdf2png.sh && \ { cp bin/libpdfium.so /usr/lib/ 2>/dev/null || true; } && \ ldconfig # Build CPU-only mode with ONNX Runtime backend RUN mkdir -p build_cpu && cd build_cpu && \ cmake .. -DUSE_CPU_ONLY=ON -DFETCH_MODELS=OFF && \ make -j$(nproc) # Create non-root user and redirect /app/models/rec into the named cache volume. RUN useradd -m -s /bin/bash ocr \ && chmod +x /app/scripts/entrypoint.sh \ && mkdir -p /home/ocr/.cache/turbo-ocr/models/rec /app/models \ && ln -s /home/ocr/.cache/turbo-ocr/models/rec /app/models/rec # Fetch all PP-OCRv5 language bundles (SHA256-verified from pinned GitHub Release) ARG OCR_INCLUDE_SERVER=1 ENV OCR_INCLUDE_SERVER=${OCR_INCLUDE_SERVER} RUN bash scripts/fetch_release_models.sh \ && chown -R ocr:ocr /app /home/ocr/.cache EXPOSE 8000 50051 ENTRYPOINT ["/app/scripts/entrypoint.sh"] CMD ["./build_cpu/paddle_cpu_server"]