feat: add build turboocr
This commit is contained in:
@@ -0,0 +1,104 @@
|
||||
# ============================================================
|
||||
# TurboOCR — CPU-only build (ONNX Runtime backend, no GPU required)
|
||||
# Base image: ubuntu:24.04
|
||||
#
|
||||
# Produces: /app/build_cpu/paddle_cpu_server (HTTP + gRPC server)
|
||||
#
|
||||
# Image size: ~500 MB (vs ~10 GB for the GPU image).
|
||||
# No TRT compilation on first start — ONNX Runtime is used directly.
|
||||
# Startup is fast (~30 s) and requires no NVIDIA driver.
|
||||
#
|
||||
# Build: docker build -f Dockerfile.cpu -t turboocr-cpu .
|
||||
# ============================================================
|
||||
|
||||
ARG TURBOOCR_VERSION=v2.1.1
|
||||
ARG ORT_VERSION=1.22.0
|
||||
# Registry mirror prefix — leave empty for direct pull.
|
||||
# China users: set to "docker.m.daocloud.io/" to proxy Docker Hub via DaoCloud.
|
||||
ARG DOCKER_MIRROR=
|
||||
|
||||
FROM ${DOCKER_MIRROR}ubuntu:24.04
|
||||
|
||||
# Re-declare ARGs after FROM so they remain in scope
|
||||
ARG TURBOOCR_VERSION
|
||||
ARG ORT_VERSION
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# Install build dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
cmake \
|
||||
g++ \
|
||||
make \
|
||||
pkg-config \
|
||||
libopencv-dev \
|
||||
nginx \
|
||||
gosu \
|
||||
libgrpc++-dev \
|
||||
libc-ares-dev \
|
||||
libprotobuf-dev \
|
||||
protobuf-compiler \
|
||||
protobuf-compiler-grpc \
|
||||
libjsoncpp-dev \
|
||||
uuid-dev \
|
||||
zlib1g-dev \
|
||||
libssl-dev \
|
||||
git \
|
||||
wget \
|
||||
curl \
|
||||
ca-certificates \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Drogon HTTP framework (async, epoll-based)
|
||||
RUN cd /tmp && \
|
||||
git clone --depth 1 --branch v1.9.12 https://github.com/drogonframework/drogon.git && \
|
||||
cd drogon && git submodule update --init && \
|
||||
mkdir build && cd build && \
|
||||
cmake .. -DBUILD_EXAMPLES=OFF -DBUILD_CTL=OFF -DBUILD_ORM=OFF \
|
||||
-DBUILD_POSTGRESQL=OFF -DBUILD_MYSQL=OFF -DBUILD_SQLITE=OFF \
|
||||
-DBUILD_REDIS=OFF -DBUILD_TESTING=OFF && \
|
||||
make -j$(nproc) && make install && \
|
||||
rm -rf /tmp/drogon
|
||||
|
||||
# Install ONNX Runtime C++ SDK
|
||||
RUN cd /tmp && \
|
||||
wget -q "https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VERSION}/onnxruntime-linux-x64-${ORT_VERSION}.tgz" && \
|
||||
tar xzf "onnxruntime-linux-x64-${ORT_VERSION}.tgz" && \
|
||||
cp -r "onnxruntime-linux-x64-${ORT_VERSION}/include/"* /usr/local/include/ && \
|
||||
cp "onnxruntime-linux-x64-${ORT_VERSION}/lib/libonnxruntime.so"* /usr/local/lib/ && \
|
||||
ldconfig && rm -rf /tmp/onnxruntime*
|
||||
|
||||
# Clone TurboOCR at the pinned release tag
|
||||
RUN git clone --depth 1 --branch "${TURBOOCR_VERSION}" \
|
||||
https://github.com/aiptimizer/TurboOCR.git /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install fastpdf2png (PDF renderer — PDFium vendored in third_party/).
|
||||
# Copy vendored libpdfium first so the installer does not need network access.
|
||||
RUN cp third_party/pdfium/lib/libpdfium.so /usr/lib/ && ldconfig && \
|
||||
bash scripts/install_fastpdf2png.sh && \
|
||||
{ cp bin/libpdfium.so /usr/lib/ 2>/dev/null || true; } && \
|
||||
ldconfig
|
||||
|
||||
# Build CPU-only mode with ONNX Runtime backend
|
||||
RUN mkdir -p build_cpu && cd build_cpu && \
|
||||
cmake .. -DUSE_CPU_ONLY=ON -DFETCH_MODELS=OFF && \
|
||||
make -j$(nproc)
|
||||
|
||||
# Create non-root user and redirect /app/models/rec into the named cache volume.
|
||||
RUN useradd -m -s /bin/bash ocr \
|
||||
&& chmod +x /app/scripts/entrypoint.sh \
|
||||
&& mkdir -p /home/ocr/.cache/turbo-ocr/models/rec /app/models \
|
||||
&& ln -s /home/ocr/.cache/turbo-ocr/models/rec /app/models/rec
|
||||
|
||||
# Fetch all PP-OCRv5 language bundles (SHA256-verified from pinned GitHub Release)
|
||||
ARG OCR_INCLUDE_SERVER=1
|
||||
ENV OCR_INCLUDE_SERVER=${OCR_INCLUDE_SERVER}
|
||||
RUN bash scripts/fetch_release_models.sh \
|
||||
&& chown -R ocr:ocr /app /home/ocr/.cache
|
||||
|
||||
EXPOSE 8000 50051
|
||||
|
||||
ENTRYPOINT ["/app/scripts/entrypoint.sh"]
|
||||
CMD ["./build_cpu/paddle_cpu_server"]
|
||||
Reference in New Issue
Block a user