feat: add build turboocr

2026-04-29 11:54:59 +08:00
parent ce16588916
commit 5f8503df42
11 changed files with 675 additions and 8 deletions
@@ -0,0 +1,73 @@
+# Source build configuration
+TURBOOCR_VERSION=v2.1.1
+
+# Registry mirror prefix for docker build — leave empty for direct pull.
+# China users: set to "docker.m.daocloud.io/" to proxy Docker Hub via DaoCloud.
+# Example: TURBOOCR_DOCKER_MIRROR=docker.m.daocloud.io/
+TURBOOCR_DOCKER_MIRROR=
+
+# NGC (nvcr.io) mirror prefix for the CUDA 12.x GPU build — leave empty for direct pull.
+# Standard Docker Hub mirrors (e.g. DaoCloud) do NOT proxy nvcr.io.
+# Set this only if you have a dedicated NGC pull-through proxy.
+TURBOOCR_NGC_MIRROR=
+
+# Network configuration
+TURBOOCR_HTTP_PORT_OVERRIDE=8000
+TURBOOCR_GRPC_PORT_OVERRIDE=50051
+
+# Language bundle: latin (default), chinese, greek, eslav, arabic, korean, thai
+TURBOOCR_LANG=
+# Set to 1 with TURBOOCR_LANG=chinese to use the 84 MB server rec model
+TURBOOCR_SERVER=
+
+# GPU pipeline pool — number of concurrent inference pipelines (~1.4 GB VRAM each).
+# Leave empty to let the server choose automatically based on available VRAM.
+# Ignored in CPU mode.
+TURBOOCR_PIPELINE_POOL_SIZE=
+
+# Set to 1 to skip loading the PP-DocLayoutV3 layout detection model.
+# Saves ~300-500 MB VRAM and cuts first-start compilation time by ~28 min on laptop GPUs.
+# Only do this if you do not need the ?layout=1 PDF endpoint.
+TURBOOCR_DISABLE_LAYOUT=0
+
+# Default PDF parsing mode: ocr (safest) / geometric / auto / auto_verified
+TURBOOCR_PDF_MODE=ocr
+
+# Set to 1 to skip the angle classifier (~0.4 ms savings per image)
+TURBOOCR_DISABLE_ANGLE_CLS=0
+
+# Maximum detection input dimension in pixels
+TURBOOCR_DET_MAX_SIDE=960
+
+# PDF render parallelism
+TURBOOCR_PDF_DAEMONS=16
+TURBOOCR_PDF_WORKERS=4
+
+# Maximum pages accepted in a single PDF request
+TURBOOCR_MAX_PDF_PAGES=2000
+
+# Log level: debug / info / warn / error
+TURBOOCR_LOG_LEVEL=info
+
+# Log format: json (structured) / text (human-readable)
+TURBOOCR_LOG_FORMAT=json
+
+# Resources — GPU variant (profile: gpu)
+# First-start builds TRT engines; 12 G covers the GPU + engine compilation headroom.
+TURBOOCR_CPU_LIMIT=8.0
+TURBOOCR_MEMORY_LIMIT=12G
+TURBOOCR_CPU_RESERVATION=2.0
+TURBOOCR_MEMORY_RESERVATION=4G
+
+# Number of NVIDIA GPUs to reserve (GPU variant only)
+TURBOOCR_GPU_COUNT=1
+
+# Shared memory — fastpdf2png uses /dev/shm for inter-process PDF page transfers
+TURBOOCR_SHM_SIZE=2g
+
+# Logging
+TURBOOCR_LOG_MAX_SIZE=100m
+TURBOOCR_LOG_MAX_FILE=3
+
+# Timezone
+TZ=UTC