From ced072de66f6f305d2892922081780db1fc140bf Mon Sep 17 00:00:00 2001 From: Sun-ZhenXing <1006925066@qq.com> Date: Tue, 10 Feb 2026 17:45:39 +0800 Subject: [PATCH] feat: add mineru more images --- .vscode/settings.json | 3 +++ builds/mineru/.env.example | 2 +- builds/mineru/Dockerfile | 8 +++--- builds/mineru/corex.Dockerfile | 27 ++++++++++++++++++++ builds/mineru/dcu.Dockerfile | 34 +++++++++++++++++++++++++ builds/mineru/docker-compose.yaml | 2 +- builds/mineru/gcu.Dockerfile | 30 ++++++++++++++++++++++ builds/mineru/kxpu.Dockerfile | 33 ++++++++++++++++++++++++ builds/mineru/maca.Dockerfile | 34 +++++++++++++++++++++++++ builds/mineru/mlu.Dockerfile | 42 +++++++++++++++++++++++++++++++ builds/mineru/musa.Dockerfile | 38 ++++++++++++++++++++++++++++ builds/mineru/npu.Dockerfile | 32 +++++++++++++++++++++++ builds/mineru/ppu.Dockerfile | 30 ++++++++++++++++++++++ 13 files changed, 309 insertions(+), 6 deletions(-) create mode 100644 builds/mineru/corex.Dockerfile create mode 100644 builds/mineru/dcu.Dockerfile create mode 100644 builds/mineru/gcu.Dockerfile create mode 100644 builds/mineru/kxpu.Dockerfile create mode 100644 builds/mineru/maca.Dockerfile create mode 100644 builds/mineru/mlu.Dockerfile create mode 100644 builds/mineru/musa.Dockerfile create mode 100644 builds/mineru/npu.Dockerfile create mode 100644 builds/mineru/ppu.Dockerfile diff --git a/.vscode/settings.json b/.vscode/settings.json index 06e7340..5b84764 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -16,6 +16,9 @@ "[yaml]": { "editor.formatOnSave": true }, + "[dockerfile]": { + "editor.formatOnSave": true + }, "[dockercompose]": { "editor.formatOnSave": true }, diff --git a/builds/mineru/.env.example b/builds/mineru/.env.example index 4b2351e..35a0fed 100644 --- a/builds/mineru/.env.example +++ b/builds/mineru/.env.example @@ -1,5 +1,5 @@ # MinerU Docker image -MINERU_VERSION=2.7.3 +MINERU_VERSION=2.7.6 # Port configurations MINERU_PORT_OVERRIDE_VLLM=30000 diff --git a/builds/mineru/Dockerfile b/builds/mineru/Dockerfile index b929fe4..560aa73 100644 --- a/builds/mineru/Dockerfile +++ b/builds/mineru/Dockerfile @@ -10,10 +10,10 @@ FROM vllm/vllm-openai:v0.10.1.1 # Install libgl for opencv support & Noto fonts for Chinese characters RUN apt-get update && \ apt-get install -y \ - fonts-noto-core \ - fonts-noto-cjk \ - fontconfig \ - libgl1 && \ + fonts-noto-core \ + fonts-noto-cjk \ + fontconfig \ + libgl1 && \ fc-cache -fv && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* diff --git a/builds/mineru/corex.Dockerfile b/builds/mineru/corex.Dockerfile new file mode 100644 index 0000000..8b88fe5 --- /dev/null +++ b/builds/mineru/corex.Dockerfile @@ -0,0 +1,27 @@ +# Base image containing the vLLM inference environment, requiring amd64(x86-64) CPU + iluvatar GPU. +FROM crpi-vofi3w62lkohhxsp.cn-shanghai.personal.cr.aliyuncs.com/opendatalab-mineru/corex:4.4.0_torch2.7.1_vllm0.11.2_py3.10 + + +# Install Noto fonts for Chinese characters +RUN apt-get update && \ + apt-get install -y \ + fonts-noto-core \ + fonts-noto-cjk \ + fontconfig && \ + fc-cache -fv && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Install mineru latest +RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -m pip install 'mineru[core]>=2.7.4' \ + numpy==1.26.4 \ + opencv-python==4.11.0.86 \ + -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -m pip cache purge + +# Download models and update the configuration file +RUN /bin/bash -c "mineru-models-download -s modelscope -m all" + +# Set the entry point to activate the virtual environment and run the command line tool +ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"] diff --git a/builds/mineru/dcu.Dockerfile b/builds/mineru/dcu.Dockerfile new file mode 100644 index 0000000..85fe692 --- /dev/null +++ b/builds/mineru/dcu.Dockerfile @@ -0,0 +1,34 @@ +# Base image containing the vLLM inference environment, requiring amd64(x86-64) CPU + Hygon DCU. +FROM harbor.sourcefind.cn:5443/dcu/admin/base/vllm:0.9.2-ubuntu22.04-dtk25.04.2-1226-das1.7-py3.10-20251226 + + +# Install Noto fonts for Chinese characters +RUN apt-get update && \ + apt-get install -y \ + fonts-noto-core \ + fonts-noto-cjk \ + fontconfig && \ + fc-cache -fv && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Install mineru latest +RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -m pip install mineru[api,gradio] \ + "matplotlib>=3.10,<4" \ + "ultralytics>=8.3.48,<9" \ + "doclayout_yolo==0.0.4" \ + "ftfy>=6.3.1,<7" \ + "shapely>=2.0.7,<3" \ + "pyclipper>=1.3.0,<2" \ + "omegaconf>=2.3.0,<3" \ + numpy==1.25.0 \ + opencv-python==4.11.0.86 \ + -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -m pip cache purge + +# Download models and update the configuration file +RUN /bin/bash -c "mineru-models-download -s modelscope -m all" + +# Set the entry point to activate the virtual environment and run the command line tool +ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"] diff --git a/builds/mineru/docker-compose.yaml b/builds/mineru/docker-compose.yaml index 417b088..75b5f74 100644 --- a/builds/mineru/docker-compose.yaml +++ b/builds/mineru/docker-compose.yaml @@ -11,7 +11,7 @@ x-mineru-vllm: &mineru-vllm image: ${GLOBAL_REGISTRY:-}alexsuntop/mineru:${MINERU_VERSION:-2.7.6} build: context: . - dockerfile: Dockerfile + dockerfile: ${MINERU_DOCKERFILE_PATH:-Dockerfile} platforms: - linux/amd64 - linux/arm64 diff --git a/builds/mineru/gcu.Dockerfile b/builds/mineru/gcu.Dockerfile new file mode 100644 index 0000000..f2216bc --- /dev/null +++ b/builds/mineru/gcu.Dockerfile @@ -0,0 +1,30 @@ +# Base image containing the vLLM inference environment, requiring amd64(x86-64) CPU + Enflame GCU. +FROM crpi-vofi3w62lkohhxsp.cn-shanghai.personal.cr.aliyuncs.com/opendatalab-mineru/gcu:docker_images_topsrider_i3x_3.6.20260106_vllm0.11_pytorch2.8.0 + + +# Install Noto fonts for Chinese characters +RUN echo 'deb http://mirrors.aliyun.com/ubuntu/ noble main restricted universe multiverse\n\ + deb http://mirrors.aliyun.com/ubuntu/ noble-updates main restricted universe multiverse\n\ + deb http://mirrors.aliyun.com/ubuntu/ noble-backports main restricted universe multiverse\n\ + deb http://mirrors.aliyun.com/ubuntu/ noble-security main restricted universe multiverse' > /tmp/aliyun-sources.list && \ + apt-get -o Dir::Etc::SourceList=/tmp/aliyun-sources.list update && \ + apt-get -o Dir::Etc::SourceList=/tmp/aliyun-sources.list install -y \ + fonts-noto-core \ + fonts-noto-cjk \ + fontconfig && \ + fc-cache -fv && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* /tmp/aliyun-sources.list + +# Install mineru latest +RUN python3 -m pip install "mineru[core]>=2.7.2" \ + numpy==1.26.4 \ + opencv-python==4.11.0.86 \ + -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -m pip cache purge + +# Download models and update the configuration file +RUN /bin/bash -c "mineru-models-download -s modelscope -m all" + +# Set the entry point to activate the virtual environment and run the command line tool +ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"] diff --git a/builds/mineru/kxpu.Dockerfile b/builds/mineru/kxpu.Dockerfile new file mode 100644 index 0000000..abbccee --- /dev/null +++ b/builds/mineru/kxpu.Dockerfile @@ -0,0 +1,33 @@ +# Base image containing the vLLM inference environment, requiring amd64(x86-64) CPU + Kunlun XPU. +FROM docker.1ms.run/wjie520/vllm_kunlun:v0.10.1.1rc1 + + +# Install Noto fonts for Chinese characters +RUN apt-get update && \ + apt-get install -y \ + fonts-noto-core \ + fonts-noto-cjk \ + fontconfig && \ + fc-cache -fv && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Install mineru latest +RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -m pip install "mineru[api,gradio]>=2.7.6" \ + "matplotlib>=3.10,<4" \ + "ultralytics>=8.3.48,<9" \ + "doclayout_yolo==0.0.4" \ + "ftfy>=6.3.1,<7" \ + "shapely>=2.0.7,<3" \ + "pyclipper>=1.3.0,<2" \ + "omegaconf>=2.3.0,<3" \ + -i https://mirrors.aliyun.com/pypi/simple && \ + sed -i '1,200{s/self\.act = act_layer()/self.act = nn.GELU()/;t;b};' /root/miniconda/envs/vllm_kunlun_0.10.1.1/lib/python3.10/site-packages/vllm_kunlun/models/qwen2_vl.py && \ + python3 -m pip cache purge + +# Download models and update the configuration file +RUN /bin/bash -c "mineru-models-download -s modelscope -m all" + +# Set the entry point to activate the virtual environment and run the command line tool +ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"] diff --git a/builds/mineru/maca.Dockerfile b/builds/mineru/maca.Dockerfile new file mode 100644 index 0000000..0bbd81a --- /dev/null +++ b/builds/mineru/maca.Dockerfile @@ -0,0 +1,34 @@ +# 基础镜像配置 vLLM 或 LMDeploy 推理环境,请根据实际需要选择其中一个,要求 amd64(x86-64) CPU + metax GPU。 +# Base image containing the vLLM inference environment, requiring amd64(x86-64) CPU + metax GPU. +FROM cr.metax-tech.com/public-ai-release/maca/vllm:maca.ai3.1.0.7-torch2.6-py310-ubuntu22.04-amd64 +# Base image containing the LMDeploy inference environment, requiring amd64(x86-64) CPU + metax GPU. +# FROM crpi-vofi3w62lkohhxsp.cn-shanghai.personal.cr.aliyuncs.com/opendatalab-mineru/maca:maca.ai3.1.0.7-torch2.6-py310-ubuntu22.04-lmdeploy0.10.2-amd64 + +# Install libgl for opencv support & Noto fonts for Chinese characters +RUN apt-get update && \ + apt-get install -y \ + fonts-noto-core \ + fonts-noto-cjk \ + fontconfig \ + libgl1 && \ + fc-cache -fv && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# mod torchvision to be compatible with torch 2.6 +RUN sed -i '3s/^Version: 0.15.1+metax3\.1\.0\.4$/Version: 0.21.0+metax3.1.0.4/' /opt/conda/lib/python3.10/site-packages/torchvision-0.15.1+metax3.1.0.4.dist-info/METADATA && \ + mv /opt/conda/lib/python3.10/site-packages/torchvision-0.15.1+metax3.1.0.4.dist-info /opt/conda/lib/python3.10/site-packages/torchvision-0.21.0+metax3.1.0.4.dist-info + +# Install mineru latest +RUN /opt/conda/bin/python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \ + /opt/conda/bin/python3 -m pip install 'mineru[core]>=2.6.5' \ + numpy==1.26.4 \ + opencv-python==4.11.0.86 \ + -i https://mirrors.aliyun.com/pypi/simple && \ + /opt/conda/bin/python3 -m pip cache purge + +# Download models and update the configuration file +RUN /bin/bash -c "/opt/conda/bin/mineru-models-download -s modelscope -m all" + +# Set the entry point to activate the virtual environment and run the command line tool +ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"] diff --git a/builds/mineru/mlu.Dockerfile b/builds/mineru/mlu.Dockerfile new file mode 100644 index 0000000..51c3f59 --- /dev/null +++ b/builds/mineru/mlu.Dockerfile @@ -0,0 +1,42 @@ +# 基础镜像配置 vLLM 或 LMDeploy ,请根据实际需要选择其中一个,要求 amd64(x86-64) CPU + Cambricon MLU. +# Base image containing the LMDEPLOY inference environment, requiring amd64(x86-64) CPU + Cambricon MLU. +FROM crpi-4crprmm5baj1v8iv.cn-hangzhou.personal.cr.aliyuncs.com/lmdeploy_dlinfer/camb:qwen2.5_vl +ARG BACKEND=lmdeploy +# Base image containing the vLLM inference environment, requiring amd64(x86-64) CPU + Cambricon MLU. +# FROM crpi-vofi3w62lkohhxsp.cn-shanghai.personal.cr.aliyuncs.com/opendatalab-mineru/mlu:vllm0.8.3-torch2.6.0-torchmlu1.26.1-ubuntu22.04-py310 +# ARG BACKEND=vllm + +# Install Noto fonts for Chinese characters +RUN apt-get update && \ + apt-get install -y \ + fonts-noto-core \ + fonts-noto-cjk \ + fontconfig && \ + fc-cache -fv && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Install mineru latest +RUN /bin/bash -c '\ + if [ "$BACKEND" = "vllm" ]; then \ + source /torch/venv3/pytorch_infer/bin/activate; \ + fi && \ + python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -m pip install "mineru[core]>=2.7.4" \ + numpy==1.26.4 \ + opencv-python==4.11.0.86 \ + -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -m pip install $(if [ "$BACKEND" = "lmdeploy" ]; then echo "accelerate==1.2.0"; else echo "transformers==4.50.3"; fi) && \ + python3 -m pip cache purge' + +# Download models and update the configuration file +RUN /bin/bash -c '\ + if [ "$BACKEND" = "vllm" ]; then \ + source /torch/venv3/pytorch_infer/bin/activate; \ + fi && \ + mineru-models-download -s modelscope -m all' + +WORKDIR /workspace + +# Set the entry point to activate the virtual environment and run the command line tool +ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"] diff --git a/builds/mineru/musa.Dockerfile b/builds/mineru/musa.Dockerfile new file mode 100644 index 0000000..aad10be --- /dev/null +++ b/builds/mineru/musa.Dockerfile @@ -0,0 +1,38 @@ +# Base image containing the vLLM inference environment, requiring amd64(x86-64) CPU + MooreThreads GPU. +FROM registry.mthreads.com/mcconline/vllm-musa-qy2-py310:v0.8.4-release + + +# Install libgl for opencv support & Noto fonts for Chinese characters +RUN apt-get update && \ + apt-get install -y \ + fonts-noto-core \ + fonts-noto-cjk \ + fontconfig \ + libgl1 && \ + fc-cache -fv && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Install mineru latest +RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \ + git clone https://gitcode.com/gh_mirrors/vi/vision.git -b v0.20.0 --depth 1 && \ + cd vision && \ + python3 setup.py install && \ + python3 -m pip install "mineru[api,gradio]>=2.7.2" \ + "matplotlib>=3.10,<4" \ + "ultralytics>=8.3.48,<9" \ + "doclayout_yolo==0.0.4" \ + "ftfy>=6.3.1,<7" \ + "shapely>=2.0.7,<3" \ + "pyclipper>=1.3.0,<2" \ + "omegaconf>=2.3.0,<3" \ + numpy==1.26.4 \ + opencv-python==4.11.0.86 \ + -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -m pip cache purge + +# Download models and update the configuration file +RUN /bin/bash -c "mineru-models-download -s modelscope -m all" + +# Set the entry point to activate the virtual environment and run the command line tool +ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"] diff --git a/builds/mineru/npu.Dockerfile b/builds/mineru/npu.Dockerfile new file mode 100644 index 0000000..5759331 --- /dev/null +++ b/builds/mineru/npu.Dockerfile @@ -0,0 +1,32 @@ +# 基础镜像配置 vLLM 或 LMDeploy ,请根据实际需要选择其中一个,要求 ARM(AArch64) CPU + Ascend NPU。 +# Base image containing the vLLM inference environment, requiring ARM(AArch64) CPU + Ascend NPU. +FROM quay.m.daocloud.io/ascend/vllm-ascend:v0.11.0 +# Base image containing the LMDeploy inference environment, requiring ARM(AArch64) CPU + Ascend NPU. +# FROM crpi-4crprmm5baj1v8iv.cn-hangzhou.personal.cr.aliyuncs.com/lmdeploy_dlinfer/ascend:mineru-a2 + + +# Install libgl for opencv support & Noto fonts for Chinese characters +RUN apt-get update && \ + apt-get install -y \ + fonts-noto-core \ + fonts-noto-cjk \ + fontconfig \ + libgl1 \ + libglib2.0-0 && \ + fc-cache -fv && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Install mineru latest +RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -m pip install 'mineru[core]>=2.6.5' \ + numpy==1.26.4 \ + opencv-python==4.11.0.86 \ + -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -m pip cache purge + +# Download models and update the configuration file +RUN TORCH_DEVICE_BACKEND_AUTOLOAD=0 /bin/bash -c "mineru-models-download -s modelscope -m all" + +# Set the entry point to activate the virtual environment and run the command line tool +ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"] diff --git a/builds/mineru/ppu.Dockerfile b/builds/mineru/ppu.Dockerfile new file mode 100644 index 0000000..d344d59 --- /dev/null +++ b/builds/mineru/ppu.Dockerfile @@ -0,0 +1,30 @@ +# 基础镜像配置 vLLM 或 LMDeploy 推理环境,请根据实际需要选择其中一个,要求 amd64(x86-64) CPU + t-head PPU。 +# Base image containing the vLLM inference environment, requiring amd64(x86-64) CPU + t-head PPU. +FROM crpi-vofi3w62lkohhxsp.cn-shanghai.personal.cr.aliyuncs.com/opendatalab-mineru/ppu:ppu-pytorch2.6.0-ubuntu24.04-cuda12.6-vllm0.8.5-py312 +# Base image containing the LMDeploy inference environment, requiring amd64(x86-64) CPU + t-head PPU. +# FROM crpi-4crprmm5baj1v8iv.cn-hangzhou.personal.cr.aliyuncs.com/lmdeploy_dlinfer/ppu:mineru-ppu + +# Install libgl for opencv support & Noto fonts for Chinese characters +RUN apt-get update && \ + apt-get install -y \ + fonts-noto-core \ + fonts-noto-cjk \ + fontconfig \ + libgl1 && \ + fc-cache -fv && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Install mineru latest +RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -m pip install 'mineru[core]>=2.6.5' \ + numpy==1.26.4 \ + opencv-python==4.11.0.86 \ + -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -m pip cache purge + +# Download models and update the configuration file +RUN /bin/bash -c "mineru-models-download -s modelscope -m all" + +# Set the entry point to activate the virtual environment and run the command line tool +ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"]