feat: add mineru-v2

2025-09-21 16:57:19 +08:00
parent 92fd82cbb3
commit 166f0f2a50
8 changed files with 165 additions and 14 deletions
--- a/README.md
+++ b/README.md
@@ -4,9 +4,10 @@ Compose Anything helps users quickly deploy various services by providing a set

 ## Supported Services

-| Service              | Version |
-| -------------------- | ------- |
-| [Redis](./src/redis) | 8.2.1   |
+| Service                      | Version |
+| ---------------------------- | ------- |
+| [Redis](./src/redis)         | 8.2.1   |
+| [MinerU v2](./src/mineru-v2) | 2.5.3   |

 ## Guidelines

--- a/README.zh.md
+++ b/README.zh.md
@@ -4,9 +4,10 @@ Compose Anything 通过提供一组高质量的 Docker Compose 配置文件，

 ## 已经支持的服务

-| 服务                 | 版本  |
-| -------------------- | ----- |
-| [Redis](./src/redis) | 8.2.1 |
+| 服务                         | 版本  |
+| ---------------------------- | ----- |
+| [Redis](./src/redis)         | 8.2.1 |
+| [MinerU v2](./src/mineru-v2) | 2.5.3 |

 ## 规范

--- a/src/mineru-v2/Dockerfile
+++ b/src/mineru-v2/Dockerfile
@@ -0,0 +1,27 @@
+# Use the official vllm image for gpu with Ampere architecture and above (Compute Capability>=8.0)
+# Compute Capability version query (https://developer.nvidia.com/cuda-gpus)
+FROM vllm/vllm-openai:v0.10.1.1
+
+# Use the official vllm image for gpu with Turing architecture and below (Compute Capability<8.0)
+# FROM vllm/vllm-openai:v0.10.2
+
+# Install libgl for opencv support & Noto fonts for Chinese characters
+RUN apt-get update && \
+    apt-get install -y \
+        fonts-noto-core \
+        fonts-noto-cjk \
+        fontconfig \
+        libgl1 && \
+    fc-cache -fv && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# Install mineru latest
+RUN python3 -m pip install -U 'mineru[core]' --break-system-packages && \
+    python3 -m pip cache purge
+
+# Download models and update the configuration file
+RUN /bin/bash -c "mineru-models-download -s huggingface -m all"
+
+# Set the entry point to activate the virtual environment and run the command line tool
+ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"]
--- a/src/mineru-v2/README.md
+++ b/src/mineru-v2/README.md
@@ -0,0 +1,28 @@
+# MinerU v2
+
+[Reference Documentation](https://opendatalab.github.io/MinerU/zh/usage/quick_usage/).
+
+VLM backend server:
+
+```bash
+docker compose --profile vllm-server up -d
+```
+
+Document parse API:
+
+```bash
+docker compose --profile api up -d
+```
+
+Gradio WebUI:
+
+```bash
+docker compose --profile gradio up -d
+```
+
+Test vLLM backend:
+
+```bash
+pip install mineru
+mineru -p demo.pdf -o ./output -b vlm-http-client -u http://localhost:30000
+```
--- a/src/mineru-v2/docker-compose.yaml
+++ b/src/mineru-v2/docker-compose.yaml
@@ -0,0 +1,94 @@
+x-default: &default
+  restart: unless-stopped
+  volumes:
+    - &localtime /etc/localtime:/etc/localtime:ro
+    - &timezone /etc/timezone:/etc/timezone:ro
+  logging:
+    driver: json-file
+    options:
+      max-size: 100m
+
+x-mineru-vllm: &mineru-vllm
+  <<: *default
+  image: ${MINERU_DOCKER_IMAGE:-alexsuntop/mineru:2.5.3}
+  environment:
+    MINERU_MODEL_SOURCE: local
+  ulimits:
+    memlock: -1
+    stack: 67108864
+  ipc: host
+  deploy:
+    resources:
+      limits:
+        cpus: '8.0'
+        memory: 4G
+      reservations:
+        cpus: '2.0'
+        memory: 2G
+        devices:
+          - driver: nvidia
+            device_ids: [ '0' ]
+            capabilities: [ gpu ]
+
+services:
+  mineru-vllm-server:
+    <<: *mineru-vllm
+    container_name: mineru-vllm-server
+    profiles: ["vllm-server"]
+    ports:
+      - ${MINERU_PORT_OVERRIDE_30000:-30000}:30000
+    entrypoint: mineru-vllm-server
+    command:
+      - --host 0.0.0.0
+      - --port 30000
+
+      # If using multiple GPUs, increase throughput using vllm's multi-GPU parallel mode.
+      # - --data-parallel-size 2
+      # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter,
+      # if VRAM issues persist, try lowering it further to `0.4` or below.
+      # - --gpu-memory-utilization 0.5
+
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://localhost:30000/health || exit 1"]
+
+
+  mineru-api:
+    <<: *mineru-vllm
+    container_name: mineru-api
+    profiles: ["api"]
+    ports:
+      - ${MINERU_PORT_OVERRIDE_API:-8000}:8000
+    entrypoint: mineru-api
+    command:
+      - --host 0.0.0.0
+      - --port 8000
+
+      # If using multiple GPUs, increase throughput using vllm's multi-GPU parallel mode.
+      # - --data-parallel-size 2
+      # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter,
+      # if VRAM issues persist, try lowering it further to `0.4` or below.
+      # - --gpu-memory-utilization 0.5
+
+  mineru-gradio:
+    <<: *mineru-vllm
+    container_name: mineru-gradio
+    profiles: ["gradio"]
+    ports:
+      - ${MINERU_PORT_OVERRIDE_GRADIO:-7860}:7860
+    entrypoint: mineru-gradio
+    command:
+      - --server-name 0.0.0.0
+      - --server-port 7860
+
+      # Enable the vllm engine for Gradio
+      - --enable-vllm-engine true
+      # If you want to disable the API, set this to false
+      # - --enable-api false
+      # If you want to limit the number of pages for conversion, set this to a specific number
+      # - --max-convert-pages 20
+
+      # If using multiple GPUs, increase throughput using vllm's multi-GPU parallel mode.
+      # - --data-parallel-size 2
+      # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter,
+      # if VRAM issues persist, try lowering it further to `0.4` or below.
+      # - --gpu-memory-utilization 0.5
--- a/src/redis/.env.example
+++ b/src/redis/.env.example
@@ -8,4 +8,4 @@ SKIP_FIX_PERMS=1
 REDIS_PASSWORD="passw0rd"

 # Port to bind to on the host machine
-REDIS_PORT_OVERRIDE_6379=16379
+REDIS_PORT_OVERRIDE=16379
--- a/src/redis/README.md
+++ b/src/redis/README.md
@@ -2,11 +2,11 @@

 ## Environment Variables

-| Variable Name            | Description                                              | Default Value        |
-| ------------------------ | -------------------------------------------------------- | -------------------- |
-| REDIS_VERSION            | Redis image version                                      | `"8.2.1-alpine3.22"` |
-| SKIP_FIX_PERMS           | Skip permission fixing, set to 1 to skip                 | `""`                 |
-| REDIS_PASSWORD           | Password for the default "default" user                  | `""`                 |
-| REDIS_PORT_OVERRIDE_6379 | Host port mapping (maps to Redis port 6379 in container) | 6379                 |
+| Variable Name       | Description                                              | Default Value        |
+| ------------------- | -------------------------------------------------------- | -------------------- |
+| REDIS_VERSION       | Redis image version                                      | `"8.2.1-alpine3.22"` |
+| SKIP_FIX_PERMS      | Skip permission fixing, set to 1 to skip                 | `""`                 |
+| REDIS_PASSWORD      | Password for the default "default" user                  | `""`                 |
+| REDIS_PORT_OVERRIDE | Host port mapping (maps to Redis port 6379 in container) | 6379                 |

 Please modify the `.env` file as needed for your use case.
--- a/src/redis/docker-compose.yaml
+++ b/src/redis/docker-compose.yaml
@@ -14,7 +14,7 @@ services:
    image: redis:${REDIS_VERSION:-8.2.1-alpine3.22}
    container_name: redis
    ports:
-      - "${REDIS_PORT_OVERRIDE_6379:-6379}:6379"
+      - "${REDIS_PORT_OVERRIDE:-6379}:6379"
    volumes:
      - *localtime
      - *timezone