feat: add mineru-v2

2025-09-21 16:57:19 +08:00
parent 92fd82cbb3
commit 166f0f2a50
8 changed files with 165 additions and 14 deletions
--- a/README.md
+++ b/README.md
@@ -4,9 +4,10 @@ Compose Anything helps users quickly deploy various services by providing a set
 ## Supported Services
-| Service              | Version |
+| Service                      | Version |
-| -------------------- | ------- |
+| ---------------------------- | ------- |
-| [Redis](./src/redis) | 8.2.1   |
+| [Redis](./src/redis)         | 8.2.1   |
 | [MinerU v2](./src/mineru-v2) | 2.5.3   |
 ## Guidelines
--- a/README.zh.md
+++ b/README.zh.md
@@ -4,9 +4,10 @@ Compose Anything 通过提供一组高质量的 Docker Compose 配置文件，
 ## 已经支持的服务
-| 服务                 | 版本  |
+| 服务                         | 版本  |
-| -------------------- | ----- |
+| ---------------------------- | ----- |
-| [Redis](./src/redis) | 8.2.1 |
+| [Redis](./src/redis)         | 8.2.1 |
 | [MinerU v2](./src/mineru-v2) | 2.5.3 |
 ## 规范
--- a/src/mineru-v2/Dockerfile
+++ b/src/mineru-v2/Dockerfile
@@ -0,0 +1,27 @@
 # Use the official vllm image for gpu with Ampere architecture and above (Compute Capability>=8.0)
 # Compute Capability version query (https://developer.nvidia.com/cuda-gpus)
 FROM vllm/vllm-openai:v0.10.1.1
 # Use the official vllm image for gpu with Turing architecture and below (Compute Capability<8.0)
 # FROM vllm/vllm-openai:v0.10.2
 # Install libgl for opencv support & Noto fonts for Chinese characters
 RUN apt-get update && \
    apt-get install -y \
        fonts-noto-core \
        fonts-noto-cjk \
        fontconfig \
        libgl1 && \
    fc-cache -fv && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/*
 # Install mineru latest
 RUN python3 -m pip install -U 'mineru[core]' --break-system-packages && \
    python3 -m pip cache purge
 # Download models and update the configuration file
 RUN /bin/bash -c "mineru-models-download -s huggingface -m all"
 # Set the entry point to activate the virtual environment and run the command line tool
 ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"]
--- a/src/mineru-v2/README.md
+++ b/src/mineru-v2/README.md
@@ -0,0 +1,28 @@
 # MinerU v2
 [Reference Documentation](https://opendatalab.github.io/MinerU/zh/usage/quick_usage/).
 VLM backend server:
 ```bash
 docker compose --profile vllm-server up -d
 ```
 Document parse API:
 ```bash
 docker compose --profile api up -d
 ```
 Gradio WebUI:
 ```bash
 docker compose --profile gradio up -d
 ```
 Test vLLM backend:
 ```bash
 pip install mineru
 mineru -p demo.pdf -o ./output -b vlm-http-client -u http://localhost:30000
 ```
--- a/src/mineru-v2/docker-compose.yaml
+++ b/src/mineru-v2/docker-compose.yaml
@@ -0,0 +1,94 @@
 x-default: &default
  restart: unless-stopped
  volumes:
    - &localtime /etc/localtime:/etc/localtime:ro
    - &timezone /etc/timezone:/etc/timezone:ro
  logging:
    driver: json-file
    options:
      max-size: 100m
 x-mineru-vllm: &mineru-vllm
  <<: *default
  image: ${MINERU_DOCKER_IMAGE:-alexsuntop/mineru:2.5.3}
  environment:
    MINERU_MODEL_SOURCE: local
  ulimits:
    memlock: -1
    stack: 67108864
  ipc: host
  deploy:
    resources:
      limits:
        cpus: '8.0'
        memory: 4G
      reservations:
        cpus: '2.0'
        memory: 2G
        devices:
          - driver: nvidia
            device_ids: [ '0' ]
            capabilities: [ gpu ]
 services:
  mineru-vllm-server:
    <<: *mineru-vllm
    container_name: mineru-vllm-server
    profiles: ["vllm-server"]
    ports:
      - ${MINERU_PORT_OVERRIDE_30000:-30000}:30000
    entrypoint: mineru-vllm-server
    command:
      - --host 0.0.0.0
      - --port 30000
      # If using multiple GPUs, increase throughput using vllm's multi-GPU parallel mode.
      # - --data-parallel-size 2
      # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter,
      # if VRAM issues persist, try lowering it further to `0.4` or below.
      # - --gpu-memory-utilization 0.5
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:30000/health || exit 1"]
  mineru-api:
    <<: *mineru-vllm
    container_name: mineru-api
    profiles: ["api"]
    ports:
      - ${MINERU_PORT_OVERRIDE_API:-8000}:8000
    entrypoint: mineru-api
    command:
      - --host 0.0.0.0
      - --port 8000
      # If using multiple GPUs, increase throughput using vllm's multi-GPU parallel mode.
      # - --data-parallel-size 2
      # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter,
      # if VRAM issues persist, try lowering it further to `0.4` or below.
      # - --gpu-memory-utilization 0.5
  mineru-gradio:
    <<: *mineru-vllm
    container_name: mineru-gradio
    profiles: ["gradio"]
    ports:
      - ${MINERU_PORT_OVERRIDE_GRADIO:-7860}:7860
    entrypoint: mineru-gradio
    command:
      - --server-name 0.0.0.0
      - --server-port 7860
      # Enable the vllm engine for Gradio
      - --enable-vllm-engine true
      # If you want to disable the API, set this to false
      # - --enable-api false
      # If you want to limit the number of pages for conversion, set this to a specific number
      # - --max-convert-pages 20
      # If using multiple GPUs, increase throughput using vllm's multi-GPU parallel mode.
      # - --data-parallel-size 2
      # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter,
      # if VRAM issues persist, try lowering it further to `0.4` or below.
      # - --gpu-memory-utilization 0.5
--- a/src/redis/.env.example
+++ b/src/redis/.env.example
@@ -8,4 +8,4 @@ SKIP_FIX_PERMS=1
 REDIS_PASSWORD="passw0rd"
 # Port to bind to on the host machine
-REDIS_PORT_OVERRIDE_6379=16379
+REDIS_PORT_OVERRIDE=16379
--- a/src/redis/README.md
+++ b/src/redis/README.md
@@ -2,11 +2,11 @@
 ## Environment Variables
-| Variable Name            | Description                                              | Default Value        |
+| Variable Name       | Description                                              | Default Value        |
-| ------------------------ | -------------------------------------------------------- | -------------------- |
+| ------------------- | -------------------------------------------------------- | -------------------- |
-| REDIS_VERSION            | Redis image version                                      | `"8.2.1-alpine3.22"` |
+| REDIS_VERSION       | Redis image version                                      | `"8.2.1-alpine3.22"` |
-| SKIP_FIX_PERMS           | Skip permission fixing, set to 1 to skip                 | `""`                 |
+| SKIP_FIX_PERMS      | Skip permission fixing, set to 1 to skip                 | `""`                 |
-| REDIS_PASSWORD           | Password for the default "default" user                  | `""`                 |
+| REDIS_PASSWORD      | Password for the default "default" user                  | `""`                 |
-| REDIS_PORT_OVERRIDE_6379 | Host port mapping (maps to Redis port 6379 in container) | 6379                 |
+| REDIS_PORT_OVERRIDE | Host port mapping (maps to Redis port 6379 in container) | 6379                 |
 Please modify the `.env` file as needed for your use case.
--- a/src/redis/docker-compose.yaml
+++ b/src/redis/docker-compose.yaml
@@ -14,7 +14,7 @@ services:
    image: redis:${REDIS_VERSION:-8.2.1-alpine3.22}
    container_name: redis
    ports:
-      - "${REDIS_PORT_OVERRIDE_6379:-6379}:6379"
+      - "${REDIS_PORT_OVERRIDE:-6379}:6379"
    volumes:
      - *localtime
      - *timezone