feat: add mineru-v2

This commit is contained in:
Sun-ZhenXing
2025-09-21 16:57:19 +08:00
parent 92fd82cbb3
commit 166f0f2a50
8 changed files with 165 additions and 14 deletions

View File

@@ -4,9 +4,10 @@ Compose Anything helps users quickly deploy various services by providing a set
## Supported Services
| Service | Version |
| -------------------- | ------- |
| [Redis](./src/redis) | 8.2.1 |
| Service | Version |
| ---------------------------- | ------- |
| [Redis](./src/redis) | 8.2.1 |
| [MinerU v2](./src/mineru-v2) | 2.5.3 |
## Guidelines

View File

@@ -4,9 +4,10 @@ Compose Anything 通过提供一组高质量的 Docker Compose 配置文件,
## 已经支持的服务
| 服务 | 版本 |
| -------------------- | ----- |
| [Redis](./src/redis) | 8.2.1 |
| 服务 | 版本 |
| ---------------------------- | ----- |
| [Redis](./src/redis) | 8.2.1 |
| [MinerU v2](./src/mineru-v2) | 2.5.3 |
## 规范

27
src/mineru-v2/Dockerfile Normal file
View File

@@ -0,0 +1,27 @@
# Use the official vllm image for gpu with Ampere architecture and above (Compute Capability>=8.0)
# Compute Capability version query (https://developer.nvidia.com/cuda-gpus)
FROM vllm/vllm-openai:v0.10.1.1
# Use the official vllm image for gpu with Turing architecture and below (Compute Capability<8.0)
# FROM vllm/vllm-openai:v0.10.2
# Install libgl for opencv support & Noto fonts for Chinese characters
RUN apt-get update && \
apt-get install -y \
fonts-noto-core \
fonts-noto-cjk \
fontconfig \
libgl1 && \
fc-cache -fv && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Install mineru latest
RUN python3 -m pip install -U 'mineru[core]' --break-system-packages && \
python3 -m pip cache purge
# Download models and update the configuration file
RUN /bin/bash -c "mineru-models-download -s huggingface -m all"
# Set the entry point to activate the virtual environment and run the command line tool
ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"]

28
src/mineru-v2/README.md Normal file
View File

@@ -0,0 +1,28 @@
# MinerU v2
[Reference Documentation](https://opendatalab.github.io/MinerU/zh/usage/quick_usage/).
VLM backend server:
```bash
docker compose --profile vllm-server up -d
```
Document parse API:
```bash
docker compose --profile api up -d
```
Gradio WebUI:
```bash
docker compose --profile gradio up -d
```
Test vLLM backend:
```bash
pip install mineru
mineru -p demo.pdf -o ./output -b vlm-http-client -u http://localhost:30000
```

View File

@@ -0,0 +1,94 @@
x-default: &default
restart: unless-stopped
volumes:
- &localtime /etc/localtime:/etc/localtime:ro
- &timezone /etc/timezone:/etc/timezone:ro
logging:
driver: json-file
options:
max-size: 100m
x-mineru-vllm: &mineru-vllm
<<: *default
image: ${MINERU_DOCKER_IMAGE:-alexsuntop/mineru:2.5.3}
environment:
MINERU_MODEL_SOURCE: local
ulimits:
memlock: -1
stack: 67108864
ipc: host
deploy:
resources:
limits:
cpus: '8.0'
memory: 4G
reservations:
cpus: '2.0'
memory: 2G
devices:
- driver: nvidia
device_ids: [ '0' ]
capabilities: [ gpu ]
services:
mineru-vllm-server:
<<: *mineru-vllm
container_name: mineru-vllm-server
profiles: ["vllm-server"]
ports:
- ${MINERU_PORT_OVERRIDE_30000:-30000}:30000
entrypoint: mineru-vllm-server
command:
- --host 0.0.0.0
- --port 30000
# If using multiple GPUs, increase throughput using vllm's multi-GPU parallel mode.
# - --data-parallel-size 2
# If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter,
# if VRAM issues persist, try lowering it further to `0.4` or below.
# - --gpu-memory-utilization 0.5
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:30000/health || exit 1"]
mineru-api:
<<: *mineru-vllm
container_name: mineru-api
profiles: ["api"]
ports:
- ${MINERU_PORT_OVERRIDE_API:-8000}:8000
entrypoint: mineru-api
command:
- --host 0.0.0.0
- --port 8000
# If using multiple GPUs, increase throughput using vllm's multi-GPU parallel mode.
# - --data-parallel-size 2
# If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter,
# if VRAM issues persist, try lowering it further to `0.4` or below.
# - --gpu-memory-utilization 0.5
mineru-gradio:
<<: *mineru-vllm
container_name: mineru-gradio
profiles: ["gradio"]
ports:
- ${MINERU_PORT_OVERRIDE_GRADIO:-7860}:7860
entrypoint: mineru-gradio
command:
- --server-name 0.0.0.0
- --server-port 7860
# Enable the vllm engine for Gradio
- --enable-vllm-engine true
# If you want to disable the API, set this to false
# - --enable-api false
# If you want to limit the number of pages for conversion, set this to a specific number
# - --max-convert-pages 20
# If using multiple GPUs, increase throughput using vllm's multi-GPU parallel mode.
# - --data-parallel-size 2
# If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter,
# if VRAM issues persist, try lowering it further to `0.4` or below.
# - --gpu-memory-utilization 0.5

View File

@@ -8,4 +8,4 @@ SKIP_FIX_PERMS=1
REDIS_PASSWORD="passw0rd"
# Port to bind to on the host machine
REDIS_PORT_OVERRIDE_6379=16379
REDIS_PORT_OVERRIDE=16379

View File

@@ -2,11 +2,11 @@
## Environment Variables
| Variable Name | Description | Default Value |
| ------------------------ | -------------------------------------------------------- | -------------------- |
| REDIS_VERSION | Redis image version | `"8.2.1-alpine3.22"` |
| SKIP_FIX_PERMS | Skip permission fixing, set to 1 to skip | `""` |
| REDIS_PASSWORD | Password for the default "default" user | `""` |
| REDIS_PORT_OVERRIDE_6379 | Host port mapping (maps to Redis port 6379 in container) | 6379 |
| Variable Name | Description | Default Value |
| ------------------- | -------------------------------------------------------- | -------------------- |
| REDIS_VERSION | Redis image version | `"8.2.1-alpine3.22"` |
| SKIP_FIX_PERMS | Skip permission fixing, set to 1 to skip | `""` |
| REDIS_PASSWORD | Password for the default "default" user | `""` |
| REDIS_PORT_OVERRIDE | Host port mapping (maps to Redis port 6379 in container) | 6379 |
Please modify the `.env` file as needed for your use case.

View File

@@ -14,7 +14,7 @@ services:
image: redis:${REDIS_VERSION:-8.2.1-alpine3.22}
container_name: redis
ports:
- "${REDIS_PORT_OVERRIDE_6379:-6379}:6379"
- "${REDIS_PORT_OVERRIDE:-6379}:6379"
volumes:
- *localtime
- *timezone