From 84e8b85990b51e6f4e30d62a22b19c9d349354a5 Mon Sep 17 00:00:00 2001 From: Sun-ZhenXing <1006925066@qq.com> Date: Wed, 22 Oct 2025 10:05:17 +0800 Subject: [PATCH] feat: add easy-dataset and mongodb-replicaset-single --- .../instructions/Guidelines.instructions.md | 2 + src/docker-registry/docker-compose.yaml | 10 +- src/easy-dataset/.env.example | 13 ++ src/easy-dataset/README.md | 161 ++++++++++++++++++ src/easy-dataset/README.zh.md | 145 ++++++++++++++++ src/easy-dataset/docker-compose.yaml | 40 +++++ src/gitea/docker-compose.yaml | 5 +- src/mongodb-replicaset-single/.env.example | 21 +++ src/mongodb-replicaset-single/.gitignore | 1 + src/mongodb-replicaset-single/README.md | 77 +++++++++ src/mongodb-replicaset-single/README.zh.md | 77 +++++++++ .../docker-compose.yaml | 105 ++++++++++++ src/mongodb-replicaset/README.md | 24 +-- src/mongodb-replicaset/README.zh.md | 24 +-- src/mongodb-replicaset/docker-compose.yaml | 70 ++++++++ 15 files changed, 736 insertions(+), 39 deletions(-) create mode 100644 src/easy-dataset/.env.example create mode 100644 src/easy-dataset/README.md create mode 100644 src/easy-dataset/README.zh.md create mode 100644 src/easy-dataset/docker-compose.yaml create mode 100644 src/mongodb-replicaset-single/.env.example create mode 100644 src/mongodb-replicaset-single/.gitignore create mode 100644 src/mongodb-replicaset-single/README.md create mode 100644 src/mongodb-replicaset-single/README.zh.md create mode 100644 src/mongodb-replicaset-single/docker-compose.yaml diff --git a/.github/instructions/Guidelines.instructions.md b/.github/instructions/Guidelines.instructions.md index 9c1fc17..0b59518 100644 --- a/.github/instructions/Guidelines.instructions.md +++ b/.github/instructions/Guidelines.instructions.md @@ -50,3 +50,5 @@ Compose Anything helps users quickly deploy various services by providing a set Reference template: `.compose-template.yaml` in the repo root. If you want to find image tags, try fetch url like `https://hub.docker.com/v2/repositories/library/nginx/tags?page_size=1&ordering=last_updated`. + +注意:所有中文的文档都使用中文的标点符号,如 “,”、“()” 等,中文和英文之间要留有空格。 diff --git a/src/docker-registry/docker-compose.yaml b/src/docker-registry/docker-compose.yaml index 75fafe2..b7bb53c 100644 --- a/src/docker-registry/docker-compose.yaml +++ b/src/docker-registry/docker-compose.yaml @@ -16,11 +16,11 @@ services: - registry:/var/lib/registry environment: TZ: ${TZ:-UTC} - REGISTRY_AUTH: ${REGISTRY_AUTH:-htpasswd} - REGISTRY_AUTH_HTPASSWD_REALM: ${REGISTRY_AUTH_HTPASSWD_REALM:-Registry Realm} - REGISTRY_AUTH_HTPASSWD_PATH: ${REGISTRY_AUTH_HTPASSWD_PATH:-/certs/passwd} - REGISTRY_HTTP_TLS_CERTIFICATE: ${REGISTRY_HTTP_TLS_CERTIFICATE} - REGISTRY_HTTP_TLS_KEY: ${REGISTRY_HTTP_TLS_KEY} + REGISTRY_AUTH: ${REGISTRY_AUTH:-} + REGISTRY_AUTH_HTPASSWD_REALM: ${REGISTRY_AUTH_HTPASSWD_REALM:-} + REGISTRY_AUTH_HTPASSWD_PATH: ${REGISTRY_AUTH_HTPASSWD_PATH:-} + REGISTRY_HTTP_TLS_CERTIFICATE: ${REGISTRY_HTTP_TLS_CERTIFICATE:-} + REGISTRY_HTTP_TLS_KEY: ${REGISTRY_HTTP_TLS_KEY:-} OTEL_TRACES_EXPORTER: ${OTEL_TRACES_EXPORTER:-none} ports: - ${REGISTRY_PORT:-5000}:5000 diff --git a/src/easy-dataset/.env.example b/src/easy-dataset/.env.example new file mode 100644 index 0000000..9503ddf --- /dev/null +++ b/src/easy-dataset/.env.example @@ -0,0 +1,13 @@ +# Easy Dataset Configuration + +# Image version +# Find latest releases at: https://github.com/ConardLi/easy-dataset/releases +EASY_DATASET_VERSION=1.5.1 + +# Port configuration +# Override the host port for the web interface +EASY_DATASET_PORT_OVERRIDE=1717 + +# Timezone configuration +# Set your timezone (e.g., UTC, Asia/Shanghai, America/New_York) +TZ=UTC diff --git a/src/easy-dataset/README.md b/src/easy-dataset/README.md new file mode 100644 index 0000000..5ea3ba6 --- /dev/null +++ b/src/easy-dataset/README.md @@ -0,0 +1,161 @@ +# Easy Dataset + +[English](./README.md) | [中文](./README.zh.md) + +This service deploys Easy Dataset, a powerful tool for creating fine-tuning datasets for Large Language Models (LLMs). It provides an intuitive interface for uploading domain-specific files, intelligently splitting content, generating questions, and producing high-quality training data for model fine-tuning. + +## Services + +- `easy-dataset`: The main Easy Dataset application server with built-in SQLite database. + +## Environment Variables + +| Variable Name | Description | Default Value | +| -------------------------- | ----------------------------------- | ------------- | +| EASY_DATASET_VERSION | Easy Dataset image version | `1.5.1` | +| EASY_DATASET_PORT_OVERRIDE | Host port mapping for web interface | `1717` | +| TZ | System timezone | `UTC` | + +Please create a `.env` file and modify it as needed for your use case. + +## Volumes + +- `easy_dataset_db`: A named volume for storing the SQLite database and uploaded files. +- `easy_dataset_prisma`: (Optional) A named volume for Prisma database files if needed. + +## Getting Started + +### Quick Start (Recommended) + +1. (Optional) Create a `.env` file to customize settings: + + ```env + EASY_DATASET_VERSION=1.5.1 + EASY_DATASET_PORT_OVERRIDE=1717 + TZ=Asia/Shanghai + ``` + +2. Start the service: + + ```bash + docker compose up -d + ``` + +3. Access Easy Dataset at `http://localhost:1717` + +### With Prisma Database Mount (Advanced) + +If you need to mount the Prisma database files: + +1. Initialize the database first: + + ```bash + # Clone the repository and initialize database + git clone https://github.com/ConardLi/easy-dataset.git + cd easy-dataset + npm install + npm run db:push + ``` + +2. Uncomment the Prisma volume mount in `docker-compose.yaml`: + + ```yaml + volumes: + - easy_dataset_db:/app/local-db + - easy_dataset_prisma:/app/prisma # Uncomment this line + ``` + +3. Start the service: + + ```bash + docker compose up -d + ``` + +## Features + +- **Intelligent Document Processing**: Supports PDF, Markdown, DOCX, and more +- **Smart Text Splitting**: Multiple algorithms with customizable segmentation +- **Question Generation**: Automatically extracts relevant questions from text +- **Domain Labels**: Builds global domain labels with understanding capabilities +- **Answer Generation**: Uses LLM APIs to generate comprehensive answers and Chain of Thought (COT) +- **Flexible Editing**: Edit questions, answers, and datasets at any stage +- **Multiple Export Formats**: Alpaca, ShareGPT, multilingual-thinking (JSON/JSONL) +- **Wide Model Support**: Compatible with all LLM APIs following OpenAI format + +## Usage Workflow + +1. **Create a Project**: Set up a new project with LLM API configuration +2. **Upload Documents**: Add your domain-specific files (PDF, Markdown, etc.) +3. **Text Splitting**: Review and adjust automatically split text segments +4. **Generate Questions**: Batch construct questions from text blocks +5. **Create Datasets**: Generate answers using configured LLM +6. **Export**: Export datasets in your preferred format + +## Default Credentials + +Easy Dataset does not require authentication by default. Access control should be implemented at the infrastructure level (e.g., reverse proxy, firewall rules). + +## Resource Limits + +The service is configured with the following resource limits: + +- **CPU**: 0.5-2.0 cores +- **Memory**: 1-4 GB + +These limits can be adjusted in `docker-compose.yaml` based on your workload requirements. + +## Security Considerations + +- **Data Privacy**: All data processing happens locally +- **API Keys**: Store LLM API keys securely within the application +- **Access Control**: Implement network-level access restrictions as needed +- **Updates**: Regularly update to the latest version for security patches + +## Documentation + +- Official Documentation: [https://docs.easy-dataset.com/](https://docs.easy-dataset.com/) +- GitHub Repository: [https://github.com/ConardLi/easy-dataset](https://github.com/ConardLi/easy-dataset) +- Video Tutorial: [Bilibili](https://www.bilibili.com/video/BV1y8QpYGE57/) +- Research Paper: [arXiv:2507.04009](https://arxiv.org/abs/2507.04009v1) + +## Troubleshooting + +### Container Won't Start + +- Check logs: `docker compose logs easy-dataset` +- Verify port 1717 is not already in use +- Ensure sufficient system resources + +### Database Issues + +- For SQLite issues, remove and recreate the volume: + + ```bash + docker compose down -v + docker compose up -d + ``` + +### Permission Errors + +- Ensure the container has write access to mounted volumes +- Check Docker volume permissions + +## License + +Easy Dataset is licensed under AGPL 3.0. See the [LICENSE](https://github.com/ConardLi/easy-dataset/blob/main/LICENSE) file for details. + +## Citation + +If this work is helpful, please cite: + +```bibtex +@misc{miao2025easydataset, + title={Easy Dataset: A Unified and Extensible Framework for Synthesizing LLM Fine-Tuning Data from Unstructured Documents}, + author={Ziyang Miao and Qiyu Sun and Jingyuan Wang and Yuchen Gong and Yaowei Zheng and Shiqi Li and Richong Zhang}, + year={2025}, + eprint={2507.04009}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + url={https://arxiv.org/abs/2507.04009} +} +``` diff --git a/src/easy-dataset/README.zh.md b/src/easy-dataset/README.zh.md new file mode 100644 index 0000000..96b1bd2 --- /dev/null +++ b/src/easy-dataset/README.zh.md @@ -0,0 +1,145 @@ +# Easy Dataset + +[English](./README.md) | [中文](./README.zh.md) + +这个服务部署 Easy Dataset,一个用于创建大语言模型(LLM)微调数据集的强大工具。它提供了直观的界面,可以上传特定领域的文件、智能分割内容、生成问题,并产生高质量的模型微调训练数据。 + +## 服务 + +- `easy-dataset`:主应用服务器,内置 SQLite 数据库。 + +## 环境变量 + +| 变量名 | 描述 | 默认值 | +| -------------------------- | ---------------------- | ------- | +| EASY_DATASET_VERSION | Easy Dataset 镜像版本 | `1.5.1` | +| EASY_DATASET_PORT_OVERRIDE | Web 界面的主机端口映射 | `1717` | +| TZ | 系统时区 | `UTC` | + +请创建 `.env` 文件并根据您的使用场景进行修改。 + +## 数据卷 + +- `easy_dataset_db`:用于存储 SQLite 数据库和上传文件的命名卷。 +- `easy_dataset_prisma`:(可选)如需要可用于 Prisma 数据库文件的命名卷。 + +## 快速开始 + +### 快速启动(推荐) + +1. (可选)创建 `.env` 文件以自定义设置: + + ```env + EASY_DATASET_VERSION=1.5.1 + EASY_DATASET_PORT_OVERRIDE=1717 + TZ=Asia/Shanghai + ``` + +2. 启动服务: + + ```bash + docker compose up -d + ``` + +3. 访问 Easy Dataset:`http://localhost:1717` + +### 使用 Prisma 数据库挂载(高级) + +如果需要挂载 Prisma 数据库文件: + +1. 首先初始化数据库: + + ```bash + # 克隆仓库并初始化数据库 + git clone https://github.com/ConardLi/easy-dataset.git + cd easy-dataset + npm install + npm run db:push + ``` + +2. 在 `docker-compose.yaml` 中取消注释 Prisma 卷挂载: + + ```yaml + volumes: + - easy_dataset_db:/app/local-db + - easy_dataset_prisma:/app/prisma # 取消此行注释 + ``` + +3. 启动服务: + + ```bash + docker compose up -d + ``` + +## 功能特性 + +- **智能文档处理**:支持 PDF、Markdown、DOCX 等多种格式 +- **智能文本分割**:多种算法,可自定义分段 +- **问题生成**:从文本中自动提取相关问题 +- **领域标签**:构建全局领域标签,具有理解能力 +- **答案生成**:使用 LLM API 生成全面的答案和思维链(COT) +- **灵活编辑**:在任何阶段编辑问题、答案和数据集 +- **多种导出格式**:Alpaca、ShareGPT、multilingual-thinking(JSON/JSONL) +- **广泛的模型支持**:兼容所有遵循 OpenAI 格式的 LLM API + +## 使用流程 + +1. **创建项目**:设置新项目并配置 LLM API +2. **上传文档**:添加您的特定领域文件(PDF、Markdown 等) +3. **文本分割**:查看并调整自动分割的文本段 +4. **生成问题**:从文本块批量构造问题 +5. **创建数据集**:使用配置的 LLM 生成答案 +6. **导出**:以您喜欢的格式导出数据集 + +## 默认凭据 + +Easy Dataset 默认不需要身份验证。应在基础设施层面实现访问控制(例如反向代理、防火墙规则)。 + +## 资源限制 + +该服务配置了以下资源限制: + +- **CPU**:0.5-2.0 核心 +- **内存**:1-4 GB + +可以根据您的工作负载需求在 `docker-compose.yaml` 中调整这些限制。 + +## 安全注意事项 + +- **数据隐私**:所有数据处理都在本地进行 +- **API 密钥**:在应用程序内安全存储 LLM API 密钥 +- **访问控制**:根据需要实施网络级访问限制 +- **更新**:定期更新到最新版本以获取安全补丁 + +## 文档 + +- 官方文档:[https://docs.easy-dataset.com/](https://docs.easy-dataset.com/) +- GitHub 仓库:[https://github.com/ConardLi/easy-dataset](https://github.com/ConardLi/easy-dataset) +- 视频教程:[Bilibili](https://www.bilibili.com/video/BV1y8QpYGE57/) +- 研究论文:[arXiv:2507.04009](https://arxiv.org/abs/2507.04009v1) + +## 故障排除 + +### 容器无法启动 + +- 查看日志:`docker compose logs easy-dataset` +- 验证端口 1717 未被占用 +- 确保系统资源充足 + +### 数据库问题 + +- 如遇到 SQLite 问题,删除并重新创建卷: + + ```bash + docker compose down -v + docker compose up -d + ``` + +### 权限错误 + +- 确保容器对挂载卷有写入权限 +- 检查 Docker 卷权限 + +## 许可证 + +Easy Dataset 采用 AGPL 3.0 许可证。详见 [LICENSE](https://github.com/ConardLi/easy-dataset/blob/main/LICENSE) 文件。 diff --git a/src/easy-dataset/docker-compose.yaml b/src/easy-dataset/docker-compose.yaml new file mode 100644 index 0000000..0c834aa --- /dev/null +++ b/src/easy-dataset/docker-compose.yaml @@ -0,0 +1,40 @@ +x-default: &default + restart: unless-stopped + logging: + driver: json-file + options: + max-size: 100m + max-file: "3" + +services: + easy-dataset: + <<: *default + image: ghcr.io/conardli/easy-dataset:${EASY_DATASET_VERSION:-1.5.1} + ports: + - "${EASY_DATASET_PORT_OVERRIDE:-1717}:1717" + volumes: + - easy_dataset_db:/app/local-db + # Uncomment the following line if you need to mount Prisma database files + # Note: You need to run 'npm run db:push' first to initialize the database + # - easy_dataset_prisma:/app/prisma + environment: + - TZ=${TZ:-UTC} + - NODE_ENV=production + deploy: + resources: + limits: + cpus: '2.0' + memory: 4G + reservations: + cpus: '0.5' + memory: 1G + healthcheck: + test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:1717"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s + +volumes: + easy_dataset_db: + # easy_dataset_prisma: diff --git a/src/gitea/docker-compose.yaml b/src/gitea/docker-compose.yaml index bec8598..f83ca3b 100644 --- a/src/gitea/docker-compose.yaml +++ b/src/gitea/docker-compose.yaml @@ -11,6 +11,8 @@ services: <<: *default image: gitea/gitea:${GITEA_VERSION:-1.24.6-rootless} environment: + - USER_UID=1000 + - USER_GID=1000 - TZ=${TZ:-UTC} - GITEA__database__DB_TYPE=${GITEA_DB_TYPE:-postgres} - GITEA__database__HOST=${GITEA_POSTGRES_HOST:-db:5432} @@ -18,8 +20,7 @@ services: - GITEA__database__NAME=${POSTGRES_DB:-gitea} - GITEA__database__PASSWD=${POSTGRES_PASSWORD:-gitea} volumes: - - gitea_data:/var/lib/gitea - - ./config:/etc/gitea + - ./gitea:/data ports: - "${GITEA_HTTP_PORT:-3000}:3000" - "${GITEA_SSH_PORT:-3022}:22" diff --git a/src/mongodb-replicaset-single/.env.example b/src/mongodb-replicaset-single/.env.example new file mode 100644 index 0000000..123d59a --- /dev/null +++ b/src/mongodb-replicaset-single/.env.example @@ -0,0 +1,21 @@ +# Timezone +TZ=UTC + +# MongoDB Version +MONGO_VERSION=8.0.13 + +# MongoDB root credentials +MONGO_INITDB_ROOT_USERNAME=root +MONGO_INITDB_ROOT_PASSWORD=password + +# Initial database name +MONGO_INITDB_DATABASE=admin + +# Replica set name +MONGO_REPLICA_SET_NAME=rs0 + +# MongoDB port for the single replica +MONGO_PORT_OVERRIDE_1=27017 + +# MongoDB host for initialization +MONGO_HOST=mongo1 diff --git a/src/mongodb-replicaset-single/.gitignore b/src/mongodb-replicaset-single/.gitignore new file mode 100644 index 0000000..956d472 --- /dev/null +++ b/src/mongodb-replicaset-single/.gitignore @@ -0,0 +1 @@ +/secrets diff --git a/src/mongodb-replicaset-single/README.md b/src/mongodb-replicaset-single/README.md new file mode 100644 index 0000000..0ec1bc0 --- /dev/null +++ b/src/mongodb-replicaset-single/README.md @@ -0,0 +1,77 @@ +# MongoDB Single-Node Replica Set + +[English](./README.md) | [中文](./README.zh.md) + +This service sets up a single-node MongoDB replica set, ideal for development and testing environments. + +## Prerequisites + +1. Generate a key file for the replica set: + + ```bash + openssl rand -base64 756 > ./secrets/rs0.key + ``` + + On Windows, you can use Git Bash or WSL, or download the key file from the [MongoDB documentation](https://docs.mongodb.com/manual/tutorial/deploy-replica-set/). + +## Initialization + +1. Start the services: + + ```bash + docker compose up -d + ``` + + The services will automatically initialize the replica set through the `mongo-init` init container. This container: + - Waits for the MongoDB node to be healthy + - Connects to the node + - Initializes the single-node replica set + - Uses container-based networking for communication + +2. Verify the replica set status: + + ```bash + docker exec -it mongodb-replicaset-single-mongo1-1 mongosh -u root -p password --authenticationDatabase admin --eval "rs.status()" + ``` + +## Services + +- `mongo1`: The only member of the replica set. + +## Configuration + +- `TZ`: The timezone for the container, default is `UTC`. +- `MONGO_VERSION`: The version of the MongoDB image, default is `8.0.13`. +- `MONGO_INITDB_ROOT_USERNAME`: The root username for the database, default is `root`. +- `MONGO_INITDB_ROOT_PASSWORD`: The root password for the database, default is `password`. +- `MONGO_INITDB_DATABASE`: The initial database to create, default is `admin`. +- `MONGO_REPLICA_SET_NAME`: The name of the replica set, default is `rs0`. +- `MONGO_PORT_OVERRIDE_1`: The host port for the MongoDB node, default is `27017`. +- `MONGO_HOST`: The host name for the MongoDB node, default is `mongo1`. + +## Volumes + +- `mongo_data`: A named volume for MongoDB data persistence. +- `secrets/rs0.key`: The key file for authenticating members of the replica set. + +## Security + +The replica set key file is mounted read-only and copied to `/tmp` inside the container with proper permissions (400). This approach ensures cross-platform compatibility (Windows/Linux/macOS) while maintaining security requirements. The key file is never modified on the host system. + +## Using the Single-Node Replica Set + +You can connect to the MongoDB replica set using any MongoDB client: + +```bash +mongosh "mongodb://root:password@localhost:27017/admin?authSource=admin&replicaSet=rs0" +``` + +Or using Python with PyMongo: + +```python +from pymongo import MongoClient + +client = MongoClient("mongodb://root:password@localhost:27017/admin?authSource=admin&replicaSet=rs0") +db = client.admin +print(db.command("ping")) +``` diff --git a/src/mongodb-replicaset-single/README.zh.md b/src/mongodb-replicaset-single/README.zh.md new file mode 100644 index 0000000..5f242d2 --- /dev/null +++ b/src/mongodb-replicaset-single/README.zh.md @@ -0,0 +1,77 @@ +# MongoDB 单节点副本集 + +[English](./README.md) | [中文](./README.zh.md) + +此服务用于搭建一个单节点 MongoDB 副本集,特别适合开发和测试环境。 + +## 前提条件 + +1. 为副本集生成一个密钥文件: + + ```bash + openssl rand -base64 756 > ./secrets/rs0.key + ``` + + 在 Windows 上,您可以使用 Git Bash 或 WSL,或从 [MongoDB 文档](https://docs.mongodb.com/manual/tutorial/deploy-replica-set/) 下载密钥文件。 + +## 初始化 + +1. 启动服务: + + ```bash + docker compose up -d + ``` + + 这些服务将通过 `mongo-init` init 容器自动初始化副本集。该容器会: + - 等待 MongoDB 节点就绪 + - 连接到该节点 + - 初始化单节点副本集 + - 通过容器网络进行通信 + +2. 验证副本集状态: + + ```bash + docker exec -it mongodb-replicaset-single-mongo1-1 mongosh -u root -p password --authenticationDatabase admin --eval "rs.status()" + ``` + +## 服务 + +- `mongo1`: 副本集的唯一成员。 + +## 配置 + +- `TZ`: 容器的时区,默认为 `UTC`。 +- `MONGO_VERSION`: MongoDB 镜像的版本,默认为 `8.0.13`。 +- `MONGO_INITDB_ROOT_USERNAME`: 数据库的 root 用户名,默认为 `root`。 +- `MONGO_INITDB_ROOT_PASSWORD`: 数据库的 root 密码,默认为 `password`。 +- `MONGO_INITDB_DATABASE`: 要创建的初始数据库,默认为 `admin`。 +- `MONGO_REPLICA_SET_NAME`: 副本集的名称,默认为 `rs0`。 +- `MONGO_PORT_OVERRIDE_1`: MongoDB 节点的主机端口,默认为 `27017`。 +- `MONGO_HOST`: MongoDB 节点的主机名,默认为 `mongo1`。 + +## 卷 + +- `mongo_data`: 用于 MongoDB 数据持久化的命名卷。 +- `secrets/rs0.key`: 副本集成员身份验证的密钥文件。 + +## 安全性 + +副本集密钥文件以只读方式挂载,并在容器内复制到 `/tmp`,权限为 400。这种方法确保跨平台兼容性(Windows/Linux/macOS),同时保持安全要求。密钥文件不会在主机系统上被修改。 + +## 使用单节点副本集 + +您可以使用任何 MongoDB 客户端连接到 MongoDB 副本集: + +```bash +mongosh "mongodb://root:password@localhost:27017/admin?authSource=admin&replicaSet=rs0" +``` + +或使用 Python 的 PyMongo: + +```python +from pymongo import MongoClient + +client = MongoClient("mongodb://root:password@localhost:27017/admin?authSource=admin&replicaSet=rs0") +db = client.admin +print(db.command("ping")) +``` diff --git a/src/mongodb-replicaset-single/docker-compose.yaml b/src/mongodb-replicaset-single/docker-compose.yaml new file mode 100644 index 0000000..3528eb0 --- /dev/null +++ b/src/mongodb-replicaset-single/docker-compose.yaml @@ -0,0 +1,105 @@ +x-default: &default + restart: unless-stopped + logging: + driver: json-file + options: + max-size: 100m + +x-mongo: &mongo + <<: *default + image: mongo:${MONGO_VERSION:-8.0.13} + environment: + TZ: ${TZ:-UTC} + MONGO_INITDB_ROOT_USERNAME: ${MONGO_INITDB_ROOT_USERNAME:-root} + MONGO_INITDB_ROOT_PASSWORD: ${MONGO_INITDB_ROOT_PASSWORD:-password} + MONGO_INITDB_DATABASE: ${MONGO_INITDB_DATABASE:-admin} + volumes: + - ./secrets/rs0.key:/data/rs0.key:ro + - mongo_data:/data/db + entrypoint: + - bash + - -c + - | + cp /data/rs0.key /tmp/rs0.key + chmod 400 /tmp/rs0.key + chown 999:999 /tmp/rs0.key + export MONGO_INITDB_ROOT_USERNAME MONGO_INITDB_ROOT_PASSWORD MONGO_INITDB_DATABASE + exec docker-entrypoint.sh mongod --replSet ${MONGO_REPLICA_SET_NAME:-rs0} --keyFile /tmp/rs0.key + healthcheck: + test: mongosh --eval "db.adminCommand('ping')" + interval: 5s + timeout: 3s + retries: 10 + start_period: 30s + deploy: + resources: + limits: + cpus: '0.50' + memory: 1G + reservations: + cpus: '0.25' + memory: 256M + +services: + mongo1: + <<: *mongo + ports: + - "${MONGO_PORT_OVERRIDE_1:-27017}:27017" + + mongo-init: + <<: *default + image: mongo:${MONGO_VERSION:-8.0.13} + depends_on: + mongo1: + condition: service_healthy + environment: + TZ: ${TZ:-UTC} + MONGO_INITDB_ROOT_USERNAME: ${MONGO_INITDB_ROOT_USERNAME:-root} + MONGO_INITDB_ROOT_PASSWORD: ${MONGO_INITDB_ROOT_PASSWORD:-password} + MONGO_REPLICA_SET_NAME: ${MONGO_REPLICA_SET_NAME:-rs0} + MONGO_PORT_1: ${MONGO_PORT_OVERRIDE_1:-27017} + MONGO_HOST: ${MONGO_HOST:-mongo1} + volumes: + - ./secrets/rs0.key:/data/rs0.key:ro + entrypoint: + - bash + - -c + - | + set -e + echo "Waiting for MongoDB node to be ready..." + sleep 5 + + mongosh \ + --host "mongodb://$${MONGO_INITDB_ROOT_USERNAME}:$${MONGO_INITDB_ROOT_PASSWORD}@$${MONGO_HOST}:$${MONGO_PORT_1}" \ + --authenticationDatabase admin \ + --eval " + const config = { + _id: '$${MONGO_REPLICA_SET_NAME}', + members: [ + { _id: 0, host: 'mongo1:27017' } + ] + }; + + try { + const result = rs.status(); + print('Replica set already initialized'); + } catch (e) { + print('Initializing replica set...'); + rs.initiate(config); + print('Replica set initialized successfully'); + } + " + + echo "Init container completed successfully" + deploy: + resources: + limits: + cpus: '0.25' + memory: 256M + reservations: + cpus: '0.10' + memory: 128M + +volumes: + mongo_data: + driver: local diff --git a/src/mongodb-replicaset/README.md b/src/mongodb-replicaset/README.md index a2d170c..8196b70 100644 --- a/src/mongodb-replicaset/README.md +++ b/src/mongodb-replicaset/README.md @@ -20,24 +20,16 @@ This service sets up a MongoDB replica set with three members. docker compose up -d ``` -2. Connect to the primary node: + The services will automatically initialize the replica set through the `mongo-init` init container. This container: + - Waits for all MongoDB nodes to be healthy + - Connects to the primary node + - Initializes the replica set with internal container names + - Uses container-based networking for communication + +2. Verify the replica set status: ```bash - docker exec -it mongodb-replicaset-mongo1-1 mongosh - ``` - -3. Initialize the replica set. **Remember to replace the host IP with your actual host IP.** - - ```js - config = { - _id: "rs0", - members: [ - {_id: 0, host: "192.168.31.38:27017"}, - {_id: 1, host: "192.168.31.38:27018"}, - {_id: 2, host: "192.168.31.38:27019"}, - ] - } - rs.initiate(config) + docker exec -it mongodb-replicaset-mongo1-1 mongosh -u root -p password --authenticationDatabase admin --eval "rs.status()" ``` ## Services diff --git a/src/mongodb-replicaset/README.zh.md b/src/mongodb-replicaset/README.zh.md index a205a53..90e5cef 100644 --- a/src/mongodb-replicaset/README.zh.md +++ b/src/mongodb-replicaset/README.zh.md @@ -20,24 +20,16 @@ docker compose up -d ``` -2. 连接到主节点: + 这些服务将通过 `mongo-init` init 容器自动初始化副本集。该容器会: + - 等待所有 MongoDB 节点就绪 + - 连接到主节点 + - 使用容器名初始化副本集 + - 通过容器网络进行通信 + +2. 验证副本集状态: ```bash - docker exec -it mongodb-replicaset-mongo1-1 mongosh - ``` - -3. 初始化副本集。**请记得将 host IP 替换为你的实际主机 IP。** - - ```js - config = { - _id: "rs0", - members: [ - {_id: 0, host: "192.168.31.38:27017"}, - {_id: 1, host: "192.168.31.38:27018"}, - {_id: 2, host: "192.168.31.38:27019"}, - ] - } - rs.initiate(config) + docker exec -it mongodb-replicaset-mongo1-1 mongosh -u root -p password --authenticationDatabase admin --eval "rs.status()" ``` ## 服务 diff --git a/src/mongodb-replicaset/docker-compose.yaml b/src/mongodb-replicaset/docker-compose.yaml index 4902617..31076d6 100644 --- a/src/mongodb-replicaset/docker-compose.yaml +++ b/src/mongodb-replicaset/docker-compose.yaml @@ -24,6 +24,12 @@ x-mongo: &mongo chown 999:999 /tmp/rs0.key export MONGO_INITDB_ROOT_USERNAME MONGO_INITDB_ROOT_PASSWORD MONGO_INITDB_DATABASE exec docker-entrypoint.sh mongod --replSet ${MONGO_REPLICA_SET_NAME:-rs0} --keyFile /tmp/rs0.key + healthcheck: + test: mongosh --eval "db.adminCommand('ping')" + interval: 5s + timeout: 3s + retries: 10 + start_period: 30s deploy: resources: limits: @@ -38,11 +44,75 @@ services: <<: *mongo ports: - "${MONGO_PORT_OVERRIDE_1:-27017}:27017" + mongo2: <<: *mongo ports: - "${MONGO_PORT_OVERRIDE_2:-27018}:27017" + mongo3: <<: *mongo ports: - "${MONGO_PORT_OVERRIDE_3:-27019}:27017" + + mongo-init: + <<: *default + image: mongo:${MONGO_VERSION:-8.0.13} + depends_on: + mongo1: + condition: service_healthy + mongo2: + condition: service_healthy + mongo3: + condition: service_healthy + environment: + TZ: ${TZ:-UTC} + MONGO_INITDB_ROOT_USERNAME: ${MONGO_INITDB_ROOT_USERNAME:-root} + MONGO_INITDB_ROOT_PASSWORD: ${MONGO_INITDB_ROOT_PASSWORD:-password} + MONGO_REPLICA_SET_NAME: ${MONGO_REPLICA_SET_NAME:-rs0} + MONGO_PORT_1: ${MONGO_PORT_OVERRIDE_1:-27017} + MONGO_PORT_2: ${MONGO_PORT_OVERRIDE_2:-27018} + MONGO_PORT_3: ${MONGO_PORT_OVERRIDE_3:-27019} + MONGO_HOST: ${MONGO_HOST:-mongo1} + volumes: + - ./secrets/rs0.key:/data/rs0.key:ro + entrypoint: + - bash + - -c + - | + set -e + echo "Waiting for MongoDB nodes to be ready..." + sleep 5 + + mongosh \ + --host "mongodb://$${MONGO_INITDB_ROOT_USERNAME}:$${MONGO_INITDB_ROOT_PASSWORD}@$${MONGO_HOST}:$${MONGO_PORT_1}" \ + --authenticationDatabase admin \ + --eval " + const config = { + _id: '$${MONGO_REPLICA_SET_NAME}', + members: [ + { _id: 0, host: 'mongo1:27017' }, + { _id: 1, host: 'mongo2:27017' }, + { _id: 2, host: 'mongo3:27017' } + ] + }; + + try { + const result = rs.status(); + print('Replica set already initialized'); + } catch (e) { + print('Initializing replica set...'); + rs.initiate(config); + print('Replica set initialized successfully'); + } + " + + echo "Init container completed successfully" + deploy: + resources: + limits: + cpus: '0.25' + memory: 256M + reservations: + cpus: '0.10' + memory: 128M