feat: add restate
This commit is contained in:
49
src/restate-cluster/.env.example
Normal file
49
src/restate-cluster/.env.example
Normal file
@@ -0,0 +1,49 @@
|
||||
# Restate version
|
||||
RESTATE_VERSION=1.5.3
|
||||
|
||||
# Cluster configuration
|
||||
RESTATE_CLUSTER_NAME=restate-cluster
|
||||
RESTATE_DEFAULT_REPLICATION=2
|
||||
|
||||
# Node 1 ports (primary node)
|
||||
NODE1_INGRESS_PORT_OVERRIDE=8080
|
||||
NODE1_ADMIN_PORT_OVERRIDE=9070
|
||||
NODE1_NODE_PORT_OVERRIDE=5122
|
||||
|
||||
# Node 2 ports
|
||||
NODE2_INGRESS_PORT_OVERRIDE=28080
|
||||
NODE2_ADMIN_PORT_OVERRIDE=29070
|
||||
NODE2_NODE_PORT_OVERRIDE=25122
|
||||
|
||||
# Node 3 ports
|
||||
NODE3_INGRESS_PORT_OVERRIDE=38080
|
||||
NODE3_ADMIN_PORT_OVERRIDE=39070
|
||||
NODE3_NODE_PORT_OVERRIDE=35122
|
||||
|
||||
# Node advertised addresses (internal network)
|
||||
RESTATE_NODE1_ADVERTISED_ADDRESS=http://restate-1:5122
|
||||
RESTATE_NODE2_ADVERTISED_ADDRESS=http://restate-2:5122
|
||||
RESTATE_NODE3_ADVERTISED_ADDRESS=http://restate-3:5122
|
||||
|
||||
# Metadata client addresses
|
||||
RESTATE_METADATA_CLIENT__ADDRESSES=["http://restate-1:5122","http://restate-2:5122","http://restate-3:5122"]
|
||||
|
||||
# Snapshot configuration
|
||||
RESTATE_WORKER__SNAPSHOTS__DESTINATION=s3://restate/snapshots
|
||||
RESTATE_WORKER__SNAPSHOTS__SNAPSHOT_INTERVAL_NUM_RECORDS=1000
|
||||
RESTATE_WORKER__SNAPSHOTS__AWS_REGION=local
|
||||
RESTATE_WORKER__SNAPSHOTS__AWS_ENDPOINT_URL=http://minio:9000
|
||||
RESTATE_WORKER__SNAPSHOTS__AWS_ALLOW_HTTP=true
|
||||
|
||||
# MinIO configuration
|
||||
MINIO_VERSION=latest
|
||||
MINIO_API_PORT_OVERRIDE=9000
|
||||
MINIO_CONSOLE_PORT_OVERRIDE=9001
|
||||
MINIO_ROOT_USER=minioadmin
|
||||
MINIO_ROOT_PASSWORD=minioadmin
|
||||
|
||||
# Timezone
|
||||
TZ=UTC
|
||||
|
||||
# Log level: trace, debug, info, warn, error
|
||||
RESTATE_LOG_FILTER=restate=info
|
||||
280
src/restate-cluster/README.md
Normal file
280
src/restate-cluster/README.md
Normal file
@@ -0,0 +1,280 @@
|
||||
# Restate Multi-Node Cluster
|
||||
|
||||
A highly-available 3-node Restate cluster for production workloads. This configuration provides fault tolerance and automatic failover capabilities.
|
||||
|
||||
## Features
|
||||
|
||||
- **High Availability**: 3-node cluster can tolerate 1 node failure
|
||||
- **Data Replication**: Configurable replication factor (default: 2 of 3 nodes)
|
||||
- **Automatic Snapshots**: Periodic snapshots stored in MinIO (S3-compatible)
|
||||
- **Load Distribution**: Multiple ingress endpoints for load balancing
|
||||
- **Metadata Quorum**: Replicated metadata cluster for consistency
|
||||
|
||||
## Quick Start
|
||||
|
||||
1. Copy environment file:
|
||||
|
||||
```bash
|
||||
cp .env.example .env
|
||||
```
|
||||
|
||||
2. Start the cluster:
|
||||
|
||||
```bash
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
3. Verify cluster health:
|
||||
|
||||
```bash
|
||||
# Check node 1
|
||||
curl http://localhost:9070/health
|
||||
|
||||
# Check node 2
|
||||
curl http://localhost:29070/health
|
||||
|
||||
# Check node 3
|
||||
curl http://localhost:39070/health
|
||||
```
|
||||
|
||||
## Architecture
|
||||
|
||||
The cluster consists of:
|
||||
|
||||
- **3 Restate Nodes**: Distributed workflow engines with replicated state
|
||||
- **MinIO**: S3-compatible storage for partition snapshots
|
||||
- **Replicated Bifrost Provider**: Log replication across nodes
|
||||
- **Metadata Cluster**: Distributed metadata server on all nodes
|
||||
|
||||
## Service Endpoints
|
||||
|
||||
### Node 1 (Primary)
|
||||
|
||||
- Ingress API: `http://localhost:8080`
|
||||
- Admin API: `http://localhost:9070`
|
||||
- Node Communication: Port 5122
|
||||
|
||||
### Node 2
|
||||
|
||||
- Ingress API: `http://localhost:28080`
|
||||
- Admin API: `http://localhost:29070`
|
||||
- Node Communication: Port 25122
|
||||
|
||||
### Node 3
|
||||
|
||||
- Ingress API: `http://localhost:38080`
|
||||
- Admin API: `http://localhost:39070`
|
||||
- Node Communication: Port 35122
|
||||
|
||||
### MinIO
|
||||
|
||||
- API: `http://localhost:9000`
|
||||
- Console: `http://localhost:9001` (admin UI)
|
||||
- Username: `minioadmin`
|
||||
- Password: `minioadmin`
|
||||
|
||||
## Environment Variables
|
||||
|
||||
### Cluster Configuration
|
||||
|
||||
| Variable | Default | Description |
|
||||
| ----------------------------- | ----------------- | --------------------------- |
|
||||
| `RESTATE_VERSION` | `1.5.3` | Restate server version |
|
||||
| `RESTATE_CLUSTER_NAME` | `restate-cluster` | Cluster name |
|
||||
| `RESTATE_DEFAULT_REPLICATION` | `2` | Minimum replicas for writes |
|
||||
|
||||
### Port Configuration
|
||||
|
||||
Each node has three ports:
|
||||
|
||||
- `NODEx_INGRESS_PORT_OVERRIDE`: Ingress API port
|
||||
- `NODEx_ADMIN_PORT_OVERRIDE`: Admin API port
|
||||
- `NODEx_NODE_PORT_OVERRIDE`: Node-to-node communication port
|
||||
|
||||
### Snapshot Configuration
|
||||
|
||||
| Variable | Default | Description |
|
||||
| ---------------------------------------------------------- | ------------------------ | -------------------- |
|
||||
| `RESTATE_WORKER__SNAPSHOTS__DESTINATION` | `s3://restate/snapshots` | S3 bucket path |
|
||||
| `RESTATE_WORKER__SNAPSHOTS__SNAPSHOT_INTERVAL_NUM_RECORDS` | `1000` | Records per snapshot |
|
||||
| `RESTATE_WORKER__SNAPSHOTS__AWS_ENDPOINT_URL` | `http://minio:9000` | S3 endpoint |
|
||||
|
||||
### MinIO Configuration
|
||||
|
||||
| Variable | Default | Description |
|
||||
| ----------------------------- | ------------ | -------------------- |
|
||||
| `MINIO_VERSION` | `latest` | MinIO version |
|
||||
| `MINIO_ROOT_USER` | `minioadmin` | MinIO admin username |
|
||||
| `MINIO_ROOT_PASSWORD` | `minioadmin` | MinIO admin password |
|
||||
| `MINIO_API_PORT_OVERRIDE` | `9000` | MinIO API port |
|
||||
| `MINIO_CONSOLE_PORT_OVERRIDE` | `9001` | MinIO console port |
|
||||
|
||||
## Usage Examples
|
||||
|
||||
### Deploy a Service (to any node)
|
||||
|
||||
```bash
|
||||
# Deploy to node 1
|
||||
curl -X POST http://localhost:9070/deployments \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"uri": "http://my-service:9080"}'
|
||||
```
|
||||
|
||||
### Invoke Service with Load Balancing
|
||||
|
||||
Use a load balancer or DNS round-robin across ingress endpoints:
|
||||
|
||||
```bash
|
||||
# Node 1
|
||||
curl -X POST http://localhost:8080/MyService/myMethod \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"key": "value"}'
|
||||
|
||||
# Node 2
|
||||
curl -X POST http://localhost:28080/MyService/myMethod \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"key": "value"}'
|
||||
```
|
||||
|
||||
### Check Cluster Status
|
||||
|
||||
```bash
|
||||
# From any admin API
|
||||
curl http://localhost:9070/cluster
|
||||
```
|
||||
|
||||
## Fault Tolerance
|
||||
|
||||
The cluster is configured with:
|
||||
|
||||
- **Replication Factor**: 2 (data written to 2 out of 3 nodes)
|
||||
- **Quorum**: Requires majority (2/3 nodes) for metadata operations
|
||||
- **Tolerance**: Can survive 1 node failure without downtime
|
||||
|
||||
### Testing Failover
|
||||
|
||||
Stop one node:
|
||||
|
||||
```bash
|
||||
docker compose stop restate-2
|
||||
```
|
||||
|
||||
The cluster continues to operate. Services remain available on nodes 1 and 3.
|
||||
|
||||
## Snapshots and Backups
|
||||
|
||||
Partition snapshots are automatically saved to MinIO every 1000 records. This enables:
|
||||
|
||||
- Fast recovery after failures
|
||||
- Backup and restore capabilities
|
||||
- Reduced replay time on node restart
|
||||
|
||||
### Viewing Snapshots
|
||||
|
||||
Access MinIO console at `http://localhost:9001`:
|
||||
|
||||
1. Login with `minioadmin` / `minioadmin`
|
||||
2. Navigate to `restate` bucket
|
||||
3. Browse `snapshots/` directory
|
||||
|
||||
### Backup Strategy
|
||||
|
||||
To backup cluster data:
|
||||
|
||||
1. Stop the cluster:
|
||||
|
||||
```bash
|
||||
docker compose down
|
||||
```
|
||||
|
||||
2. Backup volumes:
|
||||
|
||||
```bash
|
||||
docker run --rm -v restate-cluster_restate_data:/data -v $(pwd)/backup:/backup alpine tar czf /backup/restate-data.tar.gz -C /data .
|
||||
docker run --rm -v restate-cluster_minio_data:/data -v $(pwd)/backup:/backup alpine tar czf /backup/minio-data.tar.gz -C /data .
|
||||
```
|
||||
|
||||
## Resource Limits
|
||||
|
||||
### Per Restate Node
|
||||
|
||||
- CPU: 0.5-2.0 cores
|
||||
- Memory: 512MB-2GB
|
||||
|
||||
### MinIO Instance
|
||||
|
||||
- CPU: 0.25-1.0 cores
|
||||
- Memory: 128MB-512MB
|
||||
|
||||
Adjust based on workload in `docker-compose.yaml`.
|
||||
|
||||
## Scaling
|
||||
|
||||
To add more nodes:
|
||||
|
||||
1. Add new service in `docker-compose.yaml`
|
||||
2. Set unique `RESTATE_NODE_NAME` and `RESTATE_FORCE_NODE_ID`
|
||||
3. Add node address to `RESTATE_METADATA_CLIENT__ADDRESSES`
|
||||
4. Expose unique ports
|
||||
5. Set `RESTATE_AUTO_PROVISION=false`
|
||||
|
||||
## Production Considerations
|
||||
|
||||
- **Storage**: Use durable storage for volumes (EBS, persistent disks)
|
||||
- **Network**: Ensure low latency between nodes (<10ms)
|
||||
- **Monitoring**: Set up Prometheus scraping on port 9070
|
||||
- **Security**: Change MinIO credentials in production
|
||||
- **Replication**: Adjust `RESTATE_DEFAULT_REPLICATION` based on cluster size
|
||||
- **Snapshots**: Consider external S3 for snapshot storage
|
||||
|
||||
## Monitoring
|
||||
|
||||
Each node exposes metrics:
|
||||
|
||||
```bash
|
||||
curl http://localhost:9070/metrics # Node 1
|
||||
curl http://localhost:29070/metrics # Node 2
|
||||
curl http://localhost:39070/metrics # Node 3
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Node Won't Start
|
||||
|
||||
Check logs:
|
||||
|
||||
```bash
|
||||
docker compose logs restate-1
|
||||
```
|
||||
|
||||
Ensure all nodes can reach each other on port 5122.
|
||||
|
||||
### Split Brain Prevention
|
||||
|
||||
The cluster uses raft consensus with majority quorum. If nodes become partitioned, only the partition with majority (2+ nodes) remains active.
|
||||
|
||||
### Data Recovery
|
||||
|
||||
If data is corrupted:
|
||||
|
||||
1. Stop cluster
|
||||
2. Restore from volume backups
|
||||
3. Restart cluster
|
||||
|
||||
## Documentation
|
||||
|
||||
- [Official Documentation](https://docs.restate.dev/)
|
||||
- [Cluster Deployment Guide](https://docs.restate.dev/server/clusters)
|
||||
- [Snapshots Documentation](https://docs.restate.dev/server/snapshots)
|
||||
- [Configuration Reference](https://docs.restate.dev/references/server-config)
|
||||
|
||||
## License
|
||||
|
||||
This configuration is provided under the project's license. Restate is licensed under the [Business Source License 1.1](https://github.com/restatedev/restate/blob/main/LICENSE).
|
||||
|
||||
## Notes
|
||||
|
||||
- For single-node deployments, see [restate](../restate/)
|
||||
- MinIO is used for demo purposes; use AWS S3/compatible storage in production
|
||||
- The cluster automatically provisions on first start
|
||||
- Node IDs are pinned to ensure consistent identity across restarts
|
||||
280
src/restate-cluster/README.zh.md
Normal file
280
src/restate-cluster/README.zh.md
Normal file
@@ -0,0 +1,280 @@
|
||||
# Restate 多节点集群
|
||||
|
||||
用于生产工作负载的高可用 3 节点 Restate 集群。此配置提供容错和自动故障转移功能。
|
||||
|
||||
## 特性
|
||||
|
||||
- **高可用性**:3 节点集群可以容忍 1 个节点故障
|
||||
- **数据复制**:可配置的复制因子(默认:3 个节点中的 2 个)
|
||||
- **自动快照**:定期快照存储在 MinIO(S3 兼容)中
|
||||
- **负载分配**:用于负载均衡的多个入口端点
|
||||
- **元数据仲裁**:用于一致性的复制元数据集群
|
||||
|
||||
## 快速开始
|
||||
|
||||
1. 复制环境变量文件:
|
||||
|
||||
```bash
|
||||
cp .env.example .env
|
||||
```
|
||||
|
||||
2. 启动集群:
|
||||
|
||||
```bash
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
3. 验证集群健康状态:
|
||||
|
||||
```bash
|
||||
# 检查节点 1
|
||||
curl http://localhost:9070/health
|
||||
|
||||
# 检查节点 2
|
||||
curl http://localhost:29070/health
|
||||
|
||||
# 检查节点 3
|
||||
curl http://localhost:39070/health
|
||||
```
|
||||
|
||||
## 架构
|
||||
|
||||
集群由以下部分组成:
|
||||
|
||||
- **3 个 Restate 节点**:具有复制状态的分布式工作流引擎
|
||||
- **MinIO**:用于分区快照的 S3 兼容存储
|
||||
- **Replicated Bifrost Provider**:跨节点的日志复制
|
||||
- **元数据集群**:所有节点上的分布式元数据服务器
|
||||
|
||||
## 服务端点
|
||||
|
||||
### 节点 1(主节点)
|
||||
|
||||
- Ingress API:`http://localhost:8080`
|
||||
- Admin API:`http://localhost:9070`
|
||||
- 节点通信:端口 5122
|
||||
|
||||
### 节点 2
|
||||
|
||||
- Ingress API:`http://localhost:28080`
|
||||
- Admin API:`http://localhost:29070`
|
||||
- 节点通信:端口 25122
|
||||
|
||||
### 节点 3
|
||||
|
||||
- Ingress API:`http://localhost:38080`
|
||||
- Admin API:`http://localhost:39070`
|
||||
- 节点通信:端口 35122
|
||||
|
||||
### MinIO
|
||||
|
||||
- API:`http://localhost:9000`
|
||||
- 控制台:`http://localhost:9001`(管理 UI)
|
||||
- 用户名:`minioadmin`
|
||||
- 密码:`minioadmin`
|
||||
|
||||
## 环境变量
|
||||
|
||||
### 集群配置
|
||||
|
||||
| 变量 | 默认值 | 说明 |
|
||||
| ----------------------------- | ----------------- | -------------------- |
|
||||
| `RESTATE_VERSION` | `1.5.3` | Restate 服务器版本 |
|
||||
| `RESTATE_CLUSTER_NAME` | `restate-cluster` | 集群名称 |
|
||||
| `RESTATE_DEFAULT_REPLICATION` | `2` | 写入所需的最小副本数 |
|
||||
|
||||
### 端口配置
|
||||
|
||||
每个节点有三个端口:
|
||||
|
||||
- `NODEx_INGRESS_PORT_OVERRIDE`:Ingress API 端口
|
||||
- `NODEx_ADMIN_PORT_OVERRIDE`:Admin API 端口
|
||||
- `NODEx_NODE_PORT_OVERRIDE`:节点间通信端口
|
||||
|
||||
### 快照配置
|
||||
|
||||
| 变量 | 默认值 | 说明 |
|
||||
| ---------------------------------------------------------- | ------------------------ | ---------------- |
|
||||
| `RESTATE_WORKER__SNAPSHOTS__DESTINATION` | `s3://restate/snapshots` | S3 存储桶路径 |
|
||||
| `RESTATE_WORKER__SNAPSHOTS__SNAPSHOT_INTERVAL_NUM_RECORDS` | `1000` | 每个快照的记录数 |
|
||||
| `RESTATE_WORKER__SNAPSHOTS__AWS_ENDPOINT_URL` | `http://minio:9000` | S3 端点 |
|
||||
|
||||
### MinIO 配置
|
||||
|
||||
| 变量 | 默认值 | 说明 |
|
||||
| ----------------------------- | ------------ | ------------------ |
|
||||
| `MINIO_VERSION` | `latest` | MinIO 版本 |
|
||||
| `MINIO_ROOT_USER` | `minioadmin` | MinIO 管理员用户名 |
|
||||
| `MINIO_ROOT_PASSWORD` | `minioadmin` | MinIO 管理员密码 |
|
||||
| `MINIO_API_PORT_OVERRIDE` | `9000` | MinIO API 端口 |
|
||||
| `MINIO_CONSOLE_PORT_OVERRIDE` | `9001` | MinIO 控制台端口 |
|
||||
|
||||
## 使用示例
|
||||
|
||||
### 部署服务(到任何节点)
|
||||
|
||||
```bash
|
||||
# 部署到节点 1
|
||||
curl -X POST http://localhost:9070/deployments \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"uri": "http://my-service:9080"}'
|
||||
```
|
||||
|
||||
### 使用负载均衡调用服务
|
||||
|
||||
使用负载均衡器或 DNS 轮询跨入口端点:
|
||||
|
||||
```bash
|
||||
# 节点 1
|
||||
curl -X POST http://localhost:8080/MyService/myMethod \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"key": "value"}'
|
||||
|
||||
# 节点 2
|
||||
curl -X POST http://localhost:28080/MyService/myMethod \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"key": "value"}'
|
||||
```
|
||||
|
||||
### 检查集群状态
|
||||
|
||||
```bash
|
||||
# 从任何管理 API
|
||||
curl http://localhost:9070/cluster
|
||||
```
|
||||
|
||||
## 容错能力
|
||||
|
||||
集群配置为:
|
||||
|
||||
- **复制因子**:2(数据写入 3 个节点中的 2 个)
|
||||
- **仲裁**:元数据操作需要多数(2/3 节点)
|
||||
- **容错能力**:可以在不停机的情况下容忍 1 个节点故障
|
||||
|
||||
### 测试故障转移
|
||||
|
||||
停止一个节点:
|
||||
|
||||
```bash
|
||||
docker compose stop restate-2
|
||||
```
|
||||
|
||||
集群继续运行。服务在节点 1 和 3 上仍然可用。
|
||||
|
||||
## 快照和备份
|
||||
|
||||
分区快照每 1000 条记录自动保存到 MinIO。这使得:
|
||||
|
||||
- 故障后快速恢复
|
||||
- 备份和恢复功能
|
||||
- 减少节点重启时的重放时间
|
||||
|
||||
### 查看快照
|
||||
|
||||
在 `http://localhost:9001` 访问 MinIO 控制台:
|
||||
|
||||
1. 使用 `minioadmin` / `minioadmin` 登录
|
||||
2. 导航到 `restate` 存储桶
|
||||
3. 浏览 `snapshots/` 目录
|
||||
|
||||
### 备份策略
|
||||
|
||||
要备份集群数据:
|
||||
|
||||
1. 停止集群:
|
||||
|
||||
```bash
|
||||
docker compose down
|
||||
```
|
||||
|
||||
2. 备份卷:
|
||||
|
||||
```bash
|
||||
docker run --rm -v restate-cluster_restate_data:/data -v $(pwd)/backup:/backup alpine tar czf /backup/restate-data.tar.gz -C /data .
|
||||
docker run --rm -v restate-cluster_minio_data:/data -v $(pwd)/backup:/backup alpine tar czf /backup/minio-data.tar.gz -C /data .
|
||||
```
|
||||
|
||||
## 资源限制
|
||||
|
||||
### 每个 Restate 节点
|
||||
|
||||
- CPU:0.5-2.0 核心
|
||||
- 内存:512MB-2GB
|
||||
|
||||
### MinIO 实例
|
||||
|
||||
- CPU:0.25-1.0 核心
|
||||
- 内存:128MB-512MB
|
||||
|
||||
根据 `docker-compose.yaml` 中的工作负载进行调整。
|
||||
|
||||
## 扩展
|
||||
|
||||
要添加更多节点:
|
||||
|
||||
1. 在 `docker-compose.yaml` 中添加新服务
|
||||
2. 设置唯一的 `RESTATE_NODE_NAME` 和 `RESTATE_FORCE_NODE_ID`
|
||||
3. 将节点地址添加到 `RESTATE_METADATA_CLIENT__ADDRESSES`
|
||||
4. 暴露唯一端口
|
||||
5. 设置 `RESTATE_AUTO_PROVISION=false`
|
||||
|
||||
## 生产注意事项
|
||||
|
||||
- **存储**:使用持久存储卷(EBS、持久磁盘)
|
||||
- **网络**:确保节点之间的低延迟(<10ms)
|
||||
- **监控**:在端口 9070 上设置 Prometheus 抓取
|
||||
- **安全**:在生产环境中更改 MinIO 凭据
|
||||
- **复制**:根据集群大小调整 `RESTATE_DEFAULT_REPLICATION`
|
||||
- **快照**:考虑使用外部 S3 进行快照存储
|
||||
|
||||
## 监控
|
||||
|
||||
每个节点都公开指标:
|
||||
|
||||
```bash
|
||||
curl http://localhost:9070/metrics # 节点 1
|
||||
curl http://localhost:29070/metrics # 节点 2
|
||||
curl http://localhost:39070/metrics # 节点 3
|
||||
```
|
||||
|
||||
## 故障排除
|
||||
|
||||
### 节点无法启动
|
||||
|
||||
检查日志:
|
||||
|
||||
```bash
|
||||
docker compose logs restate-1
|
||||
```
|
||||
|
||||
确保所有节点可以在端口 5122 上相互访问。
|
||||
|
||||
### 防止脑裂
|
||||
|
||||
集群使用 raft 共识和多数仲裁。如果节点被分区,只有具有多数(2+ 节点)的分区保持活动状态。
|
||||
|
||||
### 数据恢复
|
||||
|
||||
如果数据损坏:
|
||||
|
||||
1. 停止集群
|
||||
2. 从卷备份恢复
|
||||
3. 重启集群
|
||||
|
||||
## 文档
|
||||
|
||||
- [官方文档](https://docs.restate.dev/)
|
||||
- [集群部署指南](https://docs.restate.dev/server/clusters)
|
||||
- [快照文档](https://docs.restate.dev/server/snapshots)
|
||||
- [配置参考](https://docs.restate.dev/references/server-config)
|
||||
|
||||
## 许可证
|
||||
|
||||
此配置在项目许可证下提供。Restate 采用 [Business Source License 1.1](https://github.com/restatedev/restate/blob/main/LICENSE) 许可。
|
||||
|
||||
## 注意事项
|
||||
|
||||
- 对于单节点部署,请参见 [restate](../restate/)
|
||||
- MinIO 用于演示目的;在生产环境中使用 AWS S3/兼容存储
|
||||
- 集群在首次启动时自动配置
|
||||
- 节点 ID 被固定以确保重启时的一致标识
|
||||
144
src/restate-cluster/docker-compose.yaml
Normal file
144
src/restate-cluster/docker-compose.yaml
Normal file
@@ -0,0 +1,144 @@
|
||||
# Restate Multi-Node Cluster
|
||||
# A highly-available distributed workflow engine with 3-node cluster
|
||||
# Documentation: https://docs.restate.dev/server/clusters
|
||||
|
||||
x-defaults: &defaults
|
||||
restart: unless-stopped
|
||||
logging:
|
||||
driver: json-file
|
||||
options:
|
||||
max-size: 100m
|
||||
max-file: "3"
|
||||
|
||||
x-environment: &common-env
|
||||
TZ: ${TZ:-UTC}
|
||||
RESTATE_CLUSTER_NAME: ${RESTATE_CLUSTER_NAME:-restate-cluster}
|
||||
RESTATE_LOG_FILTER: ${RESTATE_LOG_FILTER:-restate=info}
|
||||
# Minimum replicas required to accept writes (2 out of 3 nodes)
|
||||
RESTATE_DEFAULT_REPLICATION: ${RESTATE_DEFAULT_REPLICATION:-2}
|
||||
# Addresses where nodes can reach each other
|
||||
RESTATE_METADATA_CLIENT__ADDRESSES: ${RESTATE_METADATA_CLIENT__ADDRESSES:-["http://restate-1:5122","http://restate-2:5122","http://restate-3:5122"]}
|
||||
# Snapshot configuration
|
||||
RESTATE_WORKER__SNAPSHOTS__DESTINATION: ${RESTATE_WORKER__SNAPSHOTS__DESTINATION:-s3://restate/snapshots}
|
||||
RESTATE_WORKER__SNAPSHOTS__SNAPSHOT_INTERVAL_NUM_RECORDS: ${RESTATE_WORKER__SNAPSHOTS__SNAPSHOT_INTERVAL_NUM_RECORDS:-1000}
|
||||
RESTATE_WORKER__SNAPSHOTS__AWS_REGION: ${RESTATE_WORKER__SNAPSHOTS__AWS_REGION:-local}
|
||||
RESTATE_WORKER__SNAPSHOTS__AWS_ENDPOINT_URL: ${RESTATE_WORKER__SNAPSHOTS__AWS_ENDPOINT_URL:-http://minio:9000}
|
||||
RESTATE_WORKER__SNAPSHOTS__AWS_ALLOW_HTTP: ${RESTATE_WORKER__SNAPSHOTS__AWS_ALLOW_HTTP:-true}
|
||||
RESTATE_WORKER__SNAPSHOTS__AWS_ACCESS_KEY_ID: ${MINIO_ROOT_USER:-minioadmin}
|
||||
RESTATE_WORKER__SNAPSHOTS__AWS_SECRET_ACCESS_KEY: ${MINIO_ROOT_PASSWORD:-minioadmin}
|
||||
|
||||
x-restate: &restate-defaults
|
||||
<<: *defaults
|
||||
image: restatedev/restate:${RESTATE_VERSION:-1.5.3}
|
||||
volumes:
|
||||
- restate_data:/restate-data
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '2.00'
|
||||
memory: 2G
|
||||
reservations:
|
||||
cpus: '0.50'
|
||||
memory: 512M
|
||||
|
||||
services:
|
||||
restate-1:
|
||||
<<: *restate-defaults
|
||||
ports:
|
||||
- "${NODE1_INGRESS_PORT_OVERRIDE:-8080}:8080" # Ingress API
|
||||
- "${NODE1_ADMIN_PORT_OVERRIDE:-9070}:9070" # Admin API
|
||||
- "${NODE1_NODE_PORT_OVERRIDE:-5122}:5122" # Node-to-node
|
||||
environment:
|
||||
<<: *common-env
|
||||
RESTATE_NODE_NAME: restate-1
|
||||
RESTATE_FORCE_NODE_ID: 1
|
||||
RESTATE_ADVERTISED_ADDRESS: ${RESTATE_NODE1_ADVERTISED_ADDRESS:-http://restate-1:5122}
|
||||
RESTATE_AUTO_PROVISION: "true" # Only first node provisions cluster
|
||||
healthcheck:
|
||||
test: ["CMD", "sh", "-c", "wget --no-verbose --tries=1 --spider http://localhost:9070/health || exit 1"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
depends_on:
|
||||
minio:
|
||||
condition: service_healthy
|
||||
|
||||
restate-2:
|
||||
<<: *restate-defaults
|
||||
ports:
|
||||
- "${NODE2_INGRESS_PORT_OVERRIDE:-28080}:8080"
|
||||
- "${NODE2_ADMIN_PORT_OVERRIDE:-29070}:9070"
|
||||
- "${NODE2_NODE_PORT_OVERRIDE:-25122}:5122"
|
||||
environment:
|
||||
<<: *common-env
|
||||
RESTATE_NODE_NAME: restate-2
|
||||
RESTATE_FORCE_NODE_ID: 2
|
||||
RESTATE_ADVERTISED_ADDRESS: ${RESTATE_NODE2_ADVERTISED_ADDRESS:-http://restate-2:5122}
|
||||
RESTATE_AUTO_PROVISION: "false"
|
||||
healthcheck:
|
||||
test: ["CMD", "sh", "-c", "wget --no-verbose --tries=1 --spider http://localhost:9070/health || exit 1"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
depends_on:
|
||||
minio:
|
||||
condition: service_healthy
|
||||
restate-1:
|
||||
condition: service_healthy
|
||||
|
||||
restate-3:
|
||||
<<: *restate-defaults
|
||||
ports:
|
||||
- "${NODE3_INGRESS_PORT_OVERRIDE:-38080}:8080"
|
||||
- "${NODE3_ADMIN_PORT_OVERRIDE:-39070}:9070"
|
||||
- "${NODE3_NODE_PORT_OVERRIDE:-35122}:5122"
|
||||
environment:
|
||||
<<: *common-env
|
||||
RESTATE_NODE_NAME: restate-3
|
||||
RESTATE_FORCE_NODE_ID: 3
|
||||
RESTATE_ADVERTISED_ADDRESS: ${RESTATE_NODE3_ADVERTISED_ADDRESS:-http://restate-3:5122}
|
||||
RESTATE_AUTO_PROVISION: "false"
|
||||
healthcheck:
|
||||
test: ["CMD", "sh", "-c", "wget --no-verbose --tries=1 --spider http://localhost:9070/health || exit 1"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
depends_on:
|
||||
minio:
|
||||
condition: service_healthy
|
||||
restate-1:
|
||||
condition: service_healthy
|
||||
|
||||
minio:
|
||||
<<: *defaults
|
||||
image: quay.io/minio/minio:${MINIO_VERSION:-latest}
|
||||
command: ["sh", "-c", "mkdir -p /data/restate && /usr/bin/minio server --quiet /data --console-address :9001"]
|
||||
ports:
|
||||
- "${MINIO_API_PORT_OVERRIDE:-9000}:9000"
|
||||
- "${MINIO_CONSOLE_PORT_OVERRIDE:-9001}:9001"
|
||||
environment:
|
||||
- MINIO_ROOT_USER=${MINIO_ROOT_USER:-minioadmin}
|
||||
- MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD:-minioadmin}
|
||||
volumes:
|
||||
- minio_data:/data
|
||||
healthcheck:
|
||||
test: ["CMD", "mc", "ready", "local"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 10s
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '1.00'
|
||||
memory: 512M
|
||||
reservations:
|
||||
cpus: '0.25'
|
||||
memory: 128M
|
||||
|
||||
volumes:
|
||||
restate_data:
|
||||
minio_data:
|
||||
Reference in New Issue
Block a user