feat: add restate

This commit is contained in:
Sun-ZhenXing
2025-11-03 20:16:08 +08:00
parent 2a18f63c88
commit eff0807a92
142 changed files with 1395 additions and 338 deletions

View File

@@ -0,0 +1,49 @@
# Restate version
RESTATE_VERSION=1.5.3
# Cluster configuration
RESTATE_CLUSTER_NAME=restate-cluster
RESTATE_DEFAULT_REPLICATION=2
# Node 1 ports (primary node)
NODE1_INGRESS_PORT_OVERRIDE=8080
NODE1_ADMIN_PORT_OVERRIDE=9070
NODE1_NODE_PORT_OVERRIDE=5122
# Node 2 ports
NODE2_INGRESS_PORT_OVERRIDE=28080
NODE2_ADMIN_PORT_OVERRIDE=29070
NODE2_NODE_PORT_OVERRIDE=25122
# Node 3 ports
NODE3_INGRESS_PORT_OVERRIDE=38080
NODE3_ADMIN_PORT_OVERRIDE=39070
NODE3_NODE_PORT_OVERRIDE=35122
# Node advertised addresses (internal network)
RESTATE_NODE1_ADVERTISED_ADDRESS=http://restate-1:5122
RESTATE_NODE2_ADVERTISED_ADDRESS=http://restate-2:5122
RESTATE_NODE3_ADVERTISED_ADDRESS=http://restate-3:5122
# Metadata client addresses
RESTATE_METADATA_CLIENT__ADDRESSES=["http://restate-1:5122","http://restate-2:5122","http://restate-3:5122"]
# Snapshot configuration
RESTATE_WORKER__SNAPSHOTS__DESTINATION=s3://restate/snapshots
RESTATE_WORKER__SNAPSHOTS__SNAPSHOT_INTERVAL_NUM_RECORDS=1000
RESTATE_WORKER__SNAPSHOTS__AWS_REGION=local
RESTATE_WORKER__SNAPSHOTS__AWS_ENDPOINT_URL=http://minio:9000
RESTATE_WORKER__SNAPSHOTS__AWS_ALLOW_HTTP=true
# MinIO configuration
MINIO_VERSION=latest
MINIO_API_PORT_OVERRIDE=9000
MINIO_CONSOLE_PORT_OVERRIDE=9001
MINIO_ROOT_USER=minioadmin
MINIO_ROOT_PASSWORD=minioadmin
# Timezone
TZ=UTC
# Log level: trace, debug, info, warn, error
RESTATE_LOG_FILTER=restate=info

View File

@@ -0,0 +1,280 @@
# Restate Multi-Node Cluster
A highly-available 3-node Restate cluster for production workloads. This configuration provides fault tolerance and automatic failover capabilities.
## Features
- **High Availability**: 3-node cluster can tolerate 1 node failure
- **Data Replication**: Configurable replication factor (default: 2 of 3 nodes)
- **Automatic Snapshots**: Periodic snapshots stored in MinIO (S3-compatible)
- **Load Distribution**: Multiple ingress endpoints for load balancing
- **Metadata Quorum**: Replicated metadata cluster for consistency
## Quick Start
1. Copy environment file:
```bash
cp .env.example .env
```
2. Start the cluster:
```bash
docker compose up -d
```
3. Verify cluster health:
```bash
# Check node 1
curl http://localhost:9070/health
# Check node 2
curl http://localhost:29070/health
# Check node 3
curl http://localhost:39070/health
```
## Architecture
The cluster consists of:
- **3 Restate Nodes**: Distributed workflow engines with replicated state
- **MinIO**: S3-compatible storage for partition snapshots
- **Replicated Bifrost Provider**: Log replication across nodes
- **Metadata Cluster**: Distributed metadata server on all nodes
## Service Endpoints
### Node 1 (Primary)
- Ingress API: `http://localhost:8080`
- Admin API: `http://localhost:9070`
- Node Communication: Port 5122
### Node 2
- Ingress API: `http://localhost:28080`
- Admin API: `http://localhost:29070`
- Node Communication: Port 25122
### Node 3
- Ingress API: `http://localhost:38080`
- Admin API: `http://localhost:39070`
- Node Communication: Port 35122
### MinIO
- API: `http://localhost:9000`
- Console: `http://localhost:9001` (admin UI)
- Username: `minioadmin`
- Password: `minioadmin`
## Environment Variables
### Cluster Configuration
| Variable | Default | Description |
| ----------------------------- | ----------------- | --------------------------- |
| `RESTATE_VERSION` | `1.5.3` | Restate server version |
| `RESTATE_CLUSTER_NAME` | `restate-cluster` | Cluster name |
| `RESTATE_DEFAULT_REPLICATION` | `2` | Minimum replicas for writes |
### Port Configuration
Each node has three ports:
- `NODEx_INGRESS_PORT_OVERRIDE`: Ingress API port
- `NODEx_ADMIN_PORT_OVERRIDE`: Admin API port
- `NODEx_NODE_PORT_OVERRIDE`: Node-to-node communication port
### Snapshot Configuration
| Variable | Default | Description |
| ---------------------------------------------------------- | ------------------------ | -------------------- |
| `RESTATE_WORKER__SNAPSHOTS__DESTINATION` | `s3://restate/snapshots` | S3 bucket path |
| `RESTATE_WORKER__SNAPSHOTS__SNAPSHOT_INTERVAL_NUM_RECORDS` | `1000` | Records per snapshot |
| `RESTATE_WORKER__SNAPSHOTS__AWS_ENDPOINT_URL` | `http://minio:9000` | S3 endpoint |
### MinIO Configuration
| Variable | Default | Description |
| ----------------------------- | ------------ | -------------------- |
| `MINIO_VERSION` | `latest` | MinIO version |
| `MINIO_ROOT_USER` | `minioadmin` | MinIO admin username |
| `MINIO_ROOT_PASSWORD` | `minioadmin` | MinIO admin password |
| `MINIO_API_PORT_OVERRIDE` | `9000` | MinIO API port |
| `MINIO_CONSOLE_PORT_OVERRIDE` | `9001` | MinIO console port |
## Usage Examples
### Deploy a Service (to any node)
```bash
# Deploy to node 1
curl -X POST http://localhost:9070/deployments \
-H 'Content-Type: application/json' \
-d '{"uri": "http://my-service:9080"}'
```
### Invoke Service with Load Balancing
Use a load balancer or DNS round-robin across ingress endpoints:
```bash
# Node 1
curl -X POST http://localhost:8080/MyService/myMethod \
-H 'Content-Type: application/json' \
-d '{"key": "value"}'
# Node 2
curl -X POST http://localhost:28080/MyService/myMethod \
-H 'Content-Type: application/json' \
-d '{"key": "value"}'
```
### Check Cluster Status
```bash
# From any admin API
curl http://localhost:9070/cluster
```
## Fault Tolerance
The cluster is configured with:
- **Replication Factor**: 2 (data written to 2 out of 3 nodes)
- **Quorum**: Requires majority (2/3 nodes) for metadata operations
- **Tolerance**: Can survive 1 node failure without downtime
### Testing Failover
Stop one node:
```bash
docker compose stop restate-2
```
The cluster continues to operate. Services remain available on nodes 1 and 3.
## Snapshots and Backups
Partition snapshots are automatically saved to MinIO every 1000 records. This enables:
- Fast recovery after failures
- Backup and restore capabilities
- Reduced replay time on node restart
### Viewing Snapshots
Access MinIO console at `http://localhost:9001`:
1. Login with `minioadmin` / `minioadmin`
2. Navigate to `restate` bucket
3. Browse `snapshots/` directory
### Backup Strategy
To backup cluster data:
1. Stop the cluster:
```bash
docker compose down
```
2. Backup volumes:
```bash
docker run --rm -v restate-cluster_restate_data:/data -v $(pwd)/backup:/backup alpine tar czf /backup/restate-data.tar.gz -C /data .
docker run --rm -v restate-cluster_minio_data:/data -v $(pwd)/backup:/backup alpine tar czf /backup/minio-data.tar.gz -C /data .
```
## Resource Limits
### Per Restate Node
- CPU: 0.5-2.0 cores
- Memory: 512MB-2GB
### MinIO Instance
- CPU: 0.25-1.0 cores
- Memory: 128MB-512MB
Adjust based on workload in `docker-compose.yaml`.
## Scaling
To add more nodes:
1. Add new service in `docker-compose.yaml`
2. Set unique `RESTATE_NODE_NAME` and `RESTATE_FORCE_NODE_ID`
3. Add node address to `RESTATE_METADATA_CLIENT__ADDRESSES`
4. Expose unique ports
5. Set `RESTATE_AUTO_PROVISION=false`
## Production Considerations
- **Storage**: Use durable storage for volumes (EBS, persistent disks)
- **Network**: Ensure low latency between nodes (<10ms)
- **Monitoring**: Set up Prometheus scraping on port 9070
- **Security**: Change MinIO credentials in production
- **Replication**: Adjust `RESTATE_DEFAULT_REPLICATION` based on cluster size
- **Snapshots**: Consider external S3 for snapshot storage
## Monitoring
Each node exposes metrics:
```bash
curl http://localhost:9070/metrics # Node 1
curl http://localhost:29070/metrics # Node 2
curl http://localhost:39070/metrics # Node 3
```
## Troubleshooting
### Node Won't Start
Check logs:
```bash
docker compose logs restate-1
```
Ensure all nodes can reach each other on port 5122.
### Split Brain Prevention
The cluster uses raft consensus with majority quorum. If nodes become partitioned, only the partition with majority (2+ nodes) remains active.
### Data Recovery
If data is corrupted:
1. Stop cluster
2. Restore from volume backups
3. Restart cluster
## Documentation
- [Official Documentation](https://docs.restate.dev/)
- [Cluster Deployment Guide](https://docs.restate.dev/server/clusters)
- [Snapshots Documentation](https://docs.restate.dev/server/snapshots)
- [Configuration Reference](https://docs.restate.dev/references/server-config)
## License
This configuration is provided under the project's license. Restate is licensed under the [Business Source License 1.1](https://github.com/restatedev/restate/blob/main/LICENSE).
## Notes
- For single-node deployments, see [restate](../restate/)
- MinIO is used for demo purposes; use AWS S3/compatible storage in production
- The cluster automatically provisions on first start
- Node IDs are pinned to ensure consistent identity across restarts

View File

@@ -0,0 +1,280 @@
# Restate 多节点集群
用于生产工作负载的高可用 3 节点 Restate 集群。此配置提供容错和自动故障转移功能。
## 特性
- **高可用性**3 节点集群可以容忍 1 个节点故障
- **数据复制**可配置的复制因子默认3 个节点中的 2 个)
- **自动快照**:定期快照存储在 MinIOS3 兼容)中
- **负载分配**:用于负载均衡的多个入口端点
- **元数据仲裁**:用于一致性的复制元数据集群
## 快速开始
1. 复制环境变量文件:
```bash
cp .env.example .env
```
2. 启动集群:
```bash
docker compose up -d
```
3. 验证集群健康状态:
```bash
# 检查节点 1
curl http://localhost:9070/health
# 检查节点 2
curl http://localhost:29070/health
# 检查节点 3
curl http://localhost:39070/health
```
## 架构
集群由以下部分组成:
- **3 个 Restate 节点**:具有复制状态的分布式工作流引擎
- **MinIO**:用于分区快照的 S3 兼容存储
- **Replicated Bifrost Provider**:跨节点的日志复制
- **元数据集群**:所有节点上的分布式元数据服务器
## 服务端点
### 节点 1主节点
- Ingress API`http://localhost:8080`
- Admin API`http://localhost:9070`
- 节点通信:端口 5122
### 节点 2
- Ingress API`http://localhost:28080`
- Admin API`http://localhost:29070`
- 节点通信:端口 25122
### 节点 3
- Ingress API`http://localhost:38080`
- Admin API`http://localhost:39070`
- 节点通信:端口 35122
### MinIO
- API`http://localhost:9000`
- 控制台:`http://localhost:9001`(管理 UI
- 用户名:`minioadmin`
- 密码:`minioadmin`
## 环境变量
### 集群配置
| 变量 | 默认值 | 说明 |
| ----------------------------- | ----------------- | -------------------- |
| `RESTATE_VERSION` | `1.5.3` | Restate 服务器版本 |
| `RESTATE_CLUSTER_NAME` | `restate-cluster` | 集群名称 |
| `RESTATE_DEFAULT_REPLICATION` | `2` | 写入所需的最小副本数 |
### 端口配置
每个节点有三个端口:
- `NODEx_INGRESS_PORT_OVERRIDE`Ingress API 端口
- `NODEx_ADMIN_PORT_OVERRIDE`Admin API 端口
- `NODEx_NODE_PORT_OVERRIDE`:节点间通信端口
### 快照配置
| 变量 | 默认值 | 说明 |
| ---------------------------------------------------------- | ------------------------ | ---------------- |
| `RESTATE_WORKER__SNAPSHOTS__DESTINATION` | `s3://restate/snapshots` | S3 存储桶路径 |
| `RESTATE_WORKER__SNAPSHOTS__SNAPSHOT_INTERVAL_NUM_RECORDS` | `1000` | 每个快照的记录数 |
| `RESTATE_WORKER__SNAPSHOTS__AWS_ENDPOINT_URL` | `http://minio:9000` | S3 端点 |
### MinIO 配置
| 变量 | 默认值 | 说明 |
| ----------------------------- | ------------ | ------------------ |
| `MINIO_VERSION` | `latest` | MinIO 版本 |
| `MINIO_ROOT_USER` | `minioadmin` | MinIO 管理员用户名 |
| `MINIO_ROOT_PASSWORD` | `minioadmin` | MinIO 管理员密码 |
| `MINIO_API_PORT_OVERRIDE` | `9000` | MinIO API 端口 |
| `MINIO_CONSOLE_PORT_OVERRIDE` | `9001` | MinIO 控制台端口 |
## 使用示例
### 部署服务(到任何节点)
```bash
# 部署到节点 1
curl -X POST http://localhost:9070/deployments \
-H 'Content-Type: application/json' \
-d '{"uri": "http://my-service:9080"}'
```
### 使用负载均衡调用服务
使用负载均衡器或 DNS 轮询跨入口端点:
```bash
# 节点 1
curl -X POST http://localhost:8080/MyService/myMethod \
-H 'Content-Type: application/json' \
-d '{"key": "value"}'
# 节点 2
curl -X POST http://localhost:28080/MyService/myMethod \
-H 'Content-Type: application/json' \
-d '{"key": "value"}'
```
### 检查集群状态
```bash
# 从任何管理 API
curl http://localhost:9070/cluster
```
## 容错能力
集群配置为:
- **复制因子**2数据写入 3 个节点中的 2 个)
- **仲裁**元数据操作需要多数2/3 节点)
- **容错能力**:可以在不停机的情况下容忍 1 个节点故障
### 测试故障转移
停止一个节点:
```bash
docker compose stop restate-2
```
集群继续运行。服务在节点 1 和 3 上仍然可用。
## 快照和备份
分区快照每 1000 条记录自动保存到 MinIO。这使得
- 故障后快速恢复
- 备份和恢复功能
- 减少节点重启时的重放时间
### 查看快照
在 `http://localhost:9001` 访问 MinIO 控制台:
1. 使用 `minioadmin` / `minioadmin` 登录
2. 导航到 `restate` 存储桶
3. 浏览 `snapshots/` 目录
### 备份策略
要备份集群数据:
1. 停止集群:
```bash
docker compose down
```
2. 备份卷:
```bash
docker run --rm -v restate-cluster_restate_data:/data -v $(pwd)/backup:/backup alpine tar czf /backup/restate-data.tar.gz -C /data .
docker run --rm -v restate-cluster_minio_data:/data -v $(pwd)/backup:/backup alpine tar czf /backup/minio-data.tar.gz -C /data .
```
## 资源限制
### 每个 Restate 节点
- CPU0.5-2.0 核心
- 内存512MB-2GB
### MinIO 实例
- CPU0.25-1.0 核心
- 内存128MB-512MB
根据 `docker-compose.yaml` 中的工作负载进行调整。
## 扩展
要添加更多节点:
1. 在 `docker-compose.yaml` 中添加新服务
2. 设置唯一的 `RESTATE_NODE_NAME` 和 `RESTATE_FORCE_NODE_ID`
3. 将节点地址添加到 `RESTATE_METADATA_CLIENT__ADDRESSES`
4. 暴露唯一端口
5. 设置 `RESTATE_AUTO_PROVISION=false`
## 生产注意事项
- **存储**使用持久存储卷EBS、持久磁盘
- **网络**:确保节点之间的低延迟(<10ms
- **监控**:在端口 9070 上设置 Prometheus 抓取
- **安全**:在生产环境中更改 MinIO 凭据
- **复制**:根据集群大小调整 `RESTATE_DEFAULT_REPLICATION`
- **快照**:考虑使用外部 S3 进行快照存储
## 监控
每个节点都公开指标:
```bash
curl http://localhost:9070/metrics # 节点 1
curl http://localhost:29070/metrics # 节点 2
curl http://localhost:39070/metrics # 节点 3
```
## 故障排除
### 节点无法启动
检查日志:
```bash
docker compose logs restate-1
```
确保所有节点可以在端口 5122 上相互访问。
### 防止脑裂
集群使用 raft 共识和多数仲裁。如果节点被分区只有具有多数2+ 节点)的分区保持活动状态。
### 数据恢复
如果数据损坏:
1. 停止集群
2. 从卷备份恢复
3. 重启集群
## 文档
- [官方文档](https://docs.restate.dev/)
- [集群部署指南](https://docs.restate.dev/server/clusters)
- [快照文档](https://docs.restate.dev/server/snapshots)
- [配置参考](https://docs.restate.dev/references/server-config)
## 许可证
此配置在项目许可证下提供。Restate 采用 [Business Source License 1.1](https://github.com/restatedev/restate/blob/main/LICENSE) 许可。
## 注意事项
- 对于单节点部署,请参见 [restate](../restate/)
- MinIO 用于演示目的;在生产环境中使用 AWS S3/兼容存储
- 集群在首次启动时自动配置
- 节点 ID 被固定以确保重启时的一致标识

View File

@@ -0,0 +1,144 @@
# Restate Multi-Node Cluster
# A highly-available distributed workflow engine with 3-node cluster
# Documentation: https://docs.restate.dev/server/clusters
x-defaults: &defaults
restart: unless-stopped
logging:
driver: json-file
options:
max-size: 100m
max-file: "3"
x-environment: &common-env
TZ: ${TZ:-UTC}
RESTATE_CLUSTER_NAME: ${RESTATE_CLUSTER_NAME:-restate-cluster}
RESTATE_LOG_FILTER: ${RESTATE_LOG_FILTER:-restate=info}
# Minimum replicas required to accept writes (2 out of 3 nodes)
RESTATE_DEFAULT_REPLICATION: ${RESTATE_DEFAULT_REPLICATION:-2}
# Addresses where nodes can reach each other
RESTATE_METADATA_CLIENT__ADDRESSES: ${RESTATE_METADATA_CLIENT__ADDRESSES:-["http://restate-1:5122","http://restate-2:5122","http://restate-3:5122"]}
# Snapshot configuration
RESTATE_WORKER__SNAPSHOTS__DESTINATION: ${RESTATE_WORKER__SNAPSHOTS__DESTINATION:-s3://restate/snapshots}
RESTATE_WORKER__SNAPSHOTS__SNAPSHOT_INTERVAL_NUM_RECORDS: ${RESTATE_WORKER__SNAPSHOTS__SNAPSHOT_INTERVAL_NUM_RECORDS:-1000}
RESTATE_WORKER__SNAPSHOTS__AWS_REGION: ${RESTATE_WORKER__SNAPSHOTS__AWS_REGION:-local}
RESTATE_WORKER__SNAPSHOTS__AWS_ENDPOINT_URL: ${RESTATE_WORKER__SNAPSHOTS__AWS_ENDPOINT_URL:-http://minio:9000}
RESTATE_WORKER__SNAPSHOTS__AWS_ALLOW_HTTP: ${RESTATE_WORKER__SNAPSHOTS__AWS_ALLOW_HTTP:-true}
RESTATE_WORKER__SNAPSHOTS__AWS_ACCESS_KEY_ID: ${MINIO_ROOT_USER:-minioadmin}
RESTATE_WORKER__SNAPSHOTS__AWS_SECRET_ACCESS_KEY: ${MINIO_ROOT_PASSWORD:-minioadmin}
x-restate: &restate-defaults
<<: *defaults
image: restatedev/restate:${RESTATE_VERSION:-1.5.3}
volumes:
- restate_data:/restate-data
deploy:
resources:
limits:
cpus: '2.00'
memory: 2G
reservations:
cpus: '0.50'
memory: 512M
services:
restate-1:
<<: *restate-defaults
ports:
- "${NODE1_INGRESS_PORT_OVERRIDE:-8080}:8080" # Ingress API
- "${NODE1_ADMIN_PORT_OVERRIDE:-9070}:9070" # Admin API
- "${NODE1_NODE_PORT_OVERRIDE:-5122}:5122" # Node-to-node
environment:
<<: *common-env
RESTATE_NODE_NAME: restate-1
RESTATE_FORCE_NODE_ID: 1
RESTATE_ADVERTISED_ADDRESS: ${RESTATE_NODE1_ADVERTISED_ADDRESS:-http://restate-1:5122}
RESTATE_AUTO_PROVISION: "true" # Only first node provisions cluster
healthcheck:
test: ["CMD", "sh", "-c", "wget --no-verbose --tries=1 --spider http://localhost:9070/health || exit 1"]
interval: 30s
timeout: 10s
retries: 3
start_period: 30s
depends_on:
minio:
condition: service_healthy
restate-2:
<<: *restate-defaults
ports:
- "${NODE2_INGRESS_PORT_OVERRIDE:-28080}:8080"
- "${NODE2_ADMIN_PORT_OVERRIDE:-29070}:9070"
- "${NODE2_NODE_PORT_OVERRIDE:-25122}:5122"
environment:
<<: *common-env
RESTATE_NODE_NAME: restate-2
RESTATE_FORCE_NODE_ID: 2
RESTATE_ADVERTISED_ADDRESS: ${RESTATE_NODE2_ADVERTISED_ADDRESS:-http://restate-2:5122}
RESTATE_AUTO_PROVISION: "false"
healthcheck:
test: ["CMD", "sh", "-c", "wget --no-verbose --tries=1 --spider http://localhost:9070/health || exit 1"]
interval: 30s
timeout: 10s
retries: 3
start_period: 30s
depends_on:
minio:
condition: service_healthy
restate-1:
condition: service_healthy
restate-3:
<<: *restate-defaults
ports:
- "${NODE3_INGRESS_PORT_OVERRIDE:-38080}:8080"
- "${NODE3_ADMIN_PORT_OVERRIDE:-39070}:9070"
- "${NODE3_NODE_PORT_OVERRIDE:-35122}:5122"
environment:
<<: *common-env
RESTATE_NODE_NAME: restate-3
RESTATE_FORCE_NODE_ID: 3
RESTATE_ADVERTISED_ADDRESS: ${RESTATE_NODE3_ADVERTISED_ADDRESS:-http://restate-3:5122}
RESTATE_AUTO_PROVISION: "false"
healthcheck:
test: ["CMD", "sh", "-c", "wget --no-verbose --tries=1 --spider http://localhost:9070/health || exit 1"]
interval: 30s
timeout: 10s
retries: 3
start_period: 30s
depends_on:
minio:
condition: service_healthy
restate-1:
condition: service_healthy
minio:
<<: *defaults
image: quay.io/minio/minio:${MINIO_VERSION:-latest}
command: ["sh", "-c", "mkdir -p /data/restate && /usr/bin/minio server --quiet /data --console-address :9001"]
ports:
- "${MINIO_API_PORT_OVERRIDE:-9000}:9000"
- "${MINIO_CONSOLE_PORT_OVERRIDE:-9001}:9001"
environment:
- MINIO_ROOT_USER=${MINIO_ROOT_USER:-minioadmin}
- MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD:-minioadmin}
volumes:
- minio_data:/data
healthcheck:
test: ["CMD", "mc", "ready", "local"]
interval: 10s
timeout: 5s
retries: 5
start_period: 10s
deploy:
resources:
limits:
cpus: '1.00'
memory: 512M
reservations:
cpus: '0.25'
memory: 128M
volumes:
restate_data:
minio_data: