refactor: signoz
This commit is contained in:
@@ -1,5 +1,3 @@
|
|||||||
file_format: '1.0'
|
|
||||||
|
|
||||||
receivers:
|
receivers:
|
||||||
otlp:
|
otlp:
|
||||||
protocols:
|
protocols:
|
||||||
|
|||||||
+38
-73
@@ -9,27 +9,31 @@ TZ=UTC
|
|||||||
# SigNoz Version Configuration
|
# SigNoz Version Configuration
|
||||||
# ============================================
|
# ============================================
|
||||||
|
|
||||||
# SigNoz ClickHouse version
|
# SigNoz all-in-one backend + frontend + alertmanager version
|
||||||
SIGNOZ_CLICKHOUSE_VERSION=24.11.1-alpine
|
SIGNOZ_VERSION=v0.118.0
|
||||||
|
|
||||||
# SigNoz OTel Collector version
|
# SigNoz OTel Collector version (also used for migration jobs)
|
||||||
SIGNOZ_OTEL_COLLECTOR_VERSION=0.102.8
|
SIGNOZ_OTEL_COLLECTOR_VERSION=v0.144.2
|
||||||
|
|
||||||
# SigNoz Query Service version
|
# ClickHouse version
|
||||||
SIGNOZ_QUERY_SERVICE_VERSION=0.55.0
|
SIGNOZ_CLICKHOUSE_VERSION=25.5.6
|
||||||
|
|
||||||
# SigNoz Frontend version
|
# ZooKeeper version
|
||||||
SIGNOZ_FRONTEND_VERSION=0.55.0
|
SIGNOZ_ZOOKEEPER_VERSION=3.7.1
|
||||||
|
|
||||||
# SigNoz Alert Manager version
|
# ============================================
|
||||||
SIGNOZ_ALERTMANAGER_VERSION=0.23.5
|
# Security Configuration
|
||||||
|
# ============================================
|
||||||
|
|
||||||
|
# JWT secret for SigNoz token signing - MUST be changed in production
|
||||||
|
SIGNOZ_JWT_SECRET=please-change-this-secret
|
||||||
|
|
||||||
# ============================================
|
# ============================================
|
||||||
# Port Configuration
|
# Port Configuration
|
||||||
# ============================================
|
# ============================================
|
||||||
|
|
||||||
# SigNoz Frontend UI port (default: 3301)
|
# SigNoz UI port (default: 8080)
|
||||||
SIGNOZ_PORT_OVERRIDE=3301
|
SIGNOZ_PORT_OVERRIDE=8080
|
||||||
|
|
||||||
# OTel Collector OTLP gRPC port (default: 4317)
|
# OTel Collector OTLP gRPC port (default: 4317)
|
||||||
SIGNOZ_OTEL_GRPC_PORT_OVERRIDE=4317
|
SIGNOZ_OTEL_GRPC_PORT_OVERRIDE=4317
|
||||||
@@ -37,38 +41,15 @@ SIGNOZ_OTEL_GRPC_PORT_OVERRIDE=4317
|
|||||||
# OTel Collector OTLP HTTP port (default: 4318)
|
# OTel Collector OTLP HTTP port (default: 4318)
|
||||||
SIGNOZ_OTEL_HTTP_PORT_OVERRIDE=4318
|
SIGNOZ_OTEL_HTTP_PORT_OVERRIDE=4318
|
||||||
|
|
||||||
# ============================================
|
|
||||||
# ClickHouse Configuration
|
|
||||||
# ============================================
|
|
||||||
|
|
||||||
# ClickHouse database name
|
|
||||||
SIGNOZ_CLICKHOUSE_DB=signoz
|
|
||||||
|
|
||||||
# ClickHouse connection URL
|
|
||||||
SIGNOZ_CLICKHOUSE_URL=tcp://clickhouse:9000/?database=signoz
|
|
||||||
|
|
||||||
# ============================================
|
|
||||||
# Query Service Configuration
|
|
||||||
# ============================================
|
|
||||||
|
|
||||||
# Storage type (clickhouse)
|
|
||||||
SIGNOZ_STORAGE=clickhouse
|
|
||||||
|
|
||||||
# Go debug settings
|
|
||||||
SIGNOZ_GODEBUG=netdns=go
|
|
||||||
|
|
||||||
# Telemetry enabled (true/false)
|
|
||||||
SIGNOZ_TELEMETRY_ENABLED=true
|
|
||||||
|
|
||||||
# Deployment type
|
|
||||||
SIGNOZ_DEPLOYMENT_TYPE=docker-standalone-amd
|
|
||||||
|
|
||||||
# ============================================
|
# ============================================
|
||||||
# OTel Collector Configuration
|
# OTel Collector Configuration
|
||||||
# ============================================
|
# ============================================
|
||||||
|
|
||||||
# OTel resource attributes
|
# OTel resource attributes
|
||||||
SIGNOZ_OTEL_RESOURCE_ATTRIBUTES=host.name=signoz-host
|
SIGNOZ_OTEL_RESOURCE_ATTRIBUTES=host.name=signoz-host,os.type=linux
|
||||||
|
|
||||||
|
# Exception grouping for low-cardinality exceptions (true/false)
|
||||||
|
SIGNOZ_LOW_CARDINAL_EXCEPTION_GROUPING=false
|
||||||
|
|
||||||
# ============================================
|
# ============================================
|
||||||
# Resource Limits - ClickHouse
|
# Resource Limits - ClickHouse
|
||||||
@@ -103,49 +84,33 @@ SIGNOZ_OTEL_COLLECTOR_CPU_RESERVATION=0.25
|
|||||||
SIGNOZ_OTEL_COLLECTOR_MEMORY_RESERVATION=512M
|
SIGNOZ_OTEL_COLLECTOR_MEMORY_RESERVATION=512M
|
||||||
|
|
||||||
# ============================================
|
# ============================================
|
||||||
# Resource Limits - Query Service
|
# Resource Limits - SigNoz
|
||||||
# ============================================
|
# ============================================
|
||||||
|
|
||||||
# CPU limit for Query Service
|
# CPU limit for SigNoz
|
||||||
SIGNOZ_QUERY_SERVICE_CPU_LIMIT=1.0
|
SIGNOZ_CPU_LIMIT=1.0
|
||||||
|
|
||||||
# Memory limit for Query Service
|
# Memory limit for SigNoz
|
||||||
SIGNOZ_QUERY_SERVICE_MEMORY_LIMIT=1G
|
SIGNOZ_MEMORY_LIMIT=2G
|
||||||
|
|
||||||
# CPU reservation for Query Service
|
# CPU reservation for SigNoz
|
||||||
SIGNOZ_QUERY_SERVICE_CPU_RESERVATION=0.25
|
SIGNOZ_CPU_RESERVATION=0.25
|
||||||
|
|
||||||
# Memory reservation for Query Service
|
# Memory reservation for SigNoz
|
||||||
SIGNOZ_QUERY_SERVICE_MEMORY_RESERVATION=256M
|
SIGNOZ_MEMORY_RESERVATION=256M
|
||||||
|
|
||||||
# ============================================
|
# ============================================
|
||||||
# Resource Limits - Frontend
|
# Resource Limits - ZooKeeper
|
||||||
# ============================================
|
# ============================================
|
||||||
|
|
||||||
# CPU limit for Frontend
|
# CPU limit for ZooKeeper
|
||||||
SIGNOZ_FRONTEND_CPU_LIMIT=0.5
|
SIGNOZ_ZOOKEEPER_CPU_LIMIT=0.5
|
||||||
|
|
||||||
# Memory limit for Frontend
|
# Memory limit for ZooKeeper
|
||||||
SIGNOZ_FRONTEND_MEMORY_LIMIT=512M
|
SIGNOZ_ZOOKEEPER_MEMORY_LIMIT=512M
|
||||||
|
|
||||||
# CPU reservation for Frontend
|
# CPU reservation for ZooKeeper
|
||||||
SIGNOZ_FRONTEND_CPU_RESERVATION=0.1
|
SIGNOZ_ZOOKEEPER_CPU_RESERVATION=0.1
|
||||||
|
|
||||||
# Memory reservation for Frontend
|
# Memory reservation for ZooKeeper
|
||||||
SIGNOZ_FRONTEND_MEMORY_RESERVATION=128M
|
SIGNOZ_ZOOKEEPER_MEMORY_RESERVATION=128M
|
||||||
|
|
||||||
# ============================================
|
|
||||||
# Resource Limits - Alert Manager
|
|
||||||
# ============================================
|
|
||||||
|
|
||||||
# CPU limit for Alert Manager
|
|
||||||
SIGNOZ_ALERTMANAGER_CPU_LIMIT=0.5
|
|
||||||
|
|
||||||
# Memory limit for Alert Manager
|
|
||||||
SIGNOZ_ALERTMANAGER_MEMORY_LIMIT=512M
|
|
||||||
|
|
||||||
# CPU reservation for Alert Manager
|
|
||||||
SIGNOZ_ALERTMANAGER_CPU_RESERVATION=0.1
|
|
||||||
|
|
||||||
# Memory reservation for Alert Manager
|
|
||||||
SIGNOZ_ALERTMANAGER_MEMORY_RESERVATION=128M
|
|
||||||
|
|||||||
+59
-83
@@ -13,133 +13,109 @@ SigNoz is an open-source observability platform that provides monitoring and tro
|
|||||||
- **Alerts**: Configure alerts based on metrics and traces
|
- **Alerts**: Configure alerts based on metrics and traces
|
||||||
- **OpenTelemetry Native**: Built on top of OpenTelemetry standards
|
- **OpenTelemetry Native**: Built on top of OpenTelemetry standards
|
||||||
|
|
||||||
|
## Services
|
||||||
|
|
||||||
|
| Service | Image | Description |
|
||||||
|
| -------------------------------- | ------------------------------------- | ------------------------------------------------------ |
|
||||||
|
| `signoz` | signoz/signoz:v0.118.0 | All-in-one backend, frontend UI, and alert manager |
|
||||||
|
| `otel-collector` | signoz/signoz-otel-collector:v0.144.2 | Receives, processes, and exports telemetry data |
|
||||||
|
| `clickhouse` | clickhouse/clickhouse-server:25.5.6 | Time-series database for traces, metrics, and logs |
|
||||||
|
| `zookeeper-1` | signoz/zookeeper:3.7.1 | ZooKeeper for ClickHouse replication metadata |
|
||||||
|
| `init-clickhouse` | clickhouse/clickhouse-server:25.5.6 | One-shot init that downloads the histogramQuantile UDF |
|
||||||
|
| `signoz-telemetrystore-migrator` | signoz/signoz-otel-collector:v0.144.2 | One-shot schema migration for ClickHouse |
|
||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
|
|
||||||
1. Copy the environment file and adjust if needed:
|
1. Copy the environment file and set the JWT secret:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cp .env.example .env
|
cp .env.example .env
|
||||||
|
# Edit .env and set SIGNOZ_JWT_SECRET to a random string
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Create required configuration files:
|
2. Start the services:
|
||||||
|
|
||||||
```bash
|
|
||||||
mkdir -p query-service frontend
|
|
||||||
# Download or create configuration files as needed
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Start the services:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker compose up -d
|
docker compose up -d
|
||||||
```
|
```
|
||||||
|
|
||||||
4. Access SigNoz UI at `http://localhost:3301`
|
3. Access SigNoz UI at `http://localhost:8080`
|
||||||
|
|
||||||
|
> **Note**: On first start, `init-clickhouse` must download a binary from GitHub (~10 MB). Ensure internet access is available.
|
||||||
|
|
||||||
## Default Ports
|
## Default Ports
|
||||||
|
|
||||||
| Service | Port | Description |
|
| Service | Port | Description |
|
||||||
| --------------------- | ---- | -------------------- |
|
| --------------------- | ---- | -------------------- |
|
||||||
| Frontend UI | 3301 | SigNoz web interface |
|
| SigNoz UI | 8080 | SigNoz web interface |
|
||||||
| OTel Collector (gRPC) | 4317 | OTLP gRPC receiver |
|
| OTel Collector (gRPC) | 4317 | OTLP gRPC receiver |
|
||||||
| OTel Collector (HTTP) | 4318 | OTLP HTTP receiver |
|
| OTel Collector (HTTP) | 4318 | OTLP HTTP receiver |
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
### Environment Variables
|
### Key Environment Variables
|
||||||
|
|
||||||
Key environment variables (see `.env.example` for complete list):
|
| Variable | Default | Description |
|
||||||
|
| -------------------------------- | --------------------------- | ---------------------------------------------- |
|
||||||
|
| `SIGNOZ_JWT_SECRET` | `please-change-this-secret` | JWT secret for token signing — **change this** |
|
||||||
|
| `SIGNOZ_PORT_OVERRIDE` | `8080` | SigNoz UI host port |
|
||||||
|
| `SIGNOZ_OTEL_GRPC_PORT_OVERRIDE` | `4317` | OTLP gRPC receiver host port |
|
||||||
|
| `SIGNOZ_OTEL_HTTP_PORT_OVERRIDE` | `4318` | OTLP HTTP receiver host port |
|
||||||
|
| `SIGNOZ_VERSION` | `v0.118.0` | SigNoz image version |
|
||||||
|
| `SIGNOZ_OTEL_COLLECTOR_VERSION` | `v0.144.2` | OTel Collector image version |
|
||||||
|
| `SIGNOZ_CLICKHOUSE_VERSION` | `25.5.6` | ClickHouse image version |
|
||||||
|
| `TZ` | `UTC` | Timezone |
|
||||||
|
|
||||||
- `SIGNOZ_PORT_OVERRIDE`: Frontend UI port (default: 3301)
|
See `.env.example` for the complete list including resource limits.
|
||||||
- `SIGNOZ_OTEL_GRPC_PORT_OVERRIDE`: OTLP gRPC receiver port (default: 4317)
|
|
||||||
- `SIGNOZ_OTEL_HTTP_PORT_OVERRIDE`: OTLP HTTP receiver port (default: 4318)
|
|
||||||
- `SIGNOZ_CLICKHOUSE_VERSION`: ClickHouse version
|
|
||||||
- `SIGNOZ_QUERY_SERVICE_VERSION`: Query service version
|
|
||||||
- `SIGNOZ_FRONTEND_VERSION`: Frontend version
|
|
||||||
|
|
||||||
### Required Configuration Files
|
|
||||||
|
|
||||||
This setup requires several configuration files:
|
|
||||||
|
|
||||||
1. **clickhouse-config.xml**: ClickHouse server configuration
|
|
||||||
2. **clickhouse-users.xml**: ClickHouse user configuration
|
|
||||||
3. **otel-collector-config.yaml**: OTel Collector pipeline configuration
|
|
||||||
4. **query-service/prometheus.yml**: Query service Prometheus configuration
|
|
||||||
5. **frontend/nginx-config.conf**: Nginx configuration for frontend
|
|
||||||
|
|
||||||
You can obtain these files from the [official SigNoz repository](https://github.com/SigNoz/signoz/tree/main/deploy/docker/clickhouse-setup).
|
|
||||||
|
|
||||||
### Sending Telemetry Data
|
### Sending Telemetry Data
|
||||||
|
|
||||||
To send telemetry data to SigNoz, configure your application to use OpenTelemetry with the following endpoints:
|
Configure your application's OpenTelemetry SDK to export to:
|
||||||
|
|
||||||
- **gRPC**: `localhost:4317`
|
- **gRPC**: `http://localhost:4317`
|
||||||
- **HTTP**: `localhost:4318`
|
- **HTTP**: `http://localhost:4318`
|
||||||
|
|
||||||
Example for Node.js:
|
|
||||||
|
|
||||||
```javascript
|
|
||||||
const { OTLPTraceExporter } = require('@opentelemetry/exporter-trace-otlp-grpc')
|
|
||||||
const { NodeTracerProvider } = require('@opentelemetry/sdk-trace-node')
|
|
||||||
|
|
||||||
const exporter = new OTLPTraceExporter({
|
|
||||||
url: 'http://localhost:4317',
|
|
||||||
})
|
|
||||||
```
|
|
||||||
|
|
||||||
## Architecture
|
## Architecture
|
||||||
|
|
||||||
SigNoz consists of the following components:
|
```text
|
||||||
|
User → SigNoz UI (8080) → signoz backend
|
||||||
|
↓
|
||||||
|
App → OTel Collector (4317/4318) → ClickHouse
|
||||||
|
↑
|
||||||
|
Zookeeper (replication metadata)
|
||||||
|
```
|
||||||
|
|
||||||
1. **ClickHouse**: Time-series database for storing traces, metrics, and logs
|
Startup order:
|
||||||
2. **OTel Collector**: Receives, processes, and exports telemetry data
|
|
||||||
3. **Query Service**: Queries data from ClickHouse
|
|
||||||
4. **Frontend**: Web UI for visualization and analysis
|
|
||||||
5. **Alert Manager**: Manages and sends alerts
|
|
||||||
|
|
||||||
## Resource Requirements
|
1. `init-clickhouse` downloads histogramQuantile binary → `zookeeper-1` starts
|
||||||
|
2. `clickhouse` starts (after init completes and zookeeper is healthy)
|
||||||
|
3. `signoz-telemetrystore-migrator` runs schema migrations
|
||||||
|
4. `signoz` and `otel-collector` start
|
||||||
|
|
||||||
Minimum recommended resources:
|
|
||||||
|
|
||||||
- **CPU**: 4 cores
|
|
||||||
- **Memory**: 8GB RAM
|
|
||||||
- **Storage**: 20GB for data
|
|
||||||
|
|
||||||
## Data Persistence
|
## Data Persistence
|
||||||
|
|
||||||
Data is persisted in Docker volumes:
|
Data is persisted in Docker named volumes:
|
||||||
|
|
||||||
- `clickhouse_data`: ClickHouse database files
|
| Volume | Contents |
|
||||||
- `signoz_data`: SigNoz application data
|
| ------------------------- | ------------------------------------- |
|
||||||
- `alertmanager_data`: Alert manager data
|
| `clickhouse_data` | ClickHouse database files |
|
||||||
|
| `clickhouse_user_scripts` | histogramQuantile UDF binary |
|
||||||
|
| `signoz_data` | SigNoz SQLite DB and application data |
|
||||||
|
| `zookeeper_data` | ZooKeeper state |
|
||||||
|
|
||||||
## Security Considerations
|
## Security Considerations
|
||||||
|
|
||||||
- Change default credentials if applicable
|
- **Change `SIGNOZ_JWT_SECRET`** to a unique random value before production use
|
||||||
- Use environment variables for sensitive configuration
|
- Restrict port exposure to trusted networks in production
|
||||||
- Consider using secrets management for production deployments
|
- Run behind a reverse proxy with TLS termination for production
|
||||||
- Restrict network access to necessary ports only
|
|
||||||
- Enable authentication for production use
|
|
||||||
|
|
||||||
## Healthchecks
|
|
||||||
|
|
||||||
All services include healthchecks to ensure proper startup and dependency management:
|
|
||||||
|
|
||||||
- ClickHouse: HTTP health endpoint
|
|
||||||
- OTel Collector: HTTP health endpoint
|
|
||||||
- Query Service: HTTP health endpoint
|
|
||||||
- Frontend: HTTP health endpoint
|
|
||||||
- Alert Manager: HTTP health endpoint
|
|
||||||
|
|
||||||
## Troubleshooting
|
## Troubleshooting
|
||||||
|
|
||||||
1. **Services not starting**: Check logs with `docker compose logs`
|
1. **Services not starting**: `docker compose logs` — check for connection errors
|
||||||
2. **No data visible**: Verify OTel Collector configuration and application instrumentation
|
2. **init-clickhouse fails**: No internet access — the UDF binary cannot be downloaded
|
||||||
3. **High memory usage**: Adjust ClickHouse memory limits or data retention policies
|
3. **otel-collector unhealthy**: May be waiting for migrations to settle; check with `docker compose logs signoz-telemetrystore-migrator`
|
||||||
|
4. **No data visible**: Verify OTel Collector configuration and application instrumentation
|
||||||
## License
|
|
||||||
|
|
||||||
SigNoz is licensed under the MIT License. See the [official repository](https://github.com/SigNoz/signoz) for more details.
|
|
||||||
|
|
||||||
## References
|
## References
|
||||||
|
|
||||||
|
|||||||
+59
-84
@@ -13,133 +13,108 @@ SigNoz 是一个开源的可观测性平台,为分布式应用程序提供监
|
|||||||
- **告警**:基于指标和追踪配置告警
|
- **告警**:基于指标和追踪配置告警
|
||||||
- **OpenTelemetry 原生**:构建在 OpenTelemetry 标准之上
|
- **OpenTelemetry 原生**:构建在 OpenTelemetry 标准之上
|
||||||
|
|
||||||
|
## 服务列表
|
||||||
|
|
||||||
|
| 服务 | 镜像 | 描述 |
|
||||||
|
| -------------------------------- | ------------------------------------- | ---------------------------------------- |
|
||||||
|
| `signoz` | signoz/signoz:v0.118.0 | 后端、前端 UI 和告警管理器的合体镜像 |
|
||||||
|
| `otel-collector` | signoz/signoz-otel-collector:v0.144.2 | 接收、处理和导出遥测数据 |
|
||||||
|
| `clickhouse` | clickhouse/clickhouse-server:25.5.6 | 存储追踪、指标和日志的时序数据库 |
|
||||||
|
| `zookeeper-1` | signoz/zookeeper:3.7.1 | ZooKeeper,用于 ClickHouse 副本元数据 |
|
||||||
|
| `init-clickhouse` | clickhouse/clickhouse-server:25.5.6 | 一次性初始化,下载 histogramQuantile UDF |
|
||||||
|
| `signoz-telemetrystore-migrator` | signoz/signoz-otel-collector:v0.144.2 | 一次性 ClickHouse Schema 迁移 |
|
||||||
|
|
||||||
## 快速开始
|
## 快速开始
|
||||||
|
|
||||||
1. 复制环境文件并根据需要调整:
|
1. 复制环境文件并设置 JWT 密钥:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cp .env.example .env
|
cp .env.example .env
|
||||||
|
# 编辑 .env,将 SIGNOZ_JWT_SECRET 设置为随机字符串
|
||||||
```
|
```
|
||||||
|
|
||||||
2. 创建所需的配置文件:
|
2. 启动服务:
|
||||||
|
|
||||||
```bash
|
|
||||||
mkdir -p query-service frontend
|
|
||||||
# 根据需要下载或创建配置文件
|
|
||||||
```
|
|
||||||
|
|
||||||
3. 启动服务:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker compose up -d
|
docker compose up -d
|
||||||
```
|
```
|
||||||
|
|
||||||
4. 访问 SigNoz UI:`http://localhost:3301`
|
3. 访问 SigNoz UI:`http://localhost:8080`
|
||||||
|
|
||||||
|
> **注意**:首次启动时,`init-clickhouse` 需要从 GitHub 下载约 10 MB 的二进制文件,请确保网络可访问。
|
||||||
|
|
||||||
## 默认端口
|
## 默认端口
|
||||||
|
|
||||||
| 服务 | 端口 | 描述 |
|
| 服务 | 端口 | 描述 |
|
||||||
| --------------------- | ---- | ---------------- |
|
| --------------------- | ---- | ---------------- |
|
||||||
| Frontend UI | 3301 | SigNoz Web 界面 |
|
| SigNoz UI | 8080 | SigNoz Web 界面 |
|
||||||
| OTel Collector (gRPC) | 4317 | OTLP gRPC 接收器 |
|
| OTel Collector (gRPC) | 4317 | OTLP gRPC 接收器 |
|
||||||
| OTel Collector (HTTP) | 4318 | OTLP HTTP 接收器 |
|
| OTel Collector (HTTP) | 4318 | OTLP HTTP 接收器 |
|
||||||
|
|
||||||
## 配置说明
|
## 配置说明
|
||||||
|
|
||||||
### 环境变量
|
### 主要环境变量
|
||||||
|
|
||||||
主要环境变量(完整列表请查看 `.env.example`):
|
| 变量 | 默认值 | 描述 |
|
||||||
|
| -------------------------------- | --------------------------- | ----------------------------------- |
|
||||||
|
| `SIGNOZ_JWT_SECRET` | `please-change-this-secret` | JWT 签名密钥 — **生产环境必须修改** |
|
||||||
|
| `SIGNOZ_PORT_OVERRIDE` | `8080` | SigNoz UI 宿主机端口 |
|
||||||
|
| `SIGNOZ_OTEL_GRPC_PORT_OVERRIDE` | `4317` | OTLP gRPC 接收器宿主机端口 |
|
||||||
|
| `SIGNOZ_OTEL_HTTP_PORT_OVERRIDE` | `4318` | OTLP HTTP 接收器宿主机端口 |
|
||||||
|
| `SIGNOZ_VERSION` | `v0.118.0` | SigNoz 镜像版本 |
|
||||||
|
| `SIGNOZ_OTEL_COLLECTOR_VERSION` | `v0.144.2` | OTel Collector 镜像版本 |
|
||||||
|
| `SIGNOZ_CLICKHOUSE_VERSION` | `25.5.6` | ClickHouse 镜像版本 |
|
||||||
|
| `TZ` | `UTC` | 时区 |
|
||||||
|
|
||||||
- `SIGNOZ_PORT_OVERRIDE`:前端 UI 端口(默认:3301)
|
完整变量列表(含资源限制)请查看 `.env.example`。
|
||||||
- `SIGNOZ_OTEL_GRPC_PORT_OVERRIDE`:OTLP gRPC 接收器端口(默认:4317)
|
|
||||||
- `SIGNOZ_OTEL_HTTP_PORT_OVERRIDE`:OTLP HTTP 接收器端口(默认:4318)
|
|
||||||
- `SIGNOZ_CLICKHOUSE_VERSION`:ClickHouse 版本
|
|
||||||
- `SIGNOZ_QUERY_SERVICE_VERSION`:查询服务版本
|
|
||||||
- `SIGNOZ_FRONTEND_VERSION`:前端版本
|
|
||||||
|
|
||||||
### 必需的配置文件
|
|
||||||
|
|
||||||
此设置需要以下配置文件:
|
|
||||||
|
|
||||||
1. **clickhouse-config.xml**:ClickHouse 服务器配置
|
|
||||||
2. **clickhouse-users.xml**:ClickHouse 用户配置
|
|
||||||
3. **otel-collector-config.yaml**:OTel Collector 管道配置
|
|
||||||
4. **query-service/prometheus.yml**:查询服务 Prometheus 配置
|
|
||||||
5. **frontend/nginx-config.conf**:前端 Nginx 配置
|
|
||||||
|
|
||||||
您可以从 [SigNoz 官方仓库](https://github.com/SigNoz/signoz/tree/main/deploy/docker/clickhouse-setup) 获取这些文件。
|
|
||||||
|
|
||||||
### 发送遥测数据
|
### 发送遥测数据
|
||||||
|
|
||||||
要向 SigNoz 发送遥测数据,请配置您的应用程序使用 OpenTelemetry 并使用以下端点:
|
将应用程序的 OpenTelemetry SDK 配置为向以下端点导出数据:
|
||||||
|
|
||||||
- **gRPC**:`localhost:4317`
|
- **gRPC**:`http://localhost:4317`
|
||||||
- **HTTP**:`localhost:4318`
|
- **HTTP**:`http://localhost:4318`
|
||||||
|
|
||||||
Node.js 示例:
|
|
||||||
|
|
||||||
```javascript
|
|
||||||
const { OTLPTraceExporter } = require('@opentelemetry/exporter-trace-otlp-grpc')
|
|
||||||
const { NodeTracerProvider } = require('@opentelemetry/sdk-trace-node')
|
|
||||||
|
|
||||||
const exporter = new OTLPTraceExporter({
|
|
||||||
url: 'http://localhost:4317',
|
|
||||||
})
|
|
||||||
```
|
|
||||||
|
|
||||||
## 架构说明
|
## 架构说明
|
||||||
|
|
||||||
SigNoz 由以下组件组成:
|
```text
|
||||||
|
用户 → SigNoz UI(8080)→ signoz 后端
|
||||||
|
↓
|
||||||
|
应用 → OTel Collector(4317/4318)→ ClickHouse
|
||||||
|
↑
|
||||||
|
Zookeeper(副本元数据)
|
||||||
|
```
|
||||||
|
|
||||||
1. **ClickHouse**:用于存储追踪、指标和日志的时序数据库
|
启动顺序:
|
||||||
2. **OTel Collector**:接收、处理和导出遥测数据
|
|
||||||
3. **Query Service**:从 ClickHouse 查询数据
|
|
||||||
4. **Frontend**:用于可视化和分析的 Web UI
|
|
||||||
5. **Alert Manager**:管理和发送告警
|
|
||||||
|
|
||||||
## 资源需求
|
1. `init-clickhouse` 下载 histogramQuantile 二进制文件,`zookeeper-1` 同步启动
|
||||||
|
2. `clickhouse` 在初始化完成且 ZooKeeper 健康后启动
|
||||||
最低推荐资源:
|
3. `signoz-telemetrystore-migrator` 执行 Schema 迁移
|
||||||
|
4. `signoz` 和 `otel-collector` 启动
|
||||||
- **CPU**:4 核
|
|
||||||
- **内存**:8GB RAM
|
|
||||||
- **存储**:20GB 数据存储空间
|
|
||||||
|
|
||||||
## 数据持久化
|
## 数据持久化
|
||||||
|
|
||||||
数据持久化在 Docker 卷中:
|
数据持久化在 Docker 命名卷中:
|
||||||
|
|
||||||
- `clickhouse_data`:ClickHouse 数据库文件
|
| 卷名 | 内容 |
|
||||||
- `signoz_data`:SigNoz 应用程序数据
|
| ------------------------- | ------------------------------ |
|
||||||
- `alertmanager_data`:告警管理器数据
|
| `clickhouse_data` | ClickHouse 数据库文件 |
|
||||||
|
| `clickhouse_user_scripts` | histogramQuantile UDF 二进制 |
|
||||||
|
| `signoz_data` | SigNoz SQLite 数据库及应用数据 |
|
||||||
|
| `zookeeper_data` | ZooKeeper 状态数据 |
|
||||||
|
|
||||||
## 安全注意事项
|
## 安全注意事项
|
||||||
|
|
||||||
- 如适用,请更改默认凭据
|
- **生产环境必须将 `SIGNOZ_JWT_SECRET` 修改为唯一的随机值**
|
||||||
- 使用环境变量配置敏感信息
|
- 生产环境中限制端口仅对可信网络暴露
|
||||||
- 生产环境部署时考虑使用密钥管理
|
- 生产环境建议在反向代理后面启用 TLS 终止
|
||||||
- 仅限制必要端口的网络访问
|
|
||||||
- 生产环境请启用身份验证
|
|
||||||
|
|
||||||
## 健康检查
|
|
||||||
|
|
||||||
所有服务都包含健康检查以确保正确启动和依赖管理:
|
|
||||||
|
|
||||||
- ClickHouse:HTTP 健康端点
|
|
||||||
- OTel Collector:HTTP 健康端点
|
|
||||||
- Query Service:HTTP 健康端点
|
|
||||||
- Frontend:HTTP 健康端点
|
|
||||||
- Alert Manager:HTTP 健康端点
|
|
||||||
|
|
||||||
## 故障排查
|
## 故障排查
|
||||||
|
|
||||||
1. **服务未启动**:使用 `docker compose logs` 检查日志
|
1. **服务未启动**:运行 `docker compose logs` 检查连接错误
|
||||||
2. **无数据显示**:验证 OTel Collector 配置和应用程序仪器化
|
2. **init-clickhouse 失败**:没有网络访问权限,无法下载 UDF 二进制文件
|
||||||
3. **内存使用过高**:调整 ClickHouse 内存限制或数据保留策略
|
3. **otel-collector 不健康**:可能正在等待迁移完成,通过 `docker compose logs signoz-telemetrystore-migrator` 检查
|
||||||
|
4. **无数据显示**:验证 OTel Collector 配置和应用程序的 OpenTelemetry 仪器化
|
||||||
## 许可证
|
|
||||||
|
|
||||||
SigNoz 采用 MIT 许可证。详情请参阅 [官方仓库](https://github.com/SigNoz/signoz)。
|
|
||||||
|
|
||||||
## 参考资料
|
## 参考资料
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,75 @@
|
|||||||
|
<?xml version="1.0"?>
|
||||||
|
<clickhouse>
|
||||||
|
<!-- ZooKeeper is used to store metadata about replicas, when using Replicated tables.
|
||||||
|
Optional. If you don't use replicated tables, you could omit that.
|
||||||
|
|
||||||
|
See https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replication/
|
||||||
|
-->
|
||||||
|
<zookeeper>
|
||||||
|
<node index="1">
|
||||||
|
<host>zookeeper-1</host>
|
||||||
|
<port>2181</port>
|
||||||
|
</node>
|
||||||
|
<!-- <node index="2">
|
||||||
|
<host>zookeeper-2</host>
|
||||||
|
<port>2181</port>
|
||||||
|
</node>
|
||||||
|
<node index="3">
|
||||||
|
<host>zookeeper-3</host>
|
||||||
|
<port>2181</port>
|
||||||
|
</node> -->
|
||||||
|
</zookeeper>
|
||||||
|
|
||||||
|
<!-- Configuration of clusters that could be used in Distributed tables.
|
||||||
|
https://clickhouse.com/docs/en/operations/table_engines/distributed/
|
||||||
|
-->
|
||||||
|
<remote_servers>
|
||||||
|
<cluster>
|
||||||
|
<!-- Inter-server per-cluster secret for Distributed queries
|
||||||
|
default: no secret (no authentication will be performed)
|
||||||
|
|
||||||
|
If set, then Distributed queries will be validated on shards, so at least:
|
||||||
|
- such cluster should exist on the shard,
|
||||||
|
- such cluster should have the same secret.
|
||||||
|
|
||||||
|
And also (and which is more important), the initial_user will
|
||||||
|
be used as current user for the query.
|
||||||
|
|
||||||
|
Right now the protocol is pretty simple and it only takes into account:
|
||||||
|
- cluster name
|
||||||
|
- query
|
||||||
|
|
||||||
|
Also it will be nice if the following will be implemented:
|
||||||
|
- source hostname (see interserver_http_host), but then it will depends from DNS,
|
||||||
|
it can use IP address instead, but then the you need to get correct on the initiator node.
|
||||||
|
- target hostname / ip address (same notes as for source hostname)
|
||||||
|
- time-based security tokens
|
||||||
|
-->
|
||||||
|
<!-- <secret></secret> -->
|
||||||
|
<shard>
|
||||||
|
<!-- Optional. Whether to write data to just one of the replicas. Default: false (write data to all replicas). -->
|
||||||
|
<!-- <internal_replication>false</internal_replication> -->
|
||||||
|
<!-- Optional. Shard weight when writing data. Default: 1. -->
|
||||||
|
<!-- <weight>1</weight> -->
|
||||||
|
<replica>
|
||||||
|
<host>clickhouse</host>
|
||||||
|
<port>9000</port>
|
||||||
|
<!-- Optional. Priority of the replica for load_balancing. Default: 1 (less value has more priority). -->
|
||||||
|
<!-- <priority>1</priority> -->
|
||||||
|
</replica>
|
||||||
|
</shard>
|
||||||
|
<!-- <shard>
|
||||||
|
<replica>
|
||||||
|
<host>clickhouse-2</host>
|
||||||
|
<port>9000</port>
|
||||||
|
</replica>
|
||||||
|
</shard>
|
||||||
|
<shard>
|
||||||
|
<replica>
|
||||||
|
<host>clickhouse-3</host>
|
||||||
|
<port>9000</port>
|
||||||
|
</replica>
|
||||||
|
</shard> -->
|
||||||
|
</cluster>
|
||||||
|
</remote_servers>
|
||||||
|
</clickhouse>
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,21 @@
|
|||||||
|
<functions>
|
||||||
|
<function>
|
||||||
|
<type>executable</type>
|
||||||
|
<name>histogramQuantile</name>
|
||||||
|
<return_type>Float64</return_type>
|
||||||
|
<argument>
|
||||||
|
<type>Array(Float64)</type>
|
||||||
|
<name>buckets</name>
|
||||||
|
</argument>
|
||||||
|
<argument>
|
||||||
|
<type>Array(Float64)</type>
|
||||||
|
<name>counts</name>
|
||||||
|
</argument>
|
||||||
|
<argument>
|
||||||
|
<type>Float64</type>
|
||||||
|
<name>quantile</name>
|
||||||
|
</argument>
|
||||||
|
<format>CSV</format>
|
||||||
|
<command>./histogramQuantile</command>
|
||||||
|
</function>
|
||||||
|
</functions>
|
||||||
@@ -0,0 +1,123 @@
|
|||||||
|
<?xml version="1.0"?>
|
||||||
|
<clickhouse>
|
||||||
|
<!-- See also the files in users.d directory where the settings can be overridden. -->
|
||||||
|
|
||||||
|
<!-- Profiles of settings. -->
|
||||||
|
<profiles>
|
||||||
|
<!-- Default settings. -->
|
||||||
|
<default>
|
||||||
|
<!-- Maximum memory usage for processing single query, in bytes. -->
|
||||||
|
<max_memory_usage>10000000000</max_memory_usage>
|
||||||
|
|
||||||
|
<!-- How to choose between replicas during distributed query processing.
|
||||||
|
random - choose random replica from set of replicas with minimum number of errors
|
||||||
|
nearest_hostname - from set of replicas with minimum number of errors, choose replica
|
||||||
|
with minimum number of different symbols between replica's hostname and local hostname
|
||||||
|
(Hamming distance).
|
||||||
|
in_order - first live replica is chosen in specified order.
|
||||||
|
first_or_random - if first replica one has higher number of errors, pick a random one from replicas with minimum number of errors.
|
||||||
|
-->
|
||||||
|
<load_balancing>random</load_balancing>
|
||||||
|
</default>
|
||||||
|
|
||||||
|
<!-- Profile that allows only read queries. -->
|
||||||
|
<readonly>
|
||||||
|
<readonly>1</readonly>
|
||||||
|
</readonly>
|
||||||
|
</profiles>
|
||||||
|
|
||||||
|
<!-- Users and ACL. -->
|
||||||
|
<users>
|
||||||
|
<!-- If user name was not specified, 'default' user is used. -->
|
||||||
|
<default>
|
||||||
|
<!-- See also the files in users.d directory where the password can be overridden.
|
||||||
|
|
||||||
|
Password could be specified in plaintext or in SHA256 (in hex format).
|
||||||
|
|
||||||
|
If you want to specify password in plaintext (not recommended), place it in 'password' element.
|
||||||
|
Example: <password>qwerty</password>.
|
||||||
|
Password could be empty.
|
||||||
|
|
||||||
|
If you want to specify SHA256, place it in 'password_sha256_hex' element.
|
||||||
|
Example: <password_sha256_hex>65e84be33532fb784c48129675f9eff3a682b27168c0ea744b2cf58ee02337c5</password_sha256_hex>
|
||||||
|
Restrictions of SHA256: impossibility to connect to ClickHouse using MySQL JS client (as of July 2019).
|
||||||
|
|
||||||
|
If you want to specify double SHA1, place it in 'password_double_sha1_hex' element.
|
||||||
|
Example: <password_double_sha1_hex>e395796d6546b1b65db9d665cd43f0e858dd4303</password_double_sha1_hex>
|
||||||
|
|
||||||
|
If you want to specify a previously defined LDAP server (see 'ldap_servers' in the main config) for authentication,
|
||||||
|
place its name in 'server' element inside 'ldap' element.
|
||||||
|
Example: <ldap><server>my_ldap_server</server></ldap>
|
||||||
|
|
||||||
|
If you want to authenticate the user via Kerberos (assuming Kerberos is enabled, see 'kerberos' in the main config),
|
||||||
|
place 'kerberos' element instead of 'password' (and similar) elements.
|
||||||
|
The name part of the canonical principal name of the initiator must match the user name for authentication to succeed.
|
||||||
|
You can also place 'realm' element inside 'kerberos' element to further restrict authentication to only those requests
|
||||||
|
whose initiator's realm matches it.
|
||||||
|
Example: <kerberos />
|
||||||
|
Example: <kerberos><realm>EXAMPLE.COM</realm></kerberos>
|
||||||
|
|
||||||
|
How to generate decent password:
|
||||||
|
Execute: PASSWORD=$(base64 < /dev/urandom | head -c8); echo "$PASSWORD"; echo -n "$PASSWORD" | sha256sum | tr -d '-'
|
||||||
|
In first line will be password and in second - corresponding SHA256.
|
||||||
|
|
||||||
|
How to generate double SHA1:
|
||||||
|
Execute: PASSWORD=$(base64 < /dev/urandom | head -c8); echo "$PASSWORD"; echo -n "$PASSWORD" | sha1sum | tr -d '-' | xxd -r -p | sha1sum | tr -d '-'
|
||||||
|
In first line will be password and in second - corresponding double SHA1.
|
||||||
|
-->
|
||||||
|
<password></password>
|
||||||
|
|
||||||
|
<!-- List of networks with open access.
|
||||||
|
|
||||||
|
To open access from everywhere, specify:
|
||||||
|
<ip>::/0</ip>
|
||||||
|
|
||||||
|
To open access only from localhost, specify:
|
||||||
|
<ip>::1</ip>
|
||||||
|
<ip>127.0.0.1</ip>
|
||||||
|
|
||||||
|
Each element of list has one of the following forms:
|
||||||
|
<ip> IP-address or network mask. Examples: 213.180.204.3 or 10.0.0.1/8 or 10.0.0.1/255.255.255.0
|
||||||
|
2a02:6b8::3 or 2a02:6b8::3/64 or 2a02:6b8::3/ffff:ffff:ffff:ffff::.
|
||||||
|
<host> Hostname. Example: server01.clickhouse.com.
|
||||||
|
To check access, DNS query is performed, and all received addresses compared to peer address.
|
||||||
|
<host_regexp> Regular expression for host names. Example, ^server\d\d-\d\d-\d\.clickhouse\.com$
|
||||||
|
To check access, DNS PTR query is performed for peer address and then regexp is applied.
|
||||||
|
Then, for result of PTR query, another DNS query is performed and all received addresses compared to peer address.
|
||||||
|
Strongly recommended that regexp is ends with $
|
||||||
|
All results of DNS requests are cached till server restart.
|
||||||
|
-->
|
||||||
|
<networks>
|
||||||
|
<ip>::/0</ip>
|
||||||
|
</networks>
|
||||||
|
|
||||||
|
<!-- Settings profile for user. -->
|
||||||
|
<profile>default</profile>
|
||||||
|
|
||||||
|
<!-- Quota for user. -->
|
||||||
|
<quota>default</quota>
|
||||||
|
|
||||||
|
<!-- User can create other users and grant rights to them. -->
|
||||||
|
<!-- <access_management>1</access_management> -->
|
||||||
|
</default>
|
||||||
|
</users>
|
||||||
|
|
||||||
|
<!-- Quotas. -->
|
||||||
|
<quotas>
|
||||||
|
<!-- Name of quota. -->
|
||||||
|
<default>
|
||||||
|
<!-- Limits for time interval. You could specify many intervals with different limits. -->
|
||||||
|
<interval>
|
||||||
|
<!-- Length of interval. -->
|
||||||
|
<duration>3600</duration>
|
||||||
|
|
||||||
|
<!-- No limits. Just calculate resource usage for time interval. -->
|
||||||
|
<queries>0</queries>
|
||||||
|
<errors>0</errors>
|
||||||
|
<result_rows>0</result_rows>
|
||||||
|
<read_rows>0</read_rows>
|
||||||
|
<execution_time>0</execution_time>
|
||||||
|
</interval>
|
||||||
|
</default>
|
||||||
|
</quotas>
|
||||||
|
</clickhouse>
|
||||||
+170
-156
@@ -3,61 +3,199 @@ x-defaults: &defaults
|
|||||||
logging:
|
logging:
|
||||||
driver: json-file
|
driver: json-file
|
||||||
options:
|
options:
|
||||||
max-size: 100m
|
max-size: 50m
|
||||||
max-file: '3'
|
max-file: '3'
|
||||||
|
|
||||||
x-clickhouse-defaults: &clickhouse-defaults
|
x-clickhouse-defaults: &clickhouse-defaults
|
||||||
restart: on-failure
|
<<: *defaults
|
||||||
logging:
|
image: ${GLOBAL_REGISTRY:-}clickhouse/clickhouse-server:${SIGNOZ_CLICKHOUSE_VERSION:-25.5.6}
|
||||||
driver: json-file
|
depends_on:
|
||||||
options:
|
init-clickhouse:
|
||||||
max-size: 100m
|
condition: service_completed_successfully
|
||||||
max-file: '3'
|
zookeeper-1:
|
||||||
|
condition: service_healthy
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: [CMD, wget, --spider, -q, 'localhost:8123/ping']
|
test: [CMD, wget, --spider, -q, '0.0.0.0:8123/ping']
|
||||||
interval: 30s
|
interval: 30s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 3
|
retries: 3
|
||||||
deploy:
|
ulimits:
|
||||||
resources:
|
nproc: 65535
|
||||||
limits:
|
nofile:
|
||||||
cpus: ${SIGNOZ_CLICKHOUSE_CPU_LIMIT:-2.0}
|
soft: 262144
|
||||||
memory: ${SIGNOZ_CLICKHOUSE_MEMORY_LIMIT:-4G}
|
hard: 262144
|
||||||
reservations:
|
|
||||||
cpus: ${SIGNOZ_CLICKHOUSE_CPU_RESERVATION:-0.5}
|
x-db-depend: &db-depend
|
||||||
memory: ${SIGNOZ_CLICKHOUSE_MEMORY_RESERVATION:-1G}
|
<<: *defaults
|
||||||
|
depends_on:
|
||||||
|
clickhouse:
|
||||||
|
condition: service_healthy
|
||||||
|
|
||||||
services:
|
services:
|
||||||
|
# One-shot init: downloads histogramQuantile UDF binary into the shared volume
|
||||||
|
init-clickhouse:
|
||||||
|
image: ${GLOBAL_REGISTRY:-}clickhouse/clickhouse-server:${SIGNOZ_CLICKHOUSE_VERSION:-25.5.6}
|
||||||
|
restart: on-failure
|
||||||
|
logging:
|
||||||
|
driver: json-file
|
||||||
|
options:
|
||||||
|
max-size: 10m
|
||||||
|
max-file: '1'
|
||||||
|
command:
|
||||||
|
- bash
|
||||||
|
- -c
|
||||||
|
- |
|
||||||
|
version="v0.0.1"
|
||||||
|
node_os=$$(uname -s | tr '[:upper:]' '[:lower:]')
|
||||||
|
node_arch=$$(uname -m | sed s/aarch64/arm64/ | sed s/x86_64/amd64/)
|
||||||
|
echo "Fetching histogram-binary for $${node_os}/$${node_arch}"
|
||||||
|
cd /tmp
|
||||||
|
wget -O histogram-quantile.tar.gz "https://github.com/SigNoz/signoz/releases/download/histogram-quantile%2F$${version}/histogram-quantile_$${node_os}_$${node_arch}.tar.gz"
|
||||||
|
tar -xvzf histogram-quantile.tar.gz
|
||||||
|
mv histogram-quantile /var/lib/clickhouse/user_scripts/histogramQuantile
|
||||||
|
echo "Done."
|
||||||
|
volumes:
|
||||||
|
- clickhouse_user_scripts:/var/lib/clickhouse/user_scripts/
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '0.5'
|
||||||
|
memory: 256M
|
||||||
|
|
||||||
|
# ZooKeeper for ClickHouse replication metadata
|
||||||
|
zookeeper-1:
|
||||||
|
<<: *defaults
|
||||||
|
image: ${GLOBAL_REGISTRY:-}signoz/zookeeper:${SIGNOZ_ZOOKEEPER_VERSION:-3.7.1}
|
||||||
|
user: root
|
||||||
|
environment:
|
||||||
|
- TZ=${TZ:-UTC}
|
||||||
|
- ZOO_SERVER_ID=1
|
||||||
|
- ALLOW_ANONYMOUS_LOGIN=yes
|
||||||
|
- ZOO_AUTOPURGE_INTERVAL=1
|
||||||
|
- ZOO_ENABLE_PROMETHEUS_METRICS=yes
|
||||||
|
- ZOO_PROMETHEUS_METRICS_PORT_NUMBER=9141
|
||||||
|
volumes:
|
||||||
|
- zookeeper_data:/bitnami/zookeeper
|
||||||
|
healthcheck:
|
||||||
|
test: [CMD-SHELL, 'curl -s -m 2 http://localhost:8080/commands/ruok | grep error | grep null']
|
||||||
|
interval: 30s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 3
|
||||||
|
start_period: 30s
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: ${SIGNOZ_ZOOKEEPER_CPU_LIMIT:-0.5}
|
||||||
|
memory: ${SIGNOZ_ZOOKEEPER_MEMORY_LIMIT:-512M}
|
||||||
|
reservations:
|
||||||
|
cpus: ${SIGNOZ_ZOOKEEPER_CPU_RESERVATION:-0.1}
|
||||||
|
memory: ${SIGNOZ_ZOOKEEPER_MEMORY_RESERVATION:-128M}
|
||||||
|
|
||||||
# ClickHouse for storing traces, metrics and logs
|
# ClickHouse for storing traces, metrics and logs
|
||||||
clickhouse:
|
clickhouse:
|
||||||
<<: *clickhouse-defaults
|
<<: *clickhouse-defaults
|
||||||
image: ${GLOBAL_REGISTRY:-}clickhouse/clickhouse-server:${SIGNOZ_CLICKHOUSE_VERSION:-24.11.1-alpine}
|
|
||||||
user: '101:101' # ClickHouse user
|
|
||||||
volumes:
|
|
||||||
- clickhouse_data:/var/lib/clickhouse
|
|
||||||
- ./clickhouse-config.xml:/etc/clickhouse-server/config.d/logging.xml:ro
|
|
||||||
- ./clickhouse-users.xml:/etc/clickhouse-server/users.d/logging.xml:ro
|
|
||||||
environment:
|
environment:
|
||||||
- TZ=${TZ:-UTC}
|
- TZ=${TZ:-UTC}
|
||||||
- CLICKHOUSE_DB=${SIGNOZ_CLICKHOUSE_DB:-signoz}
|
- CLICKHOUSE_SKIP_USER_SETUP=1
|
||||||
|
volumes:
|
||||||
|
- clickhouse_data:/var/lib/clickhouse/
|
||||||
|
- clickhouse_user_scripts:/var/lib/clickhouse/user_scripts/
|
||||||
|
- ./clickhouse/config.xml:/etc/clickhouse-server/config.xml:ro
|
||||||
|
- ./clickhouse/users.xml:/etc/clickhouse-server/users.xml:ro
|
||||||
|
- ./clickhouse/custom-function.xml:/etc/clickhouse-server/custom-function.xml:ro
|
||||||
|
- ./clickhouse/cluster.xml:/etc/clickhouse-server/config.d/cluster.xml:ro
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: ${SIGNOZ_CLICKHOUSE_CPU_LIMIT:-2.0}
|
||||||
|
memory: ${SIGNOZ_CLICKHOUSE_MEMORY_LIMIT:-4G}
|
||||||
|
reservations:
|
||||||
|
cpus: ${SIGNOZ_CLICKHOUSE_CPU_RESERVATION:-0.5}
|
||||||
|
memory: ${SIGNOZ_CLICKHOUSE_MEMORY_RESERVATION:-1G}
|
||||||
|
|
||||||
|
# One-shot migration: bootstraps and runs schema migrations
|
||||||
|
signoz-telemetrystore-migrator:
|
||||||
|
<<: *db-depend
|
||||||
|
image: ${GLOBAL_REGISTRY:-}signoz/signoz-otel-collector:${SIGNOZ_OTEL_COLLECTOR_VERSION:-v0.144.2}
|
||||||
|
restart: on-failure
|
||||||
|
environment:
|
||||||
|
- TZ=${TZ:-UTC}
|
||||||
|
- SIGNOZ_OTEL_COLLECTOR_CLICKHOUSE_DSN=tcp://clickhouse:9000
|
||||||
|
- SIGNOZ_OTEL_COLLECTOR_CLICKHOUSE_CLUSTER=cluster
|
||||||
|
- SIGNOZ_OTEL_COLLECTOR_CLICKHOUSE_REPLICATION=true
|
||||||
|
- SIGNOZ_OTEL_COLLECTOR_TIMEOUT=10m
|
||||||
|
entrypoint: [/bin/sh]
|
||||||
|
command:
|
||||||
|
- -c
|
||||||
|
- |
|
||||||
|
/signoz-otel-collector migrate bootstrap &&
|
||||||
|
/signoz-otel-collector migrate sync up &&
|
||||||
|
/signoz-otel-collector migrate async up
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '0.5'
|
||||||
|
memory: 512M
|
||||||
|
|
||||||
|
# SigNoz all-in-one backend + frontend + alertmanager
|
||||||
|
signoz:
|
||||||
|
<<: *db-depend
|
||||||
|
image: ${GLOBAL_REGISTRY:-}signoz/signoz:${SIGNOZ_VERSION:-v0.118.0}
|
||||||
|
ports:
|
||||||
|
- '${SIGNOZ_PORT_OVERRIDE:-8080}:8080'
|
||||||
|
volumes:
|
||||||
|
- signoz_data:/var/lib/signoz/
|
||||||
|
environment:
|
||||||
|
- TZ=${TZ:-UTC}
|
||||||
|
- SIGNOZ_ALERTMANAGER_PROVIDER=signoz
|
||||||
|
- SIGNOZ_TELEMETRYSTORE_CLICKHOUSE_DSN=tcp://clickhouse:9000
|
||||||
|
- SIGNOZ_SQLSTORE_SQLITE_PATH=/var/lib/signoz/signoz.db
|
||||||
|
- SIGNOZ_TOKENIZER_JWT_SECRET=${SIGNOZ_JWT_SECRET:-please-change-this-secret}
|
||||||
|
healthcheck:
|
||||||
|
test: [CMD, wget, --spider, -q, 'localhost:8080/api/v1/health']
|
||||||
|
interval: 30s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 3
|
||||||
|
start_period: 60s
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: ${SIGNOZ_CPU_LIMIT:-1.0}
|
||||||
|
memory: ${SIGNOZ_MEMORY_LIMIT:-2G}
|
||||||
|
reservations:
|
||||||
|
cpus: ${SIGNOZ_CPU_RESERVATION:-0.25}
|
||||||
|
memory: ${SIGNOZ_MEMORY_RESERVATION:-256M}
|
||||||
|
|
||||||
# OTel Collector for receiving telemetry data
|
# OTel Collector for receiving telemetry data
|
||||||
otel-collector:
|
otel-collector:
|
||||||
<<: *defaults
|
<<: *db-depend
|
||||||
image: ${GLOBAL_REGISTRY:-}signoz/signoz-otel-collector:${SIGNOZ_OTEL_COLLECTOR_VERSION:-0.102.8}
|
image: ${GLOBAL_REGISTRY:-}signoz/signoz-otel-collector:${SIGNOZ_OTEL_COLLECTOR_VERSION:-v0.144.2}
|
||||||
|
entrypoint: [/bin/sh]
|
||||||
command:
|
command:
|
||||||
- --config=/etc/otel-collector-config.yaml
|
- -c
|
||||||
|
- |
|
||||||
|
/signoz-otel-collector migrate sync check &&
|
||||||
|
/signoz-otel-collector --config=/etc/otel-collector-config.yaml --manager-config=/etc/manager-config.yaml --copy-path=/var/tmp/collector-config.yaml
|
||||||
volumes:
|
volumes:
|
||||||
- ./otel-collector-config.yaml:/etc/otel-collector-config.yaml:ro
|
- ./otel-collector-config.yaml:/etc/otel-collector-config.yaml:ro
|
||||||
|
- ./signoz/otel-collector-opamp-config.yaml:/etc/manager-config.yaml:ro
|
||||||
environment:
|
environment:
|
||||||
- TZ=${TZ:-UTC}
|
- TZ=${TZ:-UTC}
|
||||||
- OTEL_RESOURCE_ATTRIBUTES=${SIGNOZ_OTEL_RESOURCE_ATTRIBUTES:-host.name=signoz-host}
|
- OTEL_RESOURCE_ATTRIBUTES=${SIGNOZ_OTEL_RESOURCE_ATTRIBUTES:-host.name=signoz-host,os.type=linux}
|
||||||
|
- LOW_CARDINAL_EXCEPTION_GROUPING=${SIGNOZ_LOW_CARDINAL_EXCEPTION_GROUPING:-false}
|
||||||
|
- SIGNOZ_OTEL_COLLECTOR_CLICKHOUSE_DSN=tcp://clickhouse:9000
|
||||||
|
- SIGNOZ_OTEL_COLLECTOR_CLICKHOUSE_CLUSTER=cluster
|
||||||
|
- SIGNOZ_OTEL_COLLECTOR_CLICKHOUSE_REPLICATION=true
|
||||||
|
- SIGNOZ_OTEL_COLLECTOR_TIMEOUT=10m
|
||||||
ports:
|
ports:
|
||||||
- '${SIGNOZ_OTEL_GRPC_PORT_OVERRIDE:-4317}:4317' # OTLP gRPC receiver
|
- '${SIGNOZ_OTEL_GRPC_PORT_OVERRIDE:-4317}:4317' # OTLP gRPC receiver
|
||||||
- '${SIGNOZ_OTEL_HTTP_PORT_OVERRIDE:-4318}:4318' # OTLP HTTP receiver
|
- '${SIGNOZ_OTEL_HTTP_PORT_OVERRIDE:-4318}:4318' # OTLP HTTP receiver
|
||||||
depends_on:
|
healthcheck:
|
||||||
clickhouse:
|
test: [CMD, bash, -c, echo > /dev/tcp/localhost/13133]
|
||||||
condition: service_healthy
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
start_period: 30s
|
||||||
deploy:
|
deploy:
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
@@ -66,133 +204,9 @@ services:
|
|||||||
reservations:
|
reservations:
|
||||||
cpus: ${SIGNOZ_OTEL_COLLECTOR_CPU_RESERVATION:-0.25}
|
cpus: ${SIGNOZ_OTEL_COLLECTOR_CPU_RESERVATION:-0.25}
|
||||||
memory: ${SIGNOZ_OTEL_COLLECTOR_MEMORY_RESERVATION:-512M}
|
memory: ${SIGNOZ_OTEL_COLLECTOR_MEMORY_RESERVATION:-512M}
|
||||||
healthcheck:
|
|
||||||
test:
|
|
||||||
- CMD
|
|
||||||
- wget
|
|
||||||
- --no-verbose
|
|
||||||
- --tries=1
|
|
||||||
- --spider
|
|
||||||
- 'http://localhost:13133/'
|
|
||||||
|
|
||||||
interval: 30s
|
|
||||||
timeout: 10s
|
|
||||||
retries: 3
|
|
||||||
start_period: 30s
|
|
||||||
|
|
||||||
# Query Service for querying data
|
|
||||||
query-service:
|
|
||||||
<<: *defaults
|
|
||||||
image: ${GLOBAL_REGISTRY:-}signoz/query-service:${SIGNOZ_QUERY_SERVICE_VERSION:-0.55.0}
|
|
||||||
command:
|
|
||||||
- -config=/root/config/prometheus.yml
|
|
||||||
volumes:
|
|
||||||
- ./query-service/prometheus.yml:/root/config/prometheus.yml:ro
|
|
||||||
- signoz_data:/var/lib/signoz
|
|
||||||
environment:
|
|
||||||
- TZ=${TZ:-UTC}
|
|
||||||
- ClickHouseUrl=${SIGNOZ_CLICKHOUSE_URL:-tcp://clickhouse:9000/?database=signoz}
|
|
||||||
- STORAGE=${SIGNOZ_STORAGE:-clickhouse}
|
|
||||||
- GODEBUG=${SIGNOZ_GODEBUG:-netdns=go}
|
|
||||||
- TELEMETRY_ENABLED=${SIGNOZ_TELEMETRY_ENABLED:-true}
|
|
||||||
- DEPLOYMENT_TYPE=${SIGNOZ_DEPLOYMENT_TYPE:-docker-standalone-amd}
|
|
||||||
depends_on:
|
|
||||||
clickhouse:
|
|
||||||
condition: service_healthy
|
|
||||||
deploy:
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
cpus: ${SIGNOZ_QUERY_SERVICE_CPU_LIMIT:-1.0}
|
|
||||||
memory: ${SIGNOZ_QUERY_SERVICE_MEMORY_LIMIT:-1G}
|
|
||||||
reservations:
|
|
||||||
cpus: ${SIGNOZ_QUERY_SERVICE_CPU_RESERVATION:-0.25}
|
|
||||||
memory: ${SIGNOZ_QUERY_SERVICE_MEMORY_RESERVATION:-256M}
|
|
||||||
healthcheck:
|
|
||||||
test:
|
|
||||||
- CMD
|
|
||||||
- wget
|
|
||||||
- --no-verbose
|
|
||||||
- --tries=1
|
|
||||||
- --spider
|
|
||||||
- 'http://localhost:8080/api/v1/health'
|
|
||||||
|
|
||||||
interval: 30s
|
|
||||||
timeout: 10s
|
|
||||||
retries: 3
|
|
||||||
start_period: 30s
|
|
||||||
|
|
||||||
# Frontend for the UI
|
|
||||||
frontend:
|
|
||||||
<<: *defaults
|
|
||||||
image: ${GLOBAL_REGISTRY:-}signoz/frontend:${SIGNOZ_FRONTEND_VERSION:-0.55.0}
|
|
||||||
ports:
|
|
||||||
- '${SIGNOZ_PORT_OVERRIDE:-3301}:3301'
|
|
||||||
volumes:
|
|
||||||
- ./frontend/nginx-config.conf:/etc/nginx/conf.d/default.conf:ro
|
|
||||||
environment:
|
|
||||||
- TZ=${TZ:-UTC}
|
|
||||||
depends_on:
|
|
||||||
query-service:
|
|
||||||
condition: service_healthy
|
|
||||||
deploy:
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
cpus: ${SIGNOZ_FRONTEND_CPU_LIMIT:-0.5}
|
|
||||||
memory: ${SIGNOZ_FRONTEND_MEMORY_LIMIT:-512M}
|
|
||||||
reservations:
|
|
||||||
cpus: ${SIGNOZ_FRONTEND_CPU_RESERVATION:-0.1}
|
|
||||||
memory: ${SIGNOZ_FRONTEND_MEMORY_RESERVATION:-128M}
|
|
||||||
healthcheck:
|
|
||||||
test:
|
|
||||||
- CMD
|
|
||||||
- wget
|
|
||||||
- --no-verbose
|
|
||||||
- --tries=1
|
|
||||||
- --spider
|
|
||||||
- 'http://localhost:3301/api/v1/health'
|
|
||||||
|
|
||||||
interval: 30s
|
|
||||||
timeout: 10s
|
|
||||||
retries: 3
|
|
||||||
start_period: 10s
|
|
||||||
|
|
||||||
# Alert Manager for managing alerts
|
|
||||||
alertmanager:
|
|
||||||
<<: *defaults
|
|
||||||
image: ${GLOBAL_REGISTRY:-}signoz/alertmanager:${SIGNOZ_ALERTMANAGER_VERSION:-0.23.5}
|
|
||||||
command:
|
|
||||||
- --queryService.url=http://query-service:8080
|
|
||||||
- --storage.path=/data
|
|
||||||
volumes:
|
|
||||||
- alertmanager_data:/data
|
|
||||||
environment:
|
|
||||||
- TZ=${TZ:-UTC}
|
|
||||||
depends_on:
|
|
||||||
query-service:
|
|
||||||
condition: service_healthy
|
|
||||||
deploy:
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
cpus: ${SIGNOZ_ALERTMANAGER_CPU_LIMIT:-0.5}
|
|
||||||
memory: ${SIGNOZ_ALERTMANAGER_MEMORY_LIMIT:-512M}
|
|
||||||
reservations:
|
|
||||||
cpus: ${SIGNOZ_ALERTMANAGER_CPU_RESERVATION:-0.1}
|
|
||||||
memory: ${SIGNOZ_ALERTMANAGER_MEMORY_RESERVATION:-128M}
|
|
||||||
healthcheck:
|
|
||||||
test:
|
|
||||||
- CMD
|
|
||||||
- wget
|
|
||||||
- --no-verbose
|
|
||||||
- --tries=1
|
|
||||||
- --spider
|
|
||||||
- 'http://localhost:9093/-/healthy'
|
|
||||||
|
|
||||||
interval: 30s
|
|
||||||
timeout: 10s
|
|
||||||
retries: 3
|
|
||||||
start_period: 10s
|
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
clickhouse_data:
|
clickhouse_data:
|
||||||
|
clickhouse_user_scripts:
|
||||||
signoz_data:
|
signoz_data:
|
||||||
alertmanager_data:
|
zookeeper_data:
|
||||||
|
|||||||
@@ -0,0 +1,114 @@
|
|||||||
|
connectors:
|
||||||
|
signozmeter:
|
||||||
|
metrics_flush_interval: 1h
|
||||||
|
dimensions:
|
||||||
|
- name: service.name
|
||||||
|
- name: deployment.environment
|
||||||
|
- name: host.name
|
||||||
|
receivers:
|
||||||
|
otlp:
|
||||||
|
protocols:
|
||||||
|
grpc:
|
||||||
|
endpoint: 0.0.0.0:4317
|
||||||
|
http:
|
||||||
|
endpoint: 0.0.0.0:4318
|
||||||
|
prometheus:
|
||||||
|
config:
|
||||||
|
global:
|
||||||
|
scrape_interval: 60s
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: otel-collector
|
||||||
|
static_configs:
|
||||||
|
- targets:
|
||||||
|
- localhost:8888
|
||||||
|
labels:
|
||||||
|
job_name: otel-collector
|
||||||
|
processors:
|
||||||
|
batch:
|
||||||
|
send_batch_size: 10000
|
||||||
|
send_batch_max_size: 11000
|
||||||
|
timeout: 10s
|
||||||
|
batch/meter:
|
||||||
|
send_batch_max_size: 25000
|
||||||
|
send_batch_size: 20000
|
||||||
|
timeout: 1s
|
||||||
|
resourcedetection:
|
||||||
|
detectors: [env, system]
|
||||||
|
timeout: 2s
|
||||||
|
signozspanmetrics/delta:
|
||||||
|
metrics_exporter: signozclickhousemetrics
|
||||||
|
metrics_flush_interval: 60s
|
||||||
|
latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s]
|
||||||
|
dimensions_cache_size: 100000
|
||||||
|
aggregation_temporality: AGGREGATION_TEMPORALITY_DELTA
|
||||||
|
enable_exp_histogram: true
|
||||||
|
dimensions:
|
||||||
|
- name: service.namespace
|
||||||
|
default: default
|
||||||
|
- name: deployment.environment
|
||||||
|
default: default
|
||||||
|
- name: signoz.collector.id
|
||||||
|
- name: service.version
|
||||||
|
- name: browser.platform
|
||||||
|
- name: browser.mobile
|
||||||
|
- name: k8s.cluster.name
|
||||||
|
- name: k8s.node.name
|
||||||
|
- name: k8s.namespace.name
|
||||||
|
- name: host.name
|
||||||
|
- name: host.type
|
||||||
|
- name: container.name
|
||||||
|
extensions:
|
||||||
|
health_check:
|
||||||
|
endpoint: 0.0.0.0:13133
|
||||||
|
pprof:
|
||||||
|
endpoint: 0.0.0.0:1777
|
||||||
|
exporters:
|
||||||
|
clickhousetraces:
|
||||||
|
datasource: tcp://clickhouse:9000/signoz_traces
|
||||||
|
low_cardinal_exception_grouping: ${env:LOW_CARDINAL_EXCEPTION_GROUPING}
|
||||||
|
use_new_schema: true
|
||||||
|
signozclickhousemetrics:
|
||||||
|
dsn: tcp://clickhouse:9000/signoz_metrics
|
||||||
|
clickhouselogsexporter:
|
||||||
|
dsn: tcp://clickhouse:9000/signoz_logs
|
||||||
|
timeout: 10s
|
||||||
|
use_new_schema: true
|
||||||
|
signozclickhousemeter:
|
||||||
|
dsn: tcp://clickhouse:9000/signoz_meter
|
||||||
|
timeout: 45s
|
||||||
|
sending_queue:
|
||||||
|
enabled: false
|
||||||
|
metadataexporter:
|
||||||
|
cache:
|
||||||
|
provider: in_memory
|
||||||
|
dsn: tcp://clickhouse:9000/signoz_metadata
|
||||||
|
enabled: true
|
||||||
|
timeout: 45s
|
||||||
|
service:
|
||||||
|
telemetry:
|
||||||
|
logs:
|
||||||
|
encoding: json
|
||||||
|
extensions:
|
||||||
|
- health_check
|
||||||
|
- pprof
|
||||||
|
pipelines:
|
||||||
|
traces:
|
||||||
|
receivers: [otlp]
|
||||||
|
processors: [signozspanmetrics/delta, batch]
|
||||||
|
exporters: [clickhousetraces, metadataexporter, signozmeter]
|
||||||
|
metrics:
|
||||||
|
receivers: [otlp]
|
||||||
|
processors: [batch]
|
||||||
|
exporters: [signozclickhousemetrics, metadataexporter, signozmeter]
|
||||||
|
metrics/prometheus:
|
||||||
|
receivers: [prometheus]
|
||||||
|
processors: [batch]
|
||||||
|
exporters: [signozclickhousemetrics, metadataexporter, signozmeter]
|
||||||
|
logs:
|
||||||
|
receivers: [otlp]
|
||||||
|
processors: [batch]
|
||||||
|
exporters: [clickhouselogsexporter, metadataexporter, signozmeter]
|
||||||
|
metrics/meter:
|
||||||
|
receivers: [signozmeter]
|
||||||
|
processors: [batch/meter]
|
||||||
|
exporters: [signozclickhousemeter]
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
server_endpoint: ws://signoz:4320/v1/opamp
|
||||||
Reference in New Issue
Block a user