feat: add more otel services

This commit is contained in:
Sun-ZhenXing
2026-01-11 23:42:34 +08:00
parent ea5eadfcec
commit 51fd7ea08b
28 changed files with 2358 additions and 70 deletions
+151
View File
@@ -0,0 +1,151 @@
# Global registry prefix (optional, e.g., docker.io/, ghcr.io/)
# Leave empty to pull from Docker Hub by default
GLOBAL_REGISTRY=
# Timezone setting for all containers
TZ=UTC
# ============================================
# SigNoz Version Configuration
# ============================================
# SigNoz ClickHouse version
SIGNOZ_CLICKHOUSE_VERSION=24.11.1-alpine
# SigNoz OTel Collector version
SIGNOZ_OTEL_COLLECTOR_VERSION=0.102.8
# SigNoz Query Service version
SIGNOZ_QUERY_SERVICE_VERSION=0.55.0
# SigNoz Frontend version
SIGNOZ_FRONTEND_VERSION=0.55.0
# SigNoz Alert Manager version
SIGNOZ_ALERTMANAGER_VERSION=0.23.5
# ============================================
# Port Configuration
# ============================================
# SigNoz Frontend UI port (default: 3301)
SIGNOZ_PORT_OVERRIDE=3301
# OTel Collector OTLP gRPC port (default: 4317)
SIGNOZ_OTEL_GRPC_PORT_OVERRIDE=4317
# OTel Collector OTLP HTTP port (default: 4318)
SIGNOZ_OTEL_HTTP_PORT_OVERRIDE=4318
# ============================================
# ClickHouse Configuration
# ============================================
# ClickHouse database name
SIGNOZ_CLICKHOUSE_DB=signoz
# ClickHouse connection URL
SIGNOZ_CLICKHOUSE_URL=tcp://clickhouse:9000/?database=signoz
# ============================================
# Query Service Configuration
# ============================================
# Storage type (clickhouse)
SIGNOZ_STORAGE=clickhouse
# Go debug settings
SIGNOZ_GODEBUG=netdns=go
# Telemetry enabled (true/false)
SIGNOZ_TELEMETRY_ENABLED=true
# Deployment type
SIGNOZ_DEPLOYMENT_TYPE=docker-standalone-amd
# ============================================
# OTel Collector Configuration
# ============================================
# OTel resource attributes
SIGNOZ_OTEL_RESOURCE_ATTRIBUTES=host.name=signoz-host
# ============================================
# Resource Limits - ClickHouse
# ============================================
# CPU limit for ClickHouse
SIGNOZ_CLICKHOUSE_CPU_LIMIT=2.0
# Memory limit for ClickHouse
SIGNOZ_CLICKHOUSE_MEMORY_LIMIT=4G
# CPU reservation for ClickHouse
SIGNOZ_CLICKHOUSE_CPU_RESERVATION=0.5
# Memory reservation for ClickHouse
SIGNOZ_CLICKHOUSE_MEMORY_RESERVATION=1G
# ============================================
# Resource Limits - OTel Collector
# ============================================
# CPU limit for OTel Collector
SIGNOZ_OTEL_COLLECTOR_CPU_LIMIT=1.0
# Memory limit for OTel Collector
SIGNOZ_OTEL_COLLECTOR_MEMORY_LIMIT=2G
# CPU reservation for OTel Collector
SIGNOZ_OTEL_COLLECTOR_CPU_RESERVATION=0.25
# Memory reservation for OTel Collector
SIGNOZ_OTEL_COLLECTOR_MEMORY_RESERVATION=512M
# ============================================
# Resource Limits - Query Service
# ============================================
# CPU limit for Query Service
SIGNOZ_QUERY_SERVICE_CPU_LIMIT=1.0
# Memory limit for Query Service
SIGNOZ_QUERY_SERVICE_MEMORY_LIMIT=1G
# CPU reservation for Query Service
SIGNOZ_QUERY_SERVICE_CPU_RESERVATION=0.25
# Memory reservation for Query Service
SIGNOZ_QUERY_SERVICE_MEMORY_RESERVATION=256M
# ============================================
# Resource Limits - Frontend
# ============================================
# CPU limit for Frontend
SIGNOZ_FRONTEND_CPU_LIMIT=0.5
# Memory limit for Frontend
SIGNOZ_FRONTEND_MEMORY_LIMIT=512M
# CPU reservation for Frontend
SIGNOZ_FRONTEND_CPU_RESERVATION=0.1
# Memory reservation for Frontend
SIGNOZ_FRONTEND_MEMORY_RESERVATION=128M
# ============================================
# Resource Limits - Alert Manager
# ============================================
# CPU limit for Alert Manager
SIGNOZ_ALERTMANAGER_CPU_LIMIT=0.5
# Memory limit for Alert Manager
SIGNOZ_ALERTMANAGER_MEMORY_LIMIT=512M
# CPU reservation for Alert Manager
SIGNOZ_ALERTMANAGER_CPU_RESERVATION=0.1
# Memory reservation for Alert Manager
SIGNOZ_ALERTMANAGER_MEMORY_RESERVATION=128M
+148
View File
@@ -0,0 +1,148 @@
# SigNoz
[English](README.md) | [中文](README.zh.md)
SigNoz is an open-source observability platform that provides monitoring and troubleshooting capabilities for distributed applications. It offers traces, metrics, and logs in a single platform, similar to DataDog or New Relic.
## Features
- **Distributed Tracing**: Track requests across microservices
- **Metrics Monitoring**: Collect and visualize application and infrastructure metrics
- **Log Management**: Centralized log aggregation and analysis
- **Service Maps**: Visualize service dependencies and performance
- **Alerts**: Configure alerts based on metrics and traces
- **OpenTelemetry Native**: Built on top of OpenTelemetry standards
## Quick Start
1. Copy the environment file and adjust if needed:
```bash
cp .env.example .env
```
2. Create required configuration files:
```bash
mkdir -p query-service frontend
# Download or create configuration files as needed
```
3. Start the services:
```bash
docker compose up -d
```
4. Access SigNoz UI at `http://localhost:3301`
## Default Ports
| Service | Port | Description |
| --------------------- | ---- | -------------------- |
| Frontend UI | 3301 | SigNoz web interface |
| OTel Collector (gRPC) | 4317 | OTLP gRPC receiver |
| OTel Collector (HTTP) | 4318 | OTLP HTTP receiver |
## Configuration
### Environment Variables
Key environment variables (see `.env.example` for complete list):
- `SIGNOZ_PORT_OVERRIDE`: Frontend UI port (default: 3301)
- `SIGNOZ_OTEL_GRPC_PORT_OVERRIDE`: OTLP gRPC receiver port (default: 4317)
- `SIGNOZ_OTEL_HTTP_PORT_OVERRIDE`: OTLP HTTP receiver port (default: 4318)
- `SIGNOZ_CLICKHOUSE_VERSION`: ClickHouse version
- `SIGNOZ_QUERY_SERVICE_VERSION`: Query service version
- `SIGNOZ_FRONTEND_VERSION`: Frontend version
### Required Configuration Files
This setup requires several configuration files:
1. **clickhouse-config.xml**: ClickHouse server configuration
2. **clickhouse-users.xml**: ClickHouse user configuration
3. **otel-collector-config.yaml**: OTel Collector pipeline configuration
4. **query-service/prometheus.yml**: Query service Prometheus configuration
5. **frontend/nginx-config.conf**: Nginx configuration for frontend
You can obtain these files from the [official SigNoz repository](https://github.com/SigNoz/signoz/tree/main/deploy/docker/clickhouse-setup).
### Sending Telemetry Data
To send telemetry data to SigNoz, configure your application to use OpenTelemetry with the following endpoints:
- **gRPC**: `localhost:4317`
- **HTTP**: `localhost:4318`
Example for Node.js:
```javascript
const { NodeTracerProvider } = require('@opentelemetry/sdk-trace-node');
const { OTLPTraceExporter } = require('@opentelemetry/exporter-trace-otlp-grpc');
const exporter = new OTLPTraceExporter({
url: 'http://localhost:4317',
});
```
## Architecture
SigNoz consists of the following components:
1. **ClickHouse**: Time-series database for storing traces, metrics, and logs
2. **OTel Collector**: Receives, processes, and exports telemetry data
3. **Query Service**: Queries data from ClickHouse
4. **Frontend**: Web UI for visualization and analysis
5. **Alert Manager**: Manages and sends alerts
## Resource Requirements
Minimum recommended resources:
- **CPU**: 4 cores
- **Memory**: 8GB RAM
- **Storage**: 20GB for data
## Data Persistence
Data is persisted in Docker volumes:
- `clickhouse_data`: ClickHouse database files
- `signoz_data`: SigNoz application data
- `alertmanager_data`: Alert manager data
## Security Considerations
- Change default credentials if applicable
- Use environment variables for sensitive configuration
- Consider using secrets management for production deployments
- Restrict network access to necessary ports only
- Enable authentication for production use
## Healthchecks
All services include healthchecks to ensure proper startup and dependency management:
- ClickHouse: HTTP health endpoint
- OTel Collector: HTTP health endpoint
- Query Service: HTTP health endpoint
- Frontend: HTTP health endpoint
- Alert Manager: HTTP health endpoint
## Troubleshooting
1. **Services not starting**: Check logs with `docker compose logs`
2. **No data visible**: Verify OTel Collector configuration and application instrumentation
3. **High memory usage**: Adjust ClickHouse memory limits or data retention policies
## License
SigNoz is licensed under the MIT License. See the [official repository](https://github.com/SigNoz/signoz) for more details.
## References
- [Official Documentation](https://signoz.io/docs/)
- [GitHub Repository](https://github.com/SigNoz/signoz)
- [OpenTelemetry Documentation](https://opentelemetry.io/docs/)
+148
View File
@@ -0,0 +1,148 @@
# SigNoz
[English](README.md) | [中文](README.zh.md)
SigNoz 是一个开源的可观测性平台,为分布式应用程序提供监控和故障排查能力。它在单一平台中提供追踪、指标和日志功能,类似于 DataDog 或 New Relic。
## 功能特性
- **分布式追踪**:跨微服务追踪请求
- **指标监控**:收集和可视化应用程序及基础设施指标
- **日志管理**:集中式日志聚合和分析
- **服务地图**:可视化服务依赖关系和性能
- **告警**:基于指标和追踪配置告警
- **OpenTelemetry 原生**:构建在 OpenTelemetry 标准之上
## 快速开始
1. 复制环境文件并根据需要调整:
```bash
cp .env.example .env
```
2. 创建所需的配置文件:
```bash
mkdir -p query-service frontend
# 根据需要下载或创建配置文件
```
3. 启动服务:
```bash
docker compose up -d
```
4. 访问 SigNoz UI`http://localhost:3301`
## 默认端口
| 服务 | 端口 | 描述 |
| --------------------- | ---- | ---------------- |
| Frontend UI | 3301 | SigNoz Web 界面 |
| OTel Collector (gRPC) | 4317 | OTLP gRPC 接收器 |
| OTel Collector (HTTP) | 4318 | OTLP HTTP 接收器 |
## 配置说明
### 环境变量
主要环境变量(完整列表请查看 `.env.example`):
- `SIGNOZ_PORT_OVERRIDE`:前端 UI 端口(默认:3301)
- `SIGNOZ_OTEL_GRPC_PORT_OVERRIDE`:OTLP gRPC 接收器端口(默认:4317)
- `SIGNOZ_OTEL_HTTP_PORT_OVERRIDE`:OTLP HTTP 接收器端口(默认:4318)
- `SIGNOZ_CLICKHOUSE_VERSION`ClickHouse 版本
- `SIGNOZ_QUERY_SERVICE_VERSION`:查询服务版本
- `SIGNOZ_FRONTEND_VERSION`:前端版本
### 必需的配置文件
此设置需要以下配置文件:
1. **clickhouse-config.xml**ClickHouse 服务器配置
2. **clickhouse-users.xml**ClickHouse 用户配置
3. **otel-collector-config.yaml**OTel Collector 管道配置
4. **query-service/prometheus.yml**:查询服务 Prometheus 配置
5. **frontend/nginx-config.conf**:前端 Nginx 配置
您可以从 [SigNoz 官方仓库](https://github.com/SigNoz/signoz/tree/main/deploy/docker/clickhouse-setup) 获取这些文件。
### 发送遥测数据
要向 SigNoz 发送遥测数据,请配置您的应用程序使用 OpenTelemetry 并使用以下端点:
- **gRPC**`localhost:4317`
- **HTTP**`localhost:4318`
Node.js 示例:
```javascript
const { NodeTracerProvider } = require('@opentelemetry/sdk-trace-node');
const { OTLPTraceExporter } = require('@opentelemetry/exporter-trace-otlp-grpc');
const exporter = new OTLPTraceExporter({
url: 'http://localhost:4317',
});
```
## 架构说明
SigNoz 由以下组件组成:
1. **ClickHouse**:用于存储追踪、指标和日志的时序数据库
2. **OTel Collector**:接收、处理和导出遥测数据
3. **Query Service**:从 ClickHouse 查询数据
4. **Frontend**:用于可视化和分析的 Web UI
5. **Alert Manager**:管理和发送告警
## 资源需求
最低推荐资源:
- **CPU**4 核
- **内存**8GB RAM
- **存储**20GB 数据存储空间
## 数据持久化
数据持久化在 Docker 卷中:
- `clickhouse_data`ClickHouse 数据库文件
- `signoz_data`SigNoz 应用程序数据
- `alertmanager_data`:告警管理器数据
## 安全注意事项
- 如适用,请更改默认凭据
- 使用环境变量配置敏感信息
- 生产环境部署时考虑使用密钥管理
- 仅限制必要端口的网络访问
- 生产环境请启用身份验证
## 健康检查
所有服务都包含健康检查以确保正确启动和依赖管理:
- ClickHouseHTTP 健康端点
- OTel CollectorHTTP 健康端点
- Query ServiceHTTP 健康端点
- FrontendHTTP 健康端点
- Alert ManagerHTTP 健康端点
## 故障排查
1. **服务未启动**:使用 `docker compose logs` 检查日志
2. **无数据显示**:验证 OTel Collector 配置和应用程序仪器化
3. **内存使用过高**:调整 ClickHouse 内存限制或数据保留策略
## 许可证
SigNoz 采用 MIT 许可证。详情请参阅 [官方仓库](https://github.com/SigNoz/signoz)。
## 参考资料
- [官方文档](https://signoz.io/docs/)
- [GitHub 仓库](https://github.com/SigNoz/signoz)
- [OpenTelemetry 文档](https://opentelemetry.io/docs/)
+202
View File
@@ -0,0 +1,202 @@
x-defaults: &defaults
restart: unless-stopped
logging:
driver: json-file
options:
max-size: 100m
max-file: "3"
x-clickhouse-defaults: &clickhouse-defaults
restart: on-failure
logging:
driver: json-file
options:
max-size: 100m
max-file: "3"
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "localhost:8123/ping"]
interval: 30s
timeout: 5s
retries: 3
deploy:
resources:
limits:
cpus: ${SIGNOZ_CLICKHOUSE_CPU_LIMIT:-2.0}
memory: ${SIGNOZ_CLICKHOUSE_MEMORY_LIMIT:-4G}
reservations:
cpus: ${SIGNOZ_CLICKHOUSE_CPU_RESERVATION:-0.5}
memory: ${SIGNOZ_CLICKHOUSE_MEMORY_RESERVATION:-1G}
services:
# ClickHouse for storing traces, metrics and logs
clickhouse:
<<: *clickhouse-defaults
image: ${GLOBAL_REGISTRY:-}clickhouse/clickhouse-server:${SIGNOZ_CLICKHOUSE_VERSION:-24.11.1-alpine}
user: "101:101" # ClickHouse user
volumes:
- clickhouse_data:/var/lib/clickhouse
- ./clickhouse-config.xml:/etc/clickhouse-server/config.d/logging.xml:ro
- ./clickhouse-users.xml:/etc/clickhouse-server/users.d/logging.xml:ro
environment:
- TZ=${TZ:-UTC}
- CLICKHOUSE_DB=${SIGNOZ_CLICKHOUSE_DB:-signoz}
# OTel Collector for receiving telemetry data
otel-collector:
<<: *defaults
image: ${GLOBAL_REGISTRY:-}signoz/signoz-otel-collector:${SIGNOZ_OTEL_COLLECTOR_VERSION:-0.102.8}
command:
- "--config=/etc/otel-collector-config.yaml"
volumes:
- ./otel-collector-config.yaml:/etc/otel-collector-config.yaml:ro
environment:
- TZ=${TZ:-UTC}
- OTEL_RESOURCE_ATTRIBUTES=${SIGNOZ_OTEL_RESOURCE_ATTRIBUTES:-host.name=signoz-host}
ports:
- "${SIGNOZ_OTEL_GRPC_PORT_OVERRIDE:-4317}:4317" # OTLP gRPC receiver
- "${SIGNOZ_OTEL_HTTP_PORT_OVERRIDE:-4318}:4318" # OTLP HTTP receiver
depends_on:
clickhouse:
condition: service_healthy
deploy:
resources:
limits:
cpus: ${SIGNOZ_OTEL_COLLECTOR_CPU_LIMIT:-1.0}
memory: ${SIGNOZ_OTEL_COLLECTOR_MEMORY_LIMIT:-2G}
reservations:
cpus: ${SIGNOZ_OTEL_COLLECTOR_CPU_RESERVATION:-0.25}
memory: ${SIGNOZ_OTEL_COLLECTOR_MEMORY_RESERVATION:-512M}
healthcheck:
test:
[
"CMD",
"wget",
"--no-verbose",
"--tries=1",
"--spider",
"http://localhost:13133/",
]
interval: 30s
timeout: 10s
retries: 3
start_period: 30s
# Query Service for querying data
query-service:
<<: *defaults
image: ${GLOBAL_REGISTRY:-}signoz/query-service:${SIGNOZ_QUERY_SERVICE_VERSION:-0.55.0}
command:
- "-config=/root/config/prometheus.yml"
volumes:
- ./query-service/prometheus.yml:/root/config/prometheus.yml:ro
- signoz_data:/var/lib/signoz
environment:
- TZ=${TZ:-UTC}
- ClickHouseUrl=${SIGNOZ_CLICKHOUSE_URL:-tcp://clickhouse:9000/?database=signoz}
- STORAGE=${SIGNOZ_STORAGE:-clickhouse}
- GODEBUG=${SIGNOZ_GODEBUG:-netdns=go}
- TELEMETRY_ENABLED=${SIGNOZ_TELEMETRY_ENABLED:-true}
- DEPLOYMENT_TYPE=${SIGNOZ_DEPLOYMENT_TYPE:-docker-standalone-amd}
depends_on:
clickhouse:
condition: service_healthy
deploy:
resources:
limits:
cpus: ${SIGNOZ_QUERY_SERVICE_CPU_LIMIT:-1.0}
memory: ${SIGNOZ_QUERY_SERVICE_MEMORY_LIMIT:-1G}
reservations:
cpus: ${SIGNOZ_QUERY_SERVICE_CPU_RESERVATION:-0.25}
memory: ${SIGNOZ_QUERY_SERVICE_MEMORY_RESERVATION:-256M}
healthcheck:
test:
[
"CMD",
"wget",
"--no-verbose",
"--tries=1",
"--spider",
"http://localhost:8080/api/v1/health",
]
interval: 30s
timeout: 10s
retries: 3
start_period: 30s
# Frontend for the UI
frontend:
<<: *defaults
image: ${GLOBAL_REGISTRY:-}signoz/frontend:${SIGNOZ_FRONTEND_VERSION:-0.55.0}
ports:
- "${SIGNOZ_PORT_OVERRIDE:-3301}:3301"
volumes:
- ./frontend/nginx-config.conf:/etc/nginx/conf.d/default.conf:ro
environment:
- TZ=${TZ:-UTC}
depends_on:
query-service:
condition: service_healthy
deploy:
resources:
limits:
cpus: ${SIGNOZ_FRONTEND_CPU_LIMIT:-0.5}
memory: ${SIGNOZ_FRONTEND_MEMORY_LIMIT:-512M}
reservations:
cpus: ${SIGNOZ_FRONTEND_CPU_RESERVATION:-0.1}
memory: ${SIGNOZ_FRONTEND_MEMORY_RESERVATION:-128M}
healthcheck:
test:
[
"CMD",
"wget",
"--no-verbose",
"--tries=1",
"--spider",
"http://localhost:3301/api/v1/health",
]
interval: 30s
timeout: 10s
retries: 3
start_period: 10s
# Alert Manager for managing alerts
alertmanager:
<<: *defaults
image: ${GLOBAL_REGISTRY:-}signoz/alertmanager:${SIGNOZ_ALERTMANAGER_VERSION:-0.23.5}
command:
- --queryService.url=http://query-service:8080
- --storage.path=/data
volumes:
- alertmanager_data:/data
environment:
- TZ=${TZ:-UTC}
depends_on:
query-service:
condition: service_healthy
deploy:
resources:
limits:
cpus: ${SIGNOZ_ALERTMANAGER_CPU_LIMIT:-0.5}
memory: ${SIGNOZ_ALERTMANAGER_MEMORY_LIMIT:-512M}
reservations:
cpus: ${SIGNOZ_ALERTMANAGER_CPU_RESERVATION:-0.1}
memory: ${SIGNOZ_ALERTMANAGER_MEMORY_RESERVATION:-128M}
healthcheck:
test:
[
"CMD",
"wget",
"--no-verbose",
"--tries=1",
"--spider",
"http://localhost:9093/-/healthy",
]
interval: 30s
timeout: 10s
retries: 3
start_period: 10s
volumes:
clickhouse_data:
signoz_data:
alertmanager_data: