From 0e948befac754f5bcb1aab3328824548654234da Mon Sep 17 00:00:00 2001 From: Sun-ZhenXing <1006925066@qq.com> Date: Wed, 15 Apr 2026 15:05:16 +0800 Subject: [PATCH] refactor: signoz --- .../openlit/assets/otel-collector-config.yaml | 2 - src/signoz/.env.example | 111 +- src/signoz/README.md | 142 +- src/signoz/README.zh.md | 143 +-- src/signoz/clickhouse/cluster.xml | 75 ++ src/signoz/clickhouse/config.xml | 1142 +++++++++++++++++ src/signoz/clickhouse/custom-function.xml | 21 + src/signoz/clickhouse/users.xml | 123 ++ src/signoz/docker-compose.yaml | 326 ++--- src/signoz/otel-collector-config.yaml | 114 ++ .../signoz/otel-collector-opamp-config.yaml | 1 + 11 files changed, 1802 insertions(+), 398 deletions(-) create mode 100644 src/signoz/clickhouse/cluster.xml create mode 100644 src/signoz/clickhouse/config.xml create mode 100644 src/signoz/clickhouse/custom-function.xml create mode 100644 src/signoz/clickhouse/users.xml create mode 100644 src/signoz/otel-collector-config.yaml create mode 100644 src/signoz/signoz/otel-collector-opamp-config.yaml diff --git a/apps/openlit/assets/otel-collector-config.yaml b/apps/openlit/assets/otel-collector-config.yaml index 66c2676..a67475e 100644 --- a/apps/openlit/assets/otel-collector-config.yaml +++ b/apps/openlit/assets/otel-collector-config.yaml @@ -1,5 +1,3 @@ -file_format: '1.0' - receivers: otlp: protocols: diff --git a/src/signoz/.env.example b/src/signoz/.env.example index 4edd17c..d186e8d 100644 --- a/src/signoz/.env.example +++ b/src/signoz/.env.example @@ -9,27 +9,31 @@ TZ=UTC # SigNoz Version Configuration # ============================================ -# SigNoz ClickHouse version -SIGNOZ_CLICKHOUSE_VERSION=24.11.1-alpine +# SigNoz all-in-one backend + frontend + alertmanager version +SIGNOZ_VERSION=v0.118.0 -# SigNoz OTel Collector version -SIGNOZ_OTEL_COLLECTOR_VERSION=0.102.8 +# SigNoz OTel Collector version (also used for migration jobs) +SIGNOZ_OTEL_COLLECTOR_VERSION=v0.144.2 -# SigNoz Query Service version -SIGNOZ_QUERY_SERVICE_VERSION=0.55.0 +# ClickHouse version +SIGNOZ_CLICKHOUSE_VERSION=25.5.6 -# SigNoz Frontend version -SIGNOZ_FRONTEND_VERSION=0.55.0 +# ZooKeeper version +SIGNOZ_ZOOKEEPER_VERSION=3.7.1 -# SigNoz Alert Manager version -SIGNOZ_ALERTMANAGER_VERSION=0.23.5 +# ============================================ +# Security Configuration +# ============================================ + +# JWT secret for SigNoz token signing - MUST be changed in production +SIGNOZ_JWT_SECRET=please-change-this-secret # ============================================ # Port Configuration # ============================================ -# SigNoz Frontend UI port (default: 3301) -SIGNOZ_PORT_OVERRIDE=3301 +# SigNoz UI port (default: 8080) +SIGNOZ_PORT_OVERRIDE=8080 # OTel Collector OTLP gRPC port (default: 4317) SIGNOZ_OTEL_GRPC_PORT_OVERRIDE=4317 @@ -37,38 +41,15 @@ SIGNOZ_OTEL_GRPC_PORT_OVERRIDE=4317 # OTel Collector OTLP HTTP port (default: 4318) SIGNOZ_OTEL_HTTP_PORT_OVERRIDE=4318 -# ============================================ -# ClickHouse Configuration -# ============================================ - -# ClickHouse database name -SIGNOZ_CLICKHOUSE_DB=signoz - -# ClickHouse connection URL -SIGNOZ_CLICKHOUSE_URL=tcp://clickhouse:9000/?database=signoz - -# ============================================ -# Query Service Configuration -# ============================================ - -# Storage type (clickhouse) -SIGNOZ_STORAGE=clickhouse - -# Go debug settings -SIGNOZ_GODEBUG=netdns=go - -# Telemetry enabled (true/false) -SIGNOZ_TELEMETRY_ENABLED=true - -# Deployment type -SIGNOZ_DEPLOYMENT_TYPE=docker-standalone-amd - # ============================================ # OTel Collector Configuration # ============================================ # OTel resource attributes -SIGNOZ_OTEL_RESOURCE_ATTRIBUTES=host.name=signoz-host +SIGNOZ_OTEL_RESOURCE_ATTRIBUTES=host.name=signoz-host,os.type=linux + +# Exception grouping for low-cardinality exceptions (true/false) +SIGNOZ_LOW_CARDINAL_EXCEPTION_GROUPING=false # ============================================ # Resource Limits - ClickHouse @@ -103,49 +84,33 @@ SIGNOZ_OTEL_COLLECTOR_CPU_RESERVATION=0.25 SIGNOZ_OTEL_COLLECTOR_MEMORY_RESERVATION=512M # ============================================ -# Resource Limits - Query Service +# Resource Limits - SigNoz # ============================================ -# CPU limit for Query Service -SIGNOZ_QUERY_SERVICE_CPU_LIMIT=1.0 +# CPU limit for SigNoz +SIGNOZ_CPU_LIMIT=1.0 -# Memory limit for Query Service -SIGNOZ_QUERY_SERVICE_MEMORY_LIMIT=1G +# Memory limit for SigNoz +SIGNOZ_MEMORY_LIMIT=2G -# CPU reservation for Query Service -SIGNOZ_QUERY_SERVICE_CPU_RESERVATION=0.25 +# CPU reservation for SigNoz +SIGNOZ_CPU_RESERVATION=0.25 -# Memory reservation for Query Service -SIGNOZ_QUERY_SERVICE_MEMORY_RESERVATION=256M +# Memory reservation for SigNoz +SIGNOZ_MEMORY_RESERVATION=256M # ============================================ -# Resource Limits - Frontend +# Resource Limits - ZooKeeper # ============================================ -# CPU limit for Frontend -SIGNOZ_FRONTEND_CPU_LIMIT=0.5 +# CPU limit for ZooKeeper +SIGNOZ_ZOOKEEPER_CPU_LIMIT=0.5 -# Memory limit for Frontend -SIGNOZ_FRONTEND_MEMORY_LIMIT=512M +# Memory limit for ZooKeeper +SIGNOZ_ZOOKEEPER_MEMORY_LIMIT=512M -# CPU reservation for Frontend -SIGNOZ_FRONTEND_CPU_RESERVATION=0.1 +# CPU reservation for ZooKeeper +SIGNOZ_ZOOKEEPER_CPU_RESERVATION=0.1 -# Memory reservation for Frontend -SIGNOZ_FRONTEND_MEMORY_RESERVATION=128M - -# ============================================ -# Resource Limits - Alert Manager -# ============================================ - -# CPU limit for Alert Manager -SIGNOZ_ALERTMANAGER_CPU_LIMIT=0.5 - -# Memory limit for Alert Manager -SIGNOZ_ALERTMANAGER_MEMORY_LIMIT=512M - -# CPU reservation for Alert Manager -SIGNOZ_ALERTMANAGER_CPU_RESERVATION=0.1 - -# Memory reservation for Alert Manager -SIGNOZ_ALERTMANAGER_MEMORY_RESERVATION=128M +# Memory reservation for ZooKeeper +SIGNOZ_ZOOKEEPER_MEMORY_RESERVATION=128M diff --git a/src/signoz/README.md b/src/signoz/README.md index 26d2db1..66f76d2 100644 --- a/src/signoz/README.md +++ b/src/signoz/README.md @@ -13,133 +13,109 @@ SigNoz is an open-source observability platform that provides monitoring and tro - **Alerts**: Configure alerts based on metrics and traces - **OpenTelemetry Native**: Built on top of OpenTelemetry standards +## Services + +| Service | Image | Description | +| -------------------------------- | ------------------------------------- | ------------------------------------------------------ | +| `signoz` | signoz/signoz:v0.118.0 | All-in-one backend, frontend UI, and alert manager | +| `otel-collector` | signoz/signoz-otel-collector:v0.144.2 | Receives, processes, and exports telemetry data | +| `clickhouse` | clickhouse/clickhouse-server:25.5.6 | Time-series database for traces, metrics, and logs | +| `zookeeper-1` | signoz/zookeeper:3.7.1 | ZooKeeper for ClickHouse replication metadata | +| `init-clickhouse` | clickhouse/clickhouse-server:25.5.6 | One-shot init that downloads the histogramQuantile UDF | +| `signoz-telemetrystore-migrator` | signoz/signoz-otel-collector:v0.144.2 | One-shot schema migration for ClickHouse | + ## Quick Start -1. Copy the environment file and adjust if needed: +1. Copy the environment file and set the JWT secret: ```bash cp .env.example .env + # Edit .env and set SIGNOZ_JWT_SECRET to a random string ``` -2. Create required configuration files: - - ```bash - mkdir -p query-service frontend - # Download or create configuration files as needed - ``` - -3. Start the services: +2. Start the services: ```bash docker compose up -d ``` -4. Access SigNoz UI at `http://localhost:3301` +3. Access SigNoz UI at `http://localhost:8080` + +> **Note**: On first start, `init-clickhouse` must download a binary from GitHub (~10 MB). Ensure internet access is available. ## Default Ports | Service | Port | Description | | --------------------- | ---- | -------------------- | -| Frontend UI | 3301 | SigNoz web interface | +| SigNoz UI | 8080 | SigNoz web interface | | OTel Collector (gRPC) | 4317 | OTLP gRPC receiver | | OTel Collector (HTTP) | 4318 | OTLP HTTP receiver | ## Configuration -### Environment Variables +### Key Environment Variables -Key environment variables (see `.env.example` for complete list): +| Variable | Default | Description | +| -------------------------------- | --------------------------- | ---------------------------------------------- | +| `SIGNOZ_JWT_SECRET` | `please-change-this-secret` | JWT secret for token signing — **change this** | +| `SIGNOZ_PORT_OVERRIDE` | `8080` | SigNoz UI host port | +| `SIGNOZ_OTEL_GRPC_PORT_OVERRIDE` | `4317` | OTLP gRPC receiver host port | +| `SIGNOZ_OTEL_HTTP_PORT_OVERRIDE` | `4318` | OTLP HTTP receiver host port | +| `SIGNOZ_VERSION` | `v0.118.0` | SigNoz image version | +| `SIGNOZ_OTEL_COLLECTOR_VERSION` | `v0.144.2` | OTel Collector image version | +| `SIGNOZ_CLICKHOUSE_VERSION` | `25.5.6` | ClickHouse image version | +| `TZ` | `UTC` | Timezone | -- `SIGNOZ_PORT_OVERRIDE`: Frontend UI port (default: 3301) -- `SIGNOZ_OTEL_GRPC_PORT_OVERRIDE`: OTLP gRPC receiver port (default: 4317) -- `SIGNOZ_OTEL_HTTP_PORT_OVERRIDE`: OTLP HTTP receiver port (default: 4318) -- `SIGNOZ_CLICKHOUSE_VERSION`: ClickHouse version -- `SIGNOZ_QUERY_SERVICE_VERSION`: Query service version -- `SIGNOZ_FRONTEND_VERSION`: Frontend version - -### Required Configuration Files - -This setup requires several configuration files: - -1. **clickhouse-config.xml**: ClickHouse server configuration -2. **clickhouse-users.xml**: ClickHouse user configuration -3. **otel-collector-config.yaml**: OTel Collector pipeline configuration -4. **query-service/prometheus.yml**: Query service Prometheus configuration -5. **frontend/nginx-config.conf**: Nginx configuration for frontend - -You can obtain these files from the [official SigNoz repository](https://github.com/SigNoz/signoz/tree/main/deploy/docker/clickhouse-setup). +See `.env.example` for the complete list including resource limits. ### Sending Telemetry Data -To send telemetry data to SigNoz, configure your application to use OpenTelemetry with the following endpoints: +Configure your application's OpenTelemetry SDK to export to: -- **gRPC**: `localhost:4317` -- **HTTP**: `localhost:4318` - -Example for Node.js: - -```javascript -const { OTLPTraceExporter } = require('@opentelemetry/exporter-trace-otlp-grpc') -const { NodeTracerProvider } = require('@opentelemetry/sdk-trace-node') - -const exporter = new OTLPTraceExporter({ - url: 'http://localhost:4317', -}) -``` +- **gRPC**: `http://localhost:4317` +- **HTTP**: `http://localhost:4318` ## Architecture -SigNoz consists of the following components: +```text +User → SigNoz UI (8080) → signoz backend + ↓ +App → OTel Collector (4317/4318) → ClickHouse + ↑ + Zookeeper (replication metadata) +``` -1. **ClickHouse**: Time-series database for storing traces, metrics, and logs -2. **OTel Collector**: Receives, processes, and exports telemetry data -3. **Query Service**: Queries data from ClickHouse -4. **Frontend**: Web UI for visualization and analysis -5. **Alert Manager**: Manages and sends alerts +Startup order: -## Resource Requirements +1. `init-clickhouse` downloads histogramQuantile binary → `zookeeper-1` starts +2. `clickhouse` starts (after init completes and zookeeper is healthy) +3. `signoz-telemetrystore-migrator` runs schema migrations +4. `signoz` and `otel-collector` start -Minimum recommended resources: - -- **CPU**: 4 cores -- **Memory**: 8GB RAM -- **Storage**: 20GB for data ## Data Persistence -Data is persisted in Docker volumes: +Data is persisted in Docker named volumes: -- `clickhouse_data`: ClickHouse database files -- `signoz_data`: SigNoz application data -- `alertmanager_data`: Alert manager data +| Volume | Contents | +| ------------------------- | ------------------------------------- | +| `clickhouse_data` | ClickHouse database files | +| `clickhouse_user_scripts` | histogramQuantile UDF binary | +| `signoz_data` | SigNoz SQLite DB and application data | +| `zookeeper_data` | ZooKeeper state | ## Security Considerations -- Change default credentials if applicable -- Use environment variables for sensitive configuration -- Consider using secrets management for production deployments -- Restrict network access to necessary ports only -- Enable authentication for production use - -## Healthchecks - -All services include healthchecks to ensure proper startup and dependency management: - -- ClickHouse: HTTP health endpoint -- OTel Collector: HTTP health endpoint -- Query Service: HTTP health endpoint -- Frontend: HTTP health endpoint -- Alert Manager: HTTP health endpoint +- **Change `SIGNOZ_JWT_SECRET`** to a unique random value before production use +- Restrict port exposure to trusted networks in production +- Run behind a reverse proxy with TLS termination for production ## Troubleshooting -1. **Services not starting**: Check logs with `docker compose logs` -2. **No data visible**: Verify OTel Collector configuration and application instrumentation -3. **High memory usage**: Adjust ClickHouse memory limits or data retention policies - -## License - -SigNoz is licensed under the MIT License. See the [official repository](https://github.com/SigNoz/signoz) for more details. +1. **Services not starting**: `docker compose logs` — check for connection errors +2. **init-clickhouse fails**: No internet access — the UDF binary cannot be downloaded +3. **otel-collector unhealthy**: May be waiting for migrations to settle; check with `docker compose logs signoz-telemetrystore-migrator` +4. **No data visible**: Verify OTel Collector configuration and application instrumentation ## References diff --git a/src/signoz/README.zh.md b/src/signoz/README.zh.md index c1f7aa7..60a9670 100644 --- a/src/signoz/README.zh.md +++ b/src/signoz/README.zh.md @@ -13,133 +13,108 @@ SigNoz 是一个开源的可观测性平台,为分布式应用程序提供监 - **告警**:基于指标和追踪配置告警 - **OpenTelemetry 原生**:构建在 OpenTelemetry 标准之上 +## 服务列表 + +| 服务 | 镜像 | 描述 | +| -------------------------------- | ------------------------------------- | ---------------------------------------- | +| `signoz` | signoz/signoz:v0.118.0 | 后端、前端 UI 和告警管理器的合体镜像 | +| `otel-collector` | signoz/signoz-otel-collector:v0.144.2 | 接收、处理和导出遥测数据 | +| `clickhouse` | clickhouse/clickhouse-server:25.5.6 | 存储追踪、指标和日志的时序数据库 | +| `zookeeper-1` | signoz/zookeeper:3.7.1 | ZooKeeper,用于 ClickHouse 副本元数据 | +| `init-clickhouse` | clickhouse/clickhouse-server:25.5.6 | 一次性初始化,下载 histogramQuantile UDF | +| `signoz-telemetrystore-migrator` | signoz/signoz-otel-collector:v0.144.2 | 一次性 ClickHouse Schema 迁移 | + ## 快速开始 -1. 复制环境文件并根据需要调整: +1. 复制环境文件并设置 JWT 密钥: ```bash cp .env.example .env + # 编辑 .env,将 SIGNOZ_JWT_SECRET 设置为随机字符串 ``` -2. 创建所需的配置文件: - - ```bash - mkdir -p query-service frontend - # 根据需要下载或创建配置文件 - ``` - -3. 启动服务: +2. 启动服务: ```bash docker compose up -d ``` -4. 访问 SigNoz UI:`http://localhost:3301` +3. 访问 SigNoz UI:`http://localhost:8080` + +> **注意**:首次启动时,`init-clickhouse` 需要从 GitHub 下载约 10 MB 的二进制文件,请确保网络可访问。 ## 默认端口 | 服务 | 端口 | 描述 | | --------------------- | ---- | ---------------- | -| Frontend UI | 3301 | SigNoz Web 界面 | +| SigNoz UI | 8080 | SigNoz Web 界面 | | OTel Collector (gRPC) | 4317 | OTLP gRPC 接收器 | | OTel Collector (HTTP) | 4318 | OTLP HTTP 接收器 | ## 配置说明 -### 环境变量 +### 主要环境变量 -主要环境变量(完整列表请查看 `.env.example`): +| 变量 | 默认值 | 描述 | +| -------------------------------- | --------------------------- | ----------------------------------- | +| `SIGNOZ_JWT_SECRET` | `please-change-this-secret` | JWT 签名密钥 — **生产环境必须修改** | +| `SIGNOZ_PORT_OVERRIDE` | `8080` | SigNoz UI 宿主机端口 | +| `SIGNOZ_OTEL_GRPC_PORT_OVERRIDE` | `4317` | OTLP gRPC 接收器宿主机端口 | +| `SIGNOZ_OTEL_HTTP_PORT_OVERRIDE` | `4318` | OTLP HTTP 接收器宿主机端口 | +| `SIGNOZ_VERSION` | `v0.118.0` | SigNoz 镜像版本 | +| `SIGNOZ_OTEL_COLLECTOR_VERSION` | `v0.144.2` | OTel Collector 镜像版本 | +| `SIGNOZ_CLICKHOUSE_VERSION` | `25.5.6` | ClickHouse 镜像版本 | +| `TZ` | `UTC` | 时区 | -- `SIGNOZ_PORT_OVERRIDE`:前端 UI 端口(默认:3301) -- `SIGNOZ_OTEL_GRPC_PORT_OVERRIDE`:OTLP gRPC 接收器端口(默认:4317) -- `SIGNOZ_OTEL_HTTP_PORT_OVERRIDE`:OTLP HTTP 接收器端口(默认:4318) -- `SIGNOZ_CLICKHOUSE_VERSION`:ClickHouse 版本 -- `SIGNOZ_QUERY_SERVICE_VERSION`:查询服务版本 -- `SIGNOZ_FRONTEND_VERSION`:前端版本 - -### 必需的配置文件 - -此设置需要以下配置文件: - -1. **clickhouse-config.xml**:ClickHouse 服务器配置 -2. **clickhouse-users.xml**:ClickHouse 用户配置 -3. **otel-collector-config.yaml**:OTel Collector 管道配置 -4. **query-service/prometheus.yml**:查询服务 Prometheus 配置 -5. **frontend/nginx-config.conf**:前端 Nginx 配置 - -您可以从 [SigNoz 官方仓库](https://github.com/SigNoz/signoz/tree/main/deploy/docker/clickhouse-setup) 获取这些文件。 +完整变量列表(含资源限制)请查看 `.env.example`。 ### 发送遥测数据 -要向 SigNoz 发送遥测数据,请配置您的应用程序使用 OpenTelemetry 并使用以下端点: +将应用程序的 OpenTelemetry SDK 配置为向以下端点导出数据: -- **gRPC**:`localhost:4317` -- **HTTP**:`localhost:4318` - -Node.js 示例: - -```javascript -const { OTLPTraceExporter } = require('@opentelemetry/exporter-trace-otlp-grpc') -const { NodeTracerProvider } = require('@opentelemetry/sdk-trace-node') - -const exporter = new OTLPTraceExporter({ - url: 'http://localhost:4317', -}) -``` +- **gRPC**:`http://localhost:4317` +- **HTTP**:`http://localhost:4318` ## 架构说明 -SigNoz 由以下组件组成: +```text +用户 → SigNoz UI(8080)→ signoz 后端 + ↓ +应用 → OTel Collector(4317/4318)→ ClickHouse + ↑ + Zookeeper(副本元数据) +``` -1. **ClickHouse**:用于存储追踪、指标和日志的时序数据库 -2. **OTel Collector**:接收、处理和导出遥测数据 -3. **Query Service**:从 ClickHouse 查询数据 -4. **Frontend**:用于可视化和分析的 Web UI -5. **Alert Manager**:管理和发送告警 +启动顺序: -## 资源需求 - -最低推荐资源: - -- **CPU**:4 核 -- **内存**:8GB RAM -- **存储**:20GB 数据存储空间 +1. `init-clickhouse` 下载 histogramQuantile 二进制文件,`zookeeper-1` 同步启动 +2. `clickhouse` 在初始化完成且 ZooKeeper 健康后启动 +3. `signoz-telemetrystore-migrator` 执行 Schema 迁移 +4. `signoz` 和 `otel-collector` 启动 ## 数据持久化 -数据持久化在 Docker 卷中: +数据持久化在 Docker 命名卷中: -- `clickhouse_data`:ClickHouse 数据库文件 -- `signoz_data`:SigNoz 应用程序数据 -- `alertmanager_data`:告警管理器数据 +| 卷名 | 内容 | +| ------------------------- | ------------------------------ | +| `clickhouse_data` | ClickHouse 数据库文件 | +| `clickhouse_user_scripts` | histogramQuantile UDF 二进制 | +| `signoz_data` | SigNoz SQLite 数据库及应用数据 | +| `zookeeper_data` | ZooKeeper 状态数据 | ## 安全注意事项 -- 如适用,请更改默认凭据 -- 使用环境变量配置敏感信息 -- 生产环境部署时考虑使用密钥管理 -- 仅限制必要端口的网络访问 -- 生产环境请启用身份验证 - -## 健康检查 - -所有服务都包含健康检查以确保正确启动和依赖管理: - -- ClickHouse:HTTP 健康端点 -- OTel Collector:HTTP 健康端点 -- Query Service:HTTP 健康端点 -- Frontend:HTTP 健康端点 -- Alert Manager:HTTP 健康端点 +- **生产环境必须将 `SIGNOZ_JWT_SECRET` 修改为唯一的随机值** +- 生产环境中限制端口仅对可信网络暴露 +- 生产环境建议在反向代理后面启用 TLS 终止 ## 故障排查 -1. **服务未启动**:使用 `docker compose logs` 检查日志 -2. **无数据显示**:验证 OTel Collector 配置和应用程序仪器化 -3. **内存使用过高**:调整 ClickHouse 内存限制或数据保留策略 - -## 许可证 - -SigNoz 采用 MIT 许可证。详情请参阅 [官方仓库](https://github.com/SigNoz/signoz)。 +1. **服务未启动**:运行 `docker compose logs` 检查连接错误 +2. **init-clickhouse 失败**:没有网络访问权限,无法下载 UDF 二进制文件 +3. **otel-collector 不健康**:可能正在等待迁移完成,通过 `docker compose logs signoz-telemetrystore-migrator` 检查 +4. **无数据显示**:验证 OTel Collector 配置和应用程序的 OpenTelemetry 仪器化 ## 参考资料 diff --git a/src/signoz/clickhouse/cluster.xml b/src/signoz/clickhouse/cluster.xml new file mode 100644 index 0000000..8b475ff --- /dev/null +++ b/src/signoz/clickhouse/cluster.xml @@ -0,0 +1,75 @@ + + + + + + zookeeper-1 + 2181 + + + + + + + + + + + + + + + + clickhouse + 9000 + + + + + + + + diff --git a/src/signoz/clickhouse/config.xml b/src/signoz/clickhouse/config.xml new file mode 100644 index 0000000..1965ac3 --- /dev/null +++ b/src/signoz/clickhouse/config.xml @@ -0,0 +1,1142 @@ + + + + + + information + + json + + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + + 1000M + 10 + + + + + + + + + + + + + + + + + + 8123 + + + 9000 + + + 9004 + + + 9005 + + + + + + + + + + + + 9009 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 4096 + + + 3 + + + + + false + + + /path/to/ssl_cert_file + /path/to/ssl_key_file + + + false + + + /path/to/ssl_ca_cert_file + + + none + + + 0 + + + -1 + -1 + + + false + + + + + + + + + + + none + true + true + sslv2,sslv3 + true + + + + true + true + sslv2,sslv3 + true + + + + RejectCertificateHandler + + + + + + + + + 100 + + + 0 + + + + 10000 + + + + + + 0.9 + + + 4194304 + + + 0 + + + + + + 8589934592 + + + 5368709120 + + + + 1000 + + + 134217728 + + + 10000 + + + /var/lib/clickhouse/ + + + /var/lib/clickhouse/tmp/ + + + + ` + + + + + + /var/lib/clickhouse/user_files/ + + + + + + + + + + + + + users.xml + + + + /var/lib/clickhouse/access/ + + + + + + + default + + + + + + + + + + + + default + + + + + + + + + true + + + false + + ' | sed -e 's|.*>\(.*\)<.*|\1|') + wget https://github.com/ClickHouse/clickhouse-jdbc-bridge/releases/download/v$PKG_VER/clickhouse-jdbc-bridge_$PKG_VER-1_all.deb + apt install --no-install-recommends -f ./clickhouse-jdbc-bridge_$PKG_VER-1_all.deb + clickhouse-jdbc-bridge & + + * [CentOS/RHEL] + export MVN_URL=https://repo1.maven.org/maven2/ru/yandex/clickhouse/clickhouse-jdbc-bridge + export PKG_VER=$(curl -sL $MVN_URL/maven-metadata.xml | grep '' | sed -e 's|.*>\(.*\)<.*|\1|') + wget https://github.com/ClickHouse/clickhouse-jdbc-bridge/releases/download/v$PKG_VER/clickhouse-jdbc-bridge-$PKG_VER-1.noarch.rpm + yum localinstall -y clickhouse-jdbc-bridge-$PKG_VER-1.noarch.rpm + clickhouse-jdbc-bridge & + + Please refer to https://github.com/ClickHouse/clickhouse-jdbc-bridge#usage for more information. + ]]> + + + + + + + + + + + + + + + 01 + example01-01-1 + + + + + + 3600 + + + + 3600 + + + 60 + + + + + + + + + + /metrics + 9363 + + true + true + true + true + + + + + + system + query_log
+ + toYYYYMM(event_date) + + + + + + 7500 +
+ + + + system + trace_log
+ + toYYYYMM(event_date) + 7500 +
+ + + + system + query_thread_log
+ toYYYYMM(event_date) + 7500 +
+ + + + system + query_views_log
+ toYYYYMM(event_date) + 7500 +
+ + + + system + part_log
+ toYYYYMM(event_date) + 7500 +
+ + + + + + system + metric_log
+ 7500 + 1000 +
+ + + + system + asynchronous_metric_log
+ + 7000 +
+ + + + + + engine MergeTree + partition by toYYYYMM(finish_date) + order by (finish_date, finish_time_us, trace_id) + + system + opentelemetry_span_log
+ 7500 +
+ + + + + system + crash_log
+ + + 1000 +
+ + + + + + + system + processors_profile_log
+ + toYYYYMM(event_date) + 7500 +
+ + + + + + + + + *_dictionary.xml + + + *function.xml + /var/lib/clickhouse/user_scripts/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + /clickhouse/task_queue/ddl + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + click_cost + any + + 0 + 3600 + + + 86400 + 60 + + + + max + + 0 + 60 + + + 3600 + 300 + + + 86400 + 3600 + + + + + + /var/lib/clickhouse/format_schemas/ + + + + + hide encrypt/decrypt arguments + ((?:aes_)?(?:encrypt|decrypt)(?:_mysql)?)\s*\(\s*(?:'(?:\\'|.)+'|.*?)\s*\) + + \1(???) + + + + + + + + + + false + + false + + + https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277 + + + + + + + + + + + 268435456 + true + +
diff --git a/src/signoz/clickhouse/custom-function.xml b/src/signoz/clickhouse/custom-function.xml new file mode 100644 index 0000000..b2b3f91 --- /dev/null +++ b/src/signoz/clickhouse/custom-function.xml @@ -0,0 +1,21 @@ + + + executable + histogramQuantile + Float64 + + Array(Float64) + buckets + + + Array(Float64) + counts + + + Float64 + quantile + + CSV + ./histogramQuantile + + diff --git a/src/signoz/clickhouse/users.xml b/src/signoz/clickhouse/users.xml new file mode 100644 index 0000000..f185620 --- /dev/null +++ b/src/signoz/clickhouse/users.xml @@ -0,0 +1,123 @@ + + + + + + + + + + 10000000000 + + + random + + + + + 1 + + + + + + + + + + + + + ::/0 + + + + default + + + default + + + + + + + + + + + + + + 3600 + + + 0 + 0 + 0 + 0 + 0 + + + + diff --git a/src/signoz/docker-compose.yaml b/src/signoz/docker-compose.yaml index d87e390..58d89e4 100644 --- a/src/signoz/docker-compose.yaml +++ b/src/signoz/docker-compose.yaml @@ -3,61 +3,199 @@ x-defaults: &defaults logging: driver: json-file options: - max-size: 100m + max-size: 50m max-file: '3' x-clickhouse-defaults: &clickhouse-defaults - restart: on-failure - logging: - driver: json-file - options: - max-size: 100m - max-file: '3' + <<: *defaults + image: ${GLOBAL_REGISTRY:-}clickhouse/clickhouse-server:${SIGNOZ_CLICKHOUSE_VERSION:-25.5.6} + depends_on: + init-clickhouse: + condition: service_completed_successfully + zookeeper-1: + condition: service_healthy healthcheck: - test: [CMD, wget, --spider, -q, 'localhost:8123/ping'] + test: [CMD, wget, --spider, -q, '0.0.0.0:8123/ping'] interval: 30s timeout: 5s retries: 3 - deploy: - resources: - limits: - cpus: ${SIGNOZ_CLICKHOUSE_CPU_LIMIT:-2.0} - memory: ${SIGNOZ_CLICKHOUSE_MEMORY_LIMIT:-4G} - reservations: - cpus: ${SIGNOZ_CLICKHOUSE_CPU_RESERVATION:-0.5} - memory: ${SIGNOZ_CLICKHOUSE_MEMORY_RESERVATION:-1G} + ulimits: + nproc: 65535 + nofile: + soft: 262144 + hard: 262144 + +x-db-depend: &db-depend + <<: *defaults + depends_on: + clickhouse: + condition: service_healthy services: + # One-shot init: downloads histogramQuantile UDF binary into the shared volume + init-clickhouse: + image: ${GLOBAL_REGISTRY:-}clickhouse/clickhouse-server:${SIGNOZ_CLICKHOUSE_VERSION:-25.5.6} + restart: on-failure + logging: + driver: json-file + options: + max-size: 10m + max-file: '1' + command: + - bash + - -c + - | + version="v0.0.1" + node_os=$$(uname -s | tr '[:upper:]' '[:lower:]') + node_arch=$$(uname -m | sed s/aarch64/arm64/ | sed s/x86_64/amd64/) + echo "Fetching histogram-binary for $${node_os}/$${node_arch}" + cd /tmp + wget -O histogram-quantile.tar.gz "https://github.com/SigNoz/signoz/releases/download/histogram-quantile%2F$${version}/histogram-quantile_$${node_os}_$${node_arch}.tar.gz" + tar -xvzf histogram-quantile.tar.gz + mv histogram-quantile /var/lib/clickhouse/user_scripts/histogramQuantile + echo "Done." + volumes: + - clickhouse_user_scripts:/var/lib/clickhouse/user_scripts/ + deploy: + resources: + limits: + cpus: '0.5' + memory: 256M + + # ZooKeeper for ClickHouse replication metadata + zookeeper-1: + <<: *defaults + image: ${GLOBAL_REGISTRY:-}signoz/zookeeper:${SIGNOZ_ZOOKEEPER_VERSION:-3.7.1} + user: root + environment: + - TZ=${TZ:-UTC} + - ZOO_SERVER_ID=1 + - ALLOW_ANONYMOUS_LOGIN=yes + - ZOO_AUTOPURGE_INTERVAL=1 + - ZOO_ENABLE_PROMETHEUS_METRICS=yes + - ZOO_PROMETHEUS_METRICS_PORT_NUMBER=9141 + volumes: + - zookeeper_data:/bitnami/zookeeper + healthcheck: + test: [CMD-SHELL, 'curl -s -m 2 http://localhost:8080/commands/ruok | grep error | grep null'] + interval: 30s + timeout: 5s + retries: 3 + start_period: 30s + deploy: + resources: + limits: + cpus: ${SIGNOZ_ZOOKEEPER_CPU_LIMIT:-0.5} + memory: ${SIGNOZ_ZOOKEEPER_MEMORY_LIMIT:-512M} + reservations: + cpus: ${SIGNOZ_ZOOKEEPER_CPU_RESERVATION:-0.1} + memory: ${SIGNOZ_ZOOKEEPER_MEMORY_RESERVATION:-128M} + # ClickHouse for storing traces, metrics and logs clickhouse: <<: *clickhouse-defaults - image: ${GLOBAL_REGISTRY:-}clickhouse/clickhouse-server:${SIGNOZ_CLICKHOUSE_VERSION:-24.11.1-alpine} - user: '101:101' # ClickHouse user - volumes: - - clickhouse_data:/var/lib/clickhouse - - ./clickhouse-config.xml:/etc/clickhouse-server/config.d/logging.xml:ro - - ./clickhouse-users.xml:/etc/clickhouse-server/users.d/logging.xml:ro environment: - TZ=${TZ:-UTC} - - CLICKHOUSE_DB=${SIGNOZ_CLICKHOUSE_DB:-signoz} + - CLICKHOUSE_SKIP_USER_SETUP=1 + volumes: + - clickhouse_data:/var/lib/clickhouse/ + - clickhouse_user_scripts:/var/lib/clickhouse/user_scripts/ + - ./clickhouse/config.xml:/etc/clickhouse-server/config.xml:ro + - ./clickhouse/users.xml:/etc/clickhouse-server/users.xml:ro + - ./clickhouse/custom-function.xml:/etc/clickhouse-server/custom-function.xml:ro + - ./clickhouse/cluster.xml:/etc/clickhouse-server/config.d/cluster.xml:ro + deploy: + resources: + limits: + cpus: ${SIGNOZ_CLICKHOUSE_CPU_LIMIT:-2.0} + memory: ${SIGNOZ_CLICKHOUSE_MEMORY_LIMIT:-4G} + reservations: + cpus: ${SIGNOZ_CLICKHOUSE_CPU_RESERVATION:-0.5} + memory: ${SIGNOZ_CLICKHOUSE_MEMORY_RESERVATION:-1G} + + # One-shot migration: bootstraps and runs schema migrations + signoz-telemetrystore-migrator: + <<: *db-depend + image: ${GLOBAL_REGISTRY:-}signoz/signoz-otel-collector:${SIGNOZ_OTEL_COLLECTOR_VERSION:-v0.144.2} + restart: on-failure + environment: + - TZ=${TZ:-UTC} + - SIGNOZ_OTEL_COLLECTOR_CLICKHOUSE_DSN=tcp://clickhouse:9000 + - SIGNOZ_OTEL_COLLECTOR_CLICKHOUSE_CLUSTER=cluster + - SIGNOZ_OTEL_COLLECTOR_CLICKHOUSE_REPLICATION=true + - SIGNOZ_OTEL_COLLECTOR_TIMEOUT=10m + entrypoint: [/bin/sh] + command: + - -c + - | + /signoz-otel-collector migrate bootstrap && + /signoz-otel-collector migrate sync up && + /signoz-otel-collector migrate async up + deploy: + resources: + limits: + cpus: '0.5' + memory: 512M + + # SigNoz all-in-one backend + frontend + alertmanager + signoz: + <<: *db-depend + image: ${GLOBAL_REGISTRY:-}signoz/signoz:${SIGNOZ_VERSION:-v0.118.0} + ports: + - '${SIGNOZ_PORT_OVERRIDE:-8080}:8080' + volumes: + - signoz_data:/var/lib/signoz/ + environment: + - TZ=${TZ:-UTC} + - SIGNOZ_ALERTMANAGER_PROVIDER=signoz + - SIGNOZ_TELEMETRYSTORE_CLICKHOUSE_DSN=tcp://clickhouse:9000 + - SIGNOZ_SQLSTORE_SQLITE_PATH=/var/lib/signoz/signoz.db + - SIGNOZ_TOKENIZER_JWT_SECRET=${SIGNOZ_JWT_SECRET:-please-change-this-secret} + healthcheck: + test: [CMD, wget, --spider, -q, 'localhost:8080/api/v1/health'] + interval: 30s + timeout: 5s + retries: 3 + start_period: 60s + deploy: + resources: + limits: + cpus: ${SIGNOZ_CPU_LIMIT:-1.0} + memory: ${SIGNOZ_MEMORY_LIMIT:-2G} + reservations: + cpus: ${SIGNOZ_CPU_RESERVATION:-0.25} + memory: ${SIGNOZ_MEMORY_RESERVATION:-256M} # OTel Collector for receiving telemetry data otel-collector: - <<: *defaults - image: ${GLOBAL_REGISTRY:-}signoz/signoz-otel-collector:${SIGNOZ_OTEL_COLLECTOR_VERSION:-0.102.8} + <<: *db-depend + image: ${GLOBAL_REGISTRY:-}signoz/signoz-otel-collector:${SIGNOZ_OTEL_COLLECTOR_VERSION:-v0.144.2} + entrypoint: [/bin/sh] command: - - --config=/etc/otel-collector-config.yaml + - -c + - | + /signoz-otel-collector migrate sync check && + /signoz-otel-collector --config=/etc/otel-collector-config.yaml --manager-config=/etc/manager-config.yaml --copy-path=/var/tmp/collector-config.yaml volumes: - ./otel-collector-config.yaml:/etc/otel-collector-config.yaml:ro + - ./signoz/otel-collector-opamp-config.yaml:/etc/manager-config.yaml:ro environment: - TZ=${TZ:-UTC} - - OTEL_RESOURCE_ATTRIBUTES=${SIGNOZ_OTEL_RESOURCE_ATTRIBUTES:-host.name=signoz-host} + - OTEL_RESOURCE_ATTRIBUTES=${SIGNOZ_OTEL_RESOURCE_ATTRIBUTES:-host.name=signoz-host,os.type=linux} + - LOW_CARDINAL_EXCEPTION_GROUPING=${SIGNOZ_LOW_CARDINAL_EXCEPTION_GROUPING:-false} + - SIGNOZ_OTEL_COLLECTOR_CLICKHOUSE_DSN=tcp://clickhouse:9000 + - SIGNOZ_OTEL_COLLECTOR_CLICKHOUSE_CLUSTER=cluster + - SIGNOZ_OTEL_COLLECTOR_CLICKHOUSE_REPLICATION=true + - SIGNOZ_OTEL_COLLECTOR_TIMEOUT=10m ports: - '${SIGNOZ_OTEL_GRPC_PORT_OVERRIDE:-4317}:4317' # OTLP gRPC receiver - '${SIGNOZ_OTEL_HTTP_PORT_OVERRIDE:-4318}:4318' # OTLP HTTP receiver - depends_on: - clickhouse: - condition: service_healthy + healthcheck: + test: [CMD, bash, -c, echo > /dev/tcp/localhost/13133] + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s deploy: resources: limits: @@ -66,133 +204,9 @@ services: reservations: cpus: ${SIGNOZ_OTEL_COLLECTOR_CPU_RESERVATION:-0.25} memory: ${SIGNOZ_OTEL_COLLECTOR_MEMORY_RESERVATION:-512M} - healthcheck: - test: - - CMD - - wget - - --no-verbose - - --tries=1 - - --spider - - 'http://localhost:13133/' - - interval: 30s - timeout: 10s - retries: 3 - start_period: 30s - - # Query Service for querying data - query-service: - <<: *defaults - image: ${GLOBAL_REGISTRY:-}signoz/query-service:${SIGNOZ_QUERY_SERVICE_VERSION:-0.55.0} - command: - - -config=/root/config/prometheus.yml - volumes: - - ./query-service/prometheus.yml:/root/config/prometheus.yml:ro - - signoz_data:/var/lib/signoz - environment: - - TZ=${TZ:-UTC} - - ClickHouseUrl=${SIGNOZ_CLICKHOUSE_URL:-tcp://clickhouse:9000/?database=signoz} - - STORAGE=${SIGNOZ_STORAGE:-clickhouse} - - GODEBUG=${SIGNOZ_GODEBUG:-netdns=go} - - TELEMETRY_ENABLED=${SIGNOZ_TELEMETRY_ENABLED:-true} - - DEPLOYMENT_TYPE=${SIGNOZ_DEPLOYMENT_TYPE:-docker-standalone-amd} - depends_on: - clickhouse: - condition: service_healthy - deploy: - resources: - limits: - cpus: ${SIGNOZ_QUERY_SERVICE_CPU_LIMIT:-1.0} - memory: ${SIGNOZ_QUERY_SERVICE_MEMORY_LIMIT:-1G} - reservations: - cpus: ${SIGNOZ_QUERY_SERVICE_CPU_RESERVATION:-0.25} - memory: ${SIGNOZ_QUERY_SERVICE_MEMORY_RESERVATION:-256M} - healthcheck: - test: - - CMD - - wget - - --no-verbose - - --tries=1 - - --spider - - 'http://localhost:8080/api/v1/health' - - interval: 30s - timeout: 10s - retries: 3 - start_period: 30s - - # Frontend for the UI - frontend: - <<: *defaults - image: ${GLOBAL_REGISTRY:-}signoz/frontend:${SIGNOZ_FRONTEND_VERSION:-0.55.0} - ports: - - '${SIGNOZ_PORT_OVERRIDE:-3301}:3301' - volumes: - - ./frontend/nginx-config.conf:/etc/nginx/conf.d/default.conf:ro - environment: - - TZ=${TZ:-UTC} - depends_on: - query-service: - condition: service_healthy - deploy: - resources: - limits: - cpus: ${SIGNOZ_FRONTEND_CPU_LIMIT:-0.5} - memory: ${SIGNOZ_FRONTEND_MEMORY_LIMIT:-512M} - reservations: - cpus: ${SIGNOZ_FRONTEND_CPU_RESERVATION:-0.1} - memory: ${SIGNOZ_FRONTEND_MEMORY_RESERVATION:-128M} - healthcheck: - test: - - CMD - - wget - - --no-verbose - - --tries=1 - - --spider - - 'http://localhost:3301/api/v1/health' - - interval: 30s - timeout: 10s - retries: 3 - start_period: 10s - - # Alert Manager for managing alerts - alertmanager: - <<: *defaults - image: ${GLOBAL_REGISTRY:-}signoz/alertmanager:${SIGNOZ_ALERTMANAGER_VERSION:-0.23.5} - command: - - --queryService.url=http://query-service:8080 - - --storage.path=/data - volumes: - - alertmanager_data:/data - environment: - - TZ=${TZ:-UTC} - depends_on: - query-service: - condition: service_healthy - deploy: - resources: - limits: - cpus: ${SIGNOZ_ALERTMANAGER_CPU_LIMIT:-0.5} - memory: ${SIGNOZ_ALERTMANAGER_MEMORY_LIMIT:-512M} - reservations: - cpus: ${SIGNOZ_ALERTMANAGER_CPU_RESERVATION:-0.1} - memory: ${SIGNOZ_ALERTMANAGER_MEMORY_RESERVATION:-128M} - healthcheck: - test: - - CMD - - wget - - --no-verbose - - --tries=1 - - --spider - - 'http://localhost:9093/-/healthy' - - interval: 30s - timeout: 10s - retries: 3 - start_period: 10s volumes: clickhouse_data: + clickhouse_user_scripts: signoz_data: - alertmanager_data: + zookeeper_data: diff --git a/src/signoz/otel-collector-config.yaml b/src/signoz/otel-collector-config.yaml new file mode 100644 index 0000000..3964dc5 --- /dev/null +++ b/src/signoz/otel-collector-config.yaml @@ -0,0 +1,114 @@ +connectors: + signozmeter: + metrics_flush_interval: 1h + dimensions: + - name: service.name + - name: deployment.environment + - name: host.name +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + prometheus: + config: + global: + scrape_interval: 60s + scrape_configs: + - job_name: otel-collector + static_configs: + - targets: + - localhost:8888 + labels: + job_name: otel-collector +processors: + batch: + send_batch_size: 10000 + send_batch_max_size: 11000 + timeout: 10s + batch/meter: + send_batch_max_size: 25000 + send_batch_size: 20000 + timeout: 1s + resourcedetection: + detectors: [env, system] + timeout: 2s + signozspanmetrics/delta: + metrics_exporter: signozclickhousemetrics + metrics_flush_interval: 60s + latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s] + dimensions_cache_size: 100000 + aggregation_temporality: AGGREGATION_TEMPORALITY_DELTA + enable_exp_histogram: true + dimensions: + - name: service.namespace + default: default + - name: deployment.environment + default: default + - name: signoz.collector.id + - name: service.version + - name: browser.platform + - name: browser.mobile + - name: k8s.cluster.name + - name: k8s.node.name + - name: k8s.namespace.name + - name: host.name + - name: host.type + - name: container.name +extensions: + health_check: + endpoint: 0.0.0.0:13133 + pprof: + endpoint: 0.0.0.0:1777 +exporters: + clickhousetraces: + datasource: tcp://clickhouse:9000/signoz_traces + low_cardinal_exception_grouping: ${env:LOW_CARDINAL_EXCEPTION_GROUPING} + use_new_schema: true + signozclickhousemetrics: + dsn: tcp://clickhouse:9000/signoz_metrics + clickhouselogsexporter: + dsn: tcp://clickhouse:9000/signoz_logs + timeout: 10s + use_new_schema: true + signozclickhousemeter: + dsn: tcp://clickhouse:9000/signoz_meter + timeout: 45s + sending_queue: + enabled: false + metadataexporter: + cache: + provider: in_memory + dsn: tcp://clickhouse:9000/signoz_metadata + enabled: true + timeout: 45s +service: + telemetry: + logs: + encoding: json + extensions: + - health_check + - pprof + pipelines: + traces: + receivers: [otlp] + processors: [signozspanmetrics/delta, batch] + exporters: [clickhousetraces, metadataexporter, signozmeter] + metrics: + receivers: [otlp] + processors: [batch] + exporters: [signozclickhousemetrics, metadataexporter, signozmeter] + metrics/prometheus: + receivers: [prometheus] + processors: [batch] + exporters: [signozclickhousemetrics, metadataexporter, signozmeter] + logs: + receivers: [otlp] + processors: [batch] + exporters: [clickhouselogsexporter, metadataexporter, signozmeter] + metrics/meter: + receivers: [signozmeter] + processors: [batch/meter] + exporters: [signozclickhousemeter] diff --git a/src/signoz/signoz/otel-collector-opamp-config.yaml b/src/signoz/signoz/otel-collector-opamp-config.yaml new file mode 100644 index 0000000..7267607 --- /dev/null +++ b/src/signoz/signoz/otel-collector-opamp-config.yaml @@ -0,0 +1 @@ +server_endpoint: ws://signoz:4320/v1/opamp