Add metrics monitoring stack with VictoriaMetrics, Grafana, and node_exporter
Implement complete monitoring infrastructure following rick-infra principles: Components: - VictoriaMetrics: Prometheus-compatible TSDB (7x less RAM usage) - Grafana: Visualization dashboard with Authentik OAuth/OIDC integration - node_exporter: System metrics collection (CPU, memory, disk, network) Architecture: - All services run as native systemd binaries (no containers) - localhost-only binding for security - Grafana uses native OAuth integration with Authentik (not forward_auth) - Full systemd security hardening enabled - Proxied via Caddy at metrics.jnss.me with HTTPS Role Features: - Unified metrics role (single role for complete stack) - Automatic role mapping via Authentik groups: - authentik Admins OR grafana-admins -> Admin access - grafana-editors -> Editor access - All others -> Viewer access - VictoriaMetrics auto-provisioned as default Grafana datasource - 12-month metrics retention by default - Comprehensive documentation included Security: - OAuth/OIDC SSO via Authentik - All metrics services bind to 127.0.0.1 only - systemd hardening (NoNewPrivileges, ProtectSystem, etc.) - Grafana accessible only via Caddy HTTPS proxy Documentation: - roles/metrics/README.md: Complete role documentation - docs/metrics-deployment-guide.md: Step-by-step deployment guide Configuration: - Updated rick-infra.yml to include metrics deployment - Grafana port set to 3001 (Gitea uses 3000) - Ready for multi-host expansion (designed for future node_exporter deployment to production hosts)
This commit is contained in:
12
roles/metrics/templates/dashboards.yml.j2
Normal file
12
roles/metrics/templates/dashboards.yml.j2
Normal file
@@ -0,0 +1,12 @@
|
||||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: 'default'
|
||||
orgId: 1
|
||||
folder: ''
|
||||
type: file
|
||||
disableDeletion: false
|
||||
updateIntervalSeconds: 10
|
||||
allowUiUpdates: true
|
||||
options:
|
||||
path: {{ grafana_data_dir }}/dashboards
|
||||
12
roles/metrics/templates/datasource-victoriametrics.yml.j2
Normal file
12
roles/metrics/templates/datasource-victoriametrics.yml.j2
Normal file
@@ -0,0 +1,12 @@
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: {{ grafana_datasource_vm_name }}
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: {{ grafana_datasource_vm_url }}
|
||||
isDefault: true
|
||||
editable: true
|
||||
jsonData:
|
||||
httpMethod: POST
|
||||
timeInterval: 15s
|
||||
26
roles/metrics/templates/grafana.caddy.j2
Normal file
26
roles/metrics/templates/grafana.caddy.j2
Normal file
@@ -0,0 +1,26 @@
|
||||
# Grafana Metrics Dashboard
|
||||
{{ grafana_domain }} {
|
||||
reverse_proxy http://{{ grafana_listen_address }}:{{ grafana_listen_port }} {
|
||||
header_up Host {host}
|
||||
header_up X-Real-IP {remote_host}
|
||||
header_up X-Forwarded-Proto https
|
||||
header_up X-Forwarded-For {remote_host}
|
||||
header_up X-Forwarded-Host {host}
|
||||
}
|
||||
|
||||
# Security headers
|
||||
header {
|
||||
X-Frame-Options SAMEORIGIN
|
||||
X-Content-Type-Options nosniff
|
||||
X-XSS-Protection "1; mode=block"
|
||||
Referrer-Policy strict-origin-when-cross-origin
|
||||
Strict-Transport-Security "max-age=31536000; includeSubDomains; preload"
|
||||
}
|
||||
|
||||
# Logging
|
||||
log {
|
||||
output file {{ caddy_log_dir }}/grafana.log
|
||||
level INFO
|
||||
format json
|
||||
}
|
||||
}
|
||||
68
roles/metrics/templates/grafana.ini.j2
Normal file
68
roles/metrics/templates/grafana.ini.j2
Normal file
@@ -0,0 +1,68 @@
|
||||
# Grafana Configuration
|
||||
# Managed by Ansible - DO NOT EDIT MANUALLY
|
||||
|
||||
[paths]
|
||||
data = {{ grafana_data_dir }}
|
||||
logs = {{ grafana_logs_dir }}
|
||||
plugins = {{ grafana_plugins_dir }}
|
||||
provisioning = {{ grafana_provisioning_dir }}
|
||||
|
||||
[server]
|
||||
http_addr = {{ grafana_listen_address }}
|
||||
http_port = {{ grafana_listen_port }}
|
||||
domain = {{ grafana_domain }}
|
||||
root_url = {{ grafana_root_url }}
|
||||
enforce_domain = true
|
||||
enable_gzip = true
|
||||
|
||||
[database]
|
||||
type = {{ grafana_database_type }}
|
||||
{% if grafana_database_type == 'sqlite3' %}
|
||||
path = {{ grafana_database_path }}
|
||||
{% endif %}
|
||||
|
||||
[security]
|
||||
admin_user = {{ grafana_admin_user }}
|
||||
admin_password = {{ grafana_admin_password }}
|
||||
secret_key = {{ vault_grafana_secret_key }}
|
||||
cookie_secure = {{ grafana_cookie_secure | lower }}
|
||||
cookie_samesite = {{ grafana_cookie_samesite }}
|
||||
disable_gravatar = true
|
||||
disable_initial_admin_creation = false
|
||||
|
||||
[users]
|
||||
allow_sign_up = {{ grafana_allow_signup | lower }}
|
||||
allow_org_create = false
|
||||
auto_assign_org = true
|
||||
auto_assign_org_role = Viewer
|
||||
|
||||
[auth]
|
||||
disable_login_form = {{ grafana_disable_login_form | lower }}
|
||||
oauth_auto_login = false
|
||||
|
||||
{% if grafana_oauth_enabled %}
|
||||
[auth.generic_oauth]
|
||||
enabled = true
|
||||
name = {{ grafana_oauth_name }}
|
||||
client_id = {{ grafana_oauth_client_id }}
|
||||
client_secret = {{ grafana_oauth_client_secret }}
|
||||
scopes = {{ grafana_oauth_scopes }}
|
||||
auth_url = {{ grafana_oauth_auth_url }}
|
||||
token_url = {{ grafana_oauth_token_url }}
|
||||
api_url = {{ grafana_oauth_api_url }}
|
||||
allow_sign_up = {{ grafana_oauth_allow_sign_up | lower }}
|
||||
role_attribute_path = {{ grafana_oauth_role_attribute_path }}
|
||||
use_pkce = true
|
||||
{% endif %}
|
||||
|
||||
[log]
|
||||
mode = console
|
||||
level = info
|
||||
|
||||
[analytics]
|
||||
reporting_enabled = false
|
||||
check_for_updates = false
|
||||
check_for_plugin_updates = false
|
||||
|
||||
[snapshots]
|
||||
external_enabled = false
|
||||
36
roles/metrics/templates/grafana.service.j2
Normal file
36
roles/metrics/templates/grafana.service.j2
Normal file
@@ -0,0 +1,36 @@
|
||||
[Unit]
|
||||
Description=Grafana visualization platform
|
||||
Documentation=https://grafana.com/docs/
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User={{ grafana_user }}
|
||||
Group={{ grafana_group }}
|
||||
|
||||
WorkingDirectory=/opt/grafana
|
||||
ExecStart=/opt/grafana/bin/grafana-server \
|
||||
--config=/etc/grafana/grafana.ini \
|
||||
--homepath=/opt/grafana
|
||||
|
||||
Restart=on-failure
|
||||
RestartSec=5s
|
||||
|
||||
# Security hardening
|
||||
{% if grafana_systemd_security %}
|
||||
NoNewPrivileges=true
|
||||
PrivateTmp=true
|
||||
ProtectSystem=strict
|
||||
ProtectHome=true
|
||||
ReadWritePaths={{ grafana_data_dir }} {{ grafana_logs_dir }}
|
||||
ProtectKernelTunables=true
|
||||
ProtectKernelModules=true
|
||||
ProtectControlGroups=true
|
||||
RestrictRealtime=true
|
||||
RestrictNamespaces=true
|
||||
LockPersonality=true
|
||||
{% endif %}
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
42
roles/metrics/templates/node_exporter.service.j2
Normal file
42
roles/metrics/templates/node_exporter.service.j2
Normal file
@@ -0,0 +1,42 @@
|
||||
[Unit]
|
||||
Description=Prometheus Node Exporter
|
||||
Documentation=https://github.com/prometheus/node_exporter
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User={{ node_exporter_user }}
|
||||
Group={{ node_exporter_group }}
|
||||
|
||||
ExecStart=/usr/local/bin/node_exporter \
|
||||
--web.listen-address={{ node_exporter_listen_address }} \
|
||||
{% for collector in node_exporter_enabled_collectors %}
|
||||
--collector.{{ collector }} \
|
||||
{% endfor %}
|
||||
{% for collector in node_exporter_disabled_collectors %}
|
||||
--no-collector.{{ collector }} \
|
||||
{% endfor %}
|
||||
--collector.filesystem.fs-types-exclude="{{ node_exporter_filesystem_ignored_fs_types | join('|') }}" \
|
||||
--collector.filesystem.mount-points-exclude="{{ node_exporter_filesystem_ignored_mount_points | join('|') }}"
|
||||
|
||||
Restart=on-failure
|
||||
RestartSec=5s
|
||||
|
||||
# Security hardening
|
||||
{% if node_exporter_systemd_security %}
|
||||
NoNewPrivileges=true
|
||||
PrivateTmp=true
|
||||
ProtectSystem=strict
|
||||
ProtectHome=true
|
||||
ProtectKernelTunables=true
|
||||
ProtectKernelModules=true
|
||||
ProtectControlGroups=true
|
||||
RestrictRealtime=true
|
||||
RestrictNamespaces=true
|
||||
LockPersonality=true
|
||||
ReadOnlyPaths=/
|
||||
{% endif %}
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
22
roles/metrics/templates/scrape.yml.j2
Normal file
22
roles/metrics/templates/scrape.yml.j2
Normal file
@@ -0,0 +1,22 @@
|
||||
global:
|
||||
scrape_interval: {{ victoriametrics_scrape_interval }}
|
||||
scrape_timeout: {{ victoriametrics_scrape_timeout }}
|
||||
external_labels:
|
||||
environment: '{{ "homelab" if inventory_hostname in groups["homelab"] else "production" }}'
|
||||
host: '{{ inventory_hostname }}'
|
||||
|
||||
scrape_configs:
|
||||
# VictoriaMetrics self-monitoring
|
||||
- job_name: 'victoriametrics'
|
||||
static_configs:
|
||||
- targets: ['{{ victoriametrics_listen_address }}']
|
||||
labels:
|
||||
service: 'victoriametrics'
|
||||
|
||||
# Node exporter for system metrics
|
||||
- job_name: 'node'
|
||||
static_configs:
|
||||
- targets: ['{{ node_exporter_listen_address }}']
|
||||
labels:
|
||||
service: 'node_exporter'
|
||||
instance: '{{ inventory_hostname }}'
|
||||
41
roles/metrics/templates/victoriametrics.service.j2
Normal file
41
roles/metrics/templates/victoriametrics.service.j2
Normal file
@@ -0,0 +1,41 @@
|
||||
[Unit]
|
||||
Description=VictoriaMetrics time-series database
|
||||
Documentation=https://docs.victoriametrics.com/
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User={{ victoriametrics_user }}
|
||||
Group={{ victoriametrics_group }}
|
||||
|
||||
ExecStart=/usr/local/bin/victoria-metrics-prod \
|
||||
-storageDataPath={{ victoriametrics_data_dir }} \
|
||||
-retentionPeriod={{ victoriametrics_retention_period }} \
|
||||
-httpListenAddr={{ victoriametrics_listen_address }} \
|
||||
-promscrape.config={{ victoriametrics_scrape_config_file }} \
|
||||
-memory.allowedPercent={{ victoriametrics_memory_allowed_percent }} \
|
||||
-storage.minFreeDiskSpaceBytes={{ victoriametrics_storage_min_free_disk_space_bytes }}
|
||||
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
|
||||
Restart=on-failure
|
||||
RestartSec=5s
|
||||
|
||||
# Security hardening
|
||||
{% if victoriametrics_systemd_security %}
|
||||
NoNewPrivileges=true
|
||||
PrivateTmp=true
|
||||
ProtectSystem=strict
|
||||
ProtectHome=true
|
||||
ReadWritePaths={{ victoriametrics_data_dir }}
|
||||
ProtectKernelTunables=true
|
||||
ProtectKernelModules=true
|
||||
ProtectControlGroups=true
|
||||
RestrictRealtime=true
|
||||
RestrictNamespaces=true
|
||||
LockPersonality=true
|
||||
{% endif %}
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
Reference in New Issue
Block a user