Add metrics monitoring stack with VictoriaMetrics, Grafana, and node_exporter
Implement complete monitoring infrastructure following rick-infra principles: Components: - VictoriaMetrics: Prometheus-compatible TSDB (7x less RAM usage) - Grafana: Visualization dashboard with Authentik OAuth/OIDC integration - node_exporter: System metrics collection (CPU, memory, disk, network) Architecture: - All services run as native systemd binaries (no containers) - localhost-only binding for security - Grafana uses native OAuth integration with Authentik (not forward_auth) - Full systemd security hardening enabled - Proxied via Caddy at metrics.jnss.me with HTTPS Role Features: - Unified metrics role (single role for complete stack) - Automatic role mapping via Authentik groups: - authentik Admins OR grafana-admins -> Admin access - grafana-editors -> Editor access - All others -> Viewer access - VictoriaMetrics auto-provisioned as default Grafana datasource - 12-month metrics retention by default - Comprehensive documentation included Security: - OAuth/OIDC SSO via Authentik - All metrics services bind to 127.0.0.1 only - systemd hardening (NoNewPrivileges, ProtectSystem, etc.) - Grafana accessible only via Caddy HTTPS proxy Documentation: - roles/metrics/README.md: Complete role documentation - docs/metrics-deployment-guide.md: Step-by-step deployment guide Configuration: - Updated rick-infra.yml to include metrics deployment - Grafana port set to 3001 (Gitea uses 3000) - Ready for multi-host expansion (designed for future node_exporter deployment to production hosts)
This commit is contained in:
178
roles/metrics/defaults/main.yml
Normal file
178
roles/metrics/defaults/main.yml
Normal file
@@ -0,0 +1,178 @@
|
||||
---
|
||||
# =================================================================
|
||||
# Metrics Infrastructure Role - Complete Monitoring Stack
|
||||
# =================================================================
|
||||
# Provides VictoriaMetrics, Grafana, and node_exporter as unified stack
|
||||
|
||||
# =================================================================
|
||||
# VictoriaMetrics Configuration
|
||||
# =================================================================
|
||||
|
||||
# Service Management
|
||||
victoriametrics_service_enabled: true
|
||||
victoriametrics_service_state: "started"
|
||||
|
||||
# Version
|
||||
victoriametrics_version: "1.105.0"
|
||||
|
||||
# Network Security (localhost only)
|
||||
victoriametrics_listen_address: "127.0.0.1:8428"
|
||||
|
||||
# Storage Configuration
|
||||
victoriametrics_data_dir: "/var/lib/victoriametrics"
|
||||
victoriametrics_retention_period: "12" # months
|
||||
|
||||
# User/Group
|
||||
victoriametrics_user: "victoriametrics"
|
||||
victoriametrics_group: "victoriametrics"
|
||||
|
||||
# Performance Settings
|
||||
victoriametrics_memory_allowed_percent: "30"
|
||||
victoriametrics_storage_min_free_disk_space_bytes: "10GB"
|
||||
|
||||
# Scrape Configuration
|
||||
victoriametrics_scrape_config_dir: "/etc/victoriametrics"
|
||||
victoriametrics_scrape_config_file: "{{ victoriametrics_scrape_config_dir }}/scrape.yml"
|
||||
victoriametrics_scrape_interval: "15s"
|
||||
victoriametrics_scrape_timeout: "10s"
|
||||
|
||||
# systemd security
|
||||
victoriametrics_systemd_security: true
|
||||
|
||||
# =================================================================
|
||||
# Grafana Configuration
|
||||
# =================================================================
|
||||
|
||||
# Service Management
|
||||
grafana_service_enabled: true
|
||||
grafana_service_state: "started"
|
||||
|
||||
# Version
|
||||
grafana_version: "11.4.0"
|
||||
|
||||
# Network Security (localhost only - proxied via Caddy)
|
||||
grafana_listen_address: "127.0.0.1"
|
||||
grafana_listen_port: 3420
|
||||
|
||||
# User/Group
|
||||
grafana_user: "grafana"
|
||||
grafana_group: "grafana"
|
||||
|
||||
# Directories
|
||||
grafana_data_dir: "/var/lib/grafana"
|
||||
grafana_logs_dir: "/var/log/grafana"
|
||||
grafana_plugins_dir: "/var/lib/grafana/plugins"
|
||||
grafana_provisioning_dir: "/etc/grafana/provisioning"
|
||||
|
||||
# Domain Configuration
|
||||
grafana_domain: "metrics.{{ caddy_domain }}"
|
||||
grafana_root_url: "https://{{ grafana_domain }}"
|
||||
|
||||
# Default admin (used only for initial setup)
|
||||
grafana_admin_user: "admin"
|
||||
grafana_admin_password: "{{ vault_grafana_admin_password }}"
|
||||
|
||||
# Disable registration (OAuth only)
|
||||
grafana_allow_signup: false
|
||||
grafana_disable_login_form: false # Keep fallback login
|
||||
|
||||
# OAuth/OIDC Configuration (Authentik)
|
||||
grafana_oauth_enabled: true
|
||||
grafana_oauth_name: "Authentik"
|
||||
grafana_oauth_client_id: "{{ vault_grafana_oauth_client_id }}"
|
||||
grafana_oauth_client_secret: "{{ vault_grafana_oauth_client_secret }}"
|
||||
|
||||
# Authentik OAuth endpoints
|
||||
grafana_oauth_auth_url: "https://{{ authentik_domain }}/application/o/authorize/"
|
||||
grafana_oauth_token_url: "https://{{ authentik_domain }}/application/o/token/"
|
||||
grafana_oauth_api_url: "https://{{ authentik_domain }}/application/o/userinfo/"
|
||||
|
||||
# OAuth role mapping
|
||||
grafana_oauth_role_attribute_path: "(contains(groups, 'authentik Admins') || contains(groups, 'grafana-admins')) && 'Admin' || contains(groups, 'grafana-editors') && 'Editor' || 'Viewer'"
|
||||
grafana_oauth_allow_sign_up: true # Auto-create users from OAuth
|
||||
grafana_oauth_scopes: "openid profile email groups"
|
||||
|
||||
# Data Source Configuration
|
||||
grafana_datasource_vm_enabled: true
|
||||
grafana_datasource_vm_url: "http://{{ victoriametrics_listen_address }}"
|
||||
grafana_datasource_vm_name: "VictoriaMetrics"
|
||||
|
||||
# Security
|
||||
grafana_systemd_security: true
|
||||
grafana_cookie_secure: true
|
||||
grafana_cookie_samesite: "lax"
|
||||
|
||||
# Database (SQLite by default)
|
||||
grafana_database_type: "sqlite3"
|
||||
grafana_database_path: "{{ grafana_data_dir }}/grafana.db"
|
||||
|
||||
# =================================================================
|
||||
# Node Exporter Configuration
|
||||
# =================================================================
|
||||
|
||||
# Service Management
|
||||
node_exporter_service_enabled: true
|
||||
node_exporter_service_state: "started"
|
||||
|
||||
# Version
|
||||
node_exporter_version: "1.8.2"
|
||||
|
||||
# Network Security (localhost only)
|
||||
node_exporter_listen_address: "127.0.0.1:9100"
|
||||
|
||||
# User/Group
|
||||
node_exporter_user: "node_exporter"
|
||||
node_exporter_group: "node_exporter"
|
||||
|
||||
# Enabled collectors
|
||||
node_exporter_enabled_collectors:
|
||||
- cpu
|
||||
- diskstats
|
||||
- filesystem
|
||||
- loadavg
|
||||
- meminfo
|
||||
- netdev
|
||||
- netstat
|
||||
- stat
|
||||
- time
|
||||
- uname
|
||||
- vmstat
|
||||
- systemd
|
||||
|
||||
# Disabled collectors
|
||||
node_exporter_disabled_collectors:
|
||||
- mdadm
|
||||
|
||||
# Filesystem collector configuration
|
||||
node_exporter_filesystem_ignored_fs_types:
|
||||
- tmpfs
|
||||
- devtmpfs
|
||||
- devfs
|
||||
- iso9660
|
||||
- overlay
|
||||
- aufs
|
||||
- squashfs
|
||||
|
||||
node_exporter_filesystem_ignored_mount_points:
|
||||
- /var/lib/containers/storage/.*
|
||||
- /run/.*
|
||||
- /sys/.*
|
||||
- /proc/.*
|
||||
|
||||
# systemd security
|
||||
node_exporter_systemd_security: true
|
||||
|
||||
# =================================================================
|
||||
# Infrastructure Notes
|
||||
# =================================================================
|
||||
# Complete monitoring stack:
|
||||
# - VictoriaMetrics: Time-series database (Prometheus-compatible)
|
||||
# - Grafana: Visualization with Authentik OAuth integration
|
||||
# - node_exporter: System metrics collection
|
||||
#
|
||||
# Role mapping via Authentik groups:
|
||||
# - grafana-admins: Full admin access
|
||||
# - grafana-editors: Can create/edit dashboards
|
||||
# - Default: Viewer access
|
||||
#
|
||||
# All services run on localhost only, proxied via Caddy
|
||||
Reference in New Issue
Block a user