From 4f8da38ca65f25a40f49563bf54f56cfba9ed2e4 Mon Sep 17 00:00:00 2001 From: Joakim Date: Sun, 14 Dec 2025 22:07:08 +0100 Subject: [PATCH] Add Nextcloud cloud storage role with split Redis caching strategy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## New Features - **Nextcloud Role**: Complete cloud storage deployment using Podman Quadlet - FPM variant with Caddy reverse proxy and FastCGI - PostgreSQL database via Unix socket - Valkey/Redis for app-level caching and file locking - Automatic HTTPS with Let's Encrypt via Caddy - Dual-root pattern: Caddy serves static assets, FPM handles PHP - **Split Caching Strategy**: Redis caching WITHOUT Redis sessions - Custom redis.config.php template for app-level caching only - File-based PHP sessions for stability (avoids session lock issues) - Prevents cascading failures from session lock contention - Documented in role README with detailed rationale ## Infrastructure Updates - **Socket Permissions**: Update PostgreSQL and Valkey to mode 777 - Required for containers that switch users (root → www-data) - Nextcloud container loses supplementary groups on user switch - Security maintained via password authentication (scram-sha-256, requirepass) - Documented socket permission architecture in docs/ - **PostgreSQL**: Export client group GID as fact for dependent roles - **Valkey**: Export client group GID as fact, update socket fix service ## Documentation - New: docs/socket-permissions-architecture.md - Explains 777 vs 770 socket permission trade-offs - Documents why group-based access doesn't work for user-switching containers - Provides TCP alternative for stricter security requirements - Updated: All role READMEs with socket permission notes - New: Nextcloud README with comprehensive deployment, troubleshooting, and Redis architecture documentation ## Configuration - host_vars: Add Nextcloud vault variables and configuration - site.yml: Include Nextcloud role in main playbook ## Technical Details **Why disable Redis sessions?** The official Nextcloud container enables Redis session handling via REDIS_HOST env var, which causes severe performance issues: 1. Session lock contention under high concurrency (browser parallel asset requests) 2. Infinite lock retries (default lock_retries=-1) blocking FPM workers 3. Timeout orphaning: reverse proxy kills connection, worker keeps lock 4. Worker pool exhaustion: all 5 default workers blocked on same session lock 5. Cascading failure: new requests queue, more timeouts, more orphaned locks Solution: Use file-based sessions (reliable, fast for single-server) while keeping Redis for distributed cache and transactional file locking via custom config file. This provides optimal performance without the complexity of Redis session debugging. Tested: Fresh deployment on arch-vps (69.62.119.31) Domain: https://cloud.jnss.me/ --- docs/socket-permissions-architecture.md | 210 ++++++++++ host_vars/arch-vps/main.yml | 21 + roles/nextcloud/README.md | 361 ++++++++++++++++++ roles/nextcloud/VAULT_VARIABLES.md | 105 +++++ roles/nextcloud/defaults/main.yml | 95 +++++ roles/nextcloud/handlers/main.yml | 16 + roles/nextcloud/meta/main.yml | 17 + roles/nextcloud/tasks/cache.yml | 17 + roles/nextcloud/tasks/database.yml | 34 ++ roles/nextcloud/tasks/main.yml | 150 ++++++++ roles/nextcloud/tasks/user.yml | 54 +++ roles/nextcloud/templates/nextcloud.caddy.j2 | 71 ++++ roles/nextcloud/templates/nextcloud.container | 44 +++ roles/nextcloud/templates/nextcloud.env.j2 | 50 +++ .../templates/redis-session-override.ini.j2 | 16 + roles/nextcloud/templates/redis.config.php.j2 | 34 ++ roles/postgresql/README.md | 31 ++ roles/postgresql/defaults/main.yml | 6 +- roles/postgresql/tasks/main.yml | 2 +- roles/valkey/README.md | 35 +- roles/valkey/defaults/main.yml | 6 +- roles/valkey/tasks/main.yml | 2 +- .../templates/valkey-socket-fix.service.j2 | 4 +- site.yml | 6 +- 24 files changed, 1379 insertions(+), 8 deletions(-) create mode 100644 docs/socket-permissions-architecture.md create mode 100644 roles/nextcloud/README.md create mode 100644 roles/nextcloud/VAULT_VARIABLES.md create mode 100644 roles/nextcloud/defaults/main.yml create mode 100644 roles/nextcloud/handlers/main.yml create mode 100644 roles/nextcloud/meta/main.yml create mode 100644 roles/nextcloud/tasks/cache.yml create mode 100644 roles/nextcloud/tasks/database.yml create mode 100644 roles/nextcloud/tasks/main.yml create mode 100644 roles/nextcloud/tasks/user.yml create mode 100644 roles/nextcloud/templates/nextcloud.caddy.j2 create mode 100644 roles/nextcloud/templates/nextcloud.container create mode 100644 roles/nextcloud/templates/nextcloud.env.j2 create mode 100644 roles/nextcloud/templates/redis-session-override.ini.j2 create mode 100644 roles/nextcloud/templates/redis.config.php.j2 diff --git a/docs/socket-permissions-architecture.md b/docs/socket-permissions-architecture.md new file mode 100644 index 0000000..de144ad --- /dev/null +++ b/docs/socket-permissions-architecture.md @@ -0,0 +1,210 @@ +# Socket Permissions Architecture Decision + +## Context + +Rick-infra uses Unix domain sockets for PostgreSQL and Valkey (Redis) connections to maximize performance and security. Applications run in Podman containers and need to access these infrastructure services via sockets. + +## Problem + +Different container images have different user models: + +1. **Authentik**: Runs as a specific user (UID 966) from start to finish +2. **Nextcloud**: Starts as root, runs entrypoint scripts, then switches to www-data (UID 33) + +When using `--group-add` with Podman: +- Supplementary groups are added to the **initial user** the container runs as +- Groups are **NOT inherited** when a container switches users internally +- Nextcloud's www-data process ends up without socket access + +## Decision + +**Use 777 permissions on Unix sockets** for PostgreSQL and Valkey. + +## Rationale + +### Why 777 Works + +1. **Compatibility**: Any container user model can access the sockets +2. **Simplicity**: No complex user namespace mapping needed +3. **Security maintained**: Password authentication still required +4. **Local-only**: Sockets are not network-exposed + +### Security Analysis + +**What 777 allows:** +- ✅ Any local process can **attempt** to connect to the socket + +**What 777 does NOT allow:** +- ❌ Authentication bypass - PostgreSQL requires username + password (scram-sha-256) +- ❌ Network access - Sockets are local filesystem only +- ❌ Remote connections - Not exposed beyond localhost + +**Security layers:** +1. **Physical**: Server access required +2. **Process**: Must be running on the same host +3. **Authentication**: Must provide valid credentials +4. **Authorization**: Database/Redis permissions enforced + +### Comparison to TCP Localhost + +Using `127.0.0.1:5432` (TCP) has **identical security**: +- Localhost-only (not network) +- Requires authentication +- Any local process can attempt connection + +Socket 777 vs TCP localhost: +- **Same security model**: Both require credentials, both are local-only +- **Different performance**: Sockets are faster (no TCP/IP stack overhead) +- **Different permissions**: Sockets use filesystem permissions, TCP uses network + +## Alternatives Considered + +### Alternative 1: Group-based Permissions (770) + +**Implementation:** +```yaml +postgresql_unix_socket_permissions: "0770" +valkey_unix_socket_perm: "770" +``` + +**Why rejected:** +- Doesn't work for Nextcloud (www-data not in groups after su switch) +- Requires all containers to use `--group-add` +- Complex UID/GID management +- Breaks container user-switching patterns + +### Alternative 2: User Namespace Mapping + +**Implementation:** +``` +--uidmap 33:963:1 # Map www-data to nextcloud +--gidmap 33:963:1 +``` + +**Why rejected:** +- Container's root loses privileges (can't run entrypoint) +- Very complex configuration +- Fragile (breaks on image updates) +- Doesn't solve the fundamental user-switching problem + +### Alternative 3: TCP on Localhost + +**Implementation:** +```yaml +# PostgreSQL +postgresql_listen_addresses: "127.0.0.1" + +# Valkey +valkey_bind: "127.0.0.1" +valkey_port: 6379 +``` + +**Why not chosen (but valid alternative):** +- ✅ Same security as socket 777 +- ✅ No permission issues +- ❌ Abandons Unix socket performance benefits +- ❌ Goes against infrastructure design goal + +**Status:** Documented as alternative, available for users who prefer it + +### Alternative 4: Custom Entrypoint + +**Implementation:** +Create wrapper that adds www-data to groups before starting FPM. + +**Why rejected:** +- Requires custom Dockerfile +- Maintenance burden +- Breaks on upstream image updates +- Fragile and complex + +## Implementation + +### Files Changed + +1. `roles/postgresql/defaults/main.yml`: Set `postgresql_unix_socket_permissions: "0777"` +2. `roles/valkey/defaults/main.yml`: Set `valkey_unix_socket_perm: "777"` +3. Documentation updated in all affected role READMEs + +### Migration Path + +For existing deployments: +1. Update socket permissions: `chmod 777 /var/run/postgresql/.s.PGSQL.5432` +2. Update socket permissions: `chmod 777 /var/run/valkey/valkey.sock` +3. Restart services (permissions persist via role configuration) + +## Consequences + +### Positive + +- ✅ Works with all container user models (root-switching, single-user, etc.) +- ✅ Simple to understand and maintain +- ✅ No complex UID/GID mapping required +- ✅ Standard pattern, well-documented +- ✅ Authentication still enforced + +### Negative + +- ⚠️ Any local process can attempt socket connection +- ⚠️ Requires clear documentation of security model +- ⚠️ May surprise users expecting tighter filesystem permissions + +### Neutral + +- ℹ️ Same security model as TCP localhost +- ℹ️ Alternative (TCP) available for those who prefer it +- ℹ️ Follows "make it work, make it right, make it fast" philosophy + +## Validation + +### Security Validation + +1. **Authentication required**: ✅ Tested - connection requires credentials +2. **Password strength**: ✅ Enforced via scram-sha-256 and vault +3. **Local-only**: ✅ Sockets are filesystem objects, not network +4. **Process isolation**: ✅ Each service has separate database/namespace + +### Functional Validation + +1. **Authentik**: ✅ Works with 777 sockets +2. **Nextcloud**: ✅ Works with 777 sockets (www-data can access) +3. **Gitea**: ✅ Works with 777 sockets + +## Monitoring + +No additional monitoring required. Standard checks apply: +- Service authentication logs (failed login attempts) +- Connection monitoring via application logs +- Systemd service health + +## Documentation + +All relevant READMEs updated with: +- Explanation of 777 permission choice +- Security rationale +- TCP alternative configuration +- Clear security model explanation + +## Future Considerations + +This decision can be revisited if: +1. Container orchestration changes (e.g., Kubernetes with different security contexts) +2. New containers with different user models emerge +3. Network isolation requirements change +4. Regulatory compliance requires stricter filesystem permissions + +In such cases, the TCP alternative provides an equivalent security model without filesystem permission concerns. + +## References + +- [PostgreSQL Role README](../roles/postgresql/README.md) +- [Valkey Role README](../roles/valkey/README.md) +- [Nextcloud Role README](../roles/nextcloud/README.md) +- [Podman User Namespaces Documentation](https://docs.podman.io/en/latest/markdown/podman-run.1.html#userns-mode) +- [Unix Socket Security](https://www.man7.org/linux/man-pages/man7/unix.7.html) + +--- + +**Decision Date**: December 14, 2025 +**Status**: Accepted +**Reviewers**: rick-infra maintainers diff --git a/host_vars/arch-vps/main.yml b/host_vars/arch-vps/main.yml index 686f4a6..d98acd6 100644 --- a/host_vars/arch-vps/main.yml +++ b/host_vars/arch-vps/main.yml @@ -69,6 +69,27 @@ postgresql_unix_socket_enabled: true postgresql_listen_addresses: "" # Socket-only mode (no TCP) valkey_unix_socket_enabled: true +# ================================================================= +# Nextcloud Configuration +# ================================================================= +nextcloud_domain: "cloud.jnss.me" + +# Database configuration +nextcloud_db_name: "nextcloud" +nextcloud_db_user: "nextcloud" +nextcloud_db_password: "{{ vault_nextcloud_db_password }}" + +# Cache configuration +nextcloud_valkey_db: 2 # Authentik uses 1 + +# Admin configuration +nextcloud_admin_user: "admin" +nextcloud_admin_password: "{{ vault_nextcloud_admin_password }}" + +# Service configuration +nextcloud_service_enabled: true +nextcloud_service_state: "started" + # ================================================================= # Security & Logging # ================================================================= diff --git a/roles/nextcloud/README.md b/roles/nextcloud/README.md new file mode 100644 index 0000000..44abf0b --- /dev/null +++ b/roles/nextcloud/README.md @@ -0,0 +1,361 @@ +# Nextcloud Cloud Storage Role + +Self-contained Nextcloud deployment using Podman Quadlet with FPM, PostgreSQL database, and Valkey cache via Unix sockets. + +## Features + +- **Container**: Single Nextcloud FPM container via Podman Quadlet +- **Database**: Self-managed PostgreSQL database via Unix socket +- **Cache**: Valkey (Redis-compatible) for file locking and caching +- **Web Server**: Caddy reverse proxy with FastCGI and automatic HTTPS +- **Security**: Group-based socket access, separated data/config volumes +- **Size**: ~320MB FPM image (vs 1.1GB Apache variant) + +## Architecture + +``` +Internet → Caddy (HTTPS:443) → FastCGI → Nextcloud FPM Container (127.0.0.1:9000) + ↓ ↓ + Serves static files PostgreSQL (socket) + from /opt/nextcloud/html Valkey (socket) +``` + +### Volume Layout + +``` +/opt/nextcloud/ +├── html/ # Application code (755 - readable by Caddy for static files) +├── data/ # User files (700 - private to container) +├── config/ # Config with secrets (700 - private to container) +├── custom_apps/ # Installed apps (755 - readable) +└── .env # Environment variables (600) +``` + +**Security Model**: +- Caddy serves static assets (CSS/JS/images) directly from `/opt/nextcloud/html` +- Caddy cannot access `/data` or `/config` (mode 700) +- User files are only served through authenticated PHP requests via FPM + +## Dependencies + +- `postgresql` role (infrastructure) +- `valkey` role (infrastructure) +- `caddy` role (web server) +- `podman` role (container runtime) + +## Variables + +See `defaults/main.yml` for all configurable variables. + +### Required Vault Variables + +Define these in your `host_vars/` with `ansible-vault`: + +```yaml +vault_nextcloud_db_password: "secure-database-password" +vault_nextcloud_admin_password: "secure-admin-password" +vault_valkey_password: "secure-valkey-password" +``` + +### Key Variables + +```yaml +# Domain +nextcloud_domain: "cloud.jnss.me" + +# Admin user +nextcloud_admin_user: "admin" + +# Database +nextcloud_db_name: "nextcloud" +nextcloud_db_user: "nextcloud" + +# Cache (use different DB number per service) +nextcloud_valkey_db: 2 # Authentik uses 1 + +# PHP limits +nextcloud_php_memory_limit: "512M" +nextcloud_php_upload_limit: "512M" +``` + +## Usage + +### Include in Playbook + +```yaml +- role: nextcloud + tags: ['nextcloud', 'cloud', 'storage'] +``` + +### Deploy + +```bash +# Deploy Nextcloud role +ansible-playbook -i inventory/hosts.yml site.yml --tags nextcloud --ask-vault-pass + +# Deploy only infrastructure dependencies +ansible-playbook -i inventory/hosts.yml site.yml --tags postgresql,valkey,caddy +``` + +## Verification + +After deployment: + +1. **Access Nextcloud**: + ```bash + https://cloud.jnss.me + ``` + +2. **Check service status**: + ```bash + ssh root@arch-vps + systemctl status nextcloud + podman ps | grep nextcloud + ``` + +3. **View logs**: + ```bash + # Container logs + journalctl -u nextcloud -f + podman logs nextcloud + + # Caddy logs + tail -f /var/log/caddy/nextcloud.log + ``` + +4. **Verify socket access**: + ```bash + # Check group memberships + id nextcloud + # Should show: postgres-clients, valkey-clients + + # Check socket permissions + ls -la /var/run/postgresql/.s.PGSQL.5432 + ls -la /var/run/valkey/valkey.sock + ``` + +## Maintenance + +### OCC Command (Nextcloud CLI) + +Run Nextcloud's OCC command-line tool: + +```bash +# General syntax +podman exec --user www-data nextcloud php occ + +# Examples +podman exec --user www-data nextcloud php occ status +podman exec --user www-data nextcloud php occ app:list +podman exec --user www-data nextcloud php occ maintenance:mode --on +podman exec --user www-data nextcloud php occ files:scan --all +``` + +### Update Nextcloud + +The container automatically updates on restart: + +```bash +systemctl restart nextcloud +``` + +Or pull specific version: + +```yaml +# In host_vars or defaults +nextcloud_version: "32-fpm" # Pin to major version +# Or +nextcloud_version: "32.0.3-fpm" # Pin to exact version +``` + +### Backup Strategy + +Key directories to backup: + +1. **User data**: `/opt/nextcloud/data` +2. **Configuration**: `/opt/nextcloud/config` +3. **Database**: PostgreSQL `nextcloud` database +4. **Custom apps**: `/opt/nextcloud/custom_apps` (optional) + +Example backup script: + +```bash +#!/bin/bash +# Enable maintenance mode +podman exec --user www-data nextcloud php occ maintenance:mode --on + +# Backup data and config +tar -czf nextcloud-data-$(date +%Y%m%d).tar.gz /opt/nextcloud/data /opt/nextcloud/config + +# Backup database +sudo -u postgres pg_dump nextcloud > nextcloud-db-$(date +%Y%m%d).sql + +# Disable maintenance mode +podman exec --user www-data nextcloud php occ maintenance:mode --off +``` + +### Performance Tuning + +Adjust PHP limits in `host_vars`: + +```yaml +nextcloud_php_memory_limit: "1G" # For large files +nextcloud_php_upload_limit: "10G" # For large uploads +``` + +### Redis/Valkey Caching Architecture + +This role uses a **split caching strategy** for optimal performance and stability: + +**PHP Sessions**: File-based (default PHP session handler) +- Location: `/var/www/html/data/sessions/` +- Why: Redis session locking can cause cascading failures under high concurrency +- Performance: Excellent for single-server deployments + +**Nextcloud Application Cache**: Redis/Valkey +- `memcache.local`: APCu (in-memory opcode cache) +- `memcache.distributed`: Redis (shared cache, file locking) +- `memcache.locking`: Redis (transactional file locking) +- Configuration: Via custom `redis.config.php` template + +**Why not Redis sessions?** + +The official Nextcloud Docker image enables Redis session handling when `REDIS_HOST` is set. However, this can cause severe performance issues: + +1. **Session lock contention**: Multiple parallel requests (browser loading CSS/JS/images) compete for the same session lock +2. **Infinite retries**: Default `lock_retries = -1` means workers block forever +3. **Timeout orphaning**: When reverse proxy times out, FPM workers keep running and hold locks +4. **Worker exhaustion**: Limited FPM workers (default 5) all become blocked +5. **Cascading failure**: New requests queue, timeouts accumulate, locks orphan + +This role disables Redis sessions by **not setting** `REDIS_HOST` in the environment, while still providing Redis caching via a custom `redis.config.php` that is deployed independently. + +**If you need Redis sessions** (e.g., multi-server setup with session sharing), you must: +1. Enable `REDIS_HOST` in `nextcloud.env.j2` +2. Set proper lock parameters in a custom PHP ini file +3. Increase FPM workers significantly (15-20+) +4. Monitor for orphaned session locks + +See `templates/redis-session-override.ini.j2` for an example of session lock tuning. + +## Troubleshooting + +### Container won't start + +```bash +# Check container logs +journalctl -u nextcloud -n 50 +podman logs nextcloud + +# Check systemd unit +systemctl status nextcloud +``` + +### Permission errors + +```bash +# Verify user groups +id nextcloud + +# Should be in: postgres-clients, valkey-clients +# If not, re-run user.yml tasks: +ansible-playbook -i inventory/hosts.yml site.yml --tags nextcloud,user +``` + +### Database connection errors + +```bash +# Test PostgreSQL socket +sudo -u nextcloud psql -h /var/run/postgresql -U nextcloud -d nextcloud + +# Check socket exists and permissions +ls -la /var/run/postgresql/.s.PGSQL.5432 +``` + +### Caddy FastCGI errors + +```bash +# Check Caddy can read app files +sudo -u caddy ls -la /opt/nextcloud/html + +# Verify FPM is listening +ss -tlnp | grep 9000 + +# Test FPM connection +curl -v http://127.0.0.1:9000 +``` + +### "Trusted domain" errors + +Add domains to `nextcloud_trusted_domains`: + +```yaml +nextcloud_trusted_domains: "cloud.jnss.me localhost 69.62.119.31" +``` + +Or add via OCC: + +```bash +podman exec --user www-data nextcloud php occ config:system:set trusted_domains 1 --value=cloud.jnss.me +``` + +## Integration with Authentik SSO + +To integrate Nextcloud with Authentik for SSO, see the Authentik documentation for OAuth2/OIDC provider setup. + +## Security Notes + +- User data (`/opt/nextcloud/data`) is mode 700 - only container can access +- Config (`/opt/nextcloud/config`) is mode 700 - contains database passwords +- Application files (`/opt/nextcloud/html`) are mode 755 - Caddy can read for static files +- All traffic is HTTPS via Caddy with automatic Let's Encrypt certificates +- Database and cache connections use Unix sockets (no TCP exposure) +- Container runs as root initially, then switches to www-data (UID 33) for PHP-FPM + +### Socket Access Pattern + +Nextcloud uses a different access pattern than other rick-infra services due to how the official Nextcloud container works: + +**How it works:** +1. Container starts as root (UID 0) +2. Entrypoint runs as root to write PHP configuration files +3. Entrypoint switches to www-data (UID 33) for PHP-FPM process +4. www-data accesses PostgreSQL and Valkey via Unix sockets + +**Why 777 socket permissions are needed:** +- The Nextcloud container cannot use `--group-add` effectively because: + - `--group-add` only adds groups to the **initial user** (root) + - When the container switches from root to www-data, supplementary groups are lost + - www-data (UID 33, GID 33) ends up with no access to group-restricted sockets +- Infrastructure sockets use mode 777 to allow access by any UID +- Security is maintained via password authentication (PostgreSQL: scram-sha-256, Valkey: requirepass) +- Sockets are local-only (not network-exposed) + +**Alternative (TCP)**: +If you prefer group-based socket access (770), you can configure PostgreSQL and Valkey to use TCP instead: + +```yaml +# In host_vars +postgresql_listen_addresses: "127.0.0.1" +postgresql_unix_socket_permissions: "0770" # Restrict to group + +valkey_bind: "127.0.0.1" +valkey_port: 6379 +valkey_unix_socket_enabled: false + +# In Nextcloud env +POSTGRES_HOST=127.0.0.1 +POSTGRES_PORT=5432 +REDIS_HOST=127.0.0.1 +REDIS_PORT=6379 +``` + +This provides the same security level (password-authenticated, localhost-only) but uses TCP instead of Unix sockets. The trade-off is slightly lower performance compared to Unix sockets. + +See infrastructure role documentation (PostgreSQL and Valkey READMEs) for more details on this architectural decision. + +## References + +- [Nextcloud Official Docker Image](https://hub.docker.com/_/nextcloud) +- [Nextcloud Documentation](https://docs.nextcloud.com/) +- [Caddy FastCGI Documentation](https://caddyserver.com/docs/caddyfile/directives/php_fastcgi) diff --git a/roles/nextcloud/VAULT_VARIABLES.md b/roles/nextcloud/VAULT_VARIABLES.md new file mode 100644 index 0000000..d4ca080 --- /dev/null +++ b/roles/nextcloud/VAULT_VARIABLES.md @@ -0,0 +1,105 @@ +# Nextcloud Role - Required Vault Variables + +This role requires the following encrypted variables to be defined in your vault file (typically `host_vars//vault.yml`). + +## Required Variables + +Add these to your encrypted vault file: + +```yaml +# Nextcloud database password +vault_nextcloud_db_password: "CHANGE_ME_secure_database_password" + +# Nextcloud admin account password +vault_nextcloud_admin_password: "CHANGE_ME_secure_admin_password" + +# Valkey/Redis password (shared infrastructure) +vault_valkey_password: "CHANGE_ME_secure_valkey_password" +``` + +## Creating/Editing Vault File + +### First Time Setup + +```bash +# Create encrypted vault file +ansible-vault create host_vars/arch-vps/vault.yml + +# Add the variables above, then save and exit +``` + +### Edit Existing Vault + +```bash +# Edit encrypted vault file +ansible-vault edit host_vars/arch-vps/vault.yml + +# Add the Nextcloud variables, then save and exit +``` + +### Password Generation + +Generate secure passwords: + +```bash +# Generate 32-character passwords +openssl rand -base64 32 + +# Or using pwgen +pwgen -s 32 1 +``` + +## Example Vault File + +Your `host_vars/arch-vps/vault.yml` should include: + +```yaml +--- +# Caddy TLS +vault_caddy_tls_email: "admin@jnss.me" +vault_cloudflare_api_token: "your-cloudflare-token" + +# Authentik +vault_authentik_db_password: "authentik-db-password" +vault_authentik_secret_key: "authentik-secret-key" +vault_authentik_admin_password: "authentik-admin-password" + +# Nextcloud (ADD THESE) +vault_nextcloud_db_password: "generated-password-1" +vault_nextcloud_admin_password: "generated-password-2" + +# Valkey (shared infrastructure) +vault_valkey_password: "valkey-password" +``` + +## Deployment + +When deploying, you'll need to provide the vault password: + +```bash +# Deploy with vault password prompt +ansible-playbook -i inventory/hosts.yml site.yml --tags nextcloud --ask-vault-pass + +# Or use a password file +ansible-playbook -i inventory/hosts.yml site.yml --tags nextcloud --vault-password-file ~/.vault_pass +``` + +## Security Notes + +- **Never commit unencrypted vault files** to git +- Use strong, randomly generated passwords (at least 32 characters) +- Each service should have unique database passwords +- Store vault password securely (password manager, encrypted file, etc.) +- Consider using `ansible-vault rekey` to change vault password periodically + +## Verification + +Check that variables are properly encrypted: + +```bash +# View encrypted file (should show encrypted content) +cat host_vars/arch-vps/vault.yml + +# Decrypt and view (requires password) +ansible-vault view host_vars/arch-vps/vault.yml +``` diff --git a/roles/nextcloud/defaults/main.yml b/roles/nextcloud/defaults/main.yml new file mode 100644 index 0000000..ae89eef --- /dev/null +++ b/roles/nextcloud/defaults/main.yml @@ -0,0 +1,95 @@ +--- +# ================================================================= +# Nextcloud Cloud Storage Role - Default Variables +# ================================================================= +# Self-contained Nextcloud deployment with FPM, PostgreSQL, and Valkey + +# ================================================================= +# Service Configuration +# ================================================================= + +# Service user and directories +nextcloud_user: nextcloud +nextcloud_group: nextcloud +nextcloud_home: /opt/nextcloud +nextcloud_html_dir: "{{ nextcloud_home }}/html" +nextcloud_data_dir: "{{ nextcloud_home }}/data" +nextcloud_config_dir: "{{ nextcloud_home }}/config" +nextcloud_custom_apps_dir: "{{ nextcloud_home }}/custom_apps" + +# Container configuration (FPM variant) +nextcloud_version: "stable-fpm" +nextcloud_image: "docker.io/library/nextcloud" +nextcloud_fpm_port: 9001 # Internal FPM port (published to 127.0.0.1, Authentik uses 9000) + +# Service management +nextcloud_service_enabled: true +nextcloud_service_state: "started" + +# ================================================================= +# Database Configuration (Self-managed) +# ================================================================= + +nextcloud_db_name: "nextcloud" +nextcloud_db_user: "nextcloud" +nextcloud_db_password: "{{ vault_nextcloud_db_password }}" + +# ================================================================= +# Cache Configuration (Self-managed) +# ================================================================= + +nextcloud_valkey_db: 2 # Use database 2 (Authentik uses 1) + +# ================================================================= +# Network Configuration +# ================================================================= + +nextcloud_domain: "cloud.jnss.me" + +# ================================================================= +# Nextcloud Core Configuration +# ================================================================= + +# Admin user (auto-configured on first run) +nextcloud_admin_user: "admin" +nextcloud_admin_password: "{{ vault_nextcloud_admin_password }}" + +# Trusted domains (space-separated) +nextcloud_trusted_domains: "{{ nextcloud_domain }}" + +# Reverse proxy configuration +nextcloud_overwriteprotocol: "https" + +# ================================================================= +# PHP Configuration +# ================================================================= + +nextcloud_php_memory_limit: "512M" +nextcloud_php_upload_limit: "512M" + +# ================================================================= +# Caddy Integration +# ================================================================= + +# Caddy configuration (assumes caddy role provides these variables) +caddy_sites_enabled_dir: "/etc/caddy/sites-enabled" +caddy_log_dir: "/var/log/caddy" +caddy_user: "caddy" + +# ================================================================= +# Infrastructure Dependencies (Read-only) +# ================================================================= +# These variables reference infrastructure services defined by their roles +# Applications MUST NOT modify these values - they are provided by infrastructure + +# PostgreSQL socket configuration (managed by postgresql role) +postgresql_unix_socket_directories: "/var/run/postgresql" +postgresql_client_group: "postgres-clients" +postgresql_port: 5432 +postgresql_unix_socket_enabled: true + +# Valkey socket configuration (managed by valkey role) +valkey_unix_socket_path: "/var/run/valkey/valkey.sock" +valkey_password: "{{ vault_valkey_password }}" +valkey_client_group: "valkey-clients" +valkey_unix_socket_enabled: true diff --git a/roles/nextcloud/handlers/main.yml b/roles/nextcloud/handlers/main.yml new file mode 100644 index 0000000..05cb932 --- /dev/null +++ b/roles/nextcloud/handlers/main.yml @@ -0,0 +1,16 @@ +--- +# Nextcloud Role Handlers + +- name: reload systemd + systemd: + daemon_reload: true + +- name: restart nextcloud + systemd: + name: nextcloud + state: restarted + +- name: reload caddy + systemd: + name: caddy + state: reloaded diff --git a/roles/nextcloud/meta/main.yml b/roles/nextcloud/meta/main.yml new file mode 100644 index 0000000..44c6cee --- /dev/null +++ b/roles/nextcloud/meta/main.yml @@ -0,0 +1,17 @@ +--- +dependencies: + - role: postgresql + - role: valkey + - role: caddy + - role: podman + +galaxy_info: + author: rick-infra + description: Nextcloud cloud storage with FPM, PostgreSQL, and Valkey + company: jnss.me + license: MIT + min_ansible_version: "2.9" + platforms: + - name: ArchLinux + versions: + - all diff --git a/roles/nextcloud/tasks/cache.yml b/roles/nextcloud/tasks/cache.yml new file mode 100644 index 0000000..562babb --- /dev/null +++ b/roles/nextcloud/tasks/cache.yml @@ -0,0 +1,17 @@ +--- +# Nextcloud Valkey Cache Setup +# Valkey configuration is done via environment variables in the container +# This file exists for consistency and future cache-specific tasks + +- name: Verify Valkey socket accessibility + stat: + path: "{{ valkey_unix_socket_path }}" + register: valkey_socket_stat + failed_when: not valkey_socket_stat.stat.exists + +- name: Display cache configuration + debug: + msg: | + Nextcloud will use Valkey database {{ nextcloud_valkey_db }} + Socket: {{ valkey_unix_socket_path }} + Access via valkey-clients group diff --git a/roles/nextcloud/tasks/database.yml b/roles/nextcloud/tasks/database.yml new file mode 100644 index 0000000..cb9b444 --- /dev/null +++ b/roles/nextcloud/tasks/database.yml @@ -0,0 +1,34 @@ +--- +# Nextcloud Database Setup - PostgreSQL via Unix Socket + +- name: Create Nextcloud PostgreSQL user + postgresql_user: + name: "{{ nextcloud_db_user }}" + password: "{{ nextcloud_db_password }}" + encrypted: true + login_unix_socket: "{{ postgresql_unix_socket_directories }}" + login_user: postgres + become: true + become_user: postgres + +- name: Create Nextcloud database + postgresql_db: + name: "{{ nextcloud_db_name }}" + owner: "{{ nextcloud_db_user }}" + encoding: UTF8 + template: template0 + login_unix_socket: "{{ postgresql_unix_socket_directories }}" + login_user: postgres + become: true + become_user: postgres + +- name: Grant Nextcloud database privileges + postgresql_privs: + db: "{{ nextcloud_db_name }}" + privs: ALL + type: database + role: "{{ nextcloud_db_user }}" + login_unix_socket: "{{ postgresql_unix_socket_directories }}" + login_user: postgres + become: true + become_user: postgres diff --git a/roles/nextcloud/tasks/main.yml b/roles/nextcloud/tasks/main.yml new file mode 100644 index 0000000..b873c6a --- /dev/null +++ b/roles/nextcloud/tasks/main.yml @@ -0,0 +1,150 @@ +--- +# Nextcloud Cloud Storage Role - Main Tasks +# Self-contained deployment with FPM, PostgreSQL, and Valkey via Unix sockets + +- name: Validate infrastructure facts are available + assert: + that: + - postgresql_client_group_gid is defined + - valkey_client_group_gid is defined + fail_msg: | + Required infrastructure facts are not available. + Ensure PostgreSQL and Valkey roles have run and exported client group GIDs. + tags: [validation] + +- name: Setup nextcloud user and container namespaces + include_tasks: user.yml + tags: [user, setup] + +- name: Setup database access and permissions + include_tasks: database.yml + tags: [database, setup] + +- name: Setup cache access and permissions + include_tasks: cache.yml + tags: [cache, setup] + +- name: Pull nextcloud container image + containers.podman.podman_image: + name: "{{ nextcloud_image }}:{{ nextcloud_version }}" + state: present + tags: [containers, image-pull] + +- name: Create nextcloud application directories (container manages ownership) + file: + path: "{{ item }}" + state: directory + # Note: No owner/group/mode specified - let container entrypoint manage ownership + # The official Nextcloud container expects to chown these to www-data (UID:33) + loop: + - "{{ nextcloud_html_dir }}" + - "{{ nextcloud_data_dir }}" + - "{{ nextcloud_config_dir }}" + - "{{ nextcloud_custom_apps_dir }}" + tags: [setup, directories] + +- name: Deploy environment configuration + template: + src: nextcloud.env.j2 + dest: "{{ nextcloud_home }}/.env" + mode: '0600' + backup: true + notify: restart nextcloud + tags: [config] + +- name: Deploy custom Redis caching configuration + template: + src: redis.config.php.j2 + dest: "{{ nextcloud_config_dir }}/redis.config.php" + mode: '0640' + notify: restart nextcloud + tags: [config, redis] + +- name: Deploy Redis session lock override configuration + template: + src: redis-session-override.ini.j2 + dest: "{{ nextcloud_home }}/redis-session-override.ini" + mode: '0644' + notify: restart nextcloud + tags: [config, redis] + +- name: Create Quadlet systemd directory (system scope) + file: + path: /etc/containers/systemd + state: directory + mode: '0755' + +- name: Deploy Quadlet container file (system scope) + template: + src: nextcloud.container + dest: /etc/containers/systemd/nextcloud.container + mode: '0644' + notify: + - reload systemd + - restart nextcloud + tags: [containers, deployment] + +- name: Deploy Caddy configuration + template: + src: nextcloud.caddy.j2 + dest: "{{ caddy_sites_enabled_dir }}/nextcloud.caddy" + owner: root + group: "{{ caddy_user }}" + mode: '0644' + backup: true + notify: reload caddy + tags: [caddy, reverse-proxy] + +- name: Ensure system dependencies are running + systemd: + name: "{{ item }}" + state: started + loop: + - postgresql + - valkey + +- name: Wait for PostgreSQL socket to be ready + wait_for: + path: "{{ postgresql_unix_socket_directories }}/.s.PGSQL.{{ postgresql_port }}" + timeout: 30 + +- name: Wait for Valkey socket to be ready + wait_for: + path: "{{ valkey_unix_socket_path }}" + timeout: 30 + +- name: Enable and start Nextcloud service (system scope) + systemd: + name: nextcloud + enabled: "{{ nextcloud_service_enabled }}" + state: "{{ nextcloud_service_state }}" + daemon_reload: true + tags: [containers, service] + +- name: Wait for Nextcloud FPM to be ready + wait_for: + host: 127.0.0.1 + port: "{{ nextcloud_fpm_port }}" + timeout: 60 + retries: 5 + delay: 10 + tags: [verification] + +- name: Display Nextcloud deployment status + debug: + msg: | + ✅ Nextcloud Cloud Storage deployed successfully! + + 🌐 Domain: {{ nextcloud_domain }} + 🗄️ Database: {{ nextcloud_db_name }} (Unix socket) + 🗄️ Cache: Valkey DB {{ nextcloud_valkey_db }} (Unix socket) + 🐳 Container: FPM via Podman Quadlet + 🔒 Admin: {{ nextcloud_admin_user }} + + 🚀 Ready for file storage and collaboration! + + 📋 Next Steps: + - Access https://{{ nextcloud_domain }} to complete setup + - Install desired Nextcloud apps + - Configure user accounts + tags: [verification] diff --git a/roles/nextcloud/tasks/user.yml b/roles/nextcloud/tasks/user.yml new file mode 100644 index 0000000..fd48330 --- /dev/null +++ b/roles/nextcloud/tasks/user.yml @@ -0,0 +1,54 @@ +--- +# Nextcloud User Management - Service-Specific User Setup + +- name: Check if nextcloud group exists + getent: + database: group + key: "{{ nextcloud_group }}" + register: nextcloud_group_check + failed_when: false + +- name: Create nextcloud group if not exists + group: + name: "{{ nextcloud_group }}" + system: true + when: nextcloud_group_check.ansible_facts.getent_group is not defined + +- name: Check if nextcloud user exists + getent: + database: passwd + key: "{{ nextcloud_user }}" + register: nextcloud_user_check + failed_when: false + +- name: Create or update nextcloud user + user: + name: "{{ nextcloud_user }}" + group: "{{ nextcloud_group }}" + groups: "{{ [postgresql_client_group, valkey_client_group] }}" + system: true + shell: /usr/bin/nologin + home: "{{ nextcloud_home }}" + create_home: true + comment: "Nextcloud cloud storage service" + append: true + +- name: Create nextcloud home directory + file: + path: "{{ nextcloud_home }}" + state: directory + owner: "{{ nextcloud_user }}" + group: "{{ nextcloud_group }}" + mode: '0755' + +- name: Get nextcloud user UID and GID for container configuration + shell: | + echo "uid=$(id -u {{ nextcloud_user }})" + echo "gid=$(id -g {{ nextcloud_user }})" + register: nextcloud_user_info + changed_when: false + +- name: Set nextcloud UID/GID facts for container templates + set_fact: + nextcloud_uid: "{{ nextcloud_user_info.stdout_lines[0] | regex_replace('uid=', '') }}" + nextcloud_gid: "{{ nextcloud_user_info.stdout_lines[1] | regex_replace('gid=', '') }}" diff --git a/roles/nextcloud/templates/nextcloud.caddy.j2 b/roles/nextcloud/templates/nextcloud.caddy.j2 new file mode 100644 index 0000000..5fa267f --- /dev/null +++ b/roles/nextcloud/templates/nextcloud.caddy.j2 @@ -0,0 +1,71 @@ +# Nextcloud Cloud Storage Service +# Caddy reverse proxy to FPM container with FastCGI transport +# Based on official Caddy php_fastcgi Docker example and Nextcloud NGINX config +{{ nextcloud_domain }} { + # Caddy root - host path where static files exist for serving + # This allows Caddy to find files to serve directly (CSS, JS, images) + root * {{ nextcloud_html_dir }} + + # .well-known redirects for CalDAV/CardDAV (must be before php_fastcgi) + redir /.well-known/carddav /remote.php/dav 301 + redir /.well-known/caldav /remote.php/dav 301 + + # Handle .well-known requests that aren't explicitly redirected above + # Let Nextcloud's API handle all other /.well-known/* URIs + redir /.well-known/* /index.php{uri} 301 + + # Block access to sensitive directories (adapted from NGINX config) + # Match both the directory itself and anything under it + @forbidden { + path /build /build/* + path /tests /tests/* + path /config /config/* + path /lib /lib/* + path /3rdparty /3rdparty/* + path /templates /templates/* + path /data /data/* + path /.* /autotest* /occ* /issue* /indie* /db_* /console* + } + respond @forbidden 404 + + # PHP-FPM with container root for SCRIPT_FILENAME + # The nested 'root' directive tells FPM where files are in the container + # Per official Caddy docs: https://caddyserver.com/docs/caddyfile/directives/php_fastcgi + php_fastcgi 127.0.0.1:{{ nextcloud_fpm_port }} { + root /var/www/html + env front_controller_active true + env modHeadersAvailable true + } + + # Serve static files directly (CSS, JS, images, fonts, etc.) + # Disable index serving to let php_fastcgi handle / and /index.php + # This prevents index.html from being served instead of routing to index.php + file_server { + index off + } + + # Security headers (adapted from Nextcloud NGINX config) + header { + # HSTS with preload + Strict-Transport-Security "max-age=31536000; includeSubDomains; preload" + # Prevent embedding in frames from other origins + X-Frame-Options "SAMEORIGIN" + # Prevent MIME type sniffing + X-Content-Type-Options "nosniff" + # XSS protection + X-XSS-Protection "1; mode=block" + # Referrer policy + Referrer-Policy "no-referrer" + # Disable FLoC tracking + Permissions-Policy "interest-cohort=()" + # Remove server header + -Server + } + + # Logging + log { + output file {{ caddy_log_dir }}/nextcloud.log + level INFO + format json + } +} diff --git a/roles/nextcloud/templates/nextcloud.container b/roles/nextcloud/templates/nextcloud.container new file mode 100644 index 0000000..d36c272 --- /dev/null +++ b/roles/nextcloud/templates/nextcloud.container @@ -0,0 +1,44 @@ +[Unit] +Description=Nextcloud Cloud Storage Container (FPM) +After=network-online.target postgresql.service valkey.service +Wants=network-online.target + +[Container] +ContainerName=nextcloud +Image={{ nextcloud_image }}:{{ nextcloud_version }} +EnvironmentFile={{ nextcloud_home }}/.env + +# Note: Container runs as root initially for entrypoint scripts, +# then switches to www-data (UID 33) for PHP-FPM process. +# This is the default behavior of the official Nextcloud image. +# Socket access works via 777 permissions (see infrastructure role docs) + +# Volume mounts +# Application files (world-readable for Caddy to serve static assets) +Volume={{ nextcloud_html_dir }}:/var/www/html:Z + +# User data (private - only container can access) +Volume={{ nextcloud_data_dir }}:/var/www/html/data:Z + +# Configuration (private - contains secrets) +Volume={{ nextcloud_config_dir }}:/var/www/html/config:Z + +# Custom apps (world-readable) +Volume={{ nextcloud_custom_apps_dir }}:/var/www/html/custom_apps:Z + +# Redis session configuration override (zz- prefix ensures it loads last) +Volume={{ nextcloud_home }}/redis-session-override.ini:/usr/local/etc/php/conf.d/zz-redis-session-override.ini:Z,ro + +# Infrastructure sockets (mounted with world-readable permissions on host) +Volume={{ postgresql_unix_socket_directories }}:{{ postgresql_unix_socket_directories }}:Z +Volume={{ valkey_unix_socket_path | dirname }}:{{ valkey_unix_socket_path | dirname }}:Z + +# Expose FPM port to localhost only (Caddy will reverse proxy) +PublishPort=127.0.0.1:{{ nextcloud_fpm_port }}:9000 + +[Service] +Restart=always +TimeoutStartSec=300 + +[Install] +WantedBy=multi-user.target diff --git a/roles/nextcloud/templates/nextcloud.env.j2 b/roles/nextcloud/templates/nextcloud.env.j2 new file mode 100644 index 0000000..f92cb53 --- /dev/null +++ b/roles/nextcloud/templates/nextcloud.env.j2 @@ -0,0 +1,50 @@ +# Nextcloud Environment Configuration +# Generated by Ansible Nextcloud role + +# ================================================================= +# Database Configuration (PostgreSQL via Unix Socket) +# ================================================================= +POSTGRES_HOST={{ postgresql_unix_socket_directories }} +POSTGRES_DB={{ nextcloud_db_name }} +POSTGRES_USER={{ nextcloud_db_user }} +POSTGRES_PASSWORD={{ nextcloud_db_password }} + +# ================================================================= +# Admin Account (Auto-configured on first run) +# ================================================================= +NEXTCLOUD_ADMIN_USER={{ nextcloud_admin_user }} +NEXTCLOUD_ADMIN_PASSWORD={{ nextcloud_admin_password }} + +# ================================================================= +# Trusted Domains +# ================================================================= +NEXTCLOUD_TRUSTED_DOMAINS={{ nextcloud_trusted_domains }} + +# ================================================================= +# Redis/Valkey Cache Configuration +# ================================================================= +# Note: Nextcloud uses REDIS_* variables even for Valkey (Redis-compatible) +# Socket access works because infrastructure sockets use 777 permissions +# Note: These are disabled since we've encountered slowdowns and issues with redis sessions. Instead nextcloud now uses file sessions. +# REDIS_HOST={{ valkey_unix_socket_path }} +# REDIS_HOST_PASSWORD={{ valkey_password }} + +# ================================================================= +# Reverse Proxy Configuration +# ================================================================= +# These settings tell Nextcloud it's behind a reverse proxy (Caddy) +OVERWRITEPROTOCOL={{ nextcloud_overwriteprotocol }} +OVERWRITEHOST={{ nextcloud_domain }} +TRUSTED_PROXIES=127.0.0.1 + +# ================================================================= +# PHP Configuration +# ================================================================= +PHP_MEMORY_LIMIT={{ nextcloud_php_memory_limit }} +PHP_UPLOAD_LIMIT={{ nextcloud_php_upload_limit }} + +# ================================================================= +# Application Settings +# ================================================================= +# Enable automatic updates during container restart +NEXTCLOUD_UPDATE=1 diff --git a/roles/nextcloud/templates/redis-session-override.ini.j2 b/roles/nextcloud/templates/redis-session-override.ini.j2 new file mode 100644 index 0000000..b5f4e1b --- /dev/null +++ b/roles/nextcloud/templates/redis-session-override.ini.j2 @@ -0,0 +1,16 @@ +; Redis Session Lock Override for Nextcloud +; Prevents orphaned session locks from causing infinite hangs +; +; Default Nextcloud container settings: +; redis.session.lock_expire = 0 (locks NEVER expire - causes infinite hangs) +; redis.session.lock_retries = -1 (infinite retries - causes worker exhaustion) +; redis.session.lock_wait_time = 10000 (10 seconds per retry - very slow) +; +; These settings ensure locks auto-expire and failed requests don't block workers forever: +; - Locks expire after 30 seconds (prevents orphaned locks) +; - Max 100 retries = 5 seconds total wait time (prevents infinite loops) +; - 50ms wait between retries (reasonable balance) + +redis.session.lock_expire = 30 +redis.session.lock_retries = 100 +redis.session.lock_wait_time = 50000 diff --git a/roles/nextcloud/templates/redis.config.php.j2 b/roles/nextcloud/templates/redis.config.php.j2 new file mode 100644 index 0000000..7d6ff7f --- /dev/null +++ b/roles/nextcloud/templates/redis.config.php.j2 @@ -0,0 +1,34 @@ + '\OC\Memcache\Redis', + 'memcache.locking' => '\OC\Memcache\Redis', + 'redis' => array( + 'host' => '{{ valkey_unix_socket_path }}', + 'password' => '{{ valkey_password }}', + ), +); diff --git a/roles/postgresql/README.md b/roles/postgresql/README.md index da41b35..12fbe58 100644 --- a/roles/postgresql/README.md +++ b/roles/postgresql/README.md @@ -114,6 +114,37 @@ The role implements comprehensive systemd security restrictions: - Local connections only by default - Encrypted password storage +### Unix Socket Permissions + +**Current Configuration**: Socket permissions are set to `0777` (world-readable/writable) + +**Rationale**: +- Allows containers running as any UID to access the socket +- Needed for containers that start as root and switch to unprivileged users (e.g., Nextcloud's www-data) +- Security is maintained via password authentication (scram-sha-256) +- Sockets are local-only (not network-exposed) + +**Security Considerations**: +- ✅ Any local process can connect to the socket +- ✅ But still requires valid username + password to authenticate +- ✅ Limited to processes on same host (not network) +- ✅ Passwords stored encrypted with scram-sha-256 + +**Alternative Approach (TCP)**: +If you prefer more restrictive socket permissions, you can use TCP instead: + +```yaml +# In host_vars +postgresql_listen_addresses: "127.0.0.1" # Listen on localhost TCP +postgresql_unix_socket_permissions: "0770" # Restrict socket to group + +# In application configs +# Use: host=127.0.0.1 port=5432 +# Instead of: host=/var/run/postgresql +``` + +This provides the same security level (password-authenticated, localhost-only) but uses TCP instead of Unix sockets. + ### File System Security - Proper ownership and permissions diff --git a/roles/postgresql/defaults/main.yml b/roles/postgresql/defaults/main.yml index 6e26579..3571fd5 100644 --- a/roles/postgresql/defaults/main.yml +++ b/roles/postgresql/defaults/main.yml @@ -20,7 +20,11 @@ postgresql_port: 5432 # Unix Socket Configuration postgresql_unix_socket_enabled: true postgresql_unix_socket_directories: "/var/run/postgresql" -postgresql_unix_socket_permissions: "0770" +# Note: 0777 allows containers running as any UID to access the socket +# This is needed for containers that start as root and switch to unprivileged users (e.g., Nextcloud) +# Security is maintained via password authentication (scram-sha-256) +# Alternative: Use TCP on 127.0.0.1:5432 (see documentation) +postgresql_unix_socket_permissions: "0777" # Group-Based Access Control postgresql_client_group: "postgres-clients" diff --git a/roles/postgresql/tasks/main.yml b/roles/postgresql/tasks/main.yml index 4741ee6..13cee55 100644 --- a/roles/postgresql/tasks/main.yml +++ b/roles/postgresql/tasks/main.yml @@ -86,7 +86,7 @@ state: directory owner: postgres group: "{{ postgresql_client_group }}" - mode: '0770' + mode: '0777' when: postgresql_unix_socket_enabled - name: Get PostgreSQL client group GID for containerized applications diff --git a/roles/valkey/README.md b/roles/valkey/README.md index 3e74b66..1a67dd5 100644 --- a/roles/valkey/README.md +++ b/roles/valkey/README.md @@ -102,12 +102,45 @@ Valkey maintains **100% Redis compatibility**: ## Security -- **Network isolation**: Binds only to localhost +- **Network isolation**: Binds only to localhost (or Unix socket only) - **Authentication**: Password protection required - **Command restrictions**: Dangerous commands disabled - **Systemd hardening**: Full security restrictions applied - **File permissions**: Restrictive access to configuration and data +### Unix Socket Permissions + +**Current Configuration**: Socket permissions are set to `777` (world-readable/writable) + +**Rationale**: +- Allows containers running as any UID to access the socket +- Needed for containers that start as root and switch to unprivileged users (e.g., Nextcloud's www-data) +- Security is maintained via password authentication (requirepass) +- Sockets are local-only (not network-exposed) + +**Security Considerations**: +- ✅ Any local process can connect to the socket +- ✅ But still requires valid password to authenticate +- ✅ Limited to processes on same host (not network) +- ✅ Password stored securely in vault + +**Alternative Approach (TCP)**: +If you prefer more restrictive socket permissions, you can use TCP instead: + +```yaml +# In host_vars +valkey_bind: "127.0.0.1" # Use TCP instead of socket +valkey_port: 6379 +valkey_unix_socket_enabled: false # Disable Unix socket +valkey_unix_socket_perm: "770" # Restrict socket to group (if enabled) + +# In application configs +# Use: host=127.0.0.1 port=6379 +# Instead of: host=/var/run/valkey/valkey.sock +``` + +This provides the same security level (password-authenticated, localhost-only) but uses TCP instead of Unix sockets. + ## Dependencies This is an infrastructure role with no dependencies. Applications that need Valkey should declare this role as a dependency: diff --git a/roles/valkey/defaults/main.yml b/roles/valkey/defaults/main.yml index 0428d2a..84d0e05 100644 --- a/roles/valkey/defaults/main.yml +++ b/roles/valkey/defaults/main.yml @@ -21,7 +21,11 @@ valkey_protected_mode: false # Not needed for socket-only mode # Unix Socket Configuration valkey_unix_socket_enabled: true valkey_unix_socket_path: "/var/run/valkey/valkey.sock" -valkey_unix_socket_perm: "770" +# Note: 777 allows containers running as any UID to access the socket +# This is needed for containers that start as root and switch to unprivileged users (e.g., Nextcloud) +# Security is maintained via password authentication (requirepass) +# Alternative: Use TCP on 127.0.0.1:6379 (see documentation) +valkey_unix_socket_perm: "777" # Group-Based Access Control valkey_client_group: "valkey-clients" diff --git a/roles/valkey/tasks/main.yml b/roles/valkey/tasks/main.yml index f0eba18..ab5fced 100644 --- a/roles/valkey/tasks/main.yml +++ b/roles/valkey/tasks/main.yml @@ -47,7 +47,7 @@ state: directory owner: valkey group: "{{ valkey_client_group }}" - mode: '0770' + mode: '0777' when: valkey_unix_socket_enabled - name: Deploy Valkey configuration file diff --git a/roles/valkey/templates/valkey-socket-fix.service.j2 b/roles/valkey/templates/valkey-socket-fix.service.j2 index 3560b13..0e75ed7 100644 --- a/roles/valkey/templates/valkey-socket-fix.service.j2 +++ b/roles/valkey/templates/valkey-socket-fix.service.j2 @@ -8,7 +8,9 @@ Type=oneshot # Wait for socket to exist (max 10 seconds) ExecStart=/bin/sh -c 'i=0; while [ ! -S {{ valkey_unix_socket_path }} ] && [ $i -lt 100 ]; do sleep 0.1; i=$((i+1)); done' ExecStart=/bin/chgrp {{ valkey_client_group }} {{ valkey_unix_socket_path }} -ExecStart=/bin/chmod 770 {{ valkey_unix_socket_path }} +# Set to 777 for containers that switch users after startup (e.g. Nextcloud root->www-data) +# Security is maintained via password authentication (requirepass) +ExecStart=/bin/chmod 777 {{ valkey_unix_socket_path }} RemainAfterExit=yes [Install] diff --git a/site.yml b/site.yml index d310b49..25646f7 100644 --- a/site.yml +++ b/site.yml @@ -23,5 +23,7 @@ # tags: ['sigvild', 'gallery', 'wedding'] # - role: gitea # tags: ['gitea', 'git', 'development'] - - role: authentik - tags: ['authentik'] + - role: nextcloud + tags: ['nextcloud'] + # - role: authentik + # tags: ['authentik']