diff --git a/common/nfs-services-client.nix b/common/nfs-services-client.nix index c879d2c..77c885f 100644 --- a/common/nfs-services-client.nix +++ b/common/nfs-services-client.nix @@ -4,15 +4,18 @@ # Mounts from data-services.service.consul (Consul DNS for automatic failover) # The NFS server registers itself in Consul, so this will automatically # point to whichever host is currently running the NFS server + # + # Uses persistent mount (not automount) with nofail to prevent blocking boot. + # The mount is established at boot time and persists - no auto-unmount. + # This prevents issues with Docker bind mounts seeing empty automount stubs. fileSystems."/data/services" = { device = "data-services.service.consul:/persist/services"; fsType = "nfs"; options = [ - "x-systemd.automount" # Auto-mount on access - "noauto" # Don't mount at boot (automount handles it) - "x-systemd.idle-timeout=60" # Unmount after 60s of inactivity - "_netdev" # Network filesystem (wait for network) + "nofail" # Don't block boot if mount fails + "x-systemd.mount-timeout=30s" # Timeout for mount attempts + "_netdev" # Network filesystem (wait for network) ]; }; diff --git a/common/nomad.nix b/common/nomad.nix index 279493e..9362b98 100644 --- a/common/nomad.nix +++ b/common/nomad.nix @@ -59,20 +59,75 @@ in extraSettingsPaths = [ "/etc/nomad-alo.json" ]; }; - # Fix race condition between NFS automount and Docker bind mounts: - # Without this, Docker can bind-mount the empty automount stub directory - # before NFS actually mounts, causing permission errors and missing data. - # - RequiresMountsFor: tells systemd that Nomad depends on /data/services - # - ExecStartPre: triggers the automount before Nomad starts - # Note: boot will still succeed if NFS is unavailable (Nomad just won't start) - # TODO: NFS mount uses Consul DNS which resolves to an IP at mount time. - # If the NFS server moves to a different IP, the mount becomes stale - # and needs to be remounted. Consider using a VIP or implementing - # a health check that remounts on staleness detection. + # NFS mount dependency configuration for Nomad: + # + # Problem: Docker bind mounts need the real NFS mount, not an empty stub. + # If Nomad starts before NFS is mounted, containers get empty directories. + # + # Solution: Use soft dependencies (wants/after) with health-checking recovery. + # - wants: Nomad wants the mount, but won't be killed if it goes away + # - after: Nomad waits for mount to be attempted before starting + # - ExecStartPre with findmnt: Blocks Nomad start until mount is actually active + # + # This prevents Docker race conditions while allowing: + # - Boot to proceed if NFS unavailable (Nomad fails to start, systemd retries) + # - Nomad to keep running if NFS temporarily fails (containers may error) + # - Recovery service to auto-restart Nomad when NFS comes back or becomes stale + # + # Note: Mount uses Consul DNS which resolves at mount time. If NFS server + # moves to different IP, mount becomes stale and needs remount. + # The recovery service handles this by detecting stale mounts and restarting Nomad. systemd.services.nomad = { - wants = [ "network-online.target" ]; - unitConfig.RequiresMountsFor = [ "/data/services" ]; - serviceConfig.ExecStartPre = "${pkgs.coreutils}/bin/ls /data/services"; + wants = [ "network-online.target" "data-services.mount" ]; + after = [ "data-services.mount" ]; + serviceConfig.ExecStartPre = "${pkgs.util-linux}/bin/findmnt --mountpoint /data/services"; + }; + + # Recovery service: automatically restart Nomad when NFS mount needs attention + # This handles scenarios where: + # - NFS server was down during boot (mount failed, Nomad hit start-limit) + # - NFS server failed over to different host with new IP (mount went stale) + # - Network outage temporarily broke the mount + # + # The timer runs every 30s and checks: + # 1. Is mount healthy (exists and accessible)? + # 2. If mount is stale/inaccessible → restart Nomad (triggers remount) + # 3. If mount is healthy but Nomad failed → restart Nomad (normal recovery) + systemd.services.nomad-mount-watcher = { + description = "Restart Nomad when NFS mount needs attention"; + serviceConfig = { + Type = "oneshot"; + ExecStart = pkgs.writeShellScript "nomad-mount-watcher" '' + # Check if mount point exists + if ! ${pkgs.util-linux}/bin/findmnt --mountpoint /data/services >/dev/null 2>&1; then + exit 0 # Mount not present, nothing to do + fi + + # Check if mount is actually accessible (not stale) + # Use timeout to avoid hanging on stale NFS mounts + if ! ${pkgs.coreutils}/bin/timeout 5s ${pkgs.coreutils}/bin/stat /data/services >/dev/null 2>&1; then + echo "NFS mount is stale or inaccessible. Restarting Nomad to trigger remount..." + ${pkgs.systemd}/bin/systemctl restart nomad.service + exit 0 + fi + + # Mount is healthy - check if Nomad needs recovery + if ${pkgs.systemd}/bin/systemctl is-failed nomad.service >/dev/null 2>&1; then + echo "NFS mount is healthy but Nomad is failed. Restarting Nomad..." + ${pkgs.systemd}/bin/systemctl restart nomad.service + fi + ''; + }; + }; + + systemd.timers.nomad-mount-watcher = { + description = "Timer for Nomad mount watcher"; + wantedBy = [ "timers.target" ]; + timerConfig = { + OnBootSec = "1min"; # First run 1min after boot + OnUnitActiveSec = "30s"; # Then every 30s + Unit = "nomad-mount-watcher.service"; + }; }; environment.etc."nomad-alo.json".text = builtins.toJSON {