Another attempt at fixing the NFS race.

Install killall everywhere.
2025-10-24 13:59:39 +01:00 · 2025-10-24 11:56:47 +01:00
3 changed files with 76 additions and 17 deletions
--- a/common/global/packages.nix
+++ b/common/global/packages.nix
@@ -3,6 +3,7 @@
  environment.systemPackages = with pkgs; [
    age
    file
    killall
    lm_sensors # TODO: this shouldn't be installed on cloud nodes
    nodejs_20 # TODO: this is for one job on nomad, it should just be a dependency there
    neovim
--- a/common/nfs-services-client.nix
+++ b/common/nfs-services-client.nix
@@ -4,15 +4,18 @@
  # Mounts from data-services.service.consul (Consul DNS for automatic failover)
  # The NFS server registers itself in Consul, so this will automatically
  # point to whichever host is currently running the NFS server
  #
  # Uses persistent mount (not automount) with nofail to prevent blocking boot.
  # The mount is established at boot time and persists - no auto-unmount.
  # This prevents issues with Docker bind mounts seeing empty automount stubs.
  fileSystems."/data/services" = {
    device = "data-services.service.consul:/persist/services";
    fsType = "nfs";
    options = [
-      "x-systemd.automount"  # Auto-mount on access
+      "nofail"                      # Don't block boot if mount fails
-      "noauto"               # Don't mount at boot (automount handles it)
+      "x-systemd.mount-timeout=30s" # Timeout for mount attempts
-      "x-systemd.idle-timeout=60"  # Unmount after 60s of inactivity
+      "_netdev"                     # Network filesystem (wait for network)
      "_netdev"              # Network filesystem (wait for network)
    ];
  };
--- a/common/nomad.nix
+++ b/common/nomad.nix
@@ -59,20 +59,75 @@ in
    extraSettingsPaths = [ "/etc/nomad-alo.json" ];
  };
-  # Fix race condition between NFS automount and Docker bind mounts:
+  # NFS mount dependency configuration for Nomad:
-  # Without this, Docker can bind-mount the empty automount stub directory
+  #
-  # before NFS actually mounts, causing permission errors and missing data.
+  # Problem: Docker bind mounts need the real NFS mount, not an empty stub.
-  # - RequiresMountsFor: tells systemd that Nomad depends on /data/services
+  # If Nomad starts before NFS is mounted, containers get empty directories.
-  # - ExecStartPre: triggers the automount before Nomad starts
+  #
-  # Note: boot will still succeed if NFS is unavailable (Nomad just won't start)
+  # Solution: Use soft dependencies (wants/after) with health-checking recovery.
-  # TODO: NFS mount uses Consul DNS which resolves to an IP at mount time.
+  # - wants: Nomad wants the mount, but won't be killed if it goes away
-  #       If the NFS server moves to a different IP, the mount becomes stale
+  # - after: Nomad waits for mount to be attempted before starting
-  #       and needs to be remounted. Consider using a VIP or implementing
+  # - ExecStartPre with findmnt: Blocks Nomad start until mount is actually active
-  #       a health check that remounts on staleness detection.
+  #
  # This prevents Docker race conditions while allowing:
  # - Boot to proceed if NFS unavailable (Nomad fails to start, systemd retries)
  # - Nomad to keep running if NFS temporarily fails (containers may error)
  # - Recovery service to auto-restart Nomad when NFS comes back or becomes stale
  #
  # Note: Mount uses Consul DNS which resolves at mount time. If NFS server
  #       moves to different IP, mount becomes stale and needs remount.
  #       The recovery service handles this by detecting stale mounts and restarting Nomad.
  systemd.services.nomad = {
-    wants = [ "network-online.target" ];
+    wants = [ "network-online.target" "data-services.mount" ];
-    unitConfig.RequiresMountsFor = [ "/data/services" ];
+    after = [ "data-services.mount" ];
-    serviceConfig.ExecStartPre = "${pkgs.coreutils}/bin/ls /data/services";
+    serviceConfig.ExecStartPre = "${pkgs.util-linux}/bin/findmnt --mountpoint /data/services";
  };
  # Recovery service: automatically restart Nomad when NFS mount needs attention
  # This handles scenarios where:
  # - NFS server was down during boot (mount failed, Nomad hit start-limit)
  # - NFS server failed over to different host with new IP (mount went stale)
  # - Network outage temporarily broke the mount
  #
  # The timer runs every 30s and checks:
  # 1. Is mount healthy (exists and accessible)?
  # 2. If mount is stale/inaccessible → restart Nomad (triggers remount)
  # 3. If mount is healthy but Nomad failed → restart Nomad (normal recovery)
  systemd.services.nomad-mount-watcher = {
    description = "Restart Nomad when NFS mount needs attention";
    serviceConfig = {
      Type = "oneshot";
      ExecStart = pkgs.writeShellScript "nomad-mount-watcher" ''
        # Check if mount point exists
        if ! ${pkgs.util-linux}/bin/findmnt --mountpoint /data/services >/dev/null 2>&1; then
          exit 0  # Mount not present, nothing to do
        fi
        # Check if mount is actually accessible (not stale)
        # Use timeout to avoid hanging on stale NFS mounts
        if ! ${pkgs.coreutils}/bin/timeout 5s ${pkgs.coreutils}/bin/stat /data/services >/dev/null 2>&1; then
          echo "NFS mount is stale or inaccessible. Restarting Nomad to trigger remount..."
          ${pkgs.systemd}/bin/systemctl restart nomad.service
          exit 0
        fi
        # Mount is healthy - check if Nomad needs recovery
        if ${pkgs.systemd}/bin/systemctl is-failed nomad.service >/dev/null 2>&1; then
          echo "NFS mount is healthy but Nomad is failed. Restarting Nomad..."
          ${pkgs.systemd}/bin/systemctl restart nomad.service
        fi
      '';
    };
  };
  systemd.timers.nomad-mount-watcher = {
    description = "Timer for Nomad mount watcher";
    wantedBy = [ "timers.target" ];
    timerConfig = {
      OnBootSec = "1min";       # First run 1min after boot
      OnUnitActiveSec = "30s";  # Then every 30s
      Unit = "nomad-mount-watcher.service";
    };
  };
  environment.etc."nomad-alo.json".text = builtins.toJSON {
Author	SHA1	Message	Date
Petru Paler	cf2210ec77	Another attempt at fixing the NFS race.	2025-10-24 13:59:39 +01:00
Petru Paler	1dc219d08f	Install killall everywhere.	2025-10-24 11:56:47 +01:00