Refactor common modules.

2025-10-24 15:34:31 +01:00
parent e5cd9bd98e
commit 53ef2f6293
17 changed files with 262 additions and 219 deletions
--- a/common/nomad.nix
+++ b/common/nomad.nix
@@ -1,174 +1,177 @@
 # inspiration: https://github.com/astro/skyflake/blob/main/nixos-modules/nomad.nix
-{ pkgs, config, ... }:
+{ pkgs, config, lib, ... }:
 let
  servers = [
    "c1"
    "c2"
    "c3"
  ];
-  server_enabled = builtins.elem config.networking.hostName servers;
 in
 {
-  services.nomad = {
-    enable = true;
-    # true breaks at least CSI volumes
-    # TODO: consider fixing
-    dropPrivileges = false;
+  options.clusterRole.nomadServer = lib.mkEnableOption "Nomad server mode";

-    settings = {
-      datacenter = "alo";
+  config = {
+    services.nomad = {
+      enable = true;
+      # true breaks at least CSI volumes
+      # TODO: consider fixing
+      dropPrivileges = false;

-      client = {
-        enabled = true;
-        server_join.retry_join = servers;
-        host_network.tailscale = {
-          interface = "tailscale0";
-          cidr = "100.64.0.0/10";
+      settings = {
+        datacenter = "alo";
+
+        client = {
+          enabled = true;
+          server_join.retry_join = servers;
+          host_network.tailscale = {
+            interface = "tailscale0";
+            cidr = "100.64.0.0/10";
+          };
+          host_volume = {
+            services = {
+              path = "/data/services";
+              read_only = false;
+            };
+            nix-store = {
+              path = "/nix/store";
+              read_only = true;
+            };
+            sw = {
+              path = "/run/current-system/sw";
+              read_only = true;
+            };
+          };
        };
-        host_volume = {
-          services = {
-            path = "/data/services";
-            read_only = false;
-          };
-          nix-store = {
-            path = "/nix/store";
-            read_only = true;
-          };
-          sw = {
-            path = "/run/current-system/sw";
-            read_only = true;
-          };
+
+        server = {
+          enabled = config.clusterRole.nomadServer;
+          bootstrap_expect = (builtins.length servers + 2) / 2;
+          server_join.retry_join = servers;
+        };
+
+        telemetry = {
+          collection_interval = "1s";
+          disable_hostname = true;
+          prometheus_metrics = true;
+          publish_allocation_metrics = true;
+          publish_node_metrics = true;
        };
      };

-      server = {
-        enabled = server_enabled;
-        bootstrap_expect = (builtins.length servers + 2) / 2;
-        server_join.retry_join = servers;
-      };
+      extraSettingsPaths = [ "/etc/nomad-alo.json" ];
+    };

-      telemetry = {
-        collection_interval = "1s";
-        disable_hostname = true;
-        prometheus_metrics = true;
-        publish_allocation_metrics = true;
-        publish_node_metrics = true;
+    # NFS mount dependency configuration for Nomad:
+    #
+    # Problem: Docker bind mounts need the real NFS mount, not an empty stub.
+    # If Nomad starts before NFS is mounted, containers get empty directories.
+    #
+    # Solution: Use soft dependencies (wants/after) with health-checking recovery.
+    # - wants: Nomad wants the mount, but won't be killed if it goes away
+    # - after: Nomad waits for mount to be attempted before starting
+    # - ExecStartPre with findmnt: Blocks Nomad start until mount is actually active
+    #
+    # This prevents Docker race conditions while allowing:
+    # - Boot to proceed if NFS unavailable (Nomad fails to start, systemd retries)
+    # - Nomad to keep running if NFS temporarily fails (containers may error)
+    # - Recovery service to auto-restart Nomad when NFS comes back or becomes stale
+    #
+    # Note: Mount uses Consul DNS which resolves at mount time. If NFS server
+    #       moves to different IP, mount becomes stale and needs remount.
+    #       The recovery service handles this by detecting stale mounts and restarting Nomad.
+    systemd.services.nomad = {
+      wants = [ "network-online.target" "data-services.mount" ];
+      after = [ "data-services.mount" ];
+      serviceConfig.ExecStartPre = "${pkgs.util-linux}/bin/findmnt --mountpoint /data/services";
+    };
+
+    # Recovery service: automatically restart Nomad when NFS mount needs attention
+    # This handles scenarios where:
+    # - NFS server was down during boot (mount failed, Nomad hit start-limit)
+    # - NFS server failed over to different host with new IP (mount went stale)
+    # - Network outage temporarily broke the mount
+    #
+    # The timer runs every 30s and checks:
+    # 1. Is mount healthy (exists and accessible)?
+    # 2. If mount is stale/inaccessible → restart Nomad (triggers remount)
+    # 3. If mount is healthy but Nomad failed → restart Nomad (normal recovery)
+    systemd.services.nomad-mount-watcher = {
+      description = "Restart Nomad when NFS mount needs attention";
+      serviceConfig = {
+        Type = "oneshot";
+        ExecStart = pkgs.writeShellScript "nomad-mount-watcher" ''
+          # Check if mount point exists
+          if ! ${pkgs.util-linux}/bin/findmnt --mountpoint /data/services >/dev/null 2>&1; then
+            exit 0  # Mount not present, nothing to do
+          fi
+
+          # Check if mount is actually accessible (not stale)
+          # Use timeout to avoid hanging on stale NFS mounts
+          if ! ${pkgs.coreutils}/bin/timeout 5s ${pkgs.coreutils}/bin/stat /data/services >/dev/null 2>&1; then
+            echo "NFS mount is stale or inaccessible. Restarting Nomad to trigger remount..."
+            ${pkgs.systemd}/bin/systemctl restart nomad.service
+            exit 0
+          fi
+
+          # Mount is healthy - check if Nomad needs recovery
+          if ${pkgs.systemd}/bin/systemctl is-failed nomad.service >/dev/null 2>&1; then
+            echo "NFS mount is healthy but Nomad is failed. Restarting Nomad..."
+            ${pkgs.systemd}/bin/systemctl restart nomad.service
+          fi
+        '';
      };
    };

-    extraSettingsPaths = [ "/etc/nomad-alo.json" ];
-  };
-
-  # NFS mount dependency configuration for Nomad:
-  #
-  # Problem: Docker bind mounts need the real NFS mount, not an empty stub.
-  # If Nomad starts before NFS is mounted, containers get empty directories.
-  #
-  # Solution: Use soft dependencies (wants/after) with health-checking recovery.
-  # - wants: Nomad wants the mount, but won't be killed if it goes away
-  # - after: Nomad waits for mount to be attempted before starting
-  # - ExecStartPre with findmnt: Blocks Nomad start until mount is actually active
-  #
-  # This prevents Docker race conditions while allowing:
-  # - Boot to proceed if NFS unavailable (Nomad fails to start, systemd retries)
-  # - Nomad to keep running if NFS temporarily fails (containers may error)
-  # - Recovery service to auto-restart Nomad when NFS comes back or becomes stale
-  #
-  # Note: Mount uses Consul DNS which resolves at mount time. If NFS server
-  #       moves to different IP, mount becomes stale and needs remount.
-  #       The recovery service handles this by detecting stale mounts and restarting Nomad.
-  systemd.services.nomad = {
-    wants = [ "network-online.target" "data-services.mount" ];
-    after = [ "data-services.mount" ];
-    serviceConfig.ExecStartPre = "${pkgs.util-linux}/bin/findmnt --mountpoint /data/services";
-  };
-
-  # Recovery service: automatically restart Nomad when NFS mount needs attention
-  # This handles scenarios where:
-  # - NFS server was down during boot (mount failed, Nomad hit start-limit)
-  # - NFS server failed over to different host with new IP (mount went stale)
-  # - Network outage temporarily broke the mount
-  #
-  # The timer runs every 30s and checks:
-  # 1. Is mount healthy (exists and accessible)?
-  # 2. If mount is stale/inaccessible → restart Nomad (triggers remount)
-  # 3. If mount is healthy but Nomad failed → restart Nomad (normal recovery)
-  systemd.services.nomad-mount-watcher = {
-    description = "Restart Nomad when NFS mount needs attention";
-    serviceConfig = {
-      Type = "oneshot";
-      ExecStart = pkgs.writeShellScript "nomad-mount-watcher" ''
-        # Check if mount point exists
-        if ! ${pkgs.util-linux}/bin/findmnt --mountpoint /data/services >/dev/null 2>&1; then
-          exit 0  # Mount not present, nothing to do
-        fi
-
-        # Check if mount is actually accessible (not stale)
-        # Use timeout to avoid hanging on stale NFS mounts
-        if ! ${pkgs.coreutils}/bin/timeout 5s ${pkgs.coreutils}/bin/stat /data/services >/dev/null 2>&1; then
-          echo "NFS mount is stale or inaccessible. Restarting Nomad to trigger remount..."
-          ${pkgs.systemd}/bin/systemctl restart nomad.service
-          exit 0
-        fi
-
-        # Mount is healthy - check if Nomad needs recovery
-        if ${pkgs.systemd}/bin/systemctl is-failed nomad.service >/dev/null 2>&1; then
-          echo "NFS mount is healthy but Nomad is failed. Restarting Nomad..."
-          ${pkgs.systemd}/bin/systemctl restart nomad.service
-        fi
-      '';
-    };
-  };
-
-  systemd.timers.nomad-mount-watcher = {
-    description = "Timer for Nomad mount watcher";
-    wantedBy = [ "timers.target" ];
-    timerConfig = {
-      OnBootSec = "1min";       # First run 1min after boot
-      OnUnitActiveSec = "30s";  # Then every 30s
-      Unit = "nomad-mount-watcher.service";
-    };
-  };
-
-  environment.etc."nomad-alo.json".text = builtins.toJSON {
-    plugin.docker.config = {
-      allow_privileged = true;
-      # for keepalived, though only really needing "NET_ADMIN","NET_BROADCAST","NET_RAW" on top of default
-      # TODO: trim this down
-      allow_caps = [ "all" ];
-      volumes.enabled = true;
-      extra_labels = [
-        "job_name"
-        "task_group_name"
-        "task_name"
-        "node_name"
-      ];
+    systemd.timers.nomad-mount-watcher = {
+      description = "Timer for Nomad mount watcher";
+      wantedBy = [ "timers.target" ];
+      timerConfig = {
+        OnBootSec = "1min";       # First run 1min after boot
+        OnUnitActiveSec = "30s";  # Then every 30s
+        Unit = "nomad-mount-watcher.service";
+      };
    };

-    plugin.raw_exec.config.enabled = true;
-  };
+    environment.etc."nomad-alo.json".text = builtins.toJSON {
+      plugin.docker.config = {
+        allow_privileged = true;
+        # for keepalived, though only really needing "NET_ADMIN","NET_BROADCAST","NET_RAW" on top of default
+        # TODO: trim this down
+        allow_caps = [ "all" ];
+        volumes.enabled = true;
+        extra_labels = [
+          "job_name"
+          "task_group_name"
+          "task_name"
+          "node_name"
+        ];
+      };

-  environment.persistence."/persist".directories = [
-    "/var/lib/docker"
-    "/var/lib/nomad"
-  ];
+      plugin.raw_exec.config.enabled = true;
+    };

-  environment.systemPackages = with pkgs; [
-    nomad
-    wander
-    damon
-  ];
+    environment.persistence."/persist".directories = [
+      "/var/lib/docker"
+      "/var/lib/nomad"
+    ];

-  networking.firewall = {
-    allowedTCPPorts =
-      if server_enabled then
-        [
-          4646
-          4647
-          4648
-        ]
-      else
-        [ 4646 ];
-    allowedUDPPorts = if server_enabled then [ 4648 ] else [ ];
+    environment.systemPackages = with pkgs; [
+      nomad
+      wander
+      damon
+    ];
+
+    networking.firewall = {
+      allowedTCPPorts =
+        if config.clusterRole.nomadServer then
+          [
+            4646
+            4647
+            4648
+          ]
+        else
+          [ 4646 ];
+      allowedUDPPorts = if config.clusterRole.nomadServer then [ 4648 ] else [ ];
+    };
  };
 }