Refactor common modules.

2025-10-24 15:34:31 +01:00
parent e5cd9bd98e
commit 53ef2f6293
17 changed files with 262 additions and 219 deletions
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -6,17 +6,23 @@ NixOS cluster configuration using flakes. Homelab infrastructure with Nomad/Cons
 ```
 ├── common/
-│   ├── global/          # Applied to all hosts (backup, sops, users, etc.)
+│   ├── global/                    # Applied to all hosts (backup, sops, users, etc.)
-│   ├── compute-node.nix # Nomad client + Consul agent + NFS client
+│   ├── minimal-node.nix           # Base (ssh, user, boot, impermanence)
-│   ├── cluster-node.nix # Nomad server + Consul server (for quorum members)
+│   ├── cluster-member.nix         # Consul + storage clients (NFS/CIFS/GlusterFS)
-│   ├── nfs-services-server.nix   # NFS server + btrfs replication (zippy)
+│   ├── nomad-worker.nix           # Nomad client (runs jobs) + Docker + NFS deps
-│   └── nfs-services-standby.nix  # NFS standby + receive replication (c1, c2)
+│   ├── nomad-server.nix           # Enables Consul + Nomad server mode
 │   ├── cluster-tools.nix          # Just CLI tools (nomad, wander, damon)
 │   ├── workstation-node.nix       # Dev tools (wget, deploy-rs, docker, nix-ld)
 │   ├── desktop-node.nix           # Hyprland + GUI environment
 │   ├── nfs-services-server.nix    # NFS server + btrfs replication (zippy)
 │   └── nfs-services-standby.nix   # NFS standby + receive replication (c1)
 ├── hosts/
-│   ├── c1/, c2/, c3/    # Cattle nodes (compute, quorum members)
+│   ├── c1/, c2/, c3/    # Cattle nodes (quorum + workers)
-│   ├── zippy/           # Primary storage + NFS server + stateful workloads
+│   ├── zippy/           # Primary storage + NFS server + worker (not quorum)
 │   ├── chilly/          # Home Assistant VM + cluster member (Consul only)
 │   ├── sparky/          # Desktop + cluster member (Consul only)
 │   ├── fractal/         # (Proxmox, will become NixOS storage node)
-│   ├── sunny/           # (Standalone ethereum node, not in cluster)
+│   └── sunny/           # (Standalone ethereum node, not in cluster)
 │   └── chilly/          # (Home Assistant VM, not in cluster)
 ├── docs/
 │   ├── CLUSTER_REVAMP.md    # Master plan for architecture changes
 │   ├── MIGRATION_TODO.md    # Tracking checklist for migration
@@ -35,11 +41,32 @@ NixOS cluster configuration using flakes. Homelab infrastructure with Nomad/Cons
 - `/data/shared` - CIFS from fractal (existing, unchanged)
 ### Hosts
- **c1, c2, c3**: Cattle nodes, run most workloads, Nomad/Consul quorum
+- **c1, c2, c3**: Cattle nodes, run most workloads, Nomad/Consul quorum members
- **zippy**: Primary NFS server, runs databases (affinity), replicates to c1 every 5min
+- **zippy**: Primary NFS server, runs workloads (affinity), NOT quorum, replicates to c1 every 5min
 - **chilly**: Home Assistant VM, cluster member (Consul agent + CLI tools), no workloads
 - **sparky**: Desktop/laptop, cluster member (Consul agent + CLI tools), no workloads
 - **fractal**: Storage node (Proxmox/ZFS), will join quorum after GlusterFS removed
- **sunny**: Standalone ethereum staking node
+- **sunny**: Standalone ethereum staking node (not in cluster)
- **chilly**: Home Assistant VM
+
 ## Config Architecture
 **Modular role-based configs** (compose as needed):
 - `minimal-node.nix` - Base for all systems (SSH, user, boot, impermanence)
 - `cluster-member.nix` - Consul agent + shared storage mounts (no Nomad)
 - `nomad-worker.nix` - Nomad client to run jobs (requires cluster-member)
 - `nomad-server.nix` - Enables Consul + Nomad server mode (for quorum members)
 - `cluster-tools.nix` - Just CLI tools (no services)
 **Machine type configs** (via flake profile):
 - `workstation-node.nix` - Dev tools (deploy-rs, docker, nix-ld, emulation)
 - `desktop-node.nix` - Extends workstation + Hyprland/GUI
 **Host composition examples**:
 - c1/c2/c3: `cluster-member + nomad-worker + nomad-server` (quorum + runs jobs)
 - zippy: `cluster-member + nomad-worker` (runs jobs, not quorum)
 - chilly/sparky: `cluster-member + cluster-tools` (Consul + CLI only)
 **Key insight**: Profiles (workstation/desktop) no longer imply cluster membership. Hosts explicitly declare roles via imports.
 ## Key Patterns
--- a/common/cluster-member.nix
+++ b/common/cluster-member.nix
--- a/common/cluster-tools.nix
+++ b/common/cluster-tools.nix
--- a/common/compute-node.nix
+++ b/common/compute-node.nix
@@ -1,10 +0,0 @@
 { pkgs, ... }:
 {
  # Compute node: Cluster node with Nomad and GlusterFS server
  # Used by: c1, c2, c3
  imports = [
    ./cluster-node.nix
    ./glusterfs.nix
    ./nomad.nix
  ];
 }
--- a/common/consul.nix
+++ b/common/consul.nix
@@ -1,44 +1,47 @@
-{ pkgs, config, ... }:
+{ pkgs, config, lib, ... }:
 let
  servers = [
    "c1"
    "c2"
    "c3"
  ];
  server_enabled = builtins.elem config.networking.hostName servers;
 in
 {
-  services.consul = {
+  options.clusterRole.consulServer = lib.mkEnableOption "Consul server mode";
-    enable = true;
+
-    webUi = true;
+  config = {
-    interface.advertise = "eno1";
+    services.consul = {
-    extraConfig = {
+      enable = true;
-      client_addr = "0.0.0.0";
+      webUi = true;
-      datacenter = "alo";
+      interface.advertise = "eno1";
-      server = server_enabled;
+      extraConfig = {
-      bootstrap_expect = if server_enabled then (builtins.length servers + 2) / 2 else null;
+        client_addr = "0.0.0.0";
-      retry_join = builtins.filter (elem: elem != config.networking.hostName) servers;
+        datacenter = "alo";
        server = config.clusterRole.consulServer;
        bootstrap_expect = if config.clusterRole.consulServer then (builtins.length servers + 2) / 2 else null;
        retry_join = builtins.filter (elem: elem != config.networking.hostName) servers;
      telemetry = {
        prometheus_retention_time = "24h";
        disable_hostname = true;
      };
    };
-  };
+    };
-  environment.persistence."/persist".directories = [ "/var/lib/consul" ];
+    environment.persistence."/persist".directories = [ "/var/lib/consul" ];
-  networking.firewall = {
+    networking.firewall = {
-    allowedTCPPorts = [
+      allowedTCPPorts = [
-      8600
+        8600
-      8500
+        8500
-      8301
+        8301
-      8302
+        8302
-      8300
+        8300
-    ];
+      ];
-    allowedUDPPorts = [
+      allowedUDPPorts = [
-      8600
+        8600
-      8301
+        8301
-      8302
+        8302
-    ];
+      ];
    };
  };
 }
--- a/common/nomad-server.nix
+++ b/common/nomad-server.nix
@@ -0,0 +1,9 @@
 { ... }:
 {
  # Enable server mode for both Consul and Nomad
  # Used by: c1, c2, c3 (quorum members)
  clusterRole = {
    consulServer = true;
    nomadServer = true;
  };
 }
--- a/common/nomad-worker.nix
+++ b/common/nomad-worker.nix
@@ -0,0 +1,9 @@
 { ... }:
 {
  # Enable Nomad client to run workloads
  # Includes: Nomad client, Docker plugin, host volumes, NFS mount dependencies
  # Used by: c1, c2, c3, zippy (all nodes that run Nomad jobs)
  imports = [
    ./nomad.nix
  ];
 }
--- a/common/nomad.nix
+++ b/common/nomad.nix
@@ -1,174 +1,177 @@
 # inspiration: https://github.com/astro/skyflake/blob/main/nixos-modules/nomad.nix
-{ pkgs, config, ... }:
+{ pkgs, config, lib, ... }:
 let
  servers = [
    "c1"
    "c2"
    "c3"
  ];
  server_enabled = builtins.elem config.networking.hostName servers;
 in
 {
-  services.nomad = {
+  options.clusterRole.nomadServer = lib.mkEnableOption "Nomad server mode";
    enable = true;
    # true breaks at least CSI volumes
    # TODO: consider fixing
    dropPrivileges = false;
-    settings = {
+  config = {
-      datacenter = "alo";
+    services.nomad = {
      enable = true;
      # true breaks at least CSI volumes
      # TODO: consider fixing
      dropPrivileges = false;
-      client = {
+      settings = {
-        enabled = true;
+        datacenter = "alo";
-        server_join.retry_join = servers;
+
-        host_network.tailscale = {
+        client = {
-          interface = "tailscale0";
+          enabled = true;
-          cidr = "100.64.0.0/10";
+          server_join.retry_join = servers;
          host_network.tailscale = {
            interface = "tailscale0";
            cidr = "100.64.0.0/10";
          };
          host_volume = {
            services = {
              path = "/data/services";
              read_only = false;
            };
            nix-store = {
              path = "/nix/store";
              read_only = true;
            };
            sw = {
              path = "/run/current-system/sw";
              read_only = true;
            };
          };
        };
-        host_volume = {
+
-          services = {
+        server = {
-            path = "/data/services";
+          enabled = config.clusterRole.nomadServer;
-            read_only = false;
+          bootstrap_expect = (builtins.length servers + 2) / 2;
-          };
+          server_join.retry_join = servers;
-          nix-store = {
+        };
-            path = "/nix/store";
+
-            read_only = true;
+        telemetry = {
-          };
+          collection_interval = "1s";
-          sw = {
+          disable_hostname = true;
-            path = "/run/current-system/sw";
+          prometheus_metrics = true;
-            read_only = true;
+          publish_allocation_metrics = true;
-          };
+          publish_node_metrics = true;
        };
      };
-      server = {
+      extraSettingsPaths = [ "/etc/nomad-alo.json" ];
-        enabled = server_enabled;
+    };
        bootstrap_expect = (builtins.length servers + 2) / 2;
        server_join.retry_join = servers;
      };
-      telemetry = {
+    # NFS mount dependency configuration for Nomad:
-        collection_interval = "1s";
+    #
-        disable_hostname = true;
+    # Problem: Docker bind mounts need the real NFS mount, not an empty stub.
-        prometheus_metrics = true;
+    # If Nomad starts before NFS is mounted, containers get empty directories.
-        publish_allocation_metrics = true;
+    #
-        publish_node_metrics = true;
+    # Solution: Use soft dependencies (wants/after) with health-checking recovery.
    # - wants: Nomad wants the mount, but won't be killed if it goes away
    # - after: Nomad waits for mount to be attempted before starting
    # - ExecStartPre with findmnt: Blocks Nomad start until mount is actually active
    #
    # This prevents Docker race conditions while allowing:
    # - Boot to proceed if NFS unavailable (Nomad fails to start, systemd retries)
    # - Nomad to keep running if NFS temporarily fails (containers may error)
    # - Recovery service to auto-restart Nomad when NFS comes back or becomes stale
    #
    # Note: Mount uses Consul DNS which resolves at mount time. If NFS server
    #       moves to different IP, mount becomes stale and needs remount.
    #       The recovery service handles this by detecting stale mounts and restarting Nomad.
    systemd.services.nomad = {
      wants = [ "network-online.target" "data-services.mount" ];
      after = [ "data-services.mount" ];
      serviceConfig.ExecStartPre = "${pkgs.util-linux}/bin/findmnt --mountpoint /data/services";
    };
    # Recovery service: automatically restart Nomad when NFS mount needs attention
    # This handles scenarios where:
    # - NFS server was down during boot (mount failed, Nomad hit start-limit)
    # - NFS server failed over to different host with new IP (mount went stale)
    # - Network outage temporarily broke the mount
    #
    # The timer runs every 30s and checks:
    # 1. Is mount healthy (exists and accessible)?
    # 2. If mount is stale/inaccessible → restart Nomad (triggers remount)
    # 3. If mount is healthy but Nomad failed → restart Nomad (normal recovery)
    systemd.services.nomad-mount-watcher = {
      description = "Restart Nomad when NFS mount needs attention";
      serviceConfig = {
        Type = "oneshot";
        ExecStart = pkgs.writeShellScript "nomad-mount-watcher" ''
          # Check if mount point exists
          if ! ${pkgs.util-linux}/bin/findmnt --mountpoint /data/services >/dev/null 2>&1; then
            exit 0  # Mount not present, nothing to do
          fi
          # Check if mount is actually accessible (not stale)
          # Use timeout to avoid hanging on stale NFS mounts
          if ! ${pkgs.coreutils}/bin/timeout 5s ${pkgs.coreutils}/bin/stat /data/services >/dev/null 2>&1; then
            echo "NFS mount is stale or inaccessible. Restarting Nomad to trigger remount..."
            ${pkgs.systemd}/bin/systemctl restart nomad.service
            exit 0
          fi
          # Mount is healthy - check if Nomad needs recovery
          if ${pkgs.systemd}/bin/systemctl is-failed nomad.service >/dev/null 2>&1; then
            echo "NFS mount is healthy but Nomad is failed. Restarting Nomad..."
            ${pkgs.systemd}/bin/systemctl restart nomad.service
          fi
        '';
      };
    };
-    extraSettingsPaths = [ "/etc/nomad-alo.json" ];
+    systemd.timers.nomad-mount-watcher = {
-  };
+      description = "Timer for Nomad mount watcher";
-
+      wantedBy = [ "timers.target" ];
-  # NFS mount dependency configuration for Nomad:
+      timerConfig = {
-  #
+        OnBootSec = "1min";       # First run 1min after boot
-  # Problem: Docker bind mounts need the real NFS mount, not an empty stub.
+        OnUnitActiveSec = "30s";  # Then every 30s
-  # If Nomad starts before NFS is mounted, containers get empty directories.
+        Unit = "nomad-mount-watcher.service";
-  #
+      };
  # Solution: Use soft dependencies (wants/after) with health-checking recovery.
  # - wants: Nomad wants the mount, but won't be killed if it goes away
  # - after: Nomad waits for mount to be attempted before starting
  # - ExecStartPre with findmnt: Blocks Nomad start until mount is actually active
  #
  # This prevents Docker race conditions while allowing:
  # - Boot to proceed if NFS unavailable (Nomad fails to start, systemd retries)
  # - Nomad to keep running if NFS temporarily fails (containers may error)
  # - Recovery service to auto-restart Nomad when NFS comes back or becomes stale
  #
  # Note: Mount uses Consul DNS which resolves at mount time. If NFS server
  #       moves to different IP, mount becomes stale and needs remount.
  #       The recovery service handles this by detecting stale mounts and restarting Nomad.
  systemd.services.nomad = {
    wants = [ "network-online.target" "data-services.mount" ];
    after = [ "data-services.mount" ];
    serviceConfig.ExecStartPre = "${pkgs.util-linux}/bin/findmnt --mountpoint /data/services";
  };
  # Recovery service: automatically restart Nomad when NFS mount needs attention
  # This handles scenarios where:
  # - NFS server was down during boot (mount failed, Nomad hit start-limit)
  # - NFS server failed over to different host with new IP (mount went stale)
  # - Network outage temporarily broke the mount
  #
  # The timer runs every 30s and checks:
  # 1. Is mount healthy (exists and accessible)?
  # 2. If mount is stale/inaccessible → restart Nomad (triggers remount)
  # 3. If mount is healthy but Nomad failed → restart Nomad (normal recovery)
  systemd.services.nomad-mount-watcher = {
    description = "Restart Nomad when NFS mount needs attention";
    serviceConfig = {
      Type = "oneshot";
      ExecStart = pkgs.writeShellScript "nomad-mount-watcher" ''
        # Check if mount point exists
        if ! ${pkgs.util-linux}/bin/findmnt --mountpoint /data/services >/dev/null 2>&1; then
          exit 0  # Mount not present, nothing to do
        fi
        # Check if mount is actually accessible (not stale)
        # Use timeout to avoid hanging on stale NFS mounts
        if ! ${pkgs.coreutils}/bin/timeout 5s ${pkgs.coreutils}/bin/stat /data/services >/dev/null 2>&1; then
          echo "NFS mount is stale or inaccessible. Restarting Nomad to trigger remount..."
          ${pkgs.systemd}/bin/systemctl restart nomad.service
          exit 0
        fi
        # Mount is healthy - check if Nomad needs recovery
        if ${pkgs.systemd}/bin/systemctl is-failed nomad.service >/dev/null 2>&1; then
          echo "NFS mount is healthy but Nomad is failed. Restarting Nomad..."
          ${pkgs.systemd}/bin/systemctl restart nomad.service
        fi
      '';
    };
  };
  systemd.timers.nomad-mount-watcher = {
    description = "Timer for Nomad mount watcher";
    wantedBy = [ "timers.target" ];
    timerConfig = {
      OnBootSec = "1min";       # First run 1min after boot
      OnUnitActiveSec = "30s";  # Then every 30s
      Unit = "nomad-mount-watcher.service";
    };
  };
  environment.etc."nomad-alo.json".text = builtins.toJSON {
    plugin.docker.config = {
      allow_privileged = true;
      # for keepalived, though only really needing "NET_ADMIN","NET_BROADCAST","NET_RAW" on top of default
      # TODO: trim this down
      allow_caps = [ "all" ];
      volumes.enabled = true;
      extra_labels = [
        "job_name"
        "task_group_name"
        "task_name"
        "node_name"
      ];
    };
-    plugin.raw_exec.config.enabled = true;
+    environment.etc."nomad-alo.json".text = builtins.toJSON {
-  };
+      plugin.docker.config = {
        allow_privileged = true;
        # for keepalived, though only really needing "NET_ADMIN","NET_BROADCAST","NET_RAW" on top of default
        # TODO: trim this down
        allow_caps = [ "all" ];
        volumes.enabled = true;
        extra_labels = [
          "job_name"
          "task_group_name"
          "task_name"
          "node_name"
        ];
      };
-  environment.persistence."/persist".directories = [
+      plugin.raw_exec.config.enabled = true;
-    "/var/lib/docker"
+    };
    "/var/lib/nomad"
  ];
-  environment.systemPackages = with pkgs; [
+    environment.persistence."/persist".directories = [
-    nomad
+      "/var/lib/docker"
-    wander
+      "/var/lib/nomad"
-    damon
+    ];
  ];
-  networking.firewall = {
+    environment.systemPackages = with pkgs; [
-    allowedTCPPorts =
+      nomad
-      if server_enabled then
+      wander
-        [
+      damon
-          4646
+    ];
-          4647
+
-          4648
+    networking.firewall = {
-        ]
+      allowedTCPPorts =
-      else
+        if config.clusterRole.nomadServer then
-        [ 4646 ];
+          [
-    allowedUDPPorts = if server_enabled then [ 4648 ] else [ ];
+            4646
            4647
            4648
          ]
        else
          [ 4646 ];
      allowedUDPPorts = if config.clusterRole.nomadServer then [ 4648 ] else [ ];
    };
  };
 }
--- a/common/server-node.nix
+++ b/common/server-node.nix
@@ -1,14 +0,0 @@
 { pkgs, ... }:
 {
  # Server profile: Cluster-enabled system for server deployments
  # Extends cluster-node with server-specific configurations
  # Future: Add bare NixOS services here (mysql, postgres, etc.) when migrating from Nomad
  imports = [
    ./cluster-node.nix
  ];
  # Server-specific configurations can be added here
  # Example (for future use):
  # services.mysql.enable = lib.mkDefault false;
  # services.postgresql.enable = lib.mkDefault false;
 }
--- a/common/workstation-node.nix
+++ b/common/workstation-node.nix
@@ -1,9 +1,9 @@
 { pkgs, inputs, ... }:
 {
  # Workstation profile: Development workstation configuration
-  # Extends server-node with development tools and emulation
+  # Adds development tools and emulation on top of minimal-node
  imports = [
-    ./server-node.nix
+    ./minimal-node.nix
    ./unattended-encryption.nix
  ];
--- a/flake.nix
+++ b/flake.nix
@@ -140,7 +140,7 @@
        c2 = mkHost "x86_64-linux" "server" [ ./hosts/c2 ];
        c3 = mkHost "x86_64-linux" "server" [ ./hosts/c3 ];
        alo-cloud-1 = mkHost "aarch64-linux" "cloud" [ ./hosts/alo-cloud-1 ];
-        zippy = mkHost "x86_64-linux" "workstation" [
+        zippy = mkHost "x86_64-linux" "minimal" [
          ethereum-nix.nixosModules.default
          ./hosts/zippy
        ];
--- a/hosts/c1/default.nix
+++ b/hosts/c1/default.nix
@@ -3,7 +3,10 @@
  imports = [
    ../../common/encrypted-btrfs-layout.nix
    ../../common/global
-    ../../common/compute-node.nix
+    ../../common/cluster-member.nix  # Consul + storage clients
    ../../common/nomad-worker.nix     # Nomad client (runs jobs)
    ../../common/nomad-server.nix     # Consul + Nomad server mode
    ../../common/glusterfs.nix        # GlusterFS server (temp during migration)
    ../../common/nfs-services-standby.nix  # NFS standby for /data/services
    # To promote to NFS server (during failover):
    # 1. Follow procedure in docs/NFS_FAILOVER.md
--- a/hosts/c2/default.nix
+++ b/hosts/c2/default.nix
@@ -3,7 +3,10 @@
  imports = [
    ../../common/encrypted-btrfs-layout.nix
    ../../common/global
-    ../../common/compute-node.nix
+    ../../common/cluster-member.nix  # Consul + storage clients
    ../../common/nomad-worker.nix     # Nomad client (runs jobs)
    ../../common/nomad-server.nix     # Consul + Nomad server mode
    ../../common/glusterfs.nix        # GlusterFS server (temp during migration)
    ./hardware.nix
  ];
--- a/hosts/c3/default.nix
+++ b/hosts/c3/default.nix
@@ -3,7 +3,10 @@
  imports = [
    ../../common/encrypted-btrfs-layout.nix
    ../../common/global
-    ../../common/compute-node.nix
+    ../../common/cluster-member.nix  # Consul + storage clients
    ../../common/nomad-worker.nix     # Nomad client (runs jobs)
    ../../common/nomad-server.nix     # Consul + Nomad server mode
    ../../common/glusterfs.nix        # GlusterFS server (temp during migration)
    ../../common/binary-cache-server.nix
    ./hardware.nix
  ];
--- a/hosts/chilly/default.nix
+++ b/hosts/chilly/default.nix
@@ -8,6 +8,8 @@
  imports = [
    ../../common/encrypted-btrfs-layout.nix
    ../../common/global
    ../../common/cluster-member.nix  # Consul + storage clients
    ../../common/cluster-tools.nix    # Nomad CLI (no service)
    ./hardware.nix
  ];
--- a/hosts/sparky/default.nix
+++ b/hosts/sparky/default.nix
@@ -3,6 +3,8 @@
  imports = [
    ../../common/encrypted-btrfs-layout.nix
    ../../common/global
    ../../common/cluster-member.nix  # Consul + storage clients
    ../../common/cluster-tools.nix    # Nomad CLI (no service)
    ./hardware.nix
  ];
--- a/hosts/zippy/default.nix
+++ b/hosts/zippy/default.nix
@@ -3,7 +3,10 @@
  imports = [
    ../../common/encrypted-btrfs-layout.nix
    ../../common/global
-    ../../common/compute-node.nix
+    ../../common/cluster-member.nix  # Consul + storage clients
    ../../common/nomad-worker.nix     # Nomad client (runs jobs)
    # NOTE: zippy is NOT a server - no nomad-server.nix import
    ../../common/glusterfs.nix        # GlusterFS server (temp during migration)
 #    ../../common/ethereum.nix
    ../../common/nfs-services-server.nix  # NFS server for /data/services
    # To move NFS server role to another host: