{ config, lib, pkgs, ... }: let cfg = config.nfsServicesServer; in { options.nfsServicesServer = { enable = lib.mkEnableOption "NFS services server" // { default = true; }; standbys = lib.mkOption { type = lib.types.listOf lib.types.str; default = []; description = '' List of standby hostnames to replicate to (e.g. ["c1"]). Requires one-time setup on the NFS server: sudo mkdir -p /persist/root/.ssh sudo ssh-keygen -t ed25519 -f /persist/root/.ssh/btrfs-replication -N "" -C "root@$(hostname)-replication" Then add the public key to each standby's nfsServicesStandby.replicationKeys option. ''; }; }; config = lib.mkIf cfg.enable { # Persist root SSH directory for replication key environment.persistence."/persist" = { directories = [ "/root/.ssh" ]; }; # Bind mount /persist/services to /data/services for local access # This makes the path consistent with NFS clients # Use mkForce to override the NFS client mount from cluster-node.nix fileSystems."/data/services" = lib.mkForce { device = "/persist/services"; fsType = "none"; options = [ "bind" ]; }; # Nomad node metadata: mark this as the primary storage node # Jobs can constrain to ${meta.storage_role} = "primary" services.nomad.settings.client.meta = { storage_role = "primary"; }; # NFS server configuration services.nfs.server = { enable = true; exports = '' /persist/services 192.168.1.0/24(rw,sync,no_subtree_check,no_root_squash) ''; }; # Consul service registration for NFS services.consul.extraConfig.services = [{ name = "data-services"; port = 2049; checks = [{ tcp = "localhost:2049"; interval = "30s"; }]; }]; # Firewall for NFS networking.firewall.allowedTCPPorts = [ 2049 111 20048 ]; networking.firewall.allowedUDPPorts = [ 2049 111 20048 ]; # systemd services: NFS server split-brain check + replication services systemd.services = lib.mkMerge ([ # Safety check: prevent split-brain by ensuring no other NFS server is active { nfs-server = { preStart = '' # Wait for Consul to be available for i in {1..30}; do if ${pkgs.netcat}/bin/nc -z localhost 8600; then break fi echo "Waiting for Consul DNS... ($i/30)" sleep 1 done # Check if another NFS server is already registered in Consul CURRENT_SERVER=$(${pkgs.dnsutils}/bin/dig +short @localhost -p 8600 data-services.service.consul | head -1 || true) MY_IP=$(${pkgs.iproute2}/bin/ip -4 addr show | ${pkgs.gnugrep}/bin/grep -oP '(?<=inet\s)\d+(\.\d+){3}' | ${pkgs.gnugrep}/bin/grep -v '^127\.' | head -1) if [ -n "$CURRENT_SERVER" ] && [ "$CURRENT_SERVER" != "$MY_IP" ]; then echo "ERROR: Another NFS server is already active at $CURRENT_SERVER" echo "This host ($MY_IP) is configured as NFS server but should be standby." echo "To fix:" echo " 1. If this is intentional (failback), first demote the other server" echo " 2. Update this host's config to use nfs-services-standby.nix instead" echo " 3. Sync data from active server before promoting this host" exit 1 fi echo "NFS server startup check passed (no other active server found)" ''; }; } ] ++ (lib.forEach cfg.standbys (standby: { "replicate-services-to-${standby}" = { description = "Replicate /persist/services to ${standby}"; path = [ pkgs.btrfs-progs pkgs.openssh pkgs.coreutils pkgs.findutils pkgs.gnugrep ]; script = '' set -euo pipefail SSH_KEY="/persist/root/.ssh/btrfs-replication" if [ ! -f "$SSH_KEY" ]; then echo "ERROR: SSH key not found at $SSH_KEY" echo "Run: sudo ssh-keygen -t ed25519 -f $SSH_KEY -N \"\" -C \"root@$(hostname)-replication\"" exit 1 fi SNAPSHOT_NAME="services@$(date +%Y%m%d-%H%M%S)" SNAPSHOT_PATH="/persist/$SNAPSHOT_NAME" # Create readonly snapshot btrfs subvolume snapshot -r /persist/services "$SNAPSHOT_PATH" # Find previous snapshot on sender PREV_LOCAL=$(ls -t /persist/services@* 2>/dev/null | grep -v "^$SNAPSHOT_PATH$" | head -1 || true) # Try incremental send if we have a parent, fall back to full send if parent missing on receiver if [ -n "$PREV_LOCAL" ]; then echo "Attempting incremental send from $(basename $PREV_LOCAL) to ${standby}" # Capture both stdout and stderr to check for parent missing error if OUTPUT=$(btrfs send -p "$PREV_LOCAL" "$SNAPSHOT_PATH" 2>&1 | \ ssh -i "$SSH_KEY" -o StrictHostKeyChecking=accept-new root@${standby} \ "btrfs receive /persist/services-standby" 2>&1); then echo "Incremental send completed successfully" elif echo "$OUTPUT" | grep -q "cannot find parent subvolume"; then echo "Parent snapshot not found on receiver, falling back to full send" btrfs send "$SNAPSHOT_PATH" | \ ssh -i "$SSH_KEY" -o StrictHostKeyChecking=accept-new root@${standby} \ "btrfs receive /persist/services-standby" else echo "ERROR: Incremental send failed: $OUTPUT" exit 1 fi else # First snapshot, do full send echo "Full send to ${standby} (first snapshot)" btrfs send "$SNAPSHOT_PATH" | \ ssh -i "$SSH_KEY" -o StrictHostKeyChecking=accept-new root@${standby} \ "btrfs receive /persist/services-standby" fi # Cleanup old snapshots on sender (keep last 24 hours = 288 snapshots at 5min intervals) find /persist -maxdepth 1 -name 'services@*' -mmin +1440 -exec btrfs subvolume delete {} \; ''; serviceConfig = { Type = "oneshot"; User = "root"; }; }; })) ); systemd.timers = lib.mkMerge ( lib.forEach cfg.standbys (standby: { "replicate-services-to-${standby}" = { description = "Timer for replicating /persist/services to ${standby}"; wantedBy = [ "timers.target" ]; timerConfig = { OnCalendar = "*:0/5"; # Every 5 minutes Persistent = true; }; }; }) ); }; }