NFS server and client setup.

2025-10-22 13:06:21 +01:00
parent 1262e03e21
commit 967ff34a51
9 changed files with 739 additions and 2 deletions
--- a/common/cluster-node.nix
+++ b/common/cluster-node.nix
@@ -1,13 +1,14 @@
 { pkgs, ... }:
 {
  # Cluster node configuration
-  # Extends minimal-node with cluster-specific services (Consul, GlusterFS, CIFS)
+  # Extends minimal-node with cluster-specific services (Consul, GlusterFS, CIFS, NFS)
  # Used by: compute nodes (c1, c2, c3)
  imports = [
    ./minimal-node.nix
    ./unattended-encryption.nix
    ./cifs-client.nix
    ./consul.nix
-    ./glusterfs-client.nix
+    ./glusterfs-client.nix  # Keep during migration, will be removed in Phase 3
+    ./nfs-services-client.nix  # New: NFS client for /data/services
  ];
 }
--- a/common/nfs-services-client.nix
+++ b/common/nfs-services-client.nix
@@ -0,0 +1,21 @@
+{ pkgs, ... }:
+{
+  # NFS client for /data/services
+  # Mounts from data-services.service.consul (Consul DNS for automatic failover)
+  # The NFS server registers itself in Consul, so this will automatically
+  # point to whichever host is currently running the NFS server
+
+  fileSystems."/data/services" = {
+    device = "data-services.service.consul:/persist/services";
+    fsType = "nfs";
+    options = [
+      "x-systemd.automount"  # Auto-mount on access
+      "noauto"               # Don't mount at boot (automount handles it)
+      "x-systemd.idle-timeout=60"  # Unmount after 60s of inactivity
+      "_netdev"              # Network filesystem (wait for network)
+    ];
+  };
+
+  # Ensure NFS client packages are available
+  environment.systemPackages = [ pkgs.nfs-utils ];
+}
--- a/common/nfs-services-server.nix
+++ b/common/nfs-services-server.nix
@@ -0,0 +1,171 @@
+{ config, lib, pkgs, ... }:
+
+let
+  cfg = config.nfsServicesServer;
+in
+{
+  options.nfsServicesServer = {
+    enable = lib.mkEnableOption "NFS services server" // { default = true; };
+
+    standbys = lib.mkOption {
+      type = lib.types.listOf lib.types.str;
+      default = [];
+      description = ''
+        List of standby hostnames to replicate to (e.g. ["c1"]).
+
+        Requires one-time setup on the NFS server:
+          sudo mkdir -p /persist/root/.ssh
+          sudo ssh-keygen -t ed25519 -f /persist/root/.ssh/btrfs-replication -N "" -C "root@$(hostname)-replication"
+
+        Then add the public key to each standby's nfsServicesStandby.replicationKeys option.
+      '';
+    };
+  };
+
+  config = lib.mkIf cfg.enable {
+    # Persist root SSH directory for replication key
+    environment.persistence."/persist" = {
+      directories = [
+        "/root/.ssh"
+      ];
+    };
+
+    # Bind mount /persist/services to /data/services for local access
+    # This makes the path consistent with NFS clients
+    # Use mkForce to override the NFS client mount from cluster-node.nix
+    fileSystems."/data/services" = lib.mkForce {
+      device = "/persist/services";
+      fsType = "none";
+      options = [ "bind" ];
+    };
+
+    # Nomad node metadata: mark this as the primary storage node
+    # Jobs can constrain to ${meta.storage_role} = "primary"
+    services.nomad.settings.client.meta = {
+      storage_role = "primary";
+    };
+
+    # NFS server configuration
+    services.nfs.server = {
+      enable = true;
+      exports = ''
+        /persist/services 192.168.1.0/24(rw,sync,no_subtree_check,no_root_squash)
+      '';
+    };
+
+    # Consul service registration for NFS
+    services.consul.extraConfig.services = [{
+      name = "data-services";
+      port = 2049;
+      checks = [{
+        tcp = "localhost:2049";
+        interval = "30s";
+      }];
+    }];
+
+    # Firewall for NFS
+    networking.firewall.allowedTCPPorts = [ 2049 111 20048 ];
+    networking.firewall.allowedUDPPorts = [ 2049 111 20048 ];
+
+    # systemd services: NFS server split-brain check + replication services
+    systemd.services = lib.mkMerge ([
+      # Safety check: prevent split-brain by ensuring no other NFS server is active
+      {
+        nfs-server = {
+          preStart = ''
+            # Wait for Consul to be available
+            for i in {1..30}; do
+              if ${pkgs.netcat}/bin/nc -z localhost 8600; then
+                break
+              fi
+              echo "Waiting for Consul DNS... ($i/30)"
+              sleep 1
+            done
+
+            # Check if another NFS server is already registered in Consul
+            CURRENT_SERVER=$(${pkgs.dnsutils}/bin/dig +short @localhost -p 8600 data-services.service.consul | head -1 || true)
+            MY_IP=$(${pkgs.iproute2}/bin/ip -4 addr show | ${pkgs.gnugrep}/bin/grep -oP '(?<=inet\s)\d+(\.\d+){3}' | ${pkgs.gnugrep}/bin/grep -v '^127\.' | head -1)
+
+            if [ -n "$CURRENT_SERVER" ] && [ "$CURRENT_SERVER" != "$MY_IP" ]; then
+              echo "ERROR: Another NFS server is already active at $CURRENT_SERVER"
+              echo "This host ($MY_IP) is configured as NFS server but should be standby."
+              echo "To fix:"
+              echo "  1. If this is intentional (failback), first demote the other server"
+              echo "  2. Update this host's config to use nfs-services-standby.nix instead"
+              echo "  3. Sync data from active server before promoting this host"
+              exit 1
+            fi
+
+            echo "NFS server startup check passed (no other active server found)"
+          '';
+        };
+      }
+    ] ++ (lib.forEach cfg.standbys (standby: {
+        "replicate-services-to-${standby}" = {
+          description = "Replicate /persist/services to ${standby}";
+          path = [ pkgs.btrfs-progs pkgs.openssh pkgs.coreutils pkgs.findutils pkgs.gnugrep ];
+
+          script = ''
+            set -euo pipefail
+
+            SSH_KEY="/persist/root/.ssh/btrfs-replication"
+            if [ ! -f "$SSH_KEY" ]; then
+              echo "ERROR: SSH key not found at $SSH_KEY"
+              echo "Run: sudo ssh-keygen -t ed25519 -f $SSH_KEY -N \"\" -C \"root@$(hostname)-replication\""
+              exit 1
+            fi
+
+            SNAPSHOT_NAME="services@$(date +%Y%m%d-%H%M%S)"
+            SNAPSHOT_PATH="/persist/$SNAPSHOT_NAME"
+
+            # Create readonly snapshot
+            btrfs subvolume snapshot -r /persist/services "$SNAPSHOT_PATH"
+
+            # Find previous snapshot on sender
+            PREV_LOCAL=$(ls -t /persist/services@* 2>/dev/null | grep -v "^$SNAPSHOT_PATH$" | head -1 || true)
+
+            # Check what snapshots exist on the receiver
+            REMOTE_SNAPSHOTS=$(ssh -i "$SSH_KEY" -o StrictHostKeyChecking=accept-new root@${standby} \
+              "ls -t /persist/services-standby/services@* 2>/dev/null || true")
+
+            # Decide: incremental or full send
+            if [ -n "$PREV_LOCAL" ] && echo "$REMOTE_SNAPSHOTS" | grep -q "$(basename "$PREV_LOCAL")"; then
+              # Receiver has the parent snapshot, do incremental
+              echo "Incremental send from $(basename $PREV_LOCAL) to ${standby}"
+              btrfs send -p "$PREV_LOCAL" "$SNAPSHOT_PATH" | \
+                ssh -i "$SSH_KEY" -o StrictHostKeyChecking=accept-new root@${standby} \
+                "btrfs receive /persist/services-standby"
+            else
+              # Receiver doesn't have parent (new standby or missing snapshot), do full send
+              echo "Full send to ${standby} (new standby or parent snapshot not found on receiver)"
+              btrfs send "$SNAPSHOT_PATH" | \
+                ssh -i "$SSH_KEY" -o StrictHostKeyChecking=accept-new root@${standby} \
+                "btrfs receive /persist/services-standby"
+            fi
+
+            # Cleanup old snapshots on sender (keep last 24 hours = 288 snapshots at 5min intervals)
+            find /persist -maxdepth 1 -name 'services@*' -mmin +1440 -exec btrfs subvolume delete {} \;
+          '';
+
+          serviceConfig = {
+            Type = "oneshot";
+            User = "root";
+          };
+        };
+      }))
+    );
+
+    systemd.timers = lib.mkMerge (
+      lib.forEach cfg.standbys (standby: {
+        "replicate-services-to-${standby}" = {
+          description = "Timer for replicating /persist/services to ${standby}";
+          wantedBy = [ "timers.target" ];
+          timerConfig = {
+            OnCalendar = "*:0/5";  # Every 5 minutes
+            Persistent = true;
+          };
+        };
+      })
+    );
+  };
+}
--- a/common/nfs-services-standby.nix
+++ b/common/nfs-services-standby.nix
@@ -0,0 +1,68 @@
+{ config, lib, pkgs, ... }:
+
+let
+  cfg = config.nfsServicesStandby;
+in
+{
+  options.nfsServicesStandby = {
+    enable = lib.mkEnableOption "NFS services standby" // { default = true; };
+
+    replicationKeys = lib.mkOption {
+      type = lib.types.listOf lib.types.str;
+      default = [];
+      description = ''
+        SSH public keys authorized to replicate btrfs snapshots to this standby.
+        These keys are restricted to only run 'btrfs receive /persist/services-standby'.
+
+        Get the public key from the NFS server:
+          ssh <nfs-server> sudo cat /persist/root/.ssh/btrfs-replication.pub
+      '';
+    };
+  };
+
+  config = lib.mkIf cfg.enable {
+    # Allow root SSH login for replication (restricted by command= in authorized_keys)
+    # This is configured in common/sshd.nix
+
+    # Restricted SSH keys for btrfs replication
+    users.users.root.openssh.authorizedKeys.keys =
+      map (key: ''command="btrfs receive /persist/services-standby",restrict ${key}'') cfg.replicationKeys;
+
+    # Mount point for services-standby subvolume
+    # This is just declarative documentation - the subvolume must be created manually once:
+    #   sudo btrfs subvolume create /persist/services-standby
+    # After that, it will persist across reboots (it's under /persist)
+    fileSystems."/persist/services-standby" = {
+      device = "/persist/services-standby";
+      fsType = "none";
+      options = [ "bind" ];
+      noCheck = true;
+    };
+
+    # Cleanup old snapshots on standby (keep last 48 hours for safety)
+    systemd.services.cleanup-services-standby-snapshots = {
+      description = "Cleanup old btrfs snapshots in services-standby";
+      path = [ pkgs.btrfs-progs pkgs.findutils ];
+
+      script = ''
+        set -euo pipefail
+        # Keep last 48 hours of snapshots (576 snapshots at 5min intervals)
+        find /persist/services-standby -maxdepth 1 -name 'services@*' -mmin +2880 -exec btrfs subvolume delete {} \; || true
+      '';
+
+      serviceConfig = {
+        Type = "oneshot";
+        User = "root";
+      };
+    };
+
+    systemd.timers.cleanup-services-standby-snapshots = {
+      description = "Timer for cleaning up old snapshots on standby";
+      wantedBy = [ "timers.target" ];
+      timerConfig = {
+        OnCalendar = "daily";
+        Persistent = true;
+      };
+    };
+  };
+}
--- a/common/sshd.nix
+++ b/common/sshd.nix
@@ -5,6 +5,7 @@
    settings = {
      PasswordAuthentication = false;
      KbdInteractiveAuthentication = false;
+      PermitRootLogin = "prohibit-password";  # Allow root login with SSH keys only
    };
  };

--- a/docs/NFS_FAILOVER.md
+++ b/docs/NFS_FAILOVER.md
@@ -0,0 +1,438 @@
+# NFS Services Failover Procedures
+
+This document describes how to fail over the `/data/services` NFS server between hosts and how to fail back.
+
+## Architecture Overview
+
+- **Primary NFS Server**: Typically `zippy`
+  - Exports `/persist/services` via NFS
+  - Has local bind mount: `/data/services` → `/persist/services` (same path as clients)
+  - Registers `data-services.service.consul` in Consul
+  - Sets Nomad node meta: `storage_role = "primary"`
+  - Replicates snapshots to standbys every 5 minutes via btrfs send
+  - **Safety check**: Refuses to start if another NFS server is already active in Consul
+
+- **Standby**: Typically `c1`
+  - Receives snapshots at `/persist/services-standby/services@<timestamp>`
+  - Can be promoted to NFS server during failover
+  - No special Nomad node meta (not primary)
+
+- **Clients**: All cluster nodes (c1, c2, c3, zippy)
+  - Mount `/data/services` from `data-services.service.consul:/persist/services`
+  - Automatically connect to whoever is registered in Consul
+
+### Nomad Job Constraints
+
+Jobs that need to run on the primary storage node should use:
+
+```hcl
+constraint {
+  attribute = "${meta.storage_role}"
+  value     = "primary"
+}
+```
+
+This is useful for:
+- Database jobs (mysql, postgres, redis) that benefit from local storage
+- Jobs that need guaranteed fast disk I/O
+
+During failover, the `storage_role = "primary"` meta attribute moves to the new NFS server, and Nomad automatically reschedules constrained jobs to the new primary.
+
+## Prerequisites
+
+- Standby has been receiving snapshots (check: `ls /persist/services-standby/services@*`)
+- Last successful replication was recent (< 5-10 minutes)
+
+---
+
+## Failover: Promoting Standby to Primary
+
+**Scenario**: `zippy` is down and you need to promote `c1` to be the NFS server.
+
+### Step 1: Choose Latest Snapshot
+
+On the standby (c1):
+
+```bash
+ssh c1
+sudo ls -lt /persist/services-standby/services@* | head -5
+```
+
+Find the most recent snapshot. Note the timestamp to estimate data loss (typically < 5 minutes).
+
+### Step 2: Promote Snapshot to Read-Write Subvolume
+
+On c1:
+
+```bash
+# Find the latest snapshot
+LATEST=$(sudo ls -t /persist/services-standby/services@* | head -1)
+
+# Create writable subvolume from snapshot
+sudo btrfs subvolume snapshot "$LATEST" /persist/services
+
+# Verify
+ls -la /persist/services
+```
+
+### Step 3: Update NixOS Configuration
+
+Edit your configuration to swap the NFS server role:
+
+**In `hosts/c1/default.nix`**:
+```nix
+imports = [
+  # ... existing imports ...
+  # ../../common/nfs-services-standby.nix  # REMOVE THIS
+  ../../common/nfs-services-server.nix     # ADD THIS
+];
+
+# Add standbys if desired (optional - can leave empty during emergency)
+nfsServicesServer.standbys = [];  # Or ["c2"] to add a new standby
+```
+
+**Optional: Prepare zippy config for when it comes back**:
+
+In `hosts/zippy/default.nix` (can do this later too):
+```nix
+imports = [
+  # ... existing imports ...
+  # ../../common/nfs-services-server.nix   # REMOVE THIS
+  ../../common/nfs-services-standby.nix    # ADD THIS
+];
+
+# Add the replication key from c1 (get it from c1:/persist/root/.ssh/btrfs-replication.pub)
+nfsServicesStandby.replicationKeys = [
+  "ssh-ed25519 AAAA... root@c1-replication"
+];
+```
+
+### Step 4: Deploy Configuration
+
+```bash
+# From your workstation
+deploy -s '.#c1'
+
+# If zippy is still down, updating its config will fail, but that's okay
+# You can update it later when it comes back
+```
+
+### Step 5: Verify NFS Server is Running
+
+On c1:
+
+```bash
+sudo systemctl status nfs-server
+sudo showmount -e localhost
+dig @localhost -p 8600 data-services.service.consul  # Should show c1's IP
+```
+
+### Step 6: Verify Clients Can Access
+
+From any node:
+
+```bash
+df -h | grep services
+ls /data/services
+```
+
+The mount should automatically reconnect via Consul DNS.
+
+### Step 7: Check Nomad Jobs
+
+```bash
+nomad job status mysql
+nomad job status postgres
+# Verify critical services are healthy
+
+# Jobs constrained to ${meta.storage_role} = "primary" will automatically
+# reschedule to c1 once it's deployed with the NFS server module
+```
+
+**Recovery Time Objective (RTO)**: ~10-15 minutes
+**Recovery Point Objective (RPO)**: Last replication interval (5 minutes max)
+
+**Note**: Jobs with the `storage_role = "primary"` constraint will automatically move to c1 because it now has that node meta attribute. No job spec changes needed!
+
+---
+
+## What Happens When zippy Comes Back?
+
+**IMPORTANT**: If zippy reboots while still configured as NFS server, it will **refuse to start** the NFS service because it detects c1 is already active in Consul.
+
+You'll see this error in `journalctl -u nfs-server`:
+
+```
+ERROR: Another NFS server is already active at 192.168.1.X
+This host (192.168.1.2) is configured as NFS server but should be standby.
+To fix:
+  1. If this is intentional (failback), first demote the other server
+  2. Update this host's config to use nfs-services-standby.nix instead
+  3. Sync data from active server before promoting this host
+```
+
+This is a **safety feature** to prevent split-brain and data corruption.
+
+### Options when zippy comes back:
+
+**Option A: Keep c1 as primary** (zippy becomes standby)
+1. Update zippy's config to use `nfs-services-standby.nix`
+2. Deploy to zippy
+3. c1 will start replicating to zippy
+
+**Option B: Fail back to zippy as primary**
+Follow the "Failing Back to Original Primary" procedure below.
+
+---
+
+## Failing Back to Original Primary
+
+**Scenario**: `zippy` is repaired and you want to move the NFS server role back from `c1` to `zippy`.
+
+### Step 1: Sync Latest Data from c1 to zippy
+
+On c1 (current primary):
+
+```bash
+# Create readonly snapshot of current state
+sudo btrfs subvolume snapshot -r /persist/services /persist/services@failback-$(date +%Y%m%d-%H%M%S)
+
+# Find the snapshot
+FAILBACK=$(sudo ls -t /persist/services@failback-* | head -1)
+
+# Send to zippy (use root SSH key if available, or generate temporary key)
+sudo btrfs send "$FAILBACK" | ssh root@zippy "btrfs receive /persist/"
+```
+
+On zippy:
+
+```bash
+# Verify snapshot arrived
+ls -la /persist/services@failback-*
+
+# Create writable subvolume from the snapshot
+FAILBACK=$(ls -t /persist/services@failback-* | head -1)
+sudo btrfs subvolume snapshot "$FAILBACK" /persist/services
+
+# Verify
+ls -la /persist/services
+```
+
+### Step 2: Update NixOS Configuration
+
+Swap the roles back:
+
+**In `hosts/zippy/default.nix`**:
+```nix
+imports = [
+  # ... existing imports ...
+  # ../../common/nfs-services-standby.nix  # REMOVE THIS
+  ../../common/nfs-services-server.nix     # ADD THIS
+];
+
+nfsServicesServer.standbys = ["c1"];
+```
+
+**In `hosts/c1/default.nix`**:
+```nix
+imports = [
+  # ... existing imports ...
+  # ../../common/nfs-services-server.nix   # REMOVE THIS
+  ../../common/nfs-services-standby.nix    # ADD THIS
+];
+
+nfsServicesStandby.replicationKeys = [
+  "ssh-ed25519 AAAA... root@zippy-replication"  # Get from zippy:/persist/root/.ssh/btrfs-replication.pub
+];
+```
+
+### Step 3: Deploy Configurations
+
+```bash
+# IMPORTANT: Deploy c1 FIRST to demote it
+deploy -s '.#c1'
+
+# Wait for c1 to stop NFS server
+ssh c1 sudo systemctl status nfs-server  # Should be inactive
+
+# Then deploy zippy to promote it
+deploy -s '.#zippy'
+```
+
+The order matters! If you deploy zippy first, it will see c1 is still active and refuse to start.
+
+### Step 4: Verify Failback
+
+Check Consul DNS points to zippy:
+
+```bash
+dig @c1 -p 8600 data-services.service.consul  # Should show zippy's IP
+```
+
+Check clients are mounting from zippy:
+
+```bash
+for host in c1 c2 c3; do
+  ssh $host "df -h | grep services"
+done
+```
+
+### Step 5: Clean Up Temporary Snapshots
+
+On c1:
+
+```bash
+# Remove the failback snapshot and the promoted subvolume
+sudo btrfs subvolume delete /persist/services@failback-*
+sudo btrfs subvolume delete /persist/services
+```
+
+---
+
+## Adding a New Standby
+
+**Scenario**: You want to add `c2` as an additional standby.
+
+### Step 1: Create Standby Subvolume on c2
+
+```bash
+ssh c2
+sudo btrfs subvolume create /persist/services-standby
+```
+
+### Step 2: Update c2 Configuration
+
+**In `hosts/c2/default.nix`**:
+```nix
+imports = [
+  # ... existing imports ...
+  ../../common/nfs-services-standby.nix
+];
+
+nfsServicesStandby.replicationKeys = [
+  "ssh-ed25519 AAAA... root@zippy-replication"  # Get from current NFS server
+];
+```
+
+### Step 3: Update NFS Server Configuration
+
+On the current NFS server (e.g., zippy), update the standbys list:
+
+**In `hosts/zippy/default.nix`**:
+```nix
+nfsServicesServer.standbys = ["c1" "c2"];  # Added c2
+```
+
+### Step 4: Deploy
+
+```bash
+deploy -s '.#c2'
+deploy -s '.#zippy'
+```
+
+The next replication cycle (within 5 minutes) will do a full send to c2, then switch to incremental.
+
+---
+
+## Troubleshooting
+
+### Replication Failed
+
+Check the replication service logs:
+
+```bash
+# On NFS server
+sudo journalctl -u replicate-services-to-c1 -f
+```
+
+Common issues:
+- SSH key not found → Run key generation step (see stateful-commands.txt)
+- Permission denied → Check authorized_keys on standby
+- Snapshot already exists → Old snapshot with same timestamp, wait for next cycle
+
+### Clients Can't Mount
+
+Check Consul:
+
+```bash
+dig @localhost -p 8600 data-services.service.consul
+consul catalog services | grep data-services
+```
+
+If Consul isn't resolving:
+- NFS server might not have registered → Check `sudo systemctl status nfs-server`
+- Consul agent might be down → Check `sudo systemctl status consul`
+
+### Mount is Stale
+
+Force remount:
+
+```bash
+sudo systemctl restart data-services.mount
+```
+
+Or unmount and let automount handle it:
+
+```bash
+sudo umount /data/services
+ls /data/services  # Triggers automount
+```
+
+### Split-Brain Prevention: NFS Server Won't Start
+
+If you see:
+```
+ERROR: Another NFS server is already active at 192.168.1.X
+```
+
+This is **intentional** - the safety check is working! You have two options:
+
+1. **Keep the other server as primary**: Update this host's config to be a standby instead
+2. **Fail back to this host**: First demote the other server, sync data, then deploy both hosts in correct order
+
+---
+
+## Monitoring
+
+### Check Replication Status
+
+On NFS server:
+
+```bash
+# List recent snapshots
+ls -lt /persist/services@* | head
+
+# Check last replication run
+sudo systemctl status replicate-services-to-c1
+
+# Check replication logs
+sudo journalctl -u replicate-services-to-c1 --since "1 hour ago"
+```
+
+On standby:
+
+```bash
+# List received snapshots
+ls -lt /persist/services-standby/services@* | head
+
+# Check how old the latest snapshot is
+stat /persist/services-standby/services@* | grep Modify | head -1
+```
+
+### Verify NFS Exports
+
+```bash
+sudo showmount -e localhost
+```
+
+Should show:
+```
+/persist/services 192.168.1.0/24
+```
+
+### Check Consul Registration
+
+```bash
+consul catalog services | grep data-services
+dig @localhost -p 8600 data-services.service.consul
+```
--- a/hosts/c1/default.nix
+++ b/hosts/c1/default.nix
@@ -4,6 +4,11 @@
    ../../common/encrypted-btrfs-layout.nix
    ../../common/global
    ../../common/compute-node.nix
+    ../../common/nfs-services-standby.nix  # NFS standby for /data/services
+    # To promote to NFS server (during failover):
+    # 1. Follow procedure in docs/NFS_FAILOVER.md
+    # 2. Replace above line with: ../../common/nfs-services-server.nix
+    # 3. Add nfsServicesServer.standbys = [ "c2" ]; (or leave empty)
    ./hardware.nix
  ];

@@ -15,4 +20,9 @@

  networking.hostName = "c1";
  services.tailscaleAutoconnect.authkey = "tskey-auth-k2nQ771YHM11CNTRL-YVpoumL2mgR6nLPG51vNhRpEKMDN7gLAi";
+
+  # NFS standby configuration: accept replication from zippy
+  nfsServicesStandby.replicationKeys = [
+    "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHyTKsMCbwCIlMcC/aopgz5Yfx/Q9QdlWC9jzMLgYFAV root@zippy-replication"
+  ];
 }
--- a/hosts/zippy/default.nix
+++ b/hosts/zippy/default.nix
@@ -5,6 +5,11 @@
    ../../common/global
    ../../common/compute-node.nix
 #    ../../common/ethereum.nix
+    ../../common/nfs-services-server.nix  # NFS server for /data/services
+    # To move NFS server role to another host:
+    # 1. Follow procedure in docs/NFS_FAILOVER.md
+    # 2. Replace above line with: ../../common/nfs-services-standby.nix
+    # 3. Add nfsServicesStandby.replicationKeys with the new server's public key
    ./hardware.nix
  ];

@@ -16,4 +21,7 @@

  networking.hostName = "zippy";
  services.tailscaleAutoconnect.authkey = "tskey-auth-ktKyQ59f2p11CNTRL-ut8E71dLWPXsVtb92hevNX9RTjmk4owBf";
+
+  # NFS server configuration: replicate to c1 as standby
+  nfsServicesServer.standbys = [ "c1" ];
 }
--- a/stateful-commands.txt
+++ b/stateful-commands.txt
@@ -39,3 +39,22 @@ kopia repository server setup (on a non-NixOS host at the time):
  * kopia server start --address 0.0.0.0:51515 --tls-cert-file ~/kopia-certs/kopia.cert --tls-key-file ~/kopia-certs/kopia.key --tls-generate-cert (first time)
  * kopia server start --address 0.0.0.0:51515 --tls-cert-file ~/kopia-certs/kopia.cert --tls-key-file ~/kopia-certs/kopia.key (subsequent)
 [TLS is mandatory for this]
+
+NFS services server setup (one-time on the NFS server host, e.g. zippy):
+  * sudo btrfs subvolume create /persist/services
+  * sudo mkdir -p /persist/root/.ssh
+  * sudo ssh-keygen -t ed25519 -f /persist/root/.ssh/btrfs-replication -N "" -C "root@$(hostname)-replication"
+  * Get the public key: sudo cat /persist/root/.ssh/btrfs-replication.pub
+    Then add this public key to each standby's nfsServicesStandby.replicationKeys option
+
+NFS services standby setup (one-time on each standby host, e.g. c1):
+  * sudo btrfs subvolume create /persist/services-standby
+
+Moving NFS server role between hosts (e.g. from zippy to c1):
+  See docs/NFS_FAILOVER.md for detailed procedure
+  Summary:
+  1. On current primary: create final snapshot and send to new primary
+  2. On new primary: promote snapshot to /persist/services
+  3. Update configs: remove nfs-services-server.nix from old primary, add to new primary
+  4. Update configs: add nfs-services-standby.nix to old primary (with replication keys)
+  5. Deploy old primary first (to demote), then new primary (to promote)