{ config, lib, pkgs, ... }:

let
  cfg = config.nfsServicesServer;
in
{
  options.nfsServicesServer = {
    enable = lib.mkEnableOption "NFS services server" // { default = true; };

    standbys = lib.mkOption {
      type = lib.types.listOf lib.types.str;
      default = [];
      description = ''
        List of standby hostnames to replicate to (e.g. ["c1"]).

        Requires one-time setup on the NFS server:
          sudo mkdir -p /persist/root/.ssh
          sudo ssh-keygen -t ed25519 -f /persist/root/.ssh/btrfs-replication -N "" -C "root@$(hostname)-replication"

        Then add the public key to each standby's nfsServicesStandby.replicationKeys option.
      '';
    };
  };

  config = lib.mkIf cfg.enable {
    # Persist root SSH directory for replication key
    environment.persistence.${config.custom.impermanence.persistPath} = {
      directories = [
        "/root/.ssh"
      ];
    };

    # Bind mount /persist/services to /data/services for local access
    # This makes the path consistent with NFS clients
    # Use mkForce to override the NFS client mount from cluster-node.nix
    fileSystems."/data/services" = lib.mkForce {
      device = "/persist/services";
      fsType = "none";
      options = [ "bind" ];
    };

    # Nomad node metadata: mark this as the primary storage node
    # Jobs can constrain to ${meta.storage_role} = "primary"
    services.nomad.settings.client.meta = {
      storage_role = "primary";
    };

    # NFS server configuration
    services.nfs.server = {
      enable = true;
      exports = ''
        /persist/services 192.168.1.0/24(rw,sync,no_subtree_check,no_root_squash)
      '';
    };

    # Consul service registration for NFS
    services.consul.extraConfig.services = [{
      name = "data-services";
      port = 2049;
      checks = [{
        tcp = "localhost:2049";
        interval = "30s";
      }];
    }];

    # Firewall for NFS
    networking.firewall.allowedTCPPorts = [ 2049 111 20048 ];
    networking.firewall.allowedUDPPorts = [ 2049 111 20048 ];

    # systemd services: NFS server split-brain check + replication services
    systemd.services = lib.mkMerge ([
      # Safety check: prevent split-brain by ensuring no other NFS server is active
      {
        nfs-server = {
          preStart = ''
            # Wait for Consul to be available
            for i in {1..30}; do
              if ${pkgs.netcat}/bin/nc -z localhost 8600; then
                break
              fi
              echo "Waiting for Consul DNS... ($i/30)"
              sleep 1
            done

            # Check if another NFS server is already registered in Consul
            CURRENT_SERVER=$(${pkgs.dnsutils}/bin/dig +short @localhost -p 8600 data-services.service.consul | head -1 || true)
            MY_IP=$(${pkgs.iproute2}/bin/ip -4 addr show | ${pkgs.gnugrep}/bin/grep -oP '(?<=inet\s)\d+(\.\d+){3}' | ${pkgs.gnugrep}/bin/grep -v '^127\.' | head -1)

            if [ -n "$CURRENT_SERVER" ] && [ "$CURRENT_SERVER" != "$MY_IP" ]; then
              echo "ERROR: Another NFS server is already active at $CURRENT_SERVER"
              echo "This host ($MY_IP) is configured as NFS server but should be standby."
              echo "To fix:"
              echo "  1. If this is intentional (failback), first demote the other server"
              echo "  2. Update this host's config to use nfs-services-standby.nix instead"
              echo "  3. Sync data from active server before promoting this host"
              exit 1
            fi

            echo "NFS server startup check passed (no other active server found)"
          '';
        };
      }
    ] ++ (lib.forEach cfg.standbys (standby: {
        "replicate-services-to-${standby}" = {
          description = "Replicate /persist/services to ${standby}";
          path = [ pkgs.btrfs-progs pkgs.openssh pkgs.coreutils pkgs.findutils pkgs.gnugrep pkgs.curl ];

          script = ''
            set -euo pipefail

            START_TIME=$(date +%s)
            REPLICATION_SUCCESS=0

            SSH_KEY="/persist/root/.ssh/btrfs-replication"
            if [ ! -f "$SSH_KEY" ]; then
              echo "ERROR: SSH key not found at $SSH_KEY"
              echo "Run: sudo ssh-keygen -t ed25519 -f $SSH_KEY -N \"\" -C \"root@$(hostname)-replication\""
              exit 1
            fi

            SNAPSHOT_NAME="services@$(date +%Y%m%d-%H%M%S)"
            SNAPSHOT_PATH="/persist/$SNAPSHOT_NAME"

            # Create readonly snapshot
            btrfs subvolume snapshot -r /persist/services "$SNAPSHOT_PATH"

            # Find previous snapshot on sender (sort by name since readonly snapshots have same mtime)
            # Use -d to list directories only, not their contents
            PREV_LOCAL=$(ls -1d /persist/services@* 2>/dev/null | grep -v "^$SNAPSHOT_PATH$" | sort -r | head -1 || true)

            # Try incremental send if we have a parent, fall back to full send if it fails
            if [ -n "$PREV_LOCAL" ]; then
              echo "Attempting incremental send from $(basename $PREV_LOCAL) to ${standby}"

              # Try incremental send, if it fails (e.g., parent missing on receiver), fall back to full
              if btrfs send -p "$PREV_LOCAL" "$SNAPSHOT_PATH" | \
                 ssh -i "$SSH_KEY" -o StrictHostKeyChecking=accept-new root@${standby} \
                 "btrfs receive /persist/services-standby"; then
                echo "Incremental send completed successfully"
                REPLICATION_SUCCESS=1
              else
                echo "Incremental send failed (likely missing parent on receiver), falling back to full send with clone source"
                # Use -c to specify clone source, maintaining parent relationship even in full send
                btrfs send -c "$PREV_LOCAL" "$SNAPSHOT_PATH" | \
                  ssh -i "$SSH_KEY" -o StrictHostKeyChecking=accept-new root@${standby} \
                  "btrfs receive /persist/services-standby"
                REPLICATION_SUCCESS=1
              fi
            else
              # First snapshot, do full send
              echo "Full send to ${standby} (first snapshot)"
              btrfs send "$SNAPSHOT_PATH" | \
                ssh -i "$SSH_KEY" -o StrictHostKeyChecking=accept-new root@${standby} \
                "btrfs receive /persist/services-standby"
              REPLICATION_SUCCESS=1
            fi

            # Cleanup old snapshots on sender (keep last 10 snapshots, sorted by name/timestamp)
            ls -1d /persist/services@* 2>/dev/null | sort | head -n -10 | xargs -r btrfs subvolume delete

            # Calculate metrics
            END_TIME=$(date +%s)
            DURATION=$((END_TIME - START_TIME))
            SNAPSHOT_COUNT=$(ls -1d /persist/services@* 2>/dev/null | wc -l)

            # Push metrics to Prometheus pushgateway
            cat <<METRICS | curl -s --data-binary @- http://pushgateway.service.consul:9091/metrics/job/nfs_replication/instance/${standby} || true
            # TYPE nfs_replication_last_success_timestamp gauge
            nfs_replication_last_success_timestamp $END_TIME
            # TYPE nfs_replication_duration_seconds gauge
            nfs_replication_duration_seconds $DURATION
            # TYPE nfs_replication_snapshot_count gauge
            nfs_replication_snapshot_count $SNAPSHOT_COUNT
            # TYPE nfs_replication_success gauge
            nfs_replication_success $REPLICATION_SUCCESS
            METRICS
          '';

          serviceConfig = {
            Type = "oneshot";
            User = "root";
          };
        };
      }))
    );

    systemd.timers = lib.mkMerge (
      lib.forEach cfg.standbys (standby: {
        "replicate-services-to-${standby}" = {
          description = "Timer for replicating /persist/services to ${standby}";
          wantedBy = [ "timers.target" ];
          timerConfig = {
            OnCalendar = "*:0/5";  # Every 5 minutes
            Persistent = true;
          };
        };
      })
    );
  };
}