80 lines
2.9 KiB
Nix
80 lines
2.9 KiB
Nix
{ config, lib, pkgs, ... }:
|
|
|
|
let
|
|
cfg = config.nfsServicesStandby;
|
|
in
|
|
{
|
|
options.nfsServicesStandby = {
|
|
enable = lib.mkEnableOption "NFS services standby" // { default = true; };
|
|
|
|
replicationKeys = lib.mkOption {
|
|
type = lib.types.listOf lib.types.str;
|
|
default = [];
|
|
description = ''
|
|
SSH public keys authorized to replicate btrfs snapshots to this standby.
|
|
These keys are restricted to only run 'btrfs receive /persist/services-standby'.
|
|
|
|
Get the public key from the NFS server:
|
|
ssh <nfs-server> sudo cat /persist/root/.ssh/btrfs-replication.pub
|
|
'';
|
|
};
|
|
};
|
|
|
|
config = lib.mkIf cfg.enable {
|
|
# Allow root SSH login for replication (restricted by command= in authorized_keys)
|
|
# This is configured in common/sshd.nix
|
|
|
|
# Restricted SSH keys for btrfs replication
|
|
users.users.root.openssh.authorizedKeys.keys =
|
|
map (key: ''command="btrfs receive /persist/services-standby",restrict ${key}'') cfg.replicationKeys;
|
|
|
|
# Mount point for services-standby subvolume
|
|
# This is just declarative documentation - the subvolume must be created manually once:
|
|
# sudo btrfs subvolume create /persist/services-standby
|
|
# After that, it will persist across reboots (it's under /persist)
|
|
fileSystems."/persist/services-standby" = {
|
|
device = "/persist/services-standby";
|
|
fsType = "none";
|
|
options = [ "bind" ];
|
|
noCheck = true;
|
|
};
|
|
|
|
# Cleanup old snapshots on standby (keep last 10 snapshots)
|
|
systemd.services.cleanup-services-standby-snapshots = {
|
|
description = "Cleanup old btrfs snapshots in services-standby";
|
|
path = [ pkgs.btrfs-progs pkgs.findutils pkgs.coreutils pkgs.curl ];
|
|
script = ''
|
|
set -euo pipefail
|
|
|
|
# Cleanup old snapshots on standby (keep last 10 snapshots, sorted by name/timestamp)
|
|
ls -1d /persist/services-standby/services@* 2>/dev/null | sort | head -n -10 | xargs -r btrfs subvolume delete || true
|
|
|
|
# Calculate metrics
|
|
CLEANUP_TIME=$(date +%s)
|
|
SNAPSHOT_COUNT=$(ls -1d /persist/services-standby/services@* 2>/dev/null | wc -l)
|
|
|
|
# Push metrics to Prometheus pushgateway
|
|
cat <<METRICS | curl -s --data-binary @- http://pushgateway.service.consul:9091/metrics/job/nfs_standby_cleanup/instance/$(hostname) || true
|
|
# TYPE nfs_standby_snapshot_count gauge
|
|
nfs_standby_snapshot_count $SNAPSHOT_COUNT
|
|
# TYPE nfs_standby_cleanup_last_run_timestamp gauge
|
|
nfs_standby_cleanup_last_run_timestamp $CLEANUP_TIME
|
|
METRICS
|
|
'';
|
|
serviceConfig = {
|
|
Type = "oneshot";
|
|
User = "root";
|
|
};
|
|
};
|
|
|
|
systemd.timers.cleanup-services-standby-snapshots = {
|
|
description = "Timer for cleaning up old snapshots on standby";
|
|
wantedBy = [ "timers.target" ];
|
|
timerConfig = {
|
|
OnCalendar = "hourly";
|
|
Persistent = true;
|
|
};
|
|
};
|
|
};
|
|
}
|