Only keep 10 snapshots, and push metrics.
This commit is contained in:
@@ -39,26 +39,27 @@ in
|
||||
noCheck = true;
|
||||
};
|
||||
|
||||
# Cleanup old snapshots on standby (keep last 4 hours for HA failover)
|
||||
# Cleanup old snapshots on standby (keep last 10 snapshots)
|
||||
systemd.services.cleanup-services-standby-snapshots = {
|
||||
description = "Cleanup old btrfs snapshots in services-standby";
|
||||
path = [ pkgs.btrfs-progs pkgs.findutils pkgs.coreutils ];
|
||||
path = [ pkgs.btrfs-progs pkgs.findutils pkgs.coreutils pkgs.curl ];
|
||||
script = ''
|
||||
set -euo pipefail
|
||||
|
||||
# Keep at least 2 hours of snapshots (24 snapshots at 5min intervals)
|
||||
MIN_KEEP=24
|
||||
# Cleanup old snapshots on standby (keep last 10 snapshots, sorted by name/timestamp)
|
||||
ls -1d /persist/services-standby/services@* 2>/dev/null | sort | head -n -10 | xargs -r btrfs subvolume delete || true
|
||||
|
||||
# Count existing snapshots
|
||||
count=$(find /persist/services-standby -maxdepth 1 -name 'services@*' -type d | wc -l)
|
||||
# Calculate metrics
|
||||
CLEANUP_TIME=$(date +%s)
|
||||
SNAPSHOT_COUNT=$(ls -1d /persist/services-standby/services@* 2>/dev/null | wc -l)
|
||||
|
||||
# Only delete old snapshots if we have more than the minimum
|
||||
if [ $count -gt $MIN_KEEP ]; then
|
||||
# Delete snapshots older than 4 hours
|
||||
find /persist/services-standby -maxdepth 1 -name 'services@*' -mmin +240 -exec btrfs subvolume delete {} \; || true
|
||||
else
|
||||
echo "Only $count snapshots found, keeping all (minimum: $MIN_KEEP)"
|
||||
fi
|
||||
# Push metrics to Prometheus pushgateway
|
||||
cat <<METRICS | curl --data-binary @- http://pushgateway.service.consul:9091/metrics/job/nfs_standby_cleanup/instance/$(hostname)
|
||||
# TYPE nfs_standby_snapshot_count gauge
|
||||
nfs_standby_snapshot_count $SNAPSHOT_COUNT
|
||||
# TYPE nfs_standby_cleanup_last_run_timestamp gauge
|
||||
nfs_standby_cleanup_last_run_timestamp $CLEANUP_TIME
|
||||
METRICS
|
||||
'';
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
|
||||
Reference in New Issue
Block a user