Refactor common modules.
This commit is contained in:
299
common/nomad.nix
299
common/nomad.nix
@@ -1,174 +1,177 @@
|
||||
# inspiration: https://github.com/astro/skyflake/blob/main/nixos-modules/nomad.nix
|
||||
{ pkgs, config, ... }:
|
||||
{ pkgs, config, lib, ... }:
|
||||
let
|
||||
servers = [
|
||||
"c1"
|
||||
"c2"
|
||||
"c3"
|
||||
];
|
||||
server_enabled = builtins.elem config.networking.hostName servers;
|
||||
in
|
||||
{
|
||||
services.nomad = {
|
||||
enable = true;
|
||||
# true breaks at least CSI volumes
|
||||
# TODO: consider fixing
|
||||
dropPrivileges = false;
|
||||
options.clusterRole.nomadServer = lib.mkEnableOption "Nomad server mode";
|
||||
|
||||
settings = {
|
||||
datacenter = "alo";
|
||||
config = {
|
||||
services.nomad = {
|
||||
enable = true;
|
||||
# true breaks at least CSI volumes
|
||||
# TODO: consider fixing
|
||||
dropPrivileges = false;
|
||||
|
||||
client = {
|
||||
enabled = true;
|
||||
server_join.retry_join = servers;
|
||||
host_network.tailscale = {
|
||||
interface = "tailscale0";
|
||||
cidr = "100.64.0.0/10";
|
||||
settings = {
|
||||
datacenter = "alo";
|
||||
|
||||
client = {
|
||||
enabled = true;
|
||||
server_join.retry_join = servers;
|
||||
host_network.tailscale = {
|
||||
interface = "tailscale0";
|
||||
cidr = "100.64.0.0/10";
|
||||
};
|
||||
host_volume = {
|
||||
services = {
|
||||
path = "/data/services";
|
||||
read_only = false;
|
||||
};
|
||||
nix-store = {
|
||||
path = "/nix/store";
|
||||
read_only = true;
|
||||
};
|
||||
sw = {
|
||||
path = "/run/current-system/sw";
|
||||
read_only = true;
|
||||
};
|
||||
};
|
||||
};
|
||||
host_volume = {
|
||||
services = {
|
||||
path = "/data/services";
|
||||
read_only = false;
|
||||
};
|
||||
nix-store = {
|
||||
path = "/nix/store";
|
||||
read_only = true;
|
||||
};
|
||||
sw = {
|
||||
path = "/run/current-system/sw";
|
||||
read_only = true;
|
||||
};
|
||||
|
||||
server = {
|
||||
enabled = config.clusterRole.nomadServer;
|
||||
bootstrap_expect = (builtins.length servers + 2) / 2;
|
||||
server_join.retry_join = servers;
|
||||
};
|
||||
|
||||
telemetry = {
|
||||
collection_interval = "1s";
|
||||
disable_hostname = true;
|
||||
prometheus_metrics = true;
|
||||
publish_allocation_metrics = true;
|
||||
publish_node_metrics = true;
|
||||
};
|
||||
};
|
||||
|
||||
server = {
|
||||
enabled = server_enabled;
|
||||
bootstrap_expect = (builtins.length servers + 2) / 2;
|
||||
server_join.retry_join = servers;
|
||||
};
|
||||
extraSettingsPaths = [ "/etc/nomad-alo.json" ];
|
||||
};
|
||||
|
||||
telemetry = {
|
||||
collection_interval = "1s";
|
||||
disable_hostname = true;
|
||||
prometheus_metrics = true;
|
||||
publish_allocation_metrics = true;
|
||||
publish_node_metrics = true;
|
||||
# NFS mount dependency configuration for Nomad:
|
||||
#
|
||||
# Problem: Docker bind mounts need the real NFS mount, not an empty stub.
|
||||
# If Nomad starts before NFS is mounted, containers get empty directories.
|
||||
#
|
||||
# Solution: Use soft dependencies (wants/after) with health-checking recovery.
|
||||
# - wants: Nomad wants the mount, but won't be killed if it goes away
|
||||
# - after: Nomad waits for mount to be attempted before starting
|
||||
# - ExecStartPre with findmnt: Blocks Nomad start until mount is actually active
|
||||
#
|
||||
# This prevents Docker race conditions while allowing:
|
||||
# - Boot to proceed if NFS unavailable (Nomad fails to start, systemd retries)
|
||||
# - Nomad to keep running if NFS temporarily fails (containers may error)
|
||||
# - Recovery service to auto-restart Nomad when NFS comes back or becomes stale
|
||||
#
|
||||
# Note: Mount uses Consul DNS which resolves at mount time. If NFS server
|
||||
# moves to different IP, mount becomes stale and needs remount.
|
||||
# The recovery service handles this by detecting stale mounts and restarting Nomad.
|
||||
systemd.services.nomad = {
|
||||
wants = [ "network-online.target" "data-services.mount" ];
|
||||
after = [ "data-services.mount" ];
|
||||
serviceConfig.ExecStartPre = "${pkgs.util-linux}/bin/findmnt --mountpoint /data/services";
|
||||
};
|
||||
|
||||
# Recovery service: automatically restart Nomad when NFS mount needs attention
|
||||
# This handles scenarios where:
|
||||
# - NFS server was down during boot (mount failed, Nomad hit start-limit)
|
||||
# - NFS server failed over to different host with new IP (mount went stale)
|
||||
# - Network outage temporarily broke the mount
|
||||
#
|
||||
# The timer runs every 30s and checks:
|
||||
# 1. Is mount healthy (exists and accessible)?
|
||||
# 2. If mount is stale/inaccessible → restart Nomad (triggers remount)
|
||||
# 3. If mount is healthy but Nomad failed → restart Nomad (normal recovery)
|
||||
systemd.services.nomad-mount-watcher = {
|
||||
description = "Restart Nomad when NFS mount needs attention";
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
ExecStart = pkgs.writeShellScript "nomad-mount-watcher" ''
|
||||
# Check if mount point exists
|
||||
if ! ${pkgs.util-linux}/bin/findmnt --mountpoint /data/services >/dev/null 2>&1; then
|
||||
exit 0 # Mount not present, nothing to do
|
||||
fi
|
||||
|
||||
# Check if mount is actually accessible (not stale)
|
||||
# Use timeout to avoid hanging on stale NFS mounts
|
||||
if ! ${pkgs.coreutils}/bin/timeout 5s ${pkgs.coreutils}/bin/stat /data/services >/dev/null 2>&1; then
|
||||
echo "NFS mount is stale or inaccessible. Restarting Nomad to trigger remount..."
|
||||
${pkgs.systemd}/bin/systemctl restart nomad.service
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Mount is healthy - check if Nomad needs recovery
|
||||
if ${pkgs.systemd}/bin/systemctl is-failed nomad.service >/dev/null 2>&1; then
|
||||
echo "NFS mount is healthy but Nomad is failed. Restarting Nomad..."
|
||||
${pkgs.systemd}/bin/systemctl restart nomad.service
|
||||
fi
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
extraSettingsPaths = [ "/etc/nomad-alo.json" ];
|
||||
};
|
||||
|
||||
# NFS mount dependency configuration for Nomad:
|
||||
#
|
||||
# Problem: Docker bind mounts need the real NFS mount, not an empty stub.
|
||||
# If Nomad starts before NFS is mounted, containers get empty directories.
|
||||
#
|
||||
# Solution: Use soft dependencies (wants/after) with health-checking recovery.
|
||||
# - wants: Nomad wants the mount, but won't be killed if it goes away
|
||||
# - after: Nomad waits for mount to be attempted before starting
|
||||
# - ExecStartPre with findmnt: Blocks Nomad start until mount is actually active
|
||||
#
|
||||
# This prevents Docker race conditions while allowing:
|
||||
# - Boot to proceed if NFS unavailable (Nomad fails to start, systemd retries)
|
||||
# - Nomad to keep running if NFS temporarily fails (containers may error)
|
||||
# - Recovery service to auto-restart Nomad when NFS comes back or becomes stale
|
||||
#
|
||||
# Note: Mount uses Consul DNS which resolves at mount time. If NFS server
|
||||
# moves to different IP, mount becomes stale and needs remount.
|
||||
# The recovery service handles this by detecting stale mounts and restarting Nomad.
|
||||
systemd.services.nomad = {
|
||||
wants = [ "network-online.target" "data-services.mount" ];
|
||||
after = [ "data-services.mount" ];
|
||||
serviceConfig.ExecStartPre = "${pkgs.util-linux}/bin/findmnt --mountpoint /data/services";
|
||||
};
|
||||
|
||||
# Recovery service: automatically restart Nomad when NFS mount needs attention
|
||||
# This handles scenarios where:
|
||||
# - NFS server was down during boot (mount failed, Nomad hit start-limit)
|
||||
# - NFS server failed over to different host with new IP (mount went stale)
|
||||
# - Network outage temporarily broke the mount
|
||||
#
|
||||
# The timer runs every 30s and checks:
|
||||
# 1. Is mount healthy (exists and accessible)?
|
||||
# 2. If mount is stale/inaccessible → restart Nomad (triggers remount)
|
||||
# 3. If mount is healthy but Nomad failed → restart Nomad (normal recovery)
|
||||
systemd.services.nomad-mount-watcher = {
|
||||
description = "Restart Nomad when NFS mount needs attention";
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
ExecStart = pkgs.writeShellScript "nomad-mount-watcher" ''
|
||||
# Check if mount point exists
|
||||
if ! ${pkgs.util-linux}/bin/findmnt --mountpoint /data/services >/dev/null 2>&1; then
|
||||
exit 0 # Mount not present, nothing to do
|
||||
fi
|
||||
|
||||
# Check if mount is actually accessible (not stale)
|
||||
# Use timeout to avoid hanging on stale NFS mounts
|
||||
if ! ${pkgs.coreutils}/bin/timeout 5s ${pkgs.coreutils}/bin/stat /data/services >/dev/null 2>&1; then
|
||||
echo "NFS mount is stale or inaccessible. Restarting Nomad to trigger remount..."
|
||||
${pkgs.systemd}/bin/systemctl restart nomad.service
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Mount is healthy - check if Nomad needs recovery
|
||||
if ${pkgs.systemd}/bin/systemctl is-failed nomad.service >/dev/null 2>&1; then
|
||||
echo "NFS mount is healthy but Nomad is failed. Restarting Nomad..."
|
||||
${pkgs.systemd}/bin/systemctl restart nomad.service
|
||||
fi
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
systemd.timers.nomad-mount-watcher = {
|
||||
description = "Timer for Nomad mount watcher";
|
||||
wantedBy = [ "timers.target" ];
|
||||
timerConfig = {
|
||||
OnBootSec = "1min"; # First run 1min after boot
|
||||
OnUnitActiveSec = "30s"; # Then every 30s
|
||||
Unit = "nomad-mount-watcher.service";
|
||||
};
|
||||
};
|
||||
|
||||
environment.etc."nomad-alo.json".text = builtins.toJSON {
|
||||
plugin.docker.config = {
|
||||
allow_privileged = true;
|
||||
# for keepalived, though only really needing "NET_ADMIN","NET_BROADCAST","NET_RAW" on top of default
|
||||
# TODO: trim this down
|
||||
allow_caps = [ "all" ];
|
||||
volumes.enabled = true;
|
||||
extra_labels = [
|
||||
"job_name"
|
||||
"task_group_name"
|
||||
"task_name"
|
||||
"node_name"
|
||||
];
|
||||
systemd.timers.nomad-mount-watcher = {
|
||||
description = "Timer for Nomad mount watcher";
|
||||
wantedBy = [ "timers.target" ];
|
||||
timerConfig = {
|
||||
OnBootSec = "1min"; # First run 1min after boot
|
||||
OnUnitActiveSec = "30s"; # Then every 30s
|
||||
Unit = "nomad-mount-watcher.service";
|
||||
};
|
||||
};
|
||||
|
||||
plugin.raw_exec.config.enabled = true;
|
||||
};
|
||||
environment.etc."nomad-alo.json".text = builtins.toJSON {
|
||||
plugin.docker.config = {
|
||||
allow_privileged = true;
|
||||
# for keepalived, though only really needing "NET_ADMIN","NET_BROADCAST","NET_RAW" on top of default
|
||||
# TODO: trim this down
|
||||
allow_caps = [ "all" ];
|
||||
volumes.enabled = true;
|
||||
extra_labels = [
|
||||
"job_name"
|
||||
"task_group_name"
|
||||
"task_name"
|
||||
"node_name"
|
||||
];
|
||||
};
|
||||
|
||||
environment.persistence."/persist".directories = [
|
||||
"/var/lib/docker"
|
||||
"/var/lib/nomad"
|
||||
];
|
||||
plugin.raw_exec.config.enabled = true;
|
||||
};
|
||||
|
||||
environment.systemPackages = with pkgs; [
|
||||
nomad
|
||||
wander
|
||||
damon
|
||||
];
|
||||
environment.persistence."/persist".directories = [
|
||||
"/var/lib/docker"
|
||||
"/var/lib/nomad"
|
||||
];
|
||||
|
||||
networking.firewall = {
|
||||
allowedTCPPorts =
|
||||
if server_enabled then
|
||||
[
|
||||
4646
|
||||
4647
|
||||
4648
|
||||
]
|
||||
else
|
||||
[ 4646 ];
|
||||
allowedUDPPorts = if server_enabled then [ 4648 ] else [ ];
|
||||
environment.systemPackages = with pkgs; [
|
||||
nomad
|
||||
wander
|
||||
damon
|
||||
];
|
||||
|
||||
networking.firewall = {
|
||||
allowedTCPPorts =
|
||||
if config.clusterRole.nomadServer then
|
||||
[
|
||||
4646
|
||||
4647
|
||||
4648
|
||||
]
|
||||
else
|
||||
[ 4646 ];
|
||||
allowedUDPPorts = if config.clusterRole.nomadServer then [ 4648 ] else [ ];
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user