120 lines
3.2 KiB
Nix
120 lines
3.2 KiB
Nix
# inspiration: https://github.com/astro/skyflake/blob/main/nixos-modules/nomad.nix
|
|
{ pkgs, config, ... }:
|
|
let
|
|
servers = [
|
|
"c1"
|
|
"c2"
|
|
"c3"
|
|
];
|
|
server_enabled = builtins.elem config.networking.hostName servers;
|
|
in
|
|
{
|
|
services.nomad = {
|
|
enable = true;
|
|
# true breaks at least CSI volumes
|
|
# TODO: consider fixing
|
|
dropPrivileges = false;
|
|
|
|
settings = {
|
|
datacenter = "alo";
|
|
|
|
client = {
|
|
enabled = true;
|
|
server_join.retry_join = servers;
|
|
host_network.tailscale = {
|
|
interface = "tailscale0";
|
|
cidr = "100.64.0.0/10";
|
|
};
|
|
host_volume = {
|
|
services = {
|
|
path = "/data/services";
|
|
read_only = false;
|
|
};
|
|
nix-store = {
|
|
path = "/nix/store";
|
|
read_only = true;
|
|
};
|
|
sw = {
|
|
path = "/run/current-system/sw";
|
|
read_only = true;
|
|
};
|
|
};
|
|
};
|
|
|
|
server = {
|
|
enabled = server_enabled;
|
|
bootstrap_expect = (builtins.length servers + 2) / 2;
|
|
server_join.retry_join = servers;
|
|
};
|
|
|
|
telemetry = {
|
|
collection_interval = "1s";
|
|
disable_hostname = true;
|
|
prometheus_metrics = true;
|
|
publish_allocation_metrics = true;
|
|
publish_node_metrics = true;
|
|
};
|
|
};
|
|
|
|
extraSettingsPaths = [ "/etc/nomad-alo.json" ];
|
|
};
|
|
|
|
# Fix race condition between NFS automount and Docker bind mounts:
|
|
# Without this, Docker can bind-mount the empty automount stub directory
|
|
# before NFS actually mounts, causing permission errors and missing data.
|
|
# - RequiresMountsFor: tells systemd that Nomad depends on /data/services
|
|
# - ExecStartPre: triggers the automount before Nomad starts
|
|
# Note: boot will still succeed if NFS is unavailable (Nomad just won't start)
|
|
# TODO: NFS mount uses Consul DNS which resolves to an IP at mount time.
|
|
# If the NFS server moves to a different IP, the mount becomes stale
|
|
# and needs to be remounted. Consider using a VIP or implementing
|
|
# a health check that remounts on staleness detection.
|
|
systemd.services.nomad = {
|
|
wants = [ "network-online.target" ];
|
|
unitConfig.RequiresMountsFor = [ "/data/services" ];
|
|
serviceConfig.ExecStartPre = "${pkgs.coreutils}/bin/ls /data/services";
|
|
};
|
|
|
|
environment.etc."nomad-alo.json".text = builtins.toJSON {
|
|
plugin.docker.config = {
|
|
allow_privileged = true;
|
|
# for keepalived, though only really needing "NET_ADMIN","NET_BROADCAST","NET_RAW" on top of default
|
|
# TODO: trim this down
|
|
allow_caps = [ "all" ];
|
|
volumes.enabled = true;
|
|
extra_labels = [
|
|
"job_name"
|
|
"task_group_name"
|
|
"task_name"
|
|
"node_name"
|
|
];
|
|
};
|
|
|
|
plugin.raw_exec.config.enabled = true;
|
|
};
|
|
|
|
environment.persistence."/persist".directories = [
|
|
"/var/lib/docker"
|
|
"/var/lib/nomad"
|
|
];
|
|
|
|
environment.systemPackages = with pkgs; [
|
|
nomad
|
|
wander
|
|
damon
|
|
];
|
|
|
|
networking.firewall = {
|
|
allowedTCPPorts =
|
|
if server_enabled then
|
|
[
|
|
4646
|
|
4647
|
|
4648
|
|
]
|
|
else
|
|
[ 4646 ];
|
|
allowedUDPPorts = if server_enabled then [ 4648 ] else [ ];
|
|
};
|
|
}
|