Files
alo-cluster/common/nomad.nix
2025-03-10 18:38:03 +00:00

110 lines
2.4 KiB
Nix

# inspiration: https://github.com/astro/skyflake/blob/main/nixos-modules/nomad.nix
{ pkgs, config, ... }:
let
servers = [
"c1"
"c2"
"c3"
];
server_enabled = builtins.elem config.networking.hostName servers;
in
{
services.nomad = {
enable = true;
# true breaks at least CSI volumes
# TODO: consider fixing
dropPrivileges = false;
settings = {
datacenter = "alo";
client = {
enabled = true;
server_join.retry_join = servers;
host_network.tailscale = {
interface = "tailscale0";
cidr = "100.64.0.0/10";
};
host_volume = {
code = {
path = "/data/compute/code";
read_only = true;
};
appdata = {
path = "/data/compute/appdata";
read_only = false;
};
nix-store = {
path = "/nix/store";
read_only = true;
};
sw = {
path = "/run/current-system/sw";
read_only = true;
};
};
};
server = {
enabled = server_enabled;
bootstrap_expect = (builtins.length servers + 2) / 2;
server_join.retry_join = servers;
};
telemetry = {
collection_interval = "1s";
disable_hostname = true;
prometheus_metrics = true;
publish_allocation_metrics = true;
publish_node_metrics = true;
};
};
extraSettingsPaths = [ "/etc/nomad-alo.json" ];
};
systemd.services.nomad.wants = [ "network-online.target" ];
environment.etc."nomad-alo.json".text = builtins.toJSON {
plugin.docker.config = {
allow_privileged = true;
# for keepalived, though only really needing "NET_ADMIN","NET_BROADCAST","NET_RAW" on top of default
# TODO: trim this down
allow_caps = [ "all" ];
volumes.enabled = true;
extra_labels = [
"job_name"
"task_group_name"
"task_name"
"node_name"
];
};
plugin.raw_exec.config.enabled = true;
};
environment.persistence."/persist".directories = [
"/var/lib/docker"
"/var/lib/nomad"
];
environment.systemPackages = with pkgs; [
nomad
wander
damon
];
networking.firewall = {
allowedTCPPorts =
if server_enabled then
[
4646
4647
4648
]
else
[ 4646 ];
allowedUDPPorts = if server_enabled then [ 4648 ] else [ ];
};
}