# inspiration: https://github.com/astro/skyflake/blob/main/nixos-modules/nomad.nix { pkgs, config, ... }: let servers = [ "c1" "c2" "c3" ]; server_enabled = builtins.elem config.networking.hostName servers; in { services.nomad = { enable = true; package = pkgs.unstable.nomad; # true breaks at least CSI volumes # TODO: consider fixing dropPrivileges = false; settings = { datacenter = "alo"; client = { enabled = true; server_join.retry_join = servers; host_network.tailscale = { interface = "tailscale0"; cidr = "100.64.0.0/10"; }; host_volume = { code = { path = "/data/compute/code"; read_only = true; }; nix-store = { path = "/nix/store"; read_only = true; }; }; }; server = { enabled = server_enabled; bootstrap_expect = (builtins.length servers + 2) / 2; server_join.retry_join = servers; }; telemetry = { collection_interval = "1s"; disable_hostname = true; prometheus_metrics = true; publish_allocation_metrics = true; publish_node_metrics = true; }; }; extraSettingsPaths = [ "/etc/nomad-alo.json" ]; }; systemd.services.nomad.wants = [ "network-online.target" ]; environment.etc."nomad-alo.json".text = builtins.toJSON { plugin.docker.config = { allow_privileged = true; # for keepalived, though only really needing "NET_ADMIN","NET_BROADCAST","NET_RAW" on top of default # TODO: trim this down allow_caps = ["all"]; volumes.enabled = true; extra_labels = [ "job_name" "task_group_name" "task_name" "node_name" ]; }; }; environment.persistence."/persist".directories = [ "/var/lib/docker" "/var/lib/nomad" ]; environment.systemPackages = with pkgs; [ nomad wander damon ]; networking.firewall = { allowedTCPPorts = if server_enabled then [ 4646 4647 4648 ] else [ 4646 ]; allowedUDPPorts = if server_enabled then [ 4648 ] else []; }; }