Refactor common modules.
This commit is contained in:
49
CLAUDE.md
49
CLAUDE.md
@@ -7,16 +7,22 @@ NixOS cluster configuration using flakes. Homelab infrastructure with Nomad/Cons
|
||||
```
|
||||
├── common/
|
||||
│ ├── global/ # Applied to all hosts (backup, sops, users, etc.)
|
||||
│ ├── compute-node.nix # Nomad client + Consul agent + NFS client
|
||||
│ ├── cluster-node.nix # Nomad server + Consul server (for quorum members)
|
||||
│ ├── minimal-node.nix # Base (ssh, user, boot, impermanence)
|
||||
│ ├── cluster-member.nix # Consul + storage clients (NFS/CIFS/GlusterFS)
|
||||
│ ├── nomad-worker.nix # Nomad client (runs jobs) + Docker + NFS deps
|
||||
│ ├── nomad-server.nix # Enables Consul + Nomad server mode
|
||||
│ ├── cluster-tools.nix # Just CLI tools (nomad, wander, damon)
|
||||
│ ├── workstation-node.nix # Dev tools (wget, deploy-rs, docker, nix-ld)
|
||||
│ ├── desktop-node.nix # Hyprland + GUI environment
|
||||
│ ├── nfs-services-server.nix # NFS server + btrfs replication (zippy)
|
||||
│ └── nfs-services-standby.nix # NFS standby + receive replication (c1, c2)
|
||||
│ └── nfs-services-standby.nix # NFS standby + receive replication (c1)
|
||||
├── hosts/
|
||||
│ ├── c1/, c2/, c3/ # Cattle nodes (compute, quorum members)
|
||||
│ ├── zippy/ # Primary storage + NFS server + stateful workloads
|
||||
│ ├── c1/, c2/, c3/ # Cattle nodes (quorum + workers)
|
||||
│ ├── zippy/ # Primary storage + NFS server + worker (not quorum)
|
||||
│ ├── chilly/ # Home Assistant VM + cluster member (Consul only)
|
||||
│ ├── sparky/ # Desktop + cluster member (Consul only)
|
||||
│ ├── fractal/ # (Proxmox, will become NixOS storage node)
|
||||
│ ├── sunny/ # (Standalone ethereum node, not in cluster)
|
||||
│ └── chilly/ # (Home Assistant VM, not in cluster)
|
||||
│ └── sunny/ # (Standalone ethereum node, not in cluster)
|
||||
├── docs/
|
||||
│ ├── CLUSTER_REVAMP.md # Master plan for architecture changes
|
||||
│ ├── MIGRATION_TODO.md # Tracking checklist for migration
|
||||
@@ -35,11 +41,32 @@ NixOS cluster configuration using flakes. Homelab infrastructure with Nomad/Cons
|
||||
- `/data/shared` - CIFS from fractal (existing, unchanged)
|
||||
|
||||
### Hosts
|
||||
- **c1, c2, c3**: Cattle nodes, run most workloads, Nomad/Consul quorum
|
||||
- **zippy**: Primary NFS server, runs databases (affinity), replicates to c1 every 5min
|
||||
- **c1, c2, c3**: Cattle nodes, run most workloads, Nomad/Consul quorum members
|
||||
- **zippy**: Primary NFS server, runs workloads (affinity), NOT quorum, replicates to c1 every 5min
|
||||
- **chilly**: Home Assistant VM, cluster member (Consul agent + CLI tools), no workloads
|
||||
- **sparky**: Desktop/laptop, cluster member (Consul agent + CLI tools), no workloads
|
||||
- **fractal**: Storage node (Proxmox/ZFS), will join quorum after GlusterFS removed
|
||||
- **sunny**: Standalone ethereum staking node
|
||||
- **chilly**: Home Assistant VM
|
||||
- **sunny**: Standalone ethereum staking node (not in cluster)
|
||||
|
||||
## Config Architecture
|
||||
|
||||
**Modular role-based configs** (compose as needed):
|
||||
- `minimal-node.nix` - Base for all systems (SSH, user, boot, impermanence)
|
||||
- `cluster-member.nix` - Consul agent + shared storage mounts (no Nomad)
|
||||
- `nomad-worker.nix` - Nomad client to run jobs (requires cluster-member)
|
||||
- `nomad-server.nix` - Enables Consul + Nomad server mode (for quorum members)
|
||||
- `cluster-tools.nix` - Just CLI tools (no services)
|
||||
|
||||
**Machine type configs** (via flake profile):
|
||||
- `workstation-node.nix` - Dev tools (deploy-rs, docker, nix-ld, emulation)
|
||||
- `desktop-node.nix` - Extends workstation + Hyprland/GUI
|
||||
|
||||
**Host composition examples**:
|
||||
- c1/c2/c3: `cluster-member + nomad-worker + nomad-server` (quorum + runs jobs)
|
||||
- zippy: `cluster-member + nomad-worker` (runs jobs, not quorum)
|
||||
- chilly/sparky: `cluster-member + cluster-tools` (Consul + CLI only)
|
||||
|
||||
**Key insight**: Profiles (workstation/desktop) no longer imply cluster membership. Hosts explicitly declare roles via imports.
|
||||
|
||||
## Key Patterns
|
||||
|
||||
|
||||
@@ -1,10 +0,0 @@
|
||||
{ pkgs, ... }:
|
||||
{
|
||||
# Compute node: Cluster node with Nomad and GlusterFS server
|
||||
# Used by: c1, c2, c3
|
||||
imports = [
|
||||
./cluster-node.nix
|
||||
./glusterfs.nix
|
||||
./nomad.nix
|
||||
];
|
||||
}
|
||||
@@ -1,13 +1,15 @@
|
||||
{ pkgs, config, ... }:
|
||||
{ pkgs, config, lib, ... }:
|
||||
let
|
||||
servers = [
|
||||
"c1"
|
||||
"c2"
|
||||
"c3"
|
||||
];
|
||||
server_enabled = builtins.elem config.networking.hostName servers;
|
||||
in
|
||||
{
|
||||
options.clusterRole.consulServer = lib.mkEnableOption "Consul server mode";
|
||||
|
||||
config = {
|
||||
services.consul = {
|
||||
enable = true;
|
||||
webUi = true;
|
||||
@@ -15,8 +17,8 @@ in
|
||||
extraConfig = {
|
||||
client_addr = "0.0.0.0";
|
||||
datacenter = "alo";
|
||||
server = server_enabled;
|
||||
bootstrap_expect = if server_enabled then (builtins.length servers + 2) / 2 else null;
|
||||
server = config.clusterRole.consulServer;
|
||||
bootstrap_expect = if config.clusterRole.consulServer then (builtins.length servers + 2) / 2 else null;
|
||||
retry_join = builtins.filter (elem: elem != config.networking.hostName) servers;
|
||||
telemetry = {
|
||||
prometheus_retention_time = "24h";
|
||||
@@ -41,4 +43,5 @@ in
|
||||
8302
|
||||
];
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
9
common/nomad-server.nix
Normal file
9
common/nomad-server.nix
Normal file
@@ -0,0 +1,9 @@
|
||||
{ ... }:
|
||||
{
|
||||
# Enable server mode for both Consul and Nomad
|
||||
# Used by: c1, c2, c3 (quorum members)
|
||||
clusterRole = {
|
||||
consulServer = true;
|
||||
nomadServer = true;
|
||||
};
|
||||
}
|
||||
9
common/nomad-worker.nix
Normal file
9
common/nomad-worker.nix
Normal file
@@ -0,0 +1,9 @@
|
||||
{ ... }:
|
||||
{
|
||||
# Enable Nomad client to run workloads
|
||||
# Includes: Nomad client, Docker plugin, host volumes, NFS mount dependencies
|
||||
# Used by: c1, c2, c3, zippy (all nodes that run Nomad jobs)
|
||||
imports = [
|
||||
./nomad.nix
|
||||
];
|
||||
}
|
||||
@@ -1,14 +1,16 @@
|
||||
# inspiration: https://github.com/astro/skyflake/blob/main/nixos-modules/nomad.nix
|
||||
{ pkgs, config, ... }:
|
||||
{ pkgs, config, lib, ... }:
|
||||
let
|
||||
servers = [
|
||||
"c1"
|
||||
"c2"
|
||||
"c3"
|
||||
];
|
||||
server_enabled = builtins.elem config.networking.hostName servers;
|
||||
in
|
||||
{
|
||||
options.clusterRole.nomadServer = lib.mkEnableOption "Nomad server mode";
|
||||
|
||||
config = {
|
||||
services.nomad = {
|
||||
enable = true;
|
||||
# true breaks at least CSI volumes
|
||||
@@ -42,7 +44,7 @@ in
|
||||
};
|
||||
|
||||
server = {
|
||||
enabled = server_enabled;
|
||||
enabled = config.clusterRole.nomadServer;
|
||||
bootstrap_expect = (builtins.length servers + 2) / 2;
|
||||
server_join.retry_join = servers;
|
||||
};
|
||||
@@ -161,7 +163,7 @@ in
|
||||
|
||||
networking.firewall = {
|
||||
allowedTCPPorts =
|
||||
if server_enabled then
|
||||
if config.clusterRole.nomadServer then
|
||||
[
|
||||
4646
|
||||
4647
|
||||
@@ -169,6 +171,7 @@ in
|
||||
]
|
||||
else
|
||||
[ 4646 ];
|
||||
allowedUDPPorts = if server_enabled then [ 4648 ] else [ ];
|
||||
allowedUDPPorts = if config.clusterRole.nomadServer then [ 4648 ] else [ ];
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
{ pkgs, ... }:
|
||||
{
|
||||
# Server profile: Cluster-enabled system for server deployments
|
||||
# Extends cluster-node with server-specific configurations
|
||||
# Future: Add bare NixOS services here (mysql, postgres, etc.) when migrating from Nomad
|
||||
imports = [
|
||||
./cluster-node.nix
|
||||
];
|
||||
|
||||
# Server-specific configurations can be added here
|
||||
# Example (for future use):
|
||||
# services.mysql.enable = lib.mkDefault false;
|
||||
# services.postgresql.enable = lib.mkDefault false;
|
||||
}
|
||||
@@ -1,9 +1,9 @@
|
||||
{ pkgs, inputs, ... }:
|
||||
{
|
||||
# Workstation profile: Development workstation configuration
|
||||
# Extends server-node with development tools and emulation
|
||||
# Adds development tools and emulation on top of minimal-node
|
||||
imports = [
|
||||
./server-node.nix
|
||||
./minimal-node.nix
|
||||
./unattended-encryption.nix
|
||||
];
|
||||
|
||||
|
||||
@@ -140,7 +140,7 @@
|
||||
c2 = mkHost "x86_64-linux" "server" [ ./hosts/c2 ];
|
||||
c3 = mkHost "x86_64-linux" "server" [ ./hosts/c3 ];
|
||||
alo-cloud-1 = mkHost "aarch64-linux" "cloud" [ ./hosts/alo-cloud-1 ];
|
||||
zippy = mkHost "x86_64-linux" "workstation" [
|
||||
zippy = mkHost "x86_64-linux" "minimal" [
|
||||
ethereum-nix.nixosModules.default
|
||||
./hosts/zippy
|
||||
];
|
||||
|
||||
@@ -3,7 +3,10 @@
|
||||
imports = [
|
||||
../../common/encrypted-btrfs-layout.nix
|
||||
../../common/global
|
||||
../../common/compute-node.nix
|
||||
../../common/cluster-member.nix # Consul + storage clients
|
||||
../../common/nomad-worker.nix # Nomad client (runs jobs)
|
||||
../../common/nomad-server.nix # Consul + Nomad server mode
|
||||
../../common/glusterfs.nix # GlusterFS server (temp during migration)
|
||||
../../common/nfs-services-standby.nix # NFS standby for /data/services
|
||||
# To promote to NFS server (during failover):
|
||||
# 1. Follow procedure in docs/NFS_FAILOVER.md
|
||||
|
||||
@@ -3,7 +3,10 @@
|
||||
imports = [
|
||||
../../common/encrypted-btrfs-layout.nix
|
||||
../../common/global
|
||||
../../common/compute-node.nix
|
||||
../../common/cluster-member.nix # Consul + storage clients
|
||||
../../common/nomad-worker.nix # Nomad client (runs jobs)
|
||||
../../common/nomad-server.nix # Consul + Nomad server mode
|
||||
../../common/glusterfs.nix # GlusterFS server (temp during migration)
|
||||
./hardware.nix
|
||||
];
|
||||
|
||||
|
||||
@@ -3,7 +3,10 @@
|
||||
imports = [
|
||||
../../common/encrypted-btrfs-layout.nix
|
||||
../../common/global
|
||||
../../common/compute-node.nix
|
||||
../../common/cluster-member.nix # Consul + storage clients
|
||||
../../common/nomad-worker.nix # Nomad client (runs jobs)
|
||||
../../common/nomad-server.nix # Consul + Nomad server mode
|
||||
../../common/glusterfs.nix # GlusterFS server (temp during migration)
|
||||
../../common/binary-cache-server.nix
|
||||
./hardware.nix
|
||||
];
|
||||
|
||||
@@ -8,6 +8,8 @@
|
||||
imports = [
|
||||
../../common/encrypted-btrfs-layout.nix
|
||||
../../common/global
|
||||
../../common/cluster-member.nix # Consul + storage clients
|
||||
../../common/cluster-tools.nix # Nomad CLI (no service)
|
||||
./hardware.nix
|
||||
];
|
||||
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
imports = [
|
||||
../../common/encrypted-btrfs-layout.nix
|
||||
../../common/global
|
||||
../../common/cluster-member.nix # Consul + storage clients
|
||||
../../common/cluster-tools.nix # Nomad CLI (no service)
|
||||
./hardware.nix
|
||||
];
|
||||
|
||||
|
||||
@@ -3,7 +3,10 @@
|
||||
imports = [
|
||||
../../common/encrypted-btrfs-layout.nix
|
||||
../../common/global
|
||||
../../common/compute-node.nix
|
||||
../../common/cluster-member.nix # Consul + storage clients
|
||||
../../common/nomad-worker.nix # Nomad client (runs jobs)
|
||||
# NOTE: zippy is NOT a server - no nomad-server.nix import
|
||||
../../common/glusterfs.nix # GlusterFS server (temp during migration)
|
||||
# ../../common/ethereum.nix
|
||||
../../common/nfs-services-server.nix # NFS server for /data/services
|
||||
# To move NFS server role to another host:
|
||||
|
||||
Reference in New Issue
Block a user