Implement Prometheus metrics endpoint (P2.16)

Add comprehensive metrics collection for production monitoring:
- src/lib/metrics.ts: prom-client based metrics library with custom counters,
  gauges, and histograms for Garmin sync, email, and decision engine
- GET /api/metrics: Prometheus-format endpoint for scraping
- Integration into garmin-sync cron: sync duration, success/failure counts,
  active users gauge
- Integration into email.ts: daily and warning email counters
- Integration into decision-engine.ts: decision type counters

Custom metrics implemented:
- phaseflow_garmin_sync_total (counter with status label)
- phaseflow_garmin_sync_duration_seconds (histogram)
- phaseflow_email_sent_total (counter with type label)
- phaseflow_decision_engine_calls_total (counter with decision label)
- phaseflow_active_users (gauge)

33 new tests (18 library + 15 route), bringing total to 586 tests.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-11 08:40:42 +00:00
parent 5ec3aba8b3
commit 5a0cdf7450
10 changed files with 528 additions and 26 deletions

View File

@@ -13,6 +13,11 @@ import {
fetchIntensityMinutes,
isTokenExpired,
} from "@/lib/garmin";
import {
activeUsersGauge,
garminSyncDuration,
garminSyncTotal,
} from "@/lib/metrics";
import { createPocketBaseClient } from "@/lib/pocketbase";
import type { GarminTokens, User } from "@/types";
@@ -34,6 +39,8 @@ export async function POST(request: Request) {
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
}
const syncStartTime = Date.now();
const result: SyncResult = {
success: true,
usersProcessed: 0,
@@ -129,10 +136,17 @@ export async function POST(request: Request) {
});
result.usersProcessed++;
garminSyncTotal.inc({ status: "success" });
} catch {
result.errors++;
garminSyncTotal.inc({ status: "failure" });
}
}
// Record sync duration and active users
const syncDurationSeconds = (Date.now() - syncStartTime) / 1000;
garminSyncDuration.observe(syncDurationSeconds);
activeUsersGauge.set(result.usersProcessed);
return NextResponse.json(result);
}

View File

@@ -0,0 +1,171 @@
// ABOUTME: Tests for Prometheus metrics endpoint used for production monitoring.
// ABOUTME: Validates metrics format, content type, and custom metric inclusion.
import * as promClient from "prom-client";
import { beforeEach, describe, expect, it, vi } from "vitest";
describe("GET /api/metrics", () => {
beforeEach(async () => {
// Clear the registry before each test to avoid metric conflicts
promClient.register.clear();
vi.resetModules();
});
describe("response format", () => {
it("returns 200 status", async () => {
const { GET } = await import("./route");
const response = await GET();
expect(response.status).toBe(200);
});
it("returns Prometheus content type", async () => {
const { GET } = await import("./route");
const response = await GET();
expect(response.headers.get("Content-Type")).toBe(
"text/plain; version=0.0.4; charset=utf-8",
);
});
it("returns text body with metrics", async () => {
const { GET } = await import("./route");
const response = await GET();
const body = await response.text();
expect(body).toBeDefined();
expect(body.length).toBeGreaterThan(0);
});
});
describe("Node.js default metrics", () => {
it("includes nodejs heap metrics", async () => {
const { GET } = await import("./route");
const response = await GET();
const body = await response.text();
expect(body).toContain("nodejs_");
});
it("includes process metrics", async () => {
const { GET } = await import("./route");
const response = await GET();
const body = await response.text();
expect(body).toContain("process_");
});
});
describe("custom application metrics", () => {
it("includes phaseflow_garmin_sync_total metric definition", async () => {
const { GET } = await import("./route");
const response = await GET();
const body = await response.text();
expect(body).toContain("# TYPE phaseflow_garmin_sync_total counter");
expect(body).toContain("# HELP phaseflow_garmin_sync_total");
});
it("includes phaseflow_garmin_sync_duration_seconds metric definition", async () => {
const { GET } = await import("./route");
const response = await GET();
const body = await response.text();
expect(body).toContain(
"# TYPE phaseflow_garmin_sync_duration_seconds histogram",
);
expect(body).toContain("# HELP phaseflow_garmin_sync_duration_seconds");
});
it("includes phaseflow_email_sent_total metric definition", async () => {
const { GET } = await import("./route");
const response = await GET();
const body = await response.text();
expect(body).toContain("# TYPE phaseflow_email_sent_total counter");
expect(body).toContain("# HELP phaseflow_email_sent_total");
});
it("includes phaseflow_decision_engine_calls_total metric definition", async () => {
const { GET } = await import("./route");
const response = await GET();
const body = await response.text();
expect(body).toContain(
"# TYPE phaseflow_decision_engine_calls_total counter",
);
expect(body).toContain("# HELP phaseflow_decision_engine_calls_total");
});
it("includes phaseflow_active_users metric definition", async () => {
const { GET } = await import("./route");
const response = await GET();
const body = await response.text();
expect(body).toContain("# TYPE phaseflow_active_users gauge");
expect(body).toContain("# HELP phaseflow_active_users");
});
});
describe("metric values", () => {
it("incremented garmin sync total is reflected in metrics output", async () => {
const { garminSyncTotal } = await import("@/lib/metrics");
garminSyncTotal.inc({ status: "success" });
const { GET } = await import("./route");
const response = await GET();
const body = await response.text();
expect(body).toContain('phaseflow_garmin_sync_total{status="success"} 1');
});
it("incremented email sent total is reflected in metrics output", async () => {
const { emailSentTotal } = await import("@/lib/metrics");
emailSentTotal.inc({ type: "daily" });
const { GET } = await import("./route");
const response = await GET();
const body = await response.text();
expect(body).toContain('phaseflow_email_sent_total{type="daily"} 1');
});
it("set active users gauge is reflected in metrics output", async () => {
const { activeUsersGauge } = await import("@/lib/metrics");
activeUsersGauge.set(25);
const { GET } = await import("./route");
const response = await GET();
const body = await response.text();
expect(body).toContain("phaseflow_active_users 25");
});
});
describe("Prometheus format validation", () => {
it("produces valid Prometheus text format with TYPE comments", async () => {
const { GET } = await import("./route");
const response = await GET();
const body = await response.text();
// Each metric should have TYPE and HELP lines
const lines = body.split("\n");
const typeLines = lines.filter((line) => line.startsWith("# TYPE"));
const helpLines = lines.filter((line) => line.startsWith("# HELP"));
// Should have type and help for our custom metrics
expect(typeLines.length).toBeGreaterThanOrEqual(5);
expect(helpLines.length).toBeGreaterThanOrEqual(5);
});
it("metric names follow Prometheus naming convention", async () => {
const { GET } = await import("./route");
const response = await GET();
const body = await response.text();
// Prometheus metric names should be snake_case with optional prefix
// Our custom metrics follow phaseflow_* pattern
expect(body).toMatch(/phaseflow_[a-z_]+/);
});
});
});

View File

@@ -0,0 +1,16 @@
// ABOUTME: Prometheus metrics endpoint for production monitoring and scraping.
// ABOUTME: Returns application metrics in Prometheus text format.
import { NextResponse } from "next/server";
import { metricsRegistry } from "@/lib/metrics";
export async function GET(): Promise<NextResponse> {
const metrics = await metricsRegistry.metrics();
return new NextResponse(metrics, {
status: 200,
headers: {
"Content-Type": metricsRegistry.contentType,
},
});
}