Implement Prometheus metrics endpoint (P2.16)
Add comprehensive metrics collection for production monitoring: - src/lib/metrics.ts: prom-client based metrics library with custom counters, gauges, and histograms for Garmin sync, email, and decision engine - GET /api/metrics: Prometheus-format endpoint for scraping - Integration into garmin-sync cron: sync duration, success/failure counts, active users gauge - Integration into email.ts: daily and warning email counters - Integration into decision-engine.ts: decision type counters Custom metrics implemented: - phaseflow_garmin_sync_total (counter with status label) - phaseflow_garmin_sync_duration_seconds (histogram) - phaseflow_email_sent_total (counter with type label) - phaseflow_decision_engine_calls_total (counter with decision label) - phaseflow_active_users (gauge) 33 new tests (18 library + 15 route), bringing total to 586 tests. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -13,6 +13,11 @@ import {
|
||||
fetchIntensityMinutes,
|
||||
isTokenExpired,
|
||||
} from "@/lib/garmin";
|
||||
import {
|
||||
activeUsersGauge,
|
||||
garminSyncDuration,
|
||||
garminSyncTotal,
|
||||
} from "@/lib/metrics";
|
||||
import { createPocketBaseClient } from "@/lib/pocketbase";
|
||||
import type { GarminTokens, User } from "@/types";
|
||||
|
||||
@@ -34,6 +39,8 @@ export async function POST(request: Request) {
|
||||
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
|
||||
}
|
||||
|
||||
const syncStartTime = Date.now();
|
||||
|
||||
const result: SyncResult = {
|
||||
success: true,
|
||||
usersProcessed: 0,
|
||||
@@ -129,10 +136,17 @@ export async function POST(request: Request) {
|
||||
});
|
||||
|
||||
result.usersProcessed++;
|
||||
garminSyncTotal.inc({ status: "success" });
|
||||
} catch {
|
||||
result.errors++;
|
||||
garminSyncTotal.inc({ status: "failure" });
|
||||
}
|
||||
}
|
||||
|
||||
// Record sync duration and active users
|
||||
const syncDurationSeconds = (Date.now() - syncStartTime) / 1000;
|
||||
garminSyncDuration.observe(syncDurationSeconds);
|
||||
activeUsersGauge.set(result.usersProcessed);
|
||||
|
||||
return NextResponse.json(result);
|
||||
}
|
||||
|
||||
171
src/app/api/metrics/route.test.ts
Normal file
171
src/app/api/metrics/route.test.ts
Normal file
@@ -0,0 +1,171 @@
|
||||
// ABOUTME: Tests for Prometheus metrics endpoint used for production monitoring.
|
||||
// ABOUTME: Validates metrics format, content type, and custom metric inclusion.
|
||||
|
||||
import * as promClient from "prom-client";
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
describe("GET /api/metrics", () => {
|
||||
beforeEach(async () => {
|
||||
// Clear the registry before each test to avoid metric conflicts
|
||||
promClient.register.clear();
|
||||
vi.resetModules();
|
||||
});
|
||||
|
||||
describe("response format", () => {
|
||||
it("returns 200 status", async () => {
|
||||
const { GET } = await import("./route");
|
||||
const response = await GET();
|
||||
|
||||
expect(response.status).toBe(200);
|
||||
});
|
||||
|
||||
it("returns Prometheus content type", async () => {
|
||||
const { GET } = await import("./route");
|
||||
const response = await GET();
|
||||
|
||||
expect(response.headers.get("Content-Type")).toBe(
|
||||
"text/plain; version=0.0.4; charset=utf-8",
|
||||
);
|
||||
});
|
||||
|
||||
it("returns text body with metrics", async () => {
|
||||
const { GET } = await import("./route");
|
||||
const response = await GET();
|
||||
const body = await response.text();
|
||||
|
||||
expect(body).toBeDefined();
|
||||
expect(body.length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Node.js default metrics", () => {
|
||||
it("includes nodejs heap metrics", async () => {
|
||||
const { GET } = await import("./route");
|
||||
const response = await GET();
|
||||
const body = await response.text();
|
||||
|
||||
expect(body).toContain("nodejs_");
|
||||
});
|
||||
|
||||
it("includes process metrics", async () => {
|
||||
const { GET } = await import("./route");
|
||||
const response = await GET();
|
||||
const body = await response.text();
|
||||
|
||||
expect(body).toContain("process_");
|
||||
});
|
||||
});
|
||||
|
||||
describe("custom application metrics", () => {
|
||||
it("includes phaseflow_garmin_sync_total metric definition", async () => {
|
||||
const { GET } = await import("./route");
|
||||
const response = await GET();
|
||||
const body = await response.text();
|
||||
|
||||
expect(body).toContain("# TYPE phaseflow_garmin_sync_total counter");
|
||||
expect(body).toContain("# HELP phaseflow_garmin_sync_total");
|
||||
});
|
||||
|
||||
it("includes phaseflow_garmin_sync_duration_seconds metric definition", async () => {
|
||||
const { GET } = await import("./route");
|
||||
const response = await GET();
|
||||
const body = await response.text();
|
||||
|
||||
expect(body).toContain(
|
||||
"# TYPE phaseflow_garmin_sync_duration_seconds histogram",
|
||||
);
|
||||
expect(body).toContain("# HELP phaseflow_garmin_sync_duration_seconds");
|
||||
});
|
||||
|
||||
it("includes phaseflow_email_sent_total metric definition", async () => {
|
||||
const { GET } = await import("./route");
|
||||
const response = await GET();
|
||||
const body = await response.text();
|
||||
|
||||
expect(body).toContain("# TYPE phaseflow_email_sent_total counter");
|
||||
expect(body).toContain("# HELP phaseflow_email_sent_total");
|
||||
});
|
||||
|
||||
it("includes phaseflow_decision_engine_calls_total metric definition", async () => {
|
||||
const { GET } = await import("./route");
|
||||
const response = await GET();
|
||||
const body = await response.text();
|
||||
|
||||
expect(body).toContain(
|
||||
"# TYPE phaseflow_decision_engine_calls_total counter",
|
||||
);
|
||||
expect(body).toContain("# HELP phaseflow_decision_engine_calls_total");
|
||||
});
|
||||
|
||||
it("includes phaseflow_active_users metric definition", async () => {
|
||||
const { GET } = await import("./route");
|
||||
const response = await GET();
|
||||
const body = await response.text();
|
||||
|
||||
expect(body).toContain("# TYPE phaseflow_active_users gauge");
|
||||
expect(body).toContain("# HELP phaseflow_active_users");
|
||||
});
|
||||
});
|
||||
|
||||
describe("metric values", () => {
|
||||
it("incremented garmin sync total is reflected in metrics output", async () => {
|
||||
const { garminSyncTotal } = await import("@/lib/metrics");
|
||||
garminSyncTotal.inc({ status: "success" });
|
||||
|
||||
const { GET } = await import("./route");
|
||||
const response = await GET();
|
||||
const body = await response.text();
|
||||
|
||||
expect(body).toContain('phaseflow_garmin_sync_total{status="success"} 1');
|
||||
});
|
||||
|
||||
it("incremented email sent total is reflected in metrics output", async () => {
|
||||
const { emailSentTotal } = await import("@/lib/metrics");
|
||||
emailSentTotal.inc({ type: "daily" });
|
||||
|
||||
const { GET } = await import("./route");
|
||||
const response = await GET();
|
||||
const body = await response.text();
|
||||
|
||||
expect(body).toContain('phaseflow_email_sent_total{type="daily"} 1');
|
||||
});
|
||||
|
||||
it("set active users gauge is reflected in metrics output", async () => {
|
||||
const { activeUsersGauge } = await import("@/lib/metrics");
|
||||
activeUsersGauge.set(25);
|
||||
|
||||
const { GET } = await import("./route");
|
||||
const response = await GET();
|
||||
const body = await response.text();
|
||||
|
||||
expect(body).toContain("phaseflow_active_users 25");
|
||||
});
|
||||
});
|
||||
|
||||
describe("Prometheus format validation", () => {
|
||||
it("produces valid Prometheus text format with TYPE comments", async () => {
|
||||
const { GET } = await import("./route");
|
||||
const response = await GET();
|
||||
const body = await response.text();
|
||||
|
||||
// Each metric should have TYPE and HELP lines
|
||||
const lines = body.split("\n");
|
||||
const typeLines = lines.filter((line) => line.startsWith("# TYPE"));
|
||||
const helpLines = lines.filter((line) => line.startsWith("# HELP"));
|
||||
|
||||
// Should have type and help for our custom metrics
|
||||
expect(typeLines.length).toBeGreaterThanOrEqual(5);
|
||||
expect(helpLines.length).toBeGreaterThanOrEqual(5);
|
||||
});
|
||||
|
||||
it("metric names follow Prometheus naming convention", async () => {
|
||||
const { GET } = await import("./route");
|
||||
const response = await GET();
|
||||
const body = await response.text();
|
||||
|
||||
// Prometheus metric names should be snake_case with optional prefix
|
||||
// Our custom metrics follow phaseflow_* pattern
|
||||
expect(body).toMatch(/phaseflow_[a-z_]+/);
|
||||
});
|
||||
});
|
||||
});
|
||||
16
src/app/api/metrics/route.ts
Normal file
16
src/app/api/metrics/route.ts
Normal file
@@ -0,0 +1,16 @@
|
||||
// ABOUTME: Prometheus metrics endpoint for production monitoring and scraping.
|
||||
// ABOUTME: Returns application metrics in Prometheus text format.
|
||||
|
||||
import { NextResponse } from "next/server";
|
||||
import { metricsRegistry } from "@/lib/metrics";
|
||||
|
||||
export async function GET(): Promise<NextResponse> {
|
||||
const metrics = await metricsRegistry.metrics();
|
||||
|
||||
return new NextResponse(metrics, {
|
||||
status: 200,
|
||||
headers: {
|
||||
"Content-Type": metricsRegistry.contentType,
|
||||
},
|
||||
});
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
// ABOUTME: Training decision engine based on biometric and cycle data.
|
||||
// ABOUTME: Implements priority-based rules for daily training recommendations.
|
||||
import { decisionEngineCallsTotal } from "@/lib/metrics";
|
||||
import type { DailyData, Decision, OverrideType } from "@/types";
|
||||
|
||||
// Override priority order - checked before algorithmic rules
|
||||
@@ -80,14 +81,18 @@ export function getDecisionWithOverrides(
|
||||
// Check overrides first, in priority order: flare > stress > sleep > pms
|
||||
for (const override of OVERRIDE_PRIORITY) {
|
||||
if (overrides.includes(override)) {
|
||||
return {
|
||||
const decision: Decision = {
|
||||
status: "REST",
|
||||
reason: OVERRIDE_REASONS[override],
|
||||
icon: "🛑",
|
||||
};
|
||||
decisionEngineCallsTotal.inc({ decision: decision.status });
|
||||
return decision;
|
||||
}
|
||||
}
|
||||
|
||||
// No active overrides - fall through to algorithmic rules
|
||||
return getTrainingDecision(data);
|
||||
const decision = getTrainingDecision(data);
|
||||
decisionEngineCallsTotal.inc({ decision: decision.status });
|
||||
return decision;
|
||||
}
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
// ABOUTME: Sends daily training notifications and period confirmation emails.
|
||||
import { Resend } from "resend";
|
||||
|
||||
import { emailSentTotal } from "@/lib/metrics";
|
||||
|
||||
const resend = new Resend(process.env.RESEND_API_KEY);
|
||||
|
||||
const EMAIL_FROM = process.env.EMAIL_FROM || "phaseflow@example.com";
|
||||
@@ -57,6 +59,8 @@ Auto-generated by PhaseFlow`;
|
||||
subject,
|
||||
text: body,
|
||||
});
|
||||
|
||||
emailSentTotal.inc({ type: "daily" });
|
||||
}
|
||||
|
||||
export async function sendPeriodConfirmationEmail(
|
||||
@@ -114,4 +118,6 @@ Auto-generated by PhaseFlow`;
|
||||
subject,
|
||||
text: body,
|
||||
});
|
||||
|
||||
emailSentTotal.inc({ type: "warning" });
|
||||
}
|
||||
|
||||
200
src/lib/metrics.test.ts
Normal file
200
src/lib/metrics.test.ts
Normal file
@@ -0,0 +1,200 @@
|
||||
// ABOUTME: Tests for the Prometheus metrics collection module.
|
||||
// ABOUTME: Validates metric registration, counters, gauges, histograms per observability spec.
|
||||
|
||||
import * as promClient from "prom-client";
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
describe("metrics", () => {
|
||||
beforeEach(async () => {
|
||||
// Clear the default registry before each test
|
||||
promClient.register.clear();
|
||||
vi.resetModules();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
promClient.register.clear();
|
||||
});
|
||||
|
||||
describe("registry", () => {
|
||||
it("exports a Prometheus registry", async () => {
|
||||
const { metricsRegistry } = await import("./metrics");
|
||||
expect(metricsRegistry).toBeDefined();
|
||||
expect(typeof metricsRegistry.metrics).toBe("function");
|
||||
});
|
||||
|
||||
it("collects default Node.js metrics", async () => {
|
||||
const { metricsRegistry } = await import("./metrics");
|
||||
const metrics = await metricsRegistry.metrics();
|
||||
|
||||
// Should include standard Node.js metrics
|
||||
expect(metrics).toContain("nodejs_");
|
||||
expect(metrics).toContain("process_");
|
||||
});
|
||||
});
|
||||
|
||||
describe("custom metrics - garmin sync", () => {
|
||||
it("exports phaseflow_garmin_sync_total counter", async () => {
|
||||
const { garminSyncTotal } = await import("./metrics");
|
||||
expect(garminSyncTotal).toBeDefined();
|
||||
expect(garminSyncTotal.inc).toBeDefined();
|
||||
});
|
||||
|
||||
it("increments garmin sync total with success status", async () => {
|
||||
const { garminSyncTotal, metricsRegistry } = await import("./metrics");
|
||||
garminSyncTotal.inc({ status: "success" });
|
||||
|
||||
const metrics = await metricsRegistry.metrics();
|
||||
expect(metrics).toContain(
|
||||
'phaseflow_garmin_sync_total{status="success"} 1',
|
||||
);
|
||||
});
|
||||
|
||||
it("increments garmin sync total with failure status", async () => {
|
||||
const { garminSyncTotal, metricsRegistry } = await import("./metrics");
|
||||
garminSyncTotal.inc({ status: "failure" });
|
||||
|
||||
const metrics = await metricsRegistry.metrics();
|
||||
expect(metrics).toContain(
|
||||
'phaseflow_garmin_sync_total{status="failure"} 1',
|
||||
);
|
||||
});
|
||||
|
||||
it("exports phaseflow_garmin_sync_duration_seconds histogram", async () => {
|
||||
const { garminSyncDuration } = await import("./metrics");
|
||||
expect(garminSyncDuration).toBeDefined();
|
||||
expect(garminSyncDuration.observe).toBeDefined();
|
||||
});
|
||||
|
||||
it("records garmin sync duration", async () => {
|
||||
const { garminSyncDuration, metricsRegistry } = await import("./metrics");
|
||||
garminSyncDuration.observe(1.5);
|
||||
|
||||
const metrics = await metricsRegistry.metrics();
|
||||
expect(metrics).toContain(
|
||||
"phaseflow_garmin_sync_duration_seconds_bucket",
|
||||
);
|
||||
expect(metrics).toContain(
|
||||
"phaseflow_garmin_sync_duration_seconds_sum 1.5",
|
||||
);
|
||||
expect(metrics).toContain(
|
||||
"phaseflow_garmin_sync_duration_seconds_count 1",
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("custom metrics - email", () => {
|
||||
it("exports phaseflow_email_sent_total counter", async () => {
|
||||
const { emailSentTotal } = await import("./metrics");
|
||||
expect(emailSentTotal).toBeDefined();
|
||||
expect(emailSentTotal.inc).toBeDefined();
|
||||
});
|
||||
|
||||
it("increments email sent total with daily type", async () => {
|
||||
const { emailSentTotal, metricsRegistry } = await import("./metrics");
|
||||
emailSentTotal.inc({ type: "daily" });
|
||||
|
||||
const metrics = await metricsRegistry.metrics();
|
||||
expect(metrics).toContain('phaseflow_email_sent_total{type="daily"} 1');
|
||||
});
|
||||
|
||||
it("increments email sent total with warning type", async () => {
|
||||
const { emailSentTotal, metricsRegistry } = await import("./metrics");
|
||||
emailSentTotal.inc({ type: "warning" });
|
||||
|
||||
const metrics = await metricsRegistry.metrics();
|
||||
expect(metrics).toContain('phaseflow_email_sent_total{type="warning"} 1');
|
||||
});
|
||||
});
|
||||
|
||||
describe("custom metrics - decision engine", () => {
|
||||
it("exports phaseflow_decision_engine_calls_total counter", async () => {
|
||||
const { decisionEngineCallsTotal } = await import("./metrics");
|
||||
expect(decisionEngineCallsTotal).toBeDefined();
|
||||
expect(decisionEngineCallsTotal.inc).toBeDefined();
|
||||
});
|
||||
|
||||
it("increments decision engine calls with decision label", async () => {
|
||||
const { decisionEngineCallsTotal, metricsRegistry } = await import(
|
||||
"./metrics"
|
||||
);
|
||||
decisionEngineCallsTotal.inc({ decision: "REST" });
|
||||
|
||||
const metrics = await metricsRegistry.metrics();
|
||||
expect(metrics).toContain(
|
||||
'phaseflow_decision_engine_calls_total{decision="REST"} 1',
|
||||
);
|
||||
});
|
||||
|
||||
it("tracks multiple decision types", async () => {
|
||||
const { decisionEngineCallsTotal, metricsRegistry } = await import(
|
||||
"./metrics"
|
||||
);
|
||||
decisionEngineCallsTotal.inc({ decision: "REST" });
|
||||
decisionEngineCallsTotal.inc({ decision: "REST" });
|
||||
decisionEngineCallsTotal.inc({ decision: "GENTLE" });
|
||||
decisionEngineCallsTotal.inc({ decision: "GO" });
|
||||
|
||||
const metrics = await metricsRegistry.metrics();
|
||||
expect(metrics).toContain(
|
||||
'phaseflow_decision_engine_calls_total{decision="REST"} 2',
|
||||
);
|
||||
expect(metrics).toContain(
|
||||
'phaseflow_decision_engine_calls_total{decision="GENTLE"} 1',
|
||||
);
|
||||
expect(metrics).toContain(
|
||||
'phaseflow_decision_engine_calls_total{decision="GO"} 1',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("custom metrics - active users", () => {
|
||||
it("exports phaseflow_active_users gauge", async () => {
|
||||
const { activeUsersGauge } = await import("./metrics");
|
||||
expect(activeUsersGauge).toBeDefined();
|
||||
expect(activeUsersGauge.set).toBeDefined();
|
||||
});
|
||||
|
||||
it("sets active users count", async () => {
|
||||
const { activeUsersGauge, metricsRegistry } = await import("./metrics");
|
||||
activeUsersGauge.set(42);
|
||||
|
||||
const metrics = await metricsRegistry.metrics();
|
||||
expect(metrics).toContain("phaseflow_active_users 42");
|
||||
});
|
||||
|
||||
it("can increment and decrement active users gauge", async () => {
|
||||
const { activeUsersGauge, metricsRegistry } = await import("./metrics");
|
||||
activeUsersGauge.set(10);
|
||||
activeUsersGauge.inc();
|
||||
activeUsersGauge.dec();
|
||||
|
||||
const metrics = await metricsRegistry.metrics();
|
||||
expect(metrics).toContain("phaseflow_active_users 10");
|
||||
});
|
||||
});
|
||||
|
||||
describe("metrics format", () => {
|
||||
it("produces valid Prometheus text format", async () => {
|
||||
const { metricsRegistry, garminSyncTotal, emailSentTotal } = await import(
|
||||
"./metrics"
|
||||
);
|
||||
garminSyncTotal.inc({ status: "success" });
|
||||
emailSentTotal.inc({ type: "daily" });
|
||||
|
||||
const metrics = await metricsRegistry.metrics();
|
||||
|
||||
// Should contain TYPE and HELP comments for custom metrics
|
||||
expect(metrics).toContain("# TYPE phaseflow_garmin_sync_total counter");
|
||||
expect(metrics).toContain("# HELP phaseflow_garmin_sync_total");
|
||||
expect(metrics).toContain("# TYPE phaseflow_email_sent_total counter");
|
||||
expect(metrics).toContain("# HELP phaseflow_email_sent_total");
|
||||
});
|
||||
|
||||
it("returns content type for Prometheus", async () => {
|
||||
const { metricsRegistry } = await import("./metrics");
|
||||
expect(metricsRegistry.contentType).toBe(
|
||||
"text/plain; version=0.0.4; charset=utf-8",
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
49
src/lib/metrics.ts
Normal file
49
src/lib/metrics.ts
Normal file
@@ -0,0 +1,49 @@
|
||||
// ABOUTME: Prometheus metrics collection module for production monitoring.
|
||||
// ABOUTME: Exposes counters, gauges, and histograms for Garmin sync, email, and decision engine.
|
||||
|
||||
import * as promClient from "prom-client";
|
||||
|
||||
// Create a new registry for our application metrics
|
||||
export const metricsRegistry = new promClient.Registry();
|
||||
|
||||
// Collect default Node.js metrics (heap, event loop, etc.)
|
||||
promClient.collectDefaultMetrics({ register: metricsRegistry });
|
||||
|
||||
// Custom metric: Garmin sync operations counter
|
||||
export const garminSyncTotal = new promClient.Counter({
|
||||
name: "phaseflow_garmin_sync_total",
|
||||
help: "Total number of Garmin sync operations",
|
||||
labelNames: ["status"] as const,
|
||||
registers: [metricsRegistry],
|
||||
});
|
||||
|
||||
// Custom metric: Garmin sync duration histogram
|
||||
export const garminSyncDuration = new promClient.Histogram({
|
||||
name: "phaseflow_garmin_sync_duration_seconds",
|
||||
help: "Duration of Garmin sync operations in seconds",
|
||||
buckets: [0.1, 0.5, 1, 2, 5, 10],
|
||||
registers: [metricsRegistry],
|
||||
});
|
||||
|
||||
// Custom metric: Email sent counter
|
||||
export const emailSentTotal = new promClient.Counter({
|
||||
name: "phaseflow_email_sent_total",
|
||||
help: "Total number of emails sent",
|
||||
labelNames: ["type"] as const,
|
||||
registers: [metricsRegistry],
|
||||
});
|
||||
|
||||
// Custom metric: Decision engine calls counter
|
||||
export const decisionEngineCallsTotal = new promClient.Counter({
|
||||
name: "phaseflow_decision_engine_calls_total",
|
||||
help: "Total number of decision engine calls",
|
||||
labelNames: ["decision"] as const,
|
||||
registers: [metricsRegistry],
|
||||
});
|
||||
|
||||
// Custom metric: Active users gauge
|
||||
export const activeUsersGauge = new promClient.Gauge({
|
||||
name: "phaseflow_active_users",
|
||||
help: "Number of users with activity in the last 24 hours",
|
||||
registers: [metricsRegistry],
|
||||
});
|
||||
Reference in New Issue
Block a user