Skip to main content

Overview

The health check module monitors system resources (disk, memory, CPU) and validates that critical environment variables are configured. Location: src/ops/health_check.js

Function

runHealthCheck()

Execute comprehensive health checks. Location: src/ops/health_check.js:30
function runHealthCheck()
status
string
Overall status: ok, warning, or error
timestamp
string
ISO timestamp of check execution
checks
object[]
Array of individual check results

Check Categories

1. Secret Check

Location: src/ops/health_check.js:36
const criticalSecrets = ['FEISHU_APP_ID', 'FEISHU_APP_SECRET'];
criticalSecrets.forEach(key => {
  if (!process.env[key] || process.env[key].trim() === '') {
    checks.push({
      name: `env:${key}`,
      ok: false,
      status: 'missing',
      severity: 'warning' // Downgraded to prevent restart loops
    });
    warnings++;
  } else {
    checks.push({ name: `env:${key}`, ok: true, status: 'present' });
  }
});

const optionalSecrets = ['CLAWHUB_TOKEN', 'OPENAI_API_KEY'];
optionalSecrets.forEach(key => {
  if (!process.env[key] || process.env[key].trim() === '') {
    checks.push({
      name: `env:${key}`,
      ok: false,
      status: 'missing',
      severity: 'info'
    });
  } else {
    checks.push({ name: `env:${key}`, ok: true, status: 'present' });
  }
});
Critical Secrets:
  • FEISHU_APP_ID - Feishu application ID
  • FEISHU_APP_SECRET - Feishu application secret
Optional Secrets:
  • CLAWHUB_TOKEN - ClawHub API token for ecosystem integration
  • OPENAI_API_KEY - OpenAI API key for LLM review

2. Disk Space Check

Location: src/ops/health_check.js:6
function getDiskUsage(mount) {
  // Use Node 18+ statfs if available
  if (fs.statfsSync) {
    const stats = fs.statfsSync(mount || '/');
    const total = stats.blocks * stats.bsize;
    const free = stats.bavail * stats.bsize; // Available to unprivileged users
    const used = total - free;
    return {
      pct: Math.round((used / total) * 100),
      freeMb: Math.round(free / 1024 / 1024)
    };
  }
  
  // Fallback to df command
  const out = execSync(`df -P "${mount || '/'}" | tail -1 | awk '{print $5, $4}'`).toString().trim().split(' ');
  return {
    pct: parseInt(out[0].replace('%', '')),
    freeMb: Math.round(parseInt(out[1]) / 1024)
  };
}
Thresholds: Location: src/ops/health_check.js:56
const disk = getDiskUsage('/');
if (disk.pct > 90) {
  checks.push({
    name: 'disk_space',
    ok: false,
    status: `${disk.pct}% used`,
    severity: 'critical'
  });
  criticalErrors++;
} else if (disk.pct > 80) {
  checks.push({
    name: 'disk_space',
    ok: false,
    status: `${disk.pct}% used`,
    severity: 'warning'
  });
  warnings++;
} else {
  checks.push({
    name: 'disk_space',
    ok: true,
    status: `${disk.pct}% used`
  });
}
  • Critical: >90% used
  • Warning: >80% used
  • OK: ≤80% used

3. Memory Check

Location: src/ops/health_check.js:67
const memFree = os.freemem();
const memTotal = os.totalmem();
const memPct = Math.round(((memTotal - memFree) / memTotal) * 100);

if (memPct > 95) {
  checks.push({
    name: 'memory',
    ok: false,
    status: `${memPct}% used`,
    severity: 'critical'
  });
  criticalErrors++;
} else {
  checks.push({
    name: 'memory',
    ok: true,
    status: `${memPct}% used`
  });
}
Threshold:
  • Critical: >95% used
  • OK: ≤95% used

4. Process Count Check (Linux only)

Location: src/ops/health_check.js:80
if (process.platform === 'linux') {
  try {
    const pids = fs.readdirSync('/proc').filter(f => /^\d+$/.test(f));
    if (pids.length > 2000) {
      checks.push({
        name: 'process_count',
        ok: false,
        status: `${pids.length} procs`,
        severity: 'warning'
      });
      warnings++;
    } else {
      checks.push({
        name: 'process_count',
        ok: true,
        status: `${pids.length} procs`
      });
    }
  } catch(e) {}
}
Threshold:
  • Warning: >2000 processes
  • OK: ≤2000 processes

Status Determination

Location: src/ops/health_check.js:94
let status = 'ok';
if (criticalErrors > 0) status = 'error';
else if (warnings > 0) status = 'warning';

return {
  status,
  timestamp: new Date().toISOString(),
  checks
};

Example Usage

const { runHealthCheck } = require('./src/ops/health_check');

const result = runHealthCheck();

console.log('Health status:', result.status);
console.log('Timestamp:', result.timestamp);

for (const check of result.checks) {
  if (!check.ok) {
    console.error(`[${check.severity}] ${check.name}: ${check.status}`);
  }
}

// Example output:
// {
//   status: 'warning',
//   timestamp: '2026-03-09T12:34:56.789Z',
//   checks: [
//     { name: 'env:FEISHU_APP_ID', ok: true, status: 'present' },
//     { name: 'env:FEISHU_APP_SECRET', ok: true, status: 'present' },
//     { name: 'env:CLAWHUB_TOKEN', ok: false, status: 'missing', severity: 'info' },
//     { name: 'disk_space', ok: false, status: '85% used', severity: 'warning' },
//     { name: 'memory', ok: true, status: '62% used' },
//     { name: 'process_count', ok: true, status: '1247 procs' }
//   ]
// }

Integration with Lifecycle

const lifecycle = require('./src/ops/lifecycle');
const { runHealthCheck } = require('./src/ops/health_check');

function monitorLoop() {
  setInterval(() => {
    const health = runHealthCheck();
    
    if (health.status === 'error') {
      console.error('[Monitor] Critical health issues detected');
      lifecycle.restart();
    } else if (health.status === 'warning') {
      console.warn('[Monitor] Health warnings:', health.checks.filter(c => !c.ok));
    }
  }, 5 * 60 * 1000); // Check every 5 minutes
}

monitorLoop();

Build docs developers (and LLMs) love