Node.js Cluster Mode — Using All CPU Cores Without Kubernetes

Introduction

A single Node.js process uses one CPU core. Cluster mode forks multiple processes to use all cores. This post covers cluster.fork(), PM2 cluster mode, sticky sessions for WebSockets, and the tradeoffs with worker threads and Kubernetes.

cluster.fork() and IPC (Inter-Process Communication)
Sticky Sessions for WebSocket with Cluster
PM2 Cluster Mode vs Node.js Cluster Module
Zero-Downtime Reload with pm2 reload
Shared State Problem and Redis Solution
Cluster vs Worker Threads Decision Matrix
CPU Affinity and Monitoring
Checklist
Conclusion

cluster.fork() and IPC (Inter-Process Communication)

The cluster module forks child processes sharing a single server port.

import cluster from 'cluster';
import os from 'os';
import express from 'express';
import http from 'http';

const numCPUs = os.cpus().length;

if (cluster.isPrimary) {
  // Primary process: fork workers
  console.log(`Primary ${process.pid} starting ${numCPUs} workers`);

  for (let i = 0; i < numCPUs; i++) {
    cluster.fork();
  }

  // Listen for worker death
  cluster.on('exit', (worker, code, signal) => {
    console.log(`Worker ${worker.process.pid} died (${signal || code})`);

    // Auto-restart on crash
    cluster.fork();
  });

  // IPC from workers to primary
  Object.values(cluster.workers!).forEach((worker) => {
    worker?.on('message', (msg) => {
      console.log(`Primary received: ${JSON.stringify(msg)}`);

      // Broadcast to all workers
      for (const w of Object.values(cluster.workers!)) {
        w?.send({ from: 'primary', data: msg });
      }
    });
  });
} else {
  // Worker process: create server
  const app = express();
  const server = http.createServer(app);

  app.get('/data', (req, res) => {
    res.json({
      pid: process.pid,
      timestamp: Date.now(),
    });
  });

  // Send message to primary
  app.get('/notify', (req, res) => {
    process.send({ event: 'user-action', data: req.query });
    res.json({ sent: true });
  });

  // Listen for messages from primary
  process.on('message', (msg) => {
    console.log(`Worker ${process.pid} received:`, msg);
  });

  server.listen(3000, () => {
    console.log(`Worker ${process.pid} listening on port 3000`);
  });
}

Sticky Sessions for WebSocket with Cluster

WebSockets require the same connection throughout the session. Use sticky sessions (hash of client IP) to route to the same worker.

import cluster from 'cluster';
import os from 'os';
import express from 'express';
import http from 'http';
import WebSocket from 'ws';
import crypto from 'crypto';

const numCPUs = os.cpus().length;
const workerPorts: Record<number, number> = {};

if (cluster.isPrimary) {
  // Primary: load balancer with sticky sessions
  const app = express();
  const server = http.createServer(app);
  const http_proxy = require('http-proxy');
  const proxy = new http_proxy.createProxyServer({
    ws: true, // Enable WebSocket proxying
  });

  // Start workers on different ports
  for (let i = 0; i < numCPUs; i++) {
    const worker = cluster.fork();
    workerPorts[worker.process.pid!] = 3001 + i;
  }

  // Sticky session: hash client IP to worker
  function getWorkerPort(clientIp: string): number {
    const hash = crypto.createHash('md5').update(clientIp).digest('hex');
    const hashNum = parseInt(hash, 16);
    const ports = Object.values(workerPorts);
    return ports[hashNum % ports.length];
  }

  // Route HTTP requests with sticky session
  app.use((req, res, next) => {
    const clientIp = req.ip || '127.0.0.1';
    const workerPort = getWorkerPort(clientIp);

    proxy.web(req, res, { target: `http://localhost:${workerPort}` });
  });

  // Route WebSocket with sticky session
  server.on('upgrade', (req, socket, head) => {
    const clientIp = req.socket.remoteAddress || '127.0.0.1';
    const workerPort = getWorkerPort(clientIp);

    proxy.ws(req, socket, head, { target: `http://localhost:${workerPort}` });
  });

  server.listen(3000, () => {
    console.log('Load balancer listening on port 3000');
  });
} else {
  // Workers: handle connections
  const app = express();
  const server = http.createServer(app);
  const wss = new WebSocket.Server({ server });

  const connections = new Map<string, WebSocket>();

  wss.on('connection', (ws, req) => {
    const clientIp = req.socket.remoteAddress!;
    console.log(`${process.pid}: Client ${clientIp} connected`);

    connections.set(clientIp, ws);

    ws.on('message', (data) => {
      console.log(`${process.pid}: Received from ${clientIp}:`, data);
      ws.send(JSON.stringify({ echo: data, pid: process.pid }));
    });

    ws.on('close', () => {
      connections.delete(clientIp);
    });
  });

  const port = 3001 + (Array.from(Object.values(workerPorts)).indexOf(3001) || 0);
  server.listen(port, () => {
    console.log(`Worker ${process.pid} listening on port ${port}`);
  });
}

PM2 Cluster Mode vs Node.js Cluster Module

PM2 is production-grade cluster management. Simpler than rolling your own.

# Start in cluster mode (uses all CPUs)
pm2 start server.ts -i max

# Start with specific count
pm2 start server.ts -i 4

# Monitor
pm2 monit

# Logs across all instances
pm2 logs

# Graceful reload (zero-downtime)
pm2 reload server

# Restart
pm2 restart server

# Stop
pm2 stop server

# Delete
pm2 delete server

// server.ts: simple Express app (no cluster code needed)
import express from 'express';

const app = express();

app.get('/data', (req, res) => {
  res.json({ pid: process.pid });
});

app.listen(3000, () => {
  console.log(`Process ${process.pid} listening on port 3000`);
});

PM2 ecosystem config:

// ecosystem.config.js
module.exports = {
  apps: [
    {
      name: 'api-server',
      script: './server.ts',
      instances: 'max', // Use all CPU cores
      exec_mode: 'cluster',
      max_memory_restart: '500M',
      env: {
        NODE_ENV: 'production',
      },
      error_file: './logs/error.log',
      out_file: './logs/out.log',
      log_date_format: 'YYYY-MM-DD HH:mm:ss Z',
      merge_logs: true,
      autorestart: true,
      max_restarts: 10,
      min_uptime: '10s',
      listen_timeout: 3000,
      kill_timeout: 5000,
    },
  ],
};

Start: pm2 start ecosystem.config.js

Zero-Downtime Reload with pm2 reload

Reload workers one-by-one without dropping connections.

# Rolling restart: each worker restarted sequentially
pm2 reload server

# Shows which worker is reloading
pm2 monit

The reload process:

PM2 sends SIGTERM to a worker
Worker gracefully shuts down
New worker started on same port
Load balancer switches traffic
Repeat for next worker

Ensure your app handles SIGTERM:

import express from 'express';
import http from 'http';

const app = express();
const server = http.createServer(app);

let inFlight = 0;

app.use((req, res, next) => {
  inFlight++;
  res.on('finish', () => {
    inFlight--;
  });
  next();
});

app.get('/data', async (req, res) => {
  // Simulate work
  await new Promise((resolve) => setTimeout(resolve, 1000));
  res.json({ pid: process.pid });
});

process.on('SIGTERM', async () => {
  console.log('SIGTERM: shutting down gracefully');

  // Stop accepting new requests
  server.close();

  // Wait for existing requests (max 30s)
  const timeout = 30000;
  const start = Date.now();

  while (inFlight > 0) {
    if (Date.now() - start > timeout) {
      console.error('Shutdown timeout');
      break;
    }

    console.log(`Waiting for ${inFlight} requests...`);
    await new Promise((resolve) => setTimeout(resolve, 1000));
  }

  process.exit(0);
});

server.listen(3000, () => {
  console.log(`Process ${process.pid} listening`);
});

Shared State Problem and Redis Solution

Cluster workers can't share memory. Use Redis for shared state.

import cluster from 'cluster';
import os from 'os';
import express from 'express';
import Redis from 'ioredis';

const redis = new Redis('redis://localhost:6379');

if (cluster.isPrimary) {
  for (let i = 0; i < os.cpus().length; i++) {
    cluster.fork();
  }
} else {
  const app = express();

  // Increment counter shared across all workers
  app.get('/increment', async (req, res) => {
    const count = await redis.incr('counter');
    res.json({ count, pid: process.pid });
  });

  // Get shared counter
  app.get('/count', async (req, res) => {
    const count = await redis.get('counter');
    res.json({ count: parseInt(count || '0'), pid: process.pid });
  });

  // Session storage (user -> worker mapping for sticky sessions)
  app.post('/session', async (req, res) => {
    const userId = req.body.userId;
    const workerId = process.pid;

    // Store session in Redis with TTL
    await redis.setex(`session:${userId}`, 3600, workerId);

    res.json({ userId, workerId });
  });

  app.get('/session/:userId', async (req, res) => {
    const workerId = await redis.get(`session:${req.params.userId}`);
    res.json({
      userId: req.params.userId,
      workerPid: workerId,
      currentPid: process.pid,
    });
  });

  app.listen(3000, () => {
    console.log(`Worker ${process.pid} listening`);
  });
}

Cluster vs Worker Threads Decision Matrix

When to use cluster vs worker threads:

// Use CLUSTER for:
// - Scaling CPU-bound HTTP APIs (multiple cores)
// - Process isolation (crash one worker, others survive)
// - Deploying on bare metal servers
// - Long-running HTTP services

// Use WORKER THREADS for:
// - CPU-bound tasks (image processing, crypto) within request
// - Shared memory (zero-copy data transfers)
// - Lower overhead than forking
// - Background job workers within same process

// Use KUBERNETES for:
// - Container orchestration (better than PM2)
// - Multi-machine scaling
// - Auto-recovery, rolling updates
// - Production deployments

const decisionTree = {
  'Single machine, all CPUs': 'cluster (PM2)',
  'CPU-intensive request tasks': 'worker threads',
  'HTTP API on bare metal': 'cluster (PM2)',
  'Containerized deployment': 'Kubernetes (one replica per pod)',
  'Image/video processing': 'worker threads',
  'Long-running background jobs': 'cluster or separate service',
  'WebSocket server': 'cluster with sticky sessions',
  'Crypto/hashing on request path': 'worker threads (async version)',
};

CPU Affinity and Monitoring

Optionally pin workers to specific CPU cores for cache locality.

import cluster from 'cluster';
import os from 'os';
import express from 'express';

const cpus = os.cpus();
const numWorkers = cpus.length;

if (cluster.isPrimary) {
  for (let i = 0; i < numWorkers; i++) {
    const worker = cluster.fork();
    console.log(`Forked worker ${worker.process.pid} (CPU ${i})`);
  }

  // Monitor worker metrics
  setInterval(() => {
    console.log('=== Cluster Metrics ===');
    for (const [id, worker] of Object.entries(cluster.workers!)) {
      if (worker) {
        console.log(`Worker ${id} (PID ${worker.process.pid}): active`);
      }
    }
  }, 30000);

  // Restart dead workers
  cluster.on('exit', (worker, code, signal) => {
    console.log(
      `Worker ${worker.process.pid} died (${signal || code}), restarting...`
    );
    cluster.fork();
  });
} else {
  const app = express();

  app.get('/metrics', (req, res) => {
    const mem = process.memoryUsage();
    res.json({
      pid: process.pid,
      heapUsed: `${(mem.heapUsed / 1024 / 1024).toFixed(2)}MB`,
      uptime: process.uptime(),
    });
  });

  app.listen(3000);
}

Monitoring with PM2:

# Terminal 1: Start with PM2
pm2 start server.ts -i max

# Terminal 2: Monitor
pm2 monit

# Shows CPU, memory, PID of each worker

Checklist

✓ Use PM2 cluster mode in production (simpler than manual clustering)
✓ Implement sticky sessions if using WebSockets (hash client IP to worker)
✓ Handle SIGTERM for graceful zero-downtime reloads
✓ Use Redis for shared state between workers
✓ Choose cluster vs worker threads based on workload
✓ Monitor worker memory and restart on threshold
✓ Set max_memory_restart to prevent unbounded growth
✓ Use Kubernetes if deploying containers (simpler than PM2 clustering)

Conclusion

Cluster mode unlocks multi-core scalability on single machines. PM2 makes it production-ready with zero-downtime reloads and auto-restart. Match your deployment model—PM2 for bare metal, Kubernetes for containers. Master both patterns and you'll scale Node.js reliably across architectures.