Published on

Node.js Graceful Shutdown — Draining In-Flight Requests Before Your Pod Dies

Authors

Introduction

Graceful shutdown separates reliable systems from ones that lose data on deployment. When Kubernetes sends SIGTERM, you have seconds to finish requests, close database connections, and exit cleanly. Most teams ignore this and lose data. This post covers production-grade shutdown patterns.

SIGTERM vs SIGINT Handling

SIGTERM is how production systems signal shutdown. SIGINT is Ctrl+C. Handle both.

import express from 'express';
import http from 'http';

const app = express();
const server = http.createServer(app);

let isShuttingDown = false;

// SIGTERM: graceful shutdown (production)
process.on('SIGTERM', async () => {
  console.log('SIGTERM received, initiating graceful shutdown...');
  isShuttingDown = true;

  // Stop accepting new connections
  server.close(async () => {
    console.log('HTTP server closed');

    // Close other resources
    await cleanup();
    process.exit(0);
  });

  // Force shutdown after timeout
  setTimeout(() => {
    console.error('Shutdown timeout, forcing exit');
    process.exit(1);
  }, 30000); // 30 second timeout
});

// SIGINT: same behavior (Ctrl+C)
process.on('SIGINT', async () => {
  console.log('SIGINT received');
  isShuttingDown = true;

  server.close(async () => {
    await cleanup();
    process.exit(0);
  });

  setTimeout(() => {
    console.error('Shutdown timeout');
    process.exit(1);
  }, 30000);
});

// During shutdown, reject new requests
app.use((req, res, next) => {
  if (isShuttingDown) {
    res.status(503).json({ error: 'Server shutting down' });
  } else {
    next();
  }
});

async function cleanup(): Promise<void> {
  console.log('Cleaning up resources...');
  // Close database, caches, etc.
}

server.listen(3000);

HTTP Server close() and In-Flight Request Tracking

server.close() stops accepting new connections but lets existing ones drain. Track them.

import express from 'express';
import http from 'http';
import pg from 'pg';

const app = express();
const server = http.createServer(app);

// Track in-flight requests
interface RequestWithId extends express.Request {
  id?: string;
}

const inFlightRequests = new Map<string, express.Request>();
let requestId = 0;

app.use((req: RequestWithId, res, next) => {
  req.id = String(requestId++);
  inFlightRequests.set(req.id, req);

  res.on('finish', () => {
    inFlightRequests.delete(req.id!);
  });

  res.on('close', () => {
    inFlightRequests.delete(req.id!);
  });

  next();
});

// Example endpoint that takes time
app.post('/process', async (req, res) => {
  try {
    // Simulate long operation
    await new Promise((resolve) => setTimeout(resolve, 5000));
    res.json({ status: 'completed' });
  } catch (err) {
    res.status(500).json({ error: 'Processing failed' });
  }
});

process.on('SIGTERM', async () => {
  console.log('SIGTERM: stopping new connections');

  // Stop accepting new connections
  server.close(async () => {
    console.log('Server closed');
    await cleanup();
    process.exit(0);
  });

  // Wait for existing requests to finish
  const shutdownTimeout = 30000;
  const startTime = Date.now();

  while (inFlightRequests.size > 0) {
    if (Date.now() - startTime > shutdownTimeout) {
      console.error(
        `Shutdown timeout: ${inFlightRequests.size} requests still in-flight`
      );

      // Kill remaining requests
      for (const [id, req] of inFlightRequests) {
        console.warn(`Force-closing request ${id}`);
        req.socket.destroy();
      }

      break;
    }

    console.log(
      `Waiting for ${inFlightRequests.size} requests to complete...`
    );
    await new Promise((resolve) => setTimeout(resolve, 1000));
  }
});

async function cleanup(): Promise<void> {
  console.log('Cleanup complete');
}

server.listen(3000);

Database Connection Pool Draining

Close all connections to databases, caches, and external services.

import pg from 'pg';
import Redis from 'ioredis';

// PostgreSQL connection pool
const pool = new pg.Pool({
  host: 'localhost',
  port: 5432,
  database: 'mydb',
  max: 20, // Connection limit
  idleTimeoutMillis: 30000,
});

// Redis client
const redis = new Redis('redis://localhost:6379');

async function drainConnections(): Promise<void> {
  console.log('Draining database connections...');

  // PostgreSQL: drain and close pool
  try {
    // Prevent new queries
    pool.end();
    console.log('PostgreSQL pool closed');
  } catch (err) {
    console.error('Error closing pool:', err);
  }

  // Wait for existing queries to finish (max 10s)
  const shutdownTimeout = 10000;
  const startTime = Date.now();

  while (pool.waitingCount > 0 || pool.idleCount < pool._clients.length) {
    if (Date.now() - startTime > shutdownTimeout) {
      console.warn('Query drain timeout exceeded');
      break;
    }

    await new Promise((resolve) => setTimeout(resolve, 500));
  }

  // Redis: close
  try {
    await redis.quit();
    console.log('Redis client closed');
  } catch (err) {
    console.error('Error closing Redis:', err);
  }
}

process.on('SIGTERM', async () => {
  console.log('Graceful shutdown initiated');

  await drainConnections();

  console.log('All connections closed');
  process.exit(0);
});

Message Queue Consumer Shutdown

For services consuming from Kafka, RabbitMQ, or SQS: finish current job, stop consuming, exit.

import amqplib from 'amqplib';

let connection: amqplib.Connection | null = null;
let channel: amqplib.Channel | null = null;
let currentJob: any = null;
let isShuttingDown = false;

async function startConsumer(): Promise<void> {
  connection = await amqplib.connect('amqp://localhost');
  channel = await connection.createChannel();

  await channel.assertQueue('jobs', { durable: true });

  // Consume messages
  await channel.consume('jobs', async (msg) => {
    if (!msg) return;

    if (isShuttingDown) {
      // Reject message during shutdown, requeue for another consumer
      channel?.nack(msg, false, true);
      return;
    }

    try {
      currentJob = JSON.parse(msg.content.toString());
      console.log('Processing job:', currentJob.id);

      // Do the work
      await processJob(currentJob);

      // Acknowledge after completion
      channel?.ack(msg);
      currentJob = null;
    } catch (err) {
      console.error('Job failed:', err);
      // Negative acknowledge: dead-letter or retry
      channel?.nack(msg, false, false);
      currentJob = null;
    }
  });
}

async function processJob(job: any): Promise<void> {
  // Simulate work
  await new Promise((resolve) => setTimeout(resolve, 5000));
  console.log('Job completed');
}

async function shutdown(): Promise<void> {
  console.log('Consumer shutdown: stopping new messages');
  isShuttingDown = true;

  // Wait for current job to finish (max 60s)
  const timeout = 60000;
  const start = Date.now();

  while (currentJob) {
    if (Date.now() - start > timeout) {
      console.error('Job timeout, forcing shutdown');
      break;
    }

    console.log('Waiting for current job to finish...');
    await new Promise((resolve) => setTimeout(resolve, 1000));
  }

  // Close connection
  if (channel) await channel.close();
  if (connection) await connection.close();

  console.log('Consumer shutdown complete');
}

process.on('SIGTERM', async () => {
  await shutdown();
  process.exit(0);
});

startConsumer().catch(console.error);

Kubernetes terminationGracePeriodSeconds Alignment

Align your shutdown timeout with K8s termination period.

# kubernetes deployment spec
apiVersion: apps/v1
kind: Deployment
metadata:
  name: api-service
spec:
  replicas: 3
  template:
    metadata:
      labels:
        app: api
    spec:
      # Grace period: K8s waits this long before killing pod
      terminationGracePeriodSeconds: 45

      containers:
      - name: api
        image: my-api:latest
        lifecycle:
          preStop:
            exec:
              command: ["/bin/sh", "-c", "sleep 5"]
              # Optional: delay before SIGTERM (for load balancer draining)

      # Readiness probe: used for load balancing decision
      readinessProbe:
        httpGet:
          path: /readiness
          port: 3000
        initialDelaySeconds: 5
        periodSeconds: 10

Align Node.js shutdown timeout with K8s:

import express from 'express';
import http from 'http';

const app = express();
const server = http.createServer(app);

let isShuttingDown = false;
let isReadyForRequests = true;

// Readiness endpoint: used by K8s
app.get('/readiness', (req, res) => {
  if (isReadyForRequests) {
    res.status(200).json({ ready: true });
  } else {
    res.status(503).json({ ready: false });
  }
});

// Health endpoint: simple liveness
app.get('/liveness', (req, res) => {
  res.status(200).json({ alive: true });
});

const inFlightRequests = new Set<http.IncomingMessage>();

app.use((req, res, next) => {
  inFlightRequests.add(req);
  res.on('finish', () => inFlightRequests.delete(req));
  res.on('close', () => inFlightRequests.delete(req));
  next();
});

process.on('SIGTERM', async () => {
  console.log('SIGTERM received');
  isShuttingDown = true;

  // Immediately fail readiness checks
  // K8s removes pod from load balancer within ~10s
  isReadyForRequests = false;

  // Wait briefly for LB to drain (K8s preStop grace)
  await new Promise((resolve) => setTimeout(resolve, 5000));

  // Now close server (no more new connections)
  server.close();

  // Drain existing requests (timeout: 30s)
  const timeout = 30000;
  const start = Date.now();

  while (inFlightRequests.size > 0) {
    if (Date.now() - start > timeout) {
      console.error('Shutdown timeout, killing remaining requests');
      break;
    }

    console.log(`Draining ${inFlightRequests.size} requests...`);
    await new Promise((resolve) => setTimeout(resolve, 1000));
  }

  process.exit(0);
});

server.listen(3000, () => {
  console.log('Server listening on port 3000');
  console.log('terminationGracePeriodSeconds: 45s');
  console.log('Shutdown timeout: 30s (within 45s K8s limit)');
});

Health Check /readiness During Shutdown

Health checks should fail immediately during shutdown to trigger load balancer draining.

import express from 'express';
import http from 'http';

const app = express();
let isReadyForRequests = true;
let isHealthy = true;

app.get('/liveness', (req, res) => {
  // Simple check: is process alive?
  res.status(isHealthy ? 200 : 503).json({ alive: isHealthy });
});

app.get('/readiness', (req, res) => {
  // Can we accept requests? (Used for load balancing)
  if (isReadyForRequests) {
    res.status(200).json({ ready: true });
  } else {
    res.status(503).json({ ready: false, reason: 'Shutting down' });
  }
});

app.get('/startup', (req, res) => {
  // Ready to start receiving traffic?
  // Use this if startup takes time (migrations, warmup)
  res.status(200).json({ startup: true });
});

const server = http.createServer(app);

process.on('SIGTERM', async () => {
  console.log('SIGTERM: marking as not ready');

  // Immediately fail readiness
  isReadyForRequests = false;

  // K8s polls readiness every 10 seconds
  // After 3 failed checks (30s), pod is removed from load balancer

  // Wait for draining period
  await new Promise((resolve) => setTimeout(resolve, 10000));

  // Then close server
  server.close(() => {
    isHealthy = false;
    process.exit(0);
  });

  // Safety timeout
  setTimeout(() => {
    console.error('Forced exit after timeout');
    isHealthy = false;
    process.exit(1);
  }, 30000);
});

server.listen(3000);

Testing Shutdown with Kill Signals

Write tests to verify graceful shutdown works.

import { spawn, ChildProcess } from 'child_process';

async function testGracefulShutdown(): Promise<void> {
  // Start server
  const server: ChildProcess = spawn('node', ['server.ts']);

  const logs: string[] = [];
  server.stdout?.on('data', (data) => {
    const line = data.toString();
    logs.push(line);
    console.log(line);
  });

  // Wait for server to start
  await new Promise((resolve) => setTimeout(resolve, 1000));

  // Send in-flight request
  const requestPromise = fetch('http://localhost:3000/process');

  // Send SIGTERM after brief delay
  setTimeout(() => {
    console.log('Sending SIGTERM...');
    server.kill('SIGTERM');
  }, 500);

  // Request should complete
  try {
    const response = await requestPromise;
    console.log('Request completed:', response.status);
    expect(response.status).toBe(200);
  } catch (err) {
    console.error('Request failed:', err);
    throw err;
  }

  // Wait for process to exit
  const exitCode = await new Promise<number>((resolve) => {
    server.on('exit', (code) => resolve(code || 0));
  });

  expect(exitCode).toBe(0);

  // Verify graceful messages in logs
  const shutdownLog = logs.join('');
  expect(shutdownLog).toContain('SIGTERM received');
  expect(shutdownLog).toContain('graceful shutdown');
  expect(shutdownLog).toContain('Server closed');
}

// Run test
testGracefulShutdown().catch(console.error);

Shutdown Timeout as Safety Net

Always include a forced exit timeout.

import express from 'express';
import http from 'http';

const app = express();
const server = http.createServer(app);

const GRACEFUL_SHUTDOWN_TIMEOUT = 30000; // 30 seconds
const FORCED_EXIT_TIMEOUT = 45000; // 45 seconds (for K8s terminationGracePeriodSeconds)

process.on('SIGTERM', async () => {
  console.log('SIGTERM: initiating shutdown');

  let shuttingDown = false;

  server.close(() => {
    console.log('Server closed');
    shuttingDown = true;
  });

  // Graceful shutdown timer
  const gracefulTimer = setTimeout(() => {
    console.warn(
      `Graceful shutdown timeout (${GRACEFUL_SHUTDOWN_TIMEOUT}ms)`
    );

    if (!shuttingDown) {
      server.close();
    }
  }, GRACEFUL_SHUTDOWN_TIMEOUT);

  // Forced exit timer (absolute deadline)
  const forcedTimer = setTimeout(() => {
    console.error(
      `Forced exit timeout (${FORCED_EXIT_TIMEOUT}ms), killing process`
    );
    process.exit(1);
  }, FORCED_EXIT_TIMEOUT);

  // Clean exit clears both timers
  const cleanExit = (): void => {
    clearTimeout(gracefulTimer);
    clearTimeout(forcedTimer);
    process.exit(0);
  };

  // Monitor for true shutdown
  const checkShutdown = setInterval(() => {
    if (shuttingDown && server.connections === 0) {
      clearInterval(checkShutdown);
      cleanExit();
    }
  }, 1000);
});

server.listen(3000);

Checklist

  • ✓ Handle both SIGTERM (production) and SIGINT (Ctrl+C)
  • ✓ Call server.close() to stop accepting new connections
  • ✓ Track in-flight requests and wait for completion
  • ✓ Drain database connection pools before exit
  • ✓ Stop consuming from message queues during shutdown
  • ✓ Fail readiness health checks immediately when shutting down
  • ✓ Set shutdown timeout: at least 5-10 seconds less than K8s terminationGracePeriodSeconds
  • ✓ Test shutdown with kill signals to verify in-flight requests complete
  • ✓ Include forced exit timeout as safety net against hangs

Conclusion

Graceful shutdown is the difference between zero data loss on deployment and silent failures. Align with Kubernetes, drain your resources, and always test shutdown. Every second matters when you're racing the clock.