In development, errors crash your bot and you restart it. In production serving 10,000 users, crashes mean downtime, lost data, and frustrated communities.
Crash Prevention
Global Error Handlers
// Catch unhandled promise rejections (most common crash cause)
process.on('unhandledRejection', (error) => {
console.error('[FATAL] Unhandled rejection:', error);
// Log to file or external service
// Do NOT process.exit() - keep running
});
// Catch uncaught exceptions
process.on('uncaughtException', (error) => {
console.error('[FATAL] Uncaught exception:', error);
// This is more serious - log and consider restarting
});
// Discord.js errors
client.on('error', (error) => {
console.error('[CLIENT] Error:', error);
});
// Shard errors
client.on('shardError', (error, shardId) => {
console.error(`[SHARD ${shardId}] Error:`, error);
});
Command Error Handling
client.on('interactionCreate', async interaction => {
if (!interaction.isChatInputCommand()) return;
try {
const command = commands.get(interaction.commandName);
if (!command) return;
await command.execute(interaction);
} catch (error) {
console.error(`Command error (${interaction.commandName}):`, error);
const errorMessage = 'Something went wrong. Please try again.';
if (interaction.replied || interaction.deferred) {
await interaction.followUp({ content: errorMessage, ephemeral: true }).catch(() => {});
} else {
await interaction.reply({ content: errorMessage, ephemeral: true }).catch(() => {});
}
}
});
Structured Logging
Using Winston
const winston = require('winston');
const logger = winston.createLogger({
level: 'info',
format: winston.format.combine(
winston.format.timestamp(),
winston.format.json()
),
transports: [
new winston.transports.File({ filename: 'error.log', level: 'error' }),
new winston.transports.File({ filename: 'combined.log' }),
new winston.transports.Console({
format: winston.format.simple()
})
]
});
// Usage
logger.info('Bot started', { guilds: client.guilds.cache.size });
logger.warn('Rate limited', { route: '/api/channels', retryAfter: 5000 });
logger.error('Command failed', { command: 'ban', error: error.message, guild: guildId });
What to Log
Always log:
- Bot startup/shutdown
- Errors with context
- Command usage statistics
- API rate limits
- Shard connections/disconnections
Never log:
- User tokens or credentials
- Full message content (privacy)
- Personally identifiable information unnecessarily
Log Rotation
Prevent logs from filling disk:
const { createLogger, transports } = require('winston');
require('winston-daily-rotate-file');
const transport = new transports.DailyRotateFile({
filename: 'logs/bot-%DATE%.log',
datePattern: 'YYYY-MM-DD',
maxSize: '20m',
maxFiles: '14d' // Keep 14 days
});
Health Monitoring
Heartbeat
setInterval(() => {
const health = {
uptime: process.uptime(),
memory: process.memoryUsage().heapUsed / 1024 / 1024,
guilds: client.guilds.cache.size,
ping: client.ws.ping,
timestamp: new Date().toISOString()
};
logger.info('Health check', health);
// Alert if memory exceeds threshold
if (health.memory > 500) {
logger.warn('High memory usage', health);
}
// Alert if ping is high
if (health.ping > 500) {
logger.warn('High latency', health);
}
}, 60000);
Restart Recovery
After a crash and restart, detect it:
client.on('ready', () => {
const lastShutdown = db.prepare('SELECT timestamp FROM bot_status ORDER BY id DESC LIMIT 1').get();
if (lastShutdown) {
const downtime = Date.now() - new Date(lastShutdown.timestamp).getTime();
logger.info('Bot recovered', { downtimeMs: downtime });
}
db.prepare('INSERT INTO bot_status (status, timestamp) VALUES (?, ?)').run('online', new Date().toISOString());
});
Production Deployment
Deploy with PM2 for automatic restarts:
pm2 start bot.js --name "discord-bot" --max-memory-restart 500M
pm2 logs discord-bot # View logs
pm2 monit # Real-time monitoring
On Space-Node's Discord Bot hosting, PM2 or systemd keeps your bot running through crashes. Combined with proper error handling, your bot maintains near-perfect uptime.
