Skip to content

VPS Health Check Script

Description: Monitors VPS system metrics including memory usage, disk usage,and CPU load average. Sends email alerts if thresholds are crossed. Designed to run on Linux hosts via cron or manually.

bash
#!/bin/bash


# ===== Configuration =====
EMAIL="youremail@example.com"     # Sample email for alerts
HOSTNAME=$(hostname)
DATE_NOW=$(date '+%Y-%m-%d %H:%M:%S')

# ===== Thresholds =====
MEM_THRESHOLD=20         # Less than 20% used memory triggers alert
DISK_THRESHOLD=80        # More than 80% disk usage triggers alert
LOAD_THRESHOLD_MULT=2    # Load avg > 2x CPU cores triggers alert

# ===== Email Sender Function =====
send_email() {
    local subject=$1
    local message=$2
    echo -e "Subject: $subject\n\n$message" | msmtp "$EMAIL"
}

# ===== Memory Check Function =====
check_memory() {
    read -r mem_total mem_free <<< $(free -m | awk '/^Mem:/ {print $2, $7}')
    mem_used_percent=$(( ( (mem_total - mem_free) * 100 ) / mem_total ))

    if [ "$mem_used_percent" -lt "$MEM_THRESHOLD" ]; then
        send_email "⚠️ LOW MEMORY on $HOSTNAME" \
        "[$DATE_NOW] Memory usage is low:\n\nUsed: ${mem_used_percent}%\nTotal: ${mem_total} MiB\nFree: ${mem_free} MiB"
    fi
}

# ===== Disk Check Function =====
check_disk() {
    while IFS= read -r line; do
        usage=$(echo "$line" | awk '{print $5}' | tr -d '%')
        mount_point=$(echo "$line" | awk '{print $6}')
        if [ "$usage" -gt "$DISK_THRESHOLD" ]; then
            send_email "⚠️ LOW DISK SPACE on $HOSTNAME" \
            "[$DATE_NOW] Disk usage is high on $mount_point:\n\nUsage: ${usage}%"
        fi
    done < <(df -h --output=pcent,target | tail -n +2)
}

# ===== Load Average Check Function =====
check_load_average() {
    cpu_cores=$(nproc)
    load_avg_1min=$(cut -d ' ' -f1 < /proc/loadavg)
    load_limit=$(echo "$cpu_cores * $LOAD_THRESHOLD_MULT" | bc)

    # Compare as float
    if (( $(echo "$load_avg_1min > $load_limit" | bc -l) )); then
        send_email "⚠️ HIGH LOAD on $HOSTNAME" \
        "[$DATE_NOW] Load average is high:\n\nLoad (1 min): $load_avg_1min\nCPU Cores: $cpu_cores\nThreshold: $load_limit"
    fi
}

# ===== Run All Checks =====
check_memory
check_disk
check_load_average