ntfy_scripts/disks_checks.sh

86 lines
3.5 KiB
Bash
Executable File

#!/bin/bash
### the used parts of this scrits have been written by tech_at_hangar.org
## This unused part of this script checks for given minimum space on a mounted volume, and alerts ntfy in case the limit is surpassed. it stays here as a reference only. I copied the script from internet but I lost the reference, sorry!
## root volume
#mingigs=10
#avail=$(df | awk '$6 == "/" && $4 < '$mingigs' * 1024*1024 { print $4/1024/1024 }')
#if [ -n "$avail" ]; then
# curl -k --retry 3 \
# -d "Only $avail GB available on the root disk. Better clean that up." \
# -H "Title: Low disk space alert on $(hostname)" \
# -H "Priority: default" \
# -H "Tags: warning,cd" \
# $topicurl
#fi
## The first part of this script checks for disk usage and alerts if it exceeds 94%
## The second part of this script checks for missing disks on btrfs raid1 devices and alerts ntfy in case of missing devices
## The third part of this script checks for read/write errors on disks in mounted partitions and alerts ntfy in case of errors
# the ntfy instance where you are senfing alerts to:
topicurl=https://NTFY_SERVER_IP/disk_alerts
### check disk usage status and notify if more then 95% is in use
# print every /dev/ disk mounted, write all results in a single line, separated by commas and remove last comma
mounted_partitions="$(df | awk '{print $1}' | grep '/dev/' | tr '\n' ',' | awk '{sub(/,$/,""); print}')"
for i in ${mounted_partitions//,/ }
do
# df on every mounted disk detected, take the 5th column, remove the % symbol, and output only the second line
disk_occupation=$(df $i | awk '{print $5}' | tail -n+2 | sed 's/.$//')
if [ ${disk_occupation} -gt 95 ] ; then
curl -k --retry 3 \
-d "Less then 5% space available on the $i partition. Better clean that up." \
-H "Title: Low disk space alert on $(hostname)" \
-H "Priority: default" \
-H "Tags: warning,cd" \
$topicurl
fi
done
### check btrfs raid status on btrfs devices
if (btrfs fi show | grep -wic "Some devices missing"); then
echo Bad: - Btrfs is missing devices, some disk is broken
echo Sending alert...
curl -k --retry 3 \
-d "Bad - Some devices are missing in btrfs: maybe a broken disk?" \
-H "Title: Missing disks in btrfs on $(hostname)" \
-H "Priority: high" \
-H "Tags: warning,boar" \
$topicurl
fi
### check errors on btrfs devices
# get list of btrfs mounted partition, put all results in a single line separated by a comma and remove the last comma
mounted_btrfs_partitions="$(mount | grep btrfs | awk '{print $3}' | tr '\n' ',' | awk '{sub(/,$/,""); print}')"
for i in ${mounted_btrfs_partitions//,/ }
do
# check btrfs errors on mounted device, if all output is 0 then return a single "1"
check_disk_errors=$(btrfs device stats $i | awk '{print $2}' | grep -c 0 -v)
if [ ${check_disk_errors} -eq 1 ] ; then
: # do nothing if output is a single "1"
else
echo Bad: - Btrfs is detecting errors on partition $i, some disk is about to be broken
echo Sending alert...
curl -k --retry 3 \
-d "Bad - Errors on btrfs mounted device $i: you will have to replace a disk. Check the status of mounted disks with: btrfs device stats $i" \
-H "Title: btrfs errors on partition $i of $(hostname)" \
-H "Priority: high" \
-H "Tags: warning,boar" \
$topicurl
fi
done