added dinamic btrfs partition device errors check

This commit is contained in:
andrea.noni 2024-10-30 16:56:34 +01:00
parent 275e1dc5dc
commit 065e5a524c
1 changed files with 31 additions and 4 deletions

View File

@ -1,9 +1,10 @@
#!/bin/bash
### I lost the link where I copied this script from, sorry, i'll update as soon as I find it back! The btrfs raid1 check part is written by tech_at_hangar.org
### I lost the link where I copied the first part of this script from, sorry, i'll update as soon as I find it back! The btrfs raid1 check part (second) and the btrfs device errors part (third) are written by tech_at_hangar.org
## The first part of this script checks for given minimum space on a mounted volume, and contacts ntfy in case the limit is surpassed.
## The second part of this script checks for missing disks on btrfs raid1 devices and contacs ntfy in case of missing devices
## The first part of this script checks for given minimum space on a mounted volume, and alerts ntfy in case the limit is surpassed.
## The second part of this script checks for missing disks on btrfs raid1 devices and alerts ntfy in case of missing devices
## The third part of this script checks for read/write errors on disks in mounted partitions and alerts ntfy in case of errors
# root volume
mingigs=10
@ -33,7 +34,7 @@ if [ -n "$avail" ]; then
$topicurl
fi
# check btrfs raid 1 status on /mnt/OTHER_VOLUME NAME
### check btrfs raid status on btrfs devices
if (btrfs fi show | grep -wic "Some devices missing"); then
echo Bad: - Btrfs is missing devices, some disk is broken
@ -45,3 +46,29 @@ if (btrfs fi show | grep -wic "Some devices missing"); then
-H "Tags: warning,boar" \
$topicurl
fi
### check errors on btrfs devices
topicurl=https://NTFY_SERVER_IP/disk_alerts
# get list of btrfs mounted partition, put all results in a single line separated by a comma and remove the last comma
mounted_partitions="$(mount | grep btrfs | awk '{print $3}' | tr '\n' ',' | awk '{sub(/,$/,""); print}')"
for i in ${mounted_partitions//,/ }
do
# check btrfs errors on mounted device, if all output is 0 then return a single "1"
check_disk_errors=$(btrfs device stats $i | awk '{print $2}' | grep -c 0 -v)
if [ ${check_disk_errors} -eq 1 ] ; then
: # do nothing if output is a single "1"
else
echo Bad: - Btrfs is detecting errors on partition $i, some disk is about to be broken
echo Sending alert...
curl -k --retry 3 \
-d "Bad - Errors on btrfs mounted device $i: you will have to replace a disk. Check the status of mounted disks with: btrfs device stats $1" \
-H "Title: btrfs errors on partition $i of $(hostname)" \
-H "Priority: high" \
-H "Tags: warning,boar" \
$topicurl
fi
done