diff --git a/Bash/Storage/README.md b/Bash/Storage/README.md new file mode 100644 index 0000000..c3db203 --- /dev/null +++ b/Bash/Storage/README.md @@ -0,0 +1,69 @@ +# Top Disk Consumer Report Generator +#### Copyright 2022-2023 Red Hat, Inc. +#### Author: Kimberly Lazarski (klazarsk@redhat.com) + +#### [ORIGINAL REPOSITORY](https://github.com/klazarsk/storagetoolkit) + +```bash +./topdiskconsumer -l 10 +``` +```bash +du -hcx --max-depth=6 / 2>/dev/null | sort -rh | head -n 20 +``` + +Top 30 file +```bash +find / -mount -ignore_readdir_race -type f -exec du -h "{}" + 2>&1 \ +| sort -rh | head -n 20 +``` + +```bash +find / -mount -ignore_readdir_race -type f -mtime +30 -exec du -h "{}" + 2>&1 \ +| sort -rh | head -20 +``` + +```txt +-f --format [format] + Format headings with the addition of bold markup for use in ticketing system. + + Valid options: + html - format headings with html bold tags + bbcode - format headings with bbcode bold tags + ansi - [DEFAULT] format headings with ansi bold tags {for terminals and + richtext. This help screen always in ANSI format.} + +-p --path [path] explicitly set path to run from (it will identify the mount + hosting the specified directory and run from that mount point. Mutually + exclusive with the -A | --alt-root option. + + +-A --alt-root - treat the specified directory as an alternate root; in other words + report the top disk consumers from the specified point, deeper; do not check for + the actual mount point. Musually exclusive with --path option. This option is very + when used in combination of a bind mount of / if you suspect mounts are hiding large + disk consumers from view. + +-l --limit [number] + Report limited to top [number] largest files for each report section [default=20] + +-t --timeout [duration] set a timeout for each section of the report. For + example, 60 default time unit is seconds so this would timeout after 60 seconds, + 30s (for a 30 second timeout), 15m (timeout after 15 minutes), 2h (timeout after + 2 hours). Accepts same values as _timeout_ command. Please note that specifying a + timeout will result in incomplete and likely inaccurate and misleading results. + +-o --skipold skip report of files aged over 30 days + +-d --skipdir skip report of largest directories + +-m --skipmeta omit metadata such as reserve blocks, start and end time, duration, etc. + +-u --skipunlinked skip report of open handles to deleted files + +-f --skipfiles skip report of largest files + +-t --temp [directory] Specify an an alternate temp directory when /tmp is too full for temp + files to allow report generation. + +-v --version display the version number then exit. +``` \ No newline at end of file diff --git a/Bash/Storage/topdiskconsumer b/Bash/Storage/topdiskconsumer new file mode 100755 index 0000000..d66165f --- /dev/null +++ b/Bash/Storage/topdiskconsumer @@ -0,0 +1,401 @@ +#!/bin/bash +###################################### +# +# Top Disk Consumer Report Generator +# Copyright 2022-2023 Red Hat, Inc. +# Author: Kimberly Lazarski (klazarsk@redhat.com) +# +# This script eases the pain of identifying where space is consumed on +# a given disk mount and its predecessor (also authored by Kimberly) has +# been widely used in responding to alert tickets at a major hosting company. +# +# Usage: run it in any directory on a filesystem you need to diagnose. The script +# will find the mount point of that filesystem and run from that point. This script +# can be a good training tool for system administrators. +# +# This program is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software Foundation, +# either version 3 of the License, or (at your option) any later version. +# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with this program. +# If not, see . + +strVersion="0.6"; +cmdLine="$0 $@ "; +dbgEcho="false"; +dbgSleep="false"; +cmdTimeout=""; +intNumFiles="20"; +optFormat=bbcode; +otagBold="\033[1m"; +ctagBold="\033[0m"; +otagRed='\033[0;31m' +otagRevRed='\e[0;101m' +ctagNc='\033[0m' +optSkipDir=0; +optSkipFiles=0; +optSkipMeta=0; +optSkipOld=0; +optSkipUnlinked=0; +unset cmdTimeout; +unset optPath; +unset strAltRoot; + + +# Find out if we have new or old sort +if echo -e "2\n1\n3" | sort -rh > /dev/null 2>&1; + then \ + bNewSort=1; +fi; + +# test for resize +# By the way headless systems very often have xterm installed for shooting a client window to a workstation, or to simply make resize available. Nevertheless... + + +fnUsage() +{ + optFormat=ansi; fnFormat; + echo -e "Your command line: \n"; + echo -e "\t $cmdLine"; + echo -e "$otagBold\ntopdiskconsumer -- Top Disk Consumer Report [Version $strVersion]\n\n$ctagBold"; + echo -e "This script reports on the top disk consumers to help identify where cleanup is required.\n\n"; + echo -e "$otagBold \bUsage:$ctagBold\n\n"; + echo -e "$otagBold \btopdiskconsumer$ctagBold\n"; + echo -en "\tWhen executed with no arguments, the script will identify the mount point \n"; + echo -en "\tcontaining the current working directory,then execute the report starting \n"; + echo -en "\tfrom the mount point, identifying and listing the top 20 largest files, the \n"; + echo -en "\ttop 20 largest directories, and then the top 20 largest files aged over 30 \n"; + echo -en "\tdays.\n\n"; + echo -en "$otagBold \bCommand line arguments:\n\n$ctagBold"; + echo -en "\t-f --format [format]\n"; + echo -en "\t\tFormat headings with the addition of bold markup for use in ticketing system. \n\n"; + echo -en "\t\t$otagBold \bValid options:$ctagBold\n"; + echo -en "\t\t html - format headings with html bold tags\n"; + echo -en "\t\t bbcode - format headings with bbcode bold tags\n"; + echo -en "\t\t ansi - [DEFAULT] format headings with ansi bold tags {for terminals and \n"; + echo -en "\t\t richtext. This help screen always in ANSI format.}\n\n"; + echo -en "\t-p --path [path] explicitly set path to run from (it will identify the mount \n"; + echo -en "\t\t hosting the specified directory and run from that mount point. Mutually \n"; + echo -en "\t\t exclusive with the -A | --alt-root option.\n"; + echo -en "\t\t\n\n"; + echo -en "\t-A --alt-root - treat the specified directory as an alternate root; in other words\n"; + echo -en "\t\t report the top disk consumers from the specified point, deeper; do not check for\n"; + echo -en "\t\t the actual mount point. Musually exclusive with --path option. This option is very \n"; + echo -en "\t\t when used in combination of a bind mount of / if you suspect mounts are hiding large\n"; + echo -en "\t\t disk consumers from view."; + echo -en "\t\t\n\n"; + echo -en "\t-l --limit [number]\n"; + echo -en "\t\t Report limited to top [number] largest files for each report section [default=20]\n\n"; + echo -en "\t-t --timeout [duration] set a timeout for each section of the report. For \n"; + echo -en "\t\t example, 60 default time unit is seconds so this would timeout after 60 seconds,\n"; + echo -en "\t\t 30s (for a 30 second timeout), 15m (timeout after 15 minutes), 2h (timeout after\n"; + echo -en "\t\t 2 hours). Accepts same values as _timeout_ command. Please note that specifying a \n"; + echo -en "\t\t timeout will result in incomplete and likely inaccurate and misleading results.\n\n "; + echo -en "\t-o --skipold skip report of files aged over 30 days \n\n"; + echo -en "\t-d --skipdir skip report of largest directories\n\n"; + echo -en "\t-m --skipmeta omit metadata such as reserve blocks, start and end time, duration, etc.\n\n"; + echo -en "\t-u --skipunlinked skip report of open handles to deleted files\n\n"; + echo -en "\t-f --skipfiles skip report of largest files\n\n"; + echo -en "\t-t --temp [directory] Specify an an alternate temp directory when /tmp is too full for temp\n"; + echo -en "\t\t files to allow report generation.\n\n"; + echo -en "\t-v --version display the version number then exit.\n\n" + echo -en "Please note: the Top [number] Directories report module includes the \"total\" size. This is intentional.\n"; + echo -en "This script can only access files that you have access to; either use sudo or root \n"; + echo -en "to execute it. You could alternatively chmod the script with suid rights, but that may be a\n"; + echo -en "security risk in finance, medical, and government environments by revealing confidential\n"; + echo -en "filenames to unprivileged users. Choose to suid with care.\n"; + echo -e "\n"; +} # end of help screen + +fnFormat() +{ + if [[ "$optFormat" =~ html|bbcode|ansi ]] ; then \ + if [ "$optFormat" == "html" ] ; then \ + $dbgEcho "Output will be html formatted."; + otagBold="";--limit + ctagBold=""; + elif [ "$optFormat" == "bbcode" ] ; then \ + $dbgEcho "Output will be bbcode formatted."; + otagBold="[b]"; + ctagBold="[/b]"; + elif [ "$optFormat" == "ansi" ] ; then \ + $dbgEcho "Output will be ansi formatted."; + otagBold="\033[1m"; + ctagBold="\033[0m"; + fi; + else echo -ne "\nBad format specified in "; + fnUsage; + exit 1; + fi; + +} + +fnDebug() +{ + dbgEcho="echo"; + dbgSleep="sleep"; +} + + + + +fnMetadata() +{ +resize > /dev/null 2>&1; +echo -e "#_# BEGIN REPORT"; + +echo -e "$otagBold== Server Time at start: ==$ctagBold"; +date; + +echo -e "\n$otagBold== Filesystem Utilization on [ $strFsMount ]: ==$ctagBold"; +df -PTh "$strFsMount" | column -t; + +echo -e "\n$otagBold== Inode Information for [ $strFsMount ]: ==$ctagBold"; +df -PTi "$strFsMount" | column -t; echo -e "\n$otagBold== Storage device behind directory [ $strFsMount ]: ==$ctagBold"; +echo "$strFsDev"; +$dbgSleep 3; + +} + + +fnReserveBlock() +{ +if blkid $strFsDev | grep -q "ext[3,4]"; + then arrBRCalc=($(tune2fs -l $strFsDev | grep -E '^Block count|^Reserved block count' | sed 's/.*:\s*//g')); + echo -e "\n$otagBold== Reserve block allocation for $strFsDev: ==$ctagBold\n $(echo "scale=3; ${arrBRCalc[1]} / ${arrBRCalc[0]} * 100" | bc)%"; +fi ; + +$dbgEcho "finished with reserve block reporting... moving on..."; +$dbgSleep 3; +} + + +fnLargestDirs() +{ +echo -e "\n$otagBold== $intNumFiles Largest Directories on [ $strFsMount ]: ==$ctagBold"; +if [ $bNewSort == 1 ]; +then \ + bNewSort=1; $cmdTimeout du -hcx --max-depth=6 "$strFsMount" 2>/dev/null | sort -rh | head -n $(($intNumFiles+1)); + else \ + $cmdTimeout du -hcx --max-depth=6 "$strFsMount" 2>/dev/null | grep -P '^([0-9]\.*)*G(?!.*(\btotal\b|\./$))' | sort -rnk1,1 | head -n $(($intNumFiles+1)) | column -t; +fi; +$dbgEcho "Finished reporting largest directories. Moving on..."; +} + + +fnLargestFiles() +{ +echo -e "\n$otagBold== $intNumFiles Largest Files on [ $strFsMount ]: ==$ctagBold"; +if [ $bNewSort == 1 ]; + then \ + $cmdTimeout find "$strFsMount" -mount -ignore_readdir_race -type f -exec du -h "{}" + 2>&1 | sort -rh | head -n $intNumFiles; + else \ + $cmdTimeout find "$strFsMount" -mount -ignore_readdir_race -type f -exec du "{}" + 2>&1 | sort -rnk1,1 | head -n $intNumFiles | awk 'BEGIN{ CONVFMT="%.2f"; }{ $1=( $1 / 1024 )"M"; print;}' | column -t; +fi; +} + + +fnLargestOldFiles() +{ +echo -e "\n$otagBold== $intNumFiles Largest Files on [ $strFsMount ] Older Than 30 Days: ==$ctagBold"; +if [ $bNewSort == 1 ]; + then \ + $cmdTimeout find "$strFsMount" -mount -ignore_readdir_race -type f -mtime +30 -exec du -h "{}" + 2>&1 | sort -rh | head -$intNumFiles; + else \ + $cmdTimeout find "$strFsMount" -mount -ignore_readdir_bNewSortrace -type f -mtime +30 -exec du "{}" + 2>&1 | sort -rnk1,1 | head -$intNumFiles | awk 'BEGIN{ CONVFMT="%.2f";}{ $1=( $1 / 1024 )"M"; print; }' | column -t; + fi; +} + + +fnLargestUnlinked() +{ +echo -e "\n$otagBold== $intNumFiles Largest Deleted Files on [ $strFsMount ] With Open Handles: ==$ctagBold"; +unset arrDeletedFiles; mapfile -t arrDeletedFiles < <(lsof $strFsMount 2>/dev/null| grep '\(deleted\)' | sort -rnk7 | awk '{print ($7/1024)/1024"MB*"$1"*"$2"*"$9"*"$10"*"$11}' | uniq -f 4 | head -$intNumFiles); +for ((i=0; i < ${#arrDeletedFiles[@]}; i++ )); + do \ + if [ $i == 0 ] ; + then echo -e "Size*COMMAND*File Handle*Filename" ; + fi; + strFileHandle=$(echo "/proc/$(echo ${arrDeletedFiles[$i]} | awk -F "*" '{print $3}')/fd/$(ls -lh /proc/$(echo ${arrDeletedFiles[$i]} | awk -F "*" '{print $3}')/fd | grep "$(echo ${arrDeletedFiles[$i]} | awk -F "*" '{print $4}')" | awk '{print $9}')";); + arrDeletedFiles[$i]=$(echo "$( echo ${arrDeletedFiles[$i]} | awk -F "*" -v filehandle=$strFileHandle '{print $1"*"$2"*"filehandle"*"$4}')"); echo ${arrDeletedFiles[$i]}; +done | column -t -s"*" ; +} + + +fnOverview() +{ +echo -e "\n$otagBold== Disk Utilization Report for All Mounts: ==$ctagBold"; +df -hTP; +} + +fnEndReport() +{ +echo -e "\n$otagBold== Elapsed Time: ==$ctagBold"; printf '%dh:%dm:%ds\n' $(($SECONDS/3600)) $(($SECONDS%3600/60)) $(($SECONDS%60)); +echo -e "\n$otagBold== Server Time at completion: ==$ctagBold"; +date; +echo -e "\n\n#_# END REPORT"; +} + + +fnSetTemp() +{ +# Is there enough space on the specified --temp directory for sort to run? +if [ $(df --output=avail $strTemp | sed '$!d') -lt 100000 ] ; then \ + if [[ -v TMPDIR ]]; then echo "TMPDIR Already set, backing it up."; strBkTMPDIR="$TMPDIR"; fi; + echo -e "\a$otagRed\nThere may not be enough space in $otagBold \b$strTemp \b$ctagBold \b$otagRed for $otagBold \bsort$ctagBold \b$otagRed to complete; there is only $(df -h --output=avail $strTemp | grep -v Avail) free; please choose a different location. $ctagNc" ; + echo -e "\nThe script will continue after a 15-second pause but the reports may be incomplete. "; + export TMPDIR="$strTemp"; + $dbgEcho "$(env | grep TMPDIR)"; + sleep 15; +else echo -e "$otagBold \busing $strTemp as an alternate temp directory." ; + if [[ -v TMPDIR ]]; then echo "TMPDIR Already set, backing it up."; strBkTMPDIR="$TMPDIR"; fi; + export TMPDIR="$strTemp"; + $dbgEcho "$(env | grep TMPDIR)"; +fi ; +} + +while [ "$1" != "" ] ; do \ + case $1 in + -D | --debug ) fnDebug + ;; + -F | --format ) shift; + optFormat="$1"; + fnFormat; + ;; + -d | --skipdir ) optSkipDir=1; + ;; + -f | --skipfiles ) optSkipFiles=1; + ;; + -l | --limit ) shift; + intNumFiles="$1"; + if ! [[ $1 =~ ^[0-9]+$ ]] ; then echo -ne "$otagBold\n Error: --limit must be followed by a natural number (an integer >= 1). $intNumFiles is invalid $ctagBold; " ; fnUsage; exit 1; fi; + ;; + -m | --skipmeta ) optSkipMeta=1; + ;; + -o | --skipold ) optSkipOld=1; + ;; + -p | --path ) shift; + optPath="$1"; + ;; + -t | --timeout ) shift; + intTimeout="$1"; + cmdTimeout="timeout $intTimeout"; + ;; + -u | --skipunlinked ) optSkipUnlinked=1; + ;; + -h | --help ) echo -e "\n"; + fnUsage; + exit; + ;; + -t | --temp ) shift; + strTemp="$1"; + if [[ ! -d "$strTemp" ]] ; then \ + echo -e "\a\n$otagBold $otagRevRed>>> \t \bSpecified temp directory "$strTemp" does not exist. Please check and correct the path.$ctagBold"; + exit 1; + echo -e "Your command line: \n"; + echo -e "\t $cmdLine"; + fi; + fnSetTemp; + ;; + -A | --alt-root ) shift; + strAltRoot="$1"; + if [[ ! -d "$strAltRoot" ]] ; then \ + echo -e "\a\n$strAltRoot does not exist. Please check the path you specified."; + exit 1; + fi; + ;; + -V | --version ) + echo "topdiskconsumer version $strVersion."; + exit 0; + ;; + + * ) + echo -en "$otagBold \n\tI couldn't understand your command. Please note if you specified a command with spaces \n" + echo -e "\t \or extended ASCII, you will need to escape those characters or use quotes around the path.\n$ctagBold"; + fnUsage; + exit 1 + ;; + esac ; + shift ; +done; + +# find the mount point if strAltRoot is not set, otherise set optPath to strAltRoot: +if [[ -v strAltRoot ]]; then \ + # if both --alt-root and --path are set, return error and quit. + $dbgEcho "--alt-root specified, checking if --path was also set."; + if [[ -v optPath ]]; then \ + echo "--alt-root and --path are mutually exclusive."; + exit 1; + fi; + optPath="$strAltRoot"; + strFsMount="$strAltRoot"; +else \ + if [ ! -v optPath ]; then \ + $dbgEcho "optPath is not set, so setting path automagically." + $dbgEcho "Current working directory = $PWD" + strFsMount="$(stat --format="%m" "$PWD")"; + # find the device behind the mount point: + strFsDev="$(findmnt -n --output=SOURCE --target="$strFsMount")"; + $dbgEcho "strFsDev=$strFsDev"; + $dbgEcho "strFsMount=$strFsMount"; + $dbgSleep 5; + else \ + if [ ! -d "$optPath" ]; then \ + echo -e "\n\n\t\tYou specified \"$optPath\" which is not extant.\n\n"; + $dbgSleep 5; + fnUsage; + exit 1; + else \ + $dbgEcho "You specified the path \"$optPath\", which is extant. OK."; + strFsMount="$(stat --format="%m" "$optPath")"; + # find the device behind the mount point: + strFsDev="$(findmnt -n --output=SOURCE --target="$strFsMount")"; + $dbgEcho "The filesystem behind \"$optPath\" is \"$strFsMount\"; running from $strFsMount."; + fi; + fi; +fi; + +# Is there enough space for sort to run? +if [[ ! -v strTemp ]]; then \ + if [ $(df --output=avail /tmp | sed '$!d') -lt 100000 ] ; then \ + echo -e "\a\a$otagRed\nThere may not be enough space in /tmp for $otagBold \bsort$ctagBold \b$otagRed \b\bto complete; there is only $(df -h --output=avail /tmp | grep -v Avail) free; consider leveraging the --temp option. $ctagNc" ; + echo -e "\nThe script will continue after a 15-second pause but the reports may be incomplete."; + sleep 15; + export TMPDIR="$strTemp"; + fi ; +fi; + + +if [ $optSkipMeta -ne 1 ]; then \ + fnMetadata; + else $dbgEcho "Skipping metadata"; +fi; +if [ $optSkipFiles -ne 1 ] ; then \ + fnLargestFiles; + else $dbgEcho "Skipping largest files..."; +fi; +if [ $optSkipDir -ne 1 ]; then \ + fnLargestDirs; + else $dbgEcho "Skipping largest dirs..."; +fi; +if [ $optSkipOld -ne 1 ]; then \ + fnLargestOldFiles; + else $dbgEcho "Skipping old files..."; +fi; +if [ $optSkipUnlinked -ne 1 ] ; then \ + fnLargestUnlinked; + else $dbgEcho "Skipping open handles to unlinked files"; +fi; +if [ $optSkipMeta -ne 1 ]; then \ + fnEndReport; + else $dbgEcho "Skipping metadata"; +fi; +if [[ -v strBkTMPDIR ]]; then export TMPDIR="$strBkTMPDIR"; else \ + unset TMPDIR; +fi; + +exit 0; +