#!/bin/bash
#
# pacache - flexible pacman cache cleaning
#
# Copyright (C) 2011 Dave Reisner <dreisner@archlinux.org>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


shopt -s extglob

declare -a candidates=() cmdopts=() whitelist=() blacklist=()
declare -i delete=0 dryrun=0 filecount=0 keep=3 move=0 totalsaved=0
declare    cachedir=@localstatedir@/cache/pacman/pkg delim=$'\n' movedir= scanarch=

msg() {
	local mesg=$1; shift
	printf "==> $mesg\n" "$@"
} >&2

error() {
	local mesg=$1; shift
	printf "==> ERROR: $mesg\n" "$@"
} >&2

die() {
	error "$@"
	exit 1
}

# reads a list of files on stdin and prints out deletion candidates
pkgfilter() {
	# there's whitelist and blacklist parameters passed to this
	# script after the block of awk.

	awk -v keep="$1" -v scanarch="$2" '
	function parse_filename(filename,     parts, count, i, pkgname, arch) {

		count = split(filename, parts, "-")

		i = 1
		pkgname = parts[i++]
		while (i <= count - 3) {
			pkgname = pkgname "-" parts[i++]
		}

		arch = substr(parts[count], 1, index(parts[count], ".") - 1)

		# filter on whitelist or blacklist
		if (wlen && !whitelist[pkgname]) return
		if (blen && blacklist[pkgname]) return

		if ("" == packages[pkgname,arch]) {
			packages[pkgname,arch] = filename
		} else {
			packages[pkgname,arch] = packages[pkgname,arch] SUBSEP filename
		}
	}

	BEGIN {
		# create whitelist
		wlen = ARGV[1]; delete ARGV[1]
		for (i = 2; i < 2 + wlen; i++) {
			whitelist[ARGV[i]] = 1
			delete ARGV[i]
		}

		# create blacklist
		blen = ARGV[i]; delete ARGV[i]
		while (i++ < ARGC) {
			blacklist[ARGV[i]] = 1
			delete ARGV[i]
		}

		# read package filenames
		while (getline < "/dev/stdin") {
			parse_filename($0)
		}

		for (pkglist in packages) {
			# idx[1,2] = idx[pkgname,arch]
			split(pkglist, idx, SUBSEP)

			# enforce architecture match if specified
			if (!scanarch || scanarch == idx[2]) {
				count = split(packages[idx[1], idx[2]], pkgs, SUBSEP)
				if (count > keep) {
					for(i = 1; i <= count - keep; i++) {
						print pkgs[i]
					}
				}
			}
		}
	}' "${@:3}"
}

size_to_human() {
	awk -v size="$1" '
	BEGIN {
		suffix[1] = "B"
		suffix[2] = "KiB"
		suffix[3] = "MiB"
		suffix[4] = "GiB"
		suffix[5] = "TiB"
		count = 1

		while (size > 1024) {
			size /= 1024
			count++
		}

		sizestr = sprintf("%.2f", size)
		sub(/.?0+$/, "", sizestr)
		printf("%s %s", sizestr, suffix[count])
	}'
}

runcmd() {
	if (( needsroot )); then
		msg "Privilege escalation required"
		if sudo -v &>/dev/null && sudo -l &>/dev/null; then
			sudo "$@"
		else
			printf '%s ' 'root'
			su -c "$(printf '%q ' "$@")"
		fi
	else
		"$@"
	fi
}

summarize() {
	local -i filecount=$1; shift
	local seenarch= seen= arch= name=
	local -r pkg_re='(.+)-[^-]+-[0-9]+-([^.]+)\.pkg.*'

	if (( delete )); then
		printf -v output 'finished: %d packages removed' "$filecount"
	elif (( move )); then
		printf -v output "finished: %d packages moved to \`%s'" "$filecount" "$movedir"
	elif (( dryrun )); then
		if (( verbose )); then
			msg "Candidate packages:"
			while read -r pkg; do
				if (( verbose >= 3 )); then
					[[ $pkg =~ $pkg_re ]] && name=${BASH_REMATCH[1]} arch=${BASH_REMATCH[2]}
					if [[ -z $seen || $seenarch != $arch || $seen != $name ]]; then
						printf '%s (%s):\n' "$name" "$arch"
					fi
					printf '  %s\n' "$pkg"
				elif (( verbose >= 2 )); then
					printf "$PWD/%s$delim" "$pkg"
				else
					printf "%s$delim" "$pkg"
				fi
			done < <(printf '%s\n' "$@" | pacsort)
		fi
		printf -v output 'finished dry run: %d candidates' "$filecount"
	fi

	printf '\n' >&2
	msg "$output (diskspace saved: %s)" "$(size_to_human "$totalsaved")"
}

usage() {
	cat <<EOF
usage: ${0##*/} <operation> [options] [targets...]

${0##*/} is a flexible pacman cache cleaning utility, which has numerous
options to help control how much, and what, is deleted from any directory
containing pacman package tarballs.

  Operations:
    -d               perform a dry run, only finding candidate packages.
    -m  <movedir>    move candidate packages to 'movedir'.
    -r               remove candidate packages.

  Options:
    -a  <arch>       scan for 'arch' (default: all architectures).
    -c  <cachedir>   scan 'cachedir' for packages (default: @localstatedir@/cache/pacman/pkg).
    -f               apply force to mv(1) and rm(1) operations.
    -h               display this help message.
    -i  <pkgs>       ignore 'pkgs', which is a comma separated. Alternatively,
                     specify '-' to read package names from stdin, newline delimited.
    -k  <num>        keep 'num' of each package in 'cachedir' (default: 3).
    -u               target uninstalled packages.
    -v               increase verbosity. specify up to 3 times.
    -z               use null delimiters for candidate names (only with -v and -vv)

EOF
}

if (( ! UID )); then
	error "Do not run this script as root. You will be prompted for privilege escalation."
	exit 42
fi

while getopts ':a:c:dfhi:k:m:rsuvz' opt; do
	case $opt in
		a) scanarch=$OPTARG ;;
		c) cachedir=$OPTARG ;;
		d) dryrun=1 ;;
		f) cmdopts=(-f) ;;
		h) usage
			exit 0 ;;
		i) if [[ $OPTARG = '-' ]]; then
				 [[ ! -t 0 ]] && IFS=$'\n' read -r -d '' -a ign
			 else
				 IFS=',' read -r -a ign <<< "$OPTARG"
			 fi
			 blacklist+=("${ign[@]}")
			unset i ign ;;
		k) keep=$OPTARG
			if [[ $keep != $OPTARG ]] || (( keep < 0 )); then
				die 'argument to option -k must be a non-negative integer'
			fi ;;
		m) move=1 movedir=$OPTARG ;;
		r) delete=1 ;;
		u) IFS=$'\n' read -r -d '' -a ign < <(pacman -Qq)
			 blacklist+=("${ign[@]}")
			 unset ign ;;
		v) (( ++verbose )) ;;
		z) delim='\0' ;;
		:) die "option '--%s' requires an argument" "$OPTARG" ;;
		?) die "invalid option -- '%s'" "$OPTARG" ;;
	esac
done
shift $(( OPTIND - 1 ))

# remaining args are a whitelist
whitelist=("$@")

# sanity checks
case $(( dryrun+delete+move )) in
	0) die "no operation specified (use -h for help)" ;;
	[^1]) die "only one operation may be used at a time" ;;
esac

[[ -d $cachedir ]] ||
	die "cachedir \`%s' does not exist or is not a directory" "$cachedir"

[[ $movedir && ! -d $movedir ]] &&
	die "move-to directory \`%s' does not exist or is not a directory" "$movedir"

if (( move || delete )); then
	# make it an absolute path since we're about to chdir
	[[ ${movedir:0:1} != '/' ]] && movedir=$PWD/$movedir
	[[ ! -w $cachedir || ( $movedir && ! -w $movedir ) ]] && needsroot=1
fi

# unlikely that this will fail, but better make sure
cd "$cachedir" || die "failed to chdir to \`%s'" "$cachedir"

# note that these results are returned in an arbitrary order from awk, but
# they'll be resorted (in summarize) iff we have a verbosity level set.
IFS=$'\n' read -r -d '' -a candidates < \
	<(printf '%s\n' *.pkg.tar?(.+([^.])) | pacsort |
		pkgfilter "$keep" "$scanarch" \
			"${#whitelist[*]}" "${whitelist[@]}" \
			"${#blacklist[*]}" "${blacklist[@]}")

if (( ! ${#candidates[*]} )); then
	msg 'no candidate packages found for pruning'
	exit 1
fi

# grab this prior to signature scavenging
pkgcount=${#candidates[*]}

# copy the list, merging in any found sigs
for cand in "${candidates[@]}"; do
  candtemp+=("$cand")
  [[ -f $cand.sig ]] && candtemp+=("$cand.sig")
done
candidates=("${candtemp[@]}")
unset candtemp

# do this before we destroy anything
totalsaved=$(@SIZECMD@ "${candidates[@]}" | awk '{ sum += $1 } END { print sum }')

# crush. kill. destroy.
(( verbose )) && cmdopts+=(-v)
if (( delete )); then
	runcmd rm "${cmdopts[@]}" "${candidates[@]}"
elif (( move )); then
	runcmd mv "${cmdopts[@]}" "${candidates[@]}" "$movedir"
fi

summarize "$pkgcount" "${candidates[@]}"