#!/bin/bash
# Imports packages from upstream Arch repos.
#
# License: GPLv3

set -e -o pipefail
shopt -s extglob globstar nullglob
source "$(librelib messages)"
setup_traps

indent() {
	/usr/lib/libretools/chroot/indent '    | '
}

# usage: expac_file <file.db> <expac_args>
#
# Uses the ${WORKDIR} global
expac_file() {
	local dbfile=$1
	local args=("${@:2}")

	local reponame=${dbfile##*/}
	reponame=${reponame%%.*}

	mkdir -p -- "${WORKDIR}/expac/root"
	cat >"${WORKDIR}/expac/pacman.conf" <<-EOT
	[options]
	RootDir = ${WORKDIR}/expac/root
	DBPath = ${WORKDIR}/expac/root

	[${reponame}]
	Server = file://$(realpath --no-symlinks -- "${dbfile%/*}")
	EOT

	fakeroot pacman --config="${WORKDIR}/expac/pacman.conf" -Syy >/dev/null
	# expac exits with non-zero on emtpy databases, so ignore errors
	expac --config="${WORKDIR}/expac/pacman.conf" --sync "${args[@]}" || true
}

# usage: fetch_dbs <from> <into>
#
# Fetch excluding everything but db files
# TODO: we could be doing without things other than what is in
#       ${ARCHTAGS[@]}
fetch_dbs() {
	local extra=()
	if [[ $arg_verbose = true ]]; then
		extra+=(-v)
	fi

	rsync "${extra[@]}" --no-motd -mrtLH --no-p \
		--include="*/" \
		--include="*.db" \
		--exclude="*" \
		--delete-after \
		"$1" "$2"
}

# usage: get_repo_dir <repo> <arch>
#
# Prints repo directory path for the given <repo> <arch> combination,
# relative to the rsync root.
get_repo_dir() {
	repo=$1 arch=$2 envsubst '$repo $arch' <<<"$ARCHPATH"
}

# usage: db_list_pkgs <path-to-db>
#
# Prints a list of packages within a given <path-to-db>, one-per-line,
# in the format:
#
#     pkgname [epoch:]pkgver-pkgrel
db_list_pkgs() {
	expac_file "$1" '%n %v' | sort -u
}

# usage: filter_blacklisted <FULL_LIST >FILTERED_LIST
#
# Given a list of packages in the format:
#
#     pkgname [epoch:]pkgver-pkgrel
#
# filter out all of the packages named in blacklist.txt.
filter_blacklisted() {
	sort -u | join -v1 \
		- \
		<(libreblacklist cat | libreblacklist get-pkg | sort -u)
}

# usage: sync_pool <from> <path-to-whitelist> <into>
#
# Sync excluding everything but whitelist
sync_pool() {
	local -r _from=$1 _whitelist=$2 _into=$3

	local extra=()
	if [[ $arg_verbose = true ]]; then
		extra+=(-v)
	fi

	mkdir -p -- "$_into"
	msg2 "Retrieving up to %d files from %s pool" \
		"$(wc -l < "$_whitelist")" \
		"$(basename "$_into")"

	# *Don't delete-after*, this is the job of
	# cleanup scripts. It will remove our packages too
	rsync "${extra[@]}" --no-motd -rtlH --no-t \
		--delay-updates \
		--safe-links \
		--include-from="$_whitelist" \
		--exclude="*" \
		"$_from" \
		"$_into"
}

# Main function. Process the databases and get the libre packages
# Outline:
#  1. Fetch package info
#     * Get blacklist.txt
#     * Get repo.db from an Arch-like repo
#  2. Figure out what we want
#     * Generate textfiles describing the current repo state, (using
#       blacklist.txt) the desired repo state, and how to get from one
#       to the other.
#  3. Fetch the packages we want
#     * Symlink to files with the same name in INHERIT pools
#     * sync_pool to download the others
#  4. Modify the repos
#     * db-move
#     * db-update
#     * db-remove
#
# Files:
#   (misc)
#     - ${WORKDIR}/expac/                 : Scratch directory for expac_file()
#   (download)
#     - ${WORKDIR}/rsync/                 : Where we download '.db' files to
#     - ${WORKDIR}/staging/${repo}        : Where we download packages to
#     - ${WORKDIR}/staging/${SRCPOOL}     : Where we download sources to
#   (analysis)
#     - ${FTP_BASE}/${INHERIT}            : Where we look for duplicate files
#     - ${FTP_BASE}/.../${repo}.db        : Where we generate ${WORKDIR}/old/ from
#     - ${WORKDIR}/old/                   : .txt files describing the way the repos are
#     - ${WORKDIR}/new/                   : .txt files describing the way we want them to be
#     - ${WORKDIR}/dif/                   : .txt files describing how to make it happen
#     - ${WORKDIR}/${tag}.pkg.whitelist   : List of package filenames to download
#     - ${WORKDIR}/all.src.whitelist      : Glob list of source-package filenames to download
#   (release)
#     - ${WORKDIR}/staging/               : STAGING= directory for db-update
main() {
	##############################################################
	# 0. Initialization                                          #
	##############################################################

	# Run as `V=true db-import-pkg` to get verbose output
	declare -r arg_verbose="$V"

	# Print usage message
	if [[ $# -ne 0 ]] || [[ -z "$DBSCRIPTS_CONFIG" ]] || ! grep -q ARCHMIRROR -- "$DBSCRIPTS_CONFIG"; then
		msg 'usage: [V=true] DBSCRIPTS_CONFIG=/path/to/file %s' "${0##*/}"
		exit $EXIT_INVALIDARGUMENT
	fi

	local config_file
	config_file="$(dirname "$(readlink -e "$0")")/config"
	source "$config_file"

	local ret=0 varname varref
	for varname in PKGEXTS FTP_BASE PKGPOOL SRCPOOL; do
		if [[ -z ${!varname:-} ]] || is_array "$varname"; then
			print "Configure '%s' as a non-empty string in %q (or %q):" "$varname" "$config_file" "$LOCAL_CONFIG"
			ret=$EXIT_NOTCONFIGURED
		fi
	done
	for varname in ARCHMIRROR ARCHPATH; do # optional: ARCHPKGPOOL ARCHSRCPOOL
		if [[ -z ${!varname:-} ]] || is_array "$varname"; then
			print "Configure '%s' as a non-empty string in DBSCRIPTS_CONFIG=%q (did you set DBSCRIPTS_CONFIG correctly?):" "$varname" "$LOCAL_CONFIG"
			ret=$EXIT_NOTCONFIGURED
		fi
	done
	for varname in ARCHTAGS; do # optional: INHERIT
		declare -n varref="$varname"
		if (( ${#varref[*]} == 0 )) || ! is_array "$varname"; then
			print "Configure '%s' as a non-empty array in DBSCRIPTS_CONFIG=%q (did you set DBSCRIPTS_CONFIG correctly?):" "$varname" "$LOCAL_CONFIG"
			ret=$EXIT_NOTCONFIGURED
		fi
	done

	WORKDIR=$(mktemp -dt "${0##*/}.XXXXXXXXXX")
	readonly WORKDIR
	trap "rm -rf -- ${WORKDIR@Q}" EXIT

	##############################################################
	# 1. Fetch package info                                      #
	##############################################################

	# Get the blacklisted packages
	libreblacklist update

	# Sync the repos databases
	msg 'Downloading .db and .files files to import'
	mkdir "${WORKDIR}/rsync"
	fetch_dbs "${ARCHMIRROR}/" "${WORKDIR}/rsync"

	##############################################################
	# 2. Figure out what we want                                 #
	##############################################################

	mkdir "${WORKDIR}"/{old,new,dif}
	local _tag _repo _arch db_file
	for _tag in "${ARCHTAGS[@]}"; do
		_repo=${_tag%-*}
		_arch=${_tag##*-}
		# FIXME: this should use db-functions to lock the
		# repos while we read them.
		db_file="${FTP_BASE}/${_repo}/os/${_arch}/${_repo}.db"
		db_list_pkgs "$db_file" > "${WORKDIR}/old/${_tag}.txt"

		db_file="${WORKDIR}/rsync/$(get_repo_dir "${_repo}" "${_arch}")/${_repo}.db"
		db_list_pkgs "$db_file" | filter_blacklisted > "${WORKDIR}/new/${_tag}.txt"
	done

	# We now have ${WORKDIR}/old/ describing the way the repos
	# are, and ${WORKDIR}/new/ describing the way we want them to
	# be.  We now create ${WORKDIR}/dif/ describing how to get
	# from point A to point B.
	cat "${WORKDIR}"/old/*-*.txt | sort -u > "${WORKDIR}/old/all.txt"
	# db-move <repo-from> <repo-to> <pkgname|pkgbase> ...
	#
	# db-move doesn't allow us to limit the operation to a
	# specific arch, but the DBSCRIPTS_CONFIG will limit what
	# arches it applies to, and we currently only import 1 arch
	# from each upstream.
	local tag_from tag_to
	for tag_from in "${ARCHTAGS[@]}"; do
		arch_from=${tag_from##*-}
		for tag_to in "${ARCHTAGS[@]}"; do
			arch_to=${tag_to##*-}
			[[ $tag_from != $tag_to ]] || continue
			[[ $arch_from == $arch_to ]] || continue

			comm -12 \
			     "${WORKDIR}/old/${tag_from}.txt" \
			     "${WORKDIR}/new/${tag_to}.txt" \
			     >> "${WORKDIR}/dif/move:${tag_from}:${tag_to}.txt"
		done
	done
	# db-update
	local tag
	for tag in "${ARCHTAGS[@]}"; do
		comm -13 \
			"${WORKDIR}/old/all.txt" \
			"${WORKDIR}/new/${tag}.txt" \
			> "${WORKDIR}/dif/update:${tag}.txt"
	done
	# db-remove <repo> <arch> <pkgname|pkgbase> ...
	for tag in "${ARCHTAGS[@]}"; do
		# pkgnames (that need to leave this tag) AND (haven't
		# already been removed from this tag by db-move).
		comm -23 \
		     <(cut -d' ' -f1 -- "${WORKDIR}/old/${tag}.txt" | sort -u) \
		     <(cut -d' ' -f1 -- "${WORKDIR}/new/${tag}.txt" "${WORKDIR}/dif/move:${tag}":*.txt | sort -u) \
		     > "${WORKDIR}/dif/remove:${tag}.txt"
	done

	##############################################################
	# 3. Fetch the packages we want                              #
	##############################################################

	# For some packages, "fetch" means to create a symlink to a
	# pool we INHERIT from.  For others, it means to actually
	# download it from arg_upstream with rsync.
	for tag in "${ARCHTAGS[@]}";do
		msg "Processing %s" "$tag"
		repo=${tag%-*}
		arch=${tag##*-}
		mkdir -p -- "${WORKDIR}/staging/${repo}"

		local pkgname filename pgpsig
		while read -r pkgname filename pgpsig; do
			local pool staged=false
			for pool in "$PKGPOOL" "${INHERIT[@]}"; do
				filepath=("${FTP_BASE}/${pool}/${filename}")
				if [[ -f $filepath && ! -h $filepath ]]; then
					ln -srT -- "$filepath"     "${WORKDIR}/staging/${repo}/${filename}"
					ln -srT -- "$filepath".sig "${WORKDIR}/staging/${repo}/${filename}".sig
					staged=true
					break
				fi
			done
			if ! $staged; then
				printf '%s\n' "$filename"{,.sig} >> "${WORKDIR}/${tag}.pkg.whitelist"
				printf '%s\n' "${filename%$PKGEXTS}*.src.tar*" >> "${WORKDIR}/all.src.whitelist"
			fi
		done < <(
			mapfile -t pkgnames < <(cut -d' ' -f1 <"${WORKDIR}/dif/update:${tag}.txt")
			db_file="${WORKDIR}/rsync/$(get_repo_dir "${repo}" "${arch}")/${repo}.db"
			if (( ${#pkgnames[@]} > 0 )); then
				expac_file "$db_file" '%n %f %g' "${pkgnames[@]}"
			fi
		)
		if [[ -f "${WORKDIR}/${tag}.pkg.whitelist" ]]; then
			sync_pool \
				"${ARCHMIRROR}/${ARCHPKGPOOL:-$(get_repo_dir "${repo}" "${arch}")}/" \
				"${WORKDIR}/${tag}.pkg.whitelist" \
				"${WORKDIR}/staging/${repo}/"
		fi
	done
	if [[ -n ${ARCHSRCPOOL:-} && -f "${WORKDIR}/all.src.whitelist" ]]; then
		sync_pool \
			"${ARCHMIRROR}/${ARCHSRCPOOL}/" \
			"${WORKDIR}/all.src.whitelist" \
			"${WORKDIR}/staging/${SRCPOOL}/"
	fi

	##############################################################
	# 4. Modify the repos                                        #
	##############################################################

	msg "Modifying the actual repos"

	# db-move
	msg2 'Step 1 of 3: db-move'
	for tag_from in "${ARCHTAGS[@]}"; do
		repo_from=${tag_from%-*}
		arch_from=${tag_from##*-}
		for tag_to in "${ARCHTAGS[@]}"; do
			repo_to=${tag_to%-*}
			arch_to=${tag_to##*-}
			[[ $tag_from != $tag_to ]] || continue
			[[ $arch_from == $arch_to ]] || continue

			plain '%s -> %s' "$tag_from" "$tag_to"
			< "${WORKDIR}/dif/move:${tag_from}:${tag_to}.txt" \
				cut -d' ' -f1 | \
				sed 's/^/pkgname=/' | \
				xargs -r -d $'\n' db-move "$repo_from" "$repo_to" |& \
				indent
		done
	done
	# db-update
	msg2 'Step 2 of 3: db-update'
	STAGING=${WORKDIR}/staging db-update |& indent
	# db-remove
	msg2 'Step 3 of 3: db-remove'
	for tag in "${ARCHTAGS[@]}"; do
		repo=${tag%-*}
		arch=${tag##*-}

		plain '%s' "$tag"
		< "${WORKDIR}/dif/remove:${tag}.txt" \
			cut -d' ' -f1 | \
			sed 's/^/pkgname=/' | \
			xargs -r -d $'\n' db-remove "$repo" "$arch" |& \
			indent
	done
}

main "$@"
