1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
|
#!/bin/bash
# Name: verify-digests.sh
# Title: Gentoo Linux release digest verification
# Author: Robin H Johnson <robbat2@gentoo.org>
# Copyright 2016 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2
#
# Description:
# This script exists to help mirrors verify raw digests of release files, to
# detect possible disk and filesystem corruptions. By design, it does NOT check
# GPG signatures.
#
# Usage:
# verify-digests.sh [FILES-OR-DIRECTORIES...]
#
# If passed a digest file:
# - it will be checked.
# If passed a non-digest file:
# - that immediate directory will be checked for all digest files.
# If passed a directory:
# - it and all subdirs will be checked for all digest files.
# If passed no arguments:
# - it will act like the directory '.' was passed.
#
# Return value:
# On success, exits zero.
# On failures, exits non-zero, and writes a file of errors to $TMPDIR.
# Take Gentoo digest files and convert to a plain BSD-format digest file.
# - strip any PGP signing
# - pass existing BSD-format digest
# - convert coreutils-format to BSD-format
transform_digest() {
sed -n -r \
-e '/BEGIN (PGP|GPG) SIGNED MESSAGE/,/^$/d' \
-e '/BEGIN (PGP|GPG) SIGNATURE/,/END (PGP|GPG) SIGNATURE/{d}' \
-e 'p' \
| \
awk \
-e '/^# .* HASH$/{hash=$2}' \
-e '(hash=="BLAKE2B"){hash="BLAKE2b"}' \
-e '(hash=="BLAKE2S"){hash="BLAKE2s"}' \
-e '/^[[:xdigit:]]+[[:space:]]+.+/{if(hash != ""){printf "%s (%s) = %s\n",hash,$2,$1}}' \
-e '/^([A-Z]+[0-9A-Za-z-]+) \(.*\) = [[:xdigit:]]+/{print $0}' \
-e '/^([A-Z]+[0-9A-Za-z-]+) [[:xdigit:]]+ [^[:space:]]+$/{ printf "%s (%s) = %s\n",$1,$3,$2; }'
}
# Pass all directory arguments to find
# Keep all file arguments as-is (so you can pass .asc files directly)
DIGESTS_ARGS=( )
DIGESTS_FIND=( )
if [[ ${#@} -eq 0 ]]; then
DIGESTS_FIND+=( . )
else
for f in "${@}" ; do
if [ -d "$f" ]; then
DIGESTS_FIND+=( "$f" )
else
DIGESTS_ARGS+=( "$f" )
fi
done
fi
# Check if non-dir arguments were digest files or files that you want to get checked
DIGESTS_ARGS2=( )
for f in "${DIGESTS_ARGS[@]}" ; do
if [[ "${f/DIGEST}" != "$f" ]] || grep -sq -m 1 -E -e '# ([A-Z]+[0-9A-Za-z-]+) HASH' -e ') = [0-9a-f]\+' "$f"; then
DIGESTS_ARGS2+=( "$f" )
else
d=$( dirname "$f" )
DIGESTS_FIND2=( )
readarray -t DIGESTS_FIND2 <<< "$(find "$d" -maxdepth 1 ! -type d \( -name '*.DIGESTS' -o -name '*.DIGESTS.asc' \) | fmt -1 |sort | uniq)"
DIGESTS_ARGS2+=( "${DIGESTS_FIND2[@]}" )
DIGESTS_FIND2=( )
fi
done
if [[ "${#DIGESTS_FIND[@]}" -gt 0 ]]; then
readarray -t DIGESTS_FIND <<< "$(find "${DIGESTS_FIND[@]}" ! -type d \( -name '*.DIGESTS' -o -name '*.DIGESTS.asc' \) | fmt -1 | sort | uniq )"
fi
# merge all items
DIGESTS=( "${DIGESTS_ARGS2[@]}" "${DIGESTS_FIND[@]}" )
# Prefer signed digests where possible, but sometimes they were in the original
# .DIGESTS file, and other times there was a seperate .asc file.
DIGESTS2="$(echo "${DIGESTS[@]}" | fmt -1 |sed '/.asc$/s/.asc$//' | sort | uniq)"
DIGESTS=( )
for d in ${DIGESTS2} ; do
if [[ -e "${d}" ]] && [[ -e "${d}.asc" ]]; then
# split signed
DIGESTS+=( "${d}.asc" )
elif [[ ! -e "${d}" ]] && [[ -e "${d}.asc" ]]; then
# cleansigned, with extension
DIGESTS+=( "${d}.asc" )
elif [[ -e "${d}" ]] && [[ ! -e "${d}.asc" ]]; then
# cleansigned, no extension
DIGESTS+=( "${d}" )
fi
done
# Setup storage for digest conversion & results
T=$(date -u +%Y%m%dT%H%M%SZ)
tmp1=$(mktemp --tmpdir)
tmp2=$(mktemp --tmpdir)
failures=$(mktemp --tmpdir "gentoo-failures.$T.XXXXXXXXXX")
trap 'rm -f "${tmp1}" "${tmp2}"' SIGINT SIGTERM EXIT
# Now check them
failed_digests=()
for d in $(echo "${DIGESTS[@]}" | fmt -1 | sort | uniq); do
sleep 0.01
echo -n "Checking digests from $d: "
transform_digest < "$d" >"$tmp1"
# add leading & trailing space to match
hashes=" $(awk '{print $1}' "$tmp1" | sort | uniq ) "
checked=0
found=0
# order by strength
for h in BLAKE2B SHA3-512 WHIRLPOOL SHA512 SHA384 SHA256 SHA224 ; do
sleep 0.01
[[ "$found" -eq 1 ]] && break
if [[ "${hashes/$h}" != "${hashes}" ]]; then
found=1
echo "using $h"
pushd "$(dirname "$d")" >/dev/null
cmd=${h}sum
cmd=${cmd,,}
# Special case, the tool name is different than the hash.
case ${h,,} in
blake2b) cmd='b2sum' ;;
blake2s) cmd='b2sum' ;;
esac
# Check we have the tooling to validate
if command -v "$cmd" >/dev/null; then
:
elif command -v rhash >/dev/null ; then
cmd=rhash
else
echo "Could not find $cmd or rhash to verify ${h} hashes" 1>&2
continue
fi
# Run the tooling now.
# TODO: if we assume rhash is available always, it could check all the hashes at once
# but that means rewriting this loop of strength-ordering
grep "^$h " "$tmp1" | ionice -c 3 --ignore "${cmd}" -c - | tee "$tmp2"
rc=${PIPESTATUS[1]}
if [ "$rc" -ne 0 ]; then
failed_digests+=("$d")
cat "$tmp2" >> "$failures"
fi
checked=1
popd >/dev/null
fi
done
if [[ $checked -eq 0 ]]; then
echo " FAIL - no usable digest"
fi
done
# Handle output of errors
if [[ "${#failed_digests[@]}" -eq 0 ]]; then
exit 0
else
echo "----"
echo "Failures detected in the following DIGESTS:" 1>&2
for f in "${failed_digests[@]}"; do
echo "$f" 1>&2
done
echo "----" 1>&2
echo "Complete output of failed DIGESTS, stored in $failures:" 1>&2
cat "$failures" 1>&2
exit 1
fi
|