-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathglacierupload
executable file
·279 lines (224 loc) · 6.49 KB
/
glacierupload
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
#!/bin/bash
# Debug
# set -x
############
# Constants
############
# Script location
readonly SCRIPT="$(cd "$(dirname "$0")"; pwd)"
# Number of parallel uploads
readonly JOBS="100%"
readonly RETRIES=10
readonly MB=1048576
########
# Parse command line options
########
function print_usage {
echo "usage: glacierupload -v|--vault <vault> [-p|--profile <profile>]"
echo " [-d|--description <description>] [-s|--split-size <level>] <file>"
echo ""
echo "Upload a file to a vault in AWS glacier."
echo ""
echo " --vault name of the vault to which the file should be uploaded"
echo " --profile optional profile name to use for the upload. The profile"
echo " name must be configured with the aws cli client."
echo " --description optional description of the file"
echo " --split-size level that determines the size of the parts used for"
echo " uploading the file. The level can be a number between"
echo " 0 and 12 and results in part size of (2^level) MBytes."
echo " If not specified the default is 0, i.e. the file is"
echo " uploaded in 1MByte parts. Maximum is 4GiB"
echo " --help print this message"
}
arg_positional=()
while [[ $# -gt 0 ]]; do
key="$1"
case $key in
-h|--help)
print_usage
exit 0
;;
-s|--split-size)
arg_split="$2"
shift # past argument
shift # past value
;;
-p|--profile)
arg_profile="$2"
shift # past argument
shift # past value
;;
-d|--description)
arg_description="$2"
shift # past argument
shift # past value
;;
-v|--vault)
arg_vault="$2"
shift # past argument
shift # past value
;;
*) # unknown option
arg_positional+=("$1") # save it in an array for later
shift # past argument
;;
esac
done
if [ -z "$arg_vault" ]; then
echo "No vault specified!"
print_usage
exit 1
fi
if [ ${#arg_positional[@]} -eq 0 ]; then
echo "No file specified!"
print_usage
exit 1
fi
####################
# Script Parameters
####################
readonly files=( "${arg_positional[@]}" )
readonly profile="${arg_profile:-}"
readonly vault="$arg_vault"
readonly description="${arg_description:-}"
readonly split_size=${arg_split:-0}
##########
# Utility functions
##########
function byte_range {
count=$1
start=$(( (count-1)*part_size ))
end=$(( count*part_size-1 ))
# Handle last chunk
end=$(( end > (file_size-1) ? (file_size-1) : end ))
echo -n "$start-$end"
}
#########
# Upload functions
#########
function multipart_upload {
local -r archive="$(cd "$(dirname "$1")"; pwd)/$(basename "$1")"
# Only powers of 2 are allowed, max is 2**12
local -r part_size=$(( 2**split_size * MB ))
local -r file_size=$(wc -c < "$archive")
echo ""
echo "---------------------------------------"
echo "Upload $archive"
echo "Total upload (bytes): $file_size"
echo "---------------------------------------"
echo ""
# initiate multipart upload connection to glacier
init_args=()
if [[ -n "$profile" ]]; then
init_args+=('--profile')
init_args+=("$profile")
fi
init_args+=('glacier' 'initiate-multipart-upload')
init_args+=('--output')
init_args+=('text')
init_args+=('--query')
init_args+=('"uploadId"')
init_args+=('--account-id' '-')
init_args+=('--part-size' "$part_size")
init_args+=('--vault-name')
init_args+=("$vault")
init_args+=('--archive-description')
init_args+=("$description")
local -r init="$(aws "${init_args[@]}")"
# xargs trims off the quotes
local -r upload_id=$(echo "$init")
#########
# Upload parts
#########
echo "Start upload $upload_id"
# Create parts in a temporary directory
tmp_dir="$(mktemp -d)"
cd "$tmp_dir" || exit 1
echo "From temporary directory: $tmp_dir"
echo ""
# parallel runs in a subprocess
export -f upload_part byte_range
export SCRIPT JOBS RETRIES MB
export upload_id
export file_size part_size split_size
export profile description archive vault
parallel --no-notice --halt now,fail=1 --line-buffer -j $JOBS ::: \
'parallel -a "$archive" --no-notice --pipepart --block $part_size --recend "" -j $JOBS --line-buffer --halt now,fail=1 --retries $RETRIES "upload_part {#}"' \
'"$SCRIPT"/treehash "$archive" > treehash.sha'
failure=$?
if (($failure > 0)); then
return $failure
fi
local -r treehash="$(< treehash.sha)"
# Return to original directory
echo -n "Returning to "
cd -
rm -rf "$tmp_dir"
#########
# Finish upload
#########
echo ""
echo "Complete upload $upload_id :"
echo ""
complete_args=()
if [[ -n "$profile" ]]; then
complete_args+=('--profile')
complete_args+=("$profile")
fi
complete_args+=('glacier' 'complete-multipart-upload')
complete_args+=('--output')
complete_args+=('text')
complete_args+=('--query')
complete_args+=('"archiveId"')
complete_args+=('--account-id' '-')
complete_args+=('--checksum' "$treehash")
complete_args+=('--archive-size' "$file_size")
complete_args+=('--vault-name')
complete_args+=("$vault")
complete_args+=("--upload-id=$upload_id")
local -r result=$(aws "${complete_args[@]}")
failure=$?
if (($failure > 0)); then
return $failure
fi
echo "$result"
local -r archive_id="$(echo "$result")"
echo "$result" > "$(basename "$archive").${archive_id:0:8}.upload.json"
}
function upload_part {
# Write part data with range information to tmp file
part="$(mktemp)"
cat > "$part"
count=$1
range="$(byte_range "$count")"
echo "Uploading range $range ($(( ${range##*-}/part_size ))/$(( file_size/part_size )))"
upload_args=()
if [[ -n "$profile" ]]; then
upload_args+=('--profile')
upload_args+=("$profile")
fi
upload_args+=('glacier' 'upload-multipart-part')
upload_args+=('--account-id' '-')
upload_args+=('--body' "$part")
upload_args+=('--range')
upload_args+=("bytes $range/*")
upload_args+=('--vault-name')
upload_args+=("$vault")
upload_args+=("--upload-id=$upload_id")
aws "${upload_args[@]}"
failure=$?
if (($failure > 0)); then
exit $failure
fi
echo "Finished upload of range $range ($(( ${range##*-}/part_size ))/$(( file_size/part_size )))"
# Remove tmp file
rm "$part"
}
#########
# Upload files
#########
for file in "${files[@]}"; do
closeupload_args=('--vault' "$vault")
closeupload_args+=('--profile' "$profile")
multipart_upload "$file" || "$SCRIPT"/glacierabort "${closeupload_args[@]}"
done