...
1#!/usr/bin/env bats
2
3load helpers
4
5function setup() {
6 # XXX: currently criu require root containers.
7 requires criu root
8
9 setup_busybox
10}
11
12function teardown() {
13 teardown_bundle
14}
15
16function setup_pipes() {
17 # The changes to 'terminal' are needed for running in detached mode
18 # shellcheck disable=SC2016
19 update_config ' (.. | select(.terminal? != null)) .terminal |= false
20 | (.. | select(.[]? == "sh")) += ["-c", "for i in `seq 10`; do read xxx || continue; echo ponG $xxx; done"]'
21
22 # Create three sets of pipes for __runc run.
23 # for stderr
24 exec {pipe}<> <(:)
25 exec {err_r}</proc/self/fd/$pipe
26 exec {err_w}>/proc/self/fd/$pipe
27 exec {pipe}>&-
28 # for stdout
29 exec {pipe}<> <(:)
30 exec {out_r}</proc/self/fd/$pipe
31 exec {out_w}>/proc/self/fd/$pipe
32 exec {pipe}>&-
33 # for stdin
34 exec {pipe}<> <(:)
35 exec {in_r}</proc/self/fd/$pipe
36 exec {in_w}>/proc/self/fd/$pipe
37 exec {pipe}>&-
38}
39
40function check_pipes() {
41 local output stderr
42
43 echo Ping >&${in_w}
44 exec {in_w}>&-
45 exec {out_w}>&-
46 exec {err_w}>&-
47
48 exec {in_r}>&-
49 output=$(cat <&${out_r})
50 exec {out_r}>&-
51 stderr=$(cat <&${err_r})
52 exec {err_r}>&-
53
54 [[ "${output}" == *"ponG Ping"* ]]
55 if [ -n "$stderr" ]; then
56 fail "runc stderr: $stderr"
57 fi
58}
59
60# Usage: runc_run_with_pipes container-name
61function runc_run_with_pipes() {
62 # Start a container to be checkpointed, with stdin/stdout redirected
63 # so that check_pipes can be used to check it's working fine.
64 # We have to redirect stderr as well because otherwise it is
65 # redirected to a bats log file, which is not accessible to CRIU
66 # (i.e. outside of container) so checkpointing will fail.
67 ret=0
68 __runc run -d "$1" <&${in_r} >&${out_w} 2>&${err_w} || ret=$?
69 if [ "$ret" -ne 0 ]; then
70 echo "runc run -d $1 (status: $ret):"
71 exec {err_w}>&-
72 cat <&${err_r}
73 fail "runc run failed"
74 fi
75
76 testcontainer "$1" running
77}
78
79# Usage: runc_restore_with_pipes work-dir container-name [optional-arguments ...]
80function runc_restore_with_pipes() {
81 workdir="$1"
82 shift
83 name="$1"
84 shift
85
86 ret=0
87 __runc restore -d --work-path "$workdir" --image-path ./image-dir "$@" "$name" <&${in_r} >&${out_w} 2>&${err_w} || ret=$?
88 if [ "$ret" -ne 0 ]; then
89 echo "__runc restore $name failed (status: $ret)"
90 exec {err_w}>&-
91 cat <&${err_r}
92 echo "CRIU log errors (if any):"
93 grep -B 5 Error "$workdir"/*.log ./image-dir/*.log || true
94 fail "runc restore failed"
95 fi
96
97 testcontainer "$name" running
98
99 runc exec --cwd /bin "$name" echo ok
100 [ "$status" -eq 0 ]
101 [[ ${output} == "ok" ]]
102}
103
104function simple_cr() {
105 runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
106 [ "$status" -eq 0 ]
107
108 testcontainer test_busybox running
109
110 for _ in $(seq 2); do
111 # checkpoint the running container
112 runc "$@" checkpoint --work-path ./work-dir test_busybox
113 grep -B 5 Error ./work-dir/dump.log || true
114 [ "$status" -eq 0 ]
115
116 # after checkpoint busybox is no longer running
117 testcontainer test_busybox checkpointed
118
119 # restore from checkpoint
120 runc "$@" restore -d --work-path ./work-dir --console-socket "$CONSOLE_SOCKET" test_busybox
121 grep -B 5 Error ./work-dir/restore.log || true
122 [ "$status" -eq 0 ]
123
124 # busybox should be back up and running
125 testcontainer test_busybox running
126 done
127}
128
129@test "checkpoint and restore" {
130 simple_cr
131}
132
133@test "checkpoint and restore (bind mount, destination is symlink)" {
134 mkdir -p rootfs/real/conf
135 ln -s /real/conf rootfs/conf
136 update_config ' .mounts += [{
137 source: ".",
138 destination: "/conf",
139 options: ["bind"]
140 }]'
141 simple_cr
142}
143
144@test "checkpoint and restore (with --debug)" {
145 simple_cr --debug
146}
147
148@test "checkpoint and restore (cgroupns)" {
149 # cgroupv2 already enables cgroupns so this case was tested above already
150 requires cgroups_v1 cgroupns
151
152 # enable CGROUPNS
153 update_config '.linux.namespaces += [{"type": "cgroup"}]'
154
155 simple_cr
156}
157
158@test "checkpoint --pre-dump (bad --parent-path)" {
159 runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
160 [ "$status" -eq 0 ]
161
162 testcontainer test_busybox running
163
164 # runc should fail with absolute parent image path.
165 runc checkpoint --parent-path "$(pwd)"/parent-dir --work-path ./work-dir --image-path ./image-dir test_busybox
166 [[ "${output}" == *"--parent-path"* ]]
167 [ "$status" -ne 0 ]
168
169 # runc should fail with invalid parent image path.
170 runc checkpoint --parent-path ./parent-dir --work-path ./work-dir --image-path ./image-dir test_busybox
171 [[ "${output}" == *"--parent-path"* ]]
172 [ "$status" -ne 0 ]
173}
174
175@test "checkpoint --pre-dump and restore" {
176 setup_pipes
177 runc_run_with_pipes test_busybox
178
179 #test checkpoint pre-dump
180 mkdir parent-dir
181 runc checkpoint --pre-dump --image-path ./parent-dir test_busybox
182 [ "$status" -eq 0 ]
183
184 # busybox should still be running
185 testcontainer test_busybox running
186
187 # checkpoint the running container
188 mkdir image-dir
189 mkdir work-dir
190 runc checkpoint --parent-path ../parent-dir --work-path ./work-dir --image-path ./image-dir test_busybox
191 grep -B 5 Error ./work-dir/dump.log || true
192 [ "$status" -eq 0 ]
193
194 # check parent path is valid
195 [ -e ./image-dir/parent ]
196
197 # after checkpoint busybox is no longer running
198 testcontainer test_busybox checkpointed
199
200 runc_restore_with_pipes ./work-dir test_busybox
201 check_pipes
202}
203
204@test "checkpoint --lazy-pages and restore" {
205 # check if lazy-pages is supported
206 if ! criu check --feature uffd-noncoop; then
207 skip "this criu does not support lazy migration"
208 fi
209
210 setup_pipes
211 runc_run_with_pipes test_busybox
212
213 # checkpoint the running container
214 mkdir image-dir
215 mkdir work-dir
216
217 # For lazy migration we need to know when CRIU is ready to serve
218 # the memory pages via TCP.
219 exec {pipe}<> <(:)
220 # shellcheck disable=SC2094
221 exec {lazy_r}</proc/self/fd/$pipe {lazy_w}>/proc/self/fd/$pipe
222 exec {pipe}>&-
223
224 # TCP port for lazy migration
225 port=27277
226
227 __runc checkpoint --lazy-pages --page-server 0.0.0.0:${port} --status-fd ${lazy_w} --work-path ./work-dir --image-path ./image-dir test_busybox &
228 cpt_pid=$!
229
230 # wait for lazy page server to be ready
231 out=$(timeout 2 dd if=/proc/self/fd/${lazy_r} bs=1 count=1 2>/dev/null | od)
232 exec {lazy_r}>&-
233 exec {lazy_w}>&-
234 # shellcheck disable=SC2116,SC2086
235 out=$(echo $out) # rm newlines
236 # show errors if there are any before we fail
237 grep -B5 Error ./work-dir/dump.log || true
238 # expecting \0 which od prints as
239 [ "$out" = "0000000 000000 0000001" ]
240
241 # Check if inventory.img was written
242 [ -e image-dir/inventory.img ]
243
244 # Start CRIU in lazy-daemon mode
245 criu lazy-pages --page-server --address 127.0.0.1 --port ${port} -D image-dir &
246 lp_pid=$!
247
248 # Restore lazily from checkpoint.
249 # The restored container needs a different name (as well as systemd
250 # unit name, in case systemd cgroup driver is used) as the checkpointed
251 # container is not yet destroyed. It is only destroyed at that point
252 # in time when the last page is lazily transferred to the destination.
253 # Killing the CRIU on the checkpoint side will let the container
254 # continue to run if the migration failed at some point.
255 [ -n "$RUNC_USE_SYSTEMD" ] && set_cgroups_path
256 runc_restore_with_pipes ./image-dir test_busybox_restore --lazy-pages
257
258 wait $cpt_pid
259
260 wait $lp_pid
261
262 check_pipes
263}
264
265@test "checkpoint and restore in external network namespace" {
266 # check if external_net_ns is supported; only with criu 3.10++
267 if ! criu check --feature external_net_ns; then
268 # this criu does not support external_net_ns; skip the test
269 skip "this criu does not support external network namespaces"
270 fi
271
272 # create a temporary name for the test network namespace
273 tmp=$(mktemp)
274 rm -f "$tmp"
275 ns_name=$(basename "$tmp")
276 # create network namespace
277 ip netns add "$ns_name"
278 ns_path=$(ip netns add "$ns_name" 2>&1 | sed -e 's/.*"\(.*\)".*/\1/')
279 # shellcheck disable=SC2012
280 ns_inode=$(ls -iL "$ns_path" | awk '{ print $1 }')
281
282 # tell runc which network namespace to use
283 update_config '(.. | select(.type? == "network")) .path |= "'"$ns_path"'"'
284
285 runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
286 [ "$status" -eq 0 ]
287
288 testcontainer test_busybox running
289
290 for _ in $(seq 2); do
291 # checkpoint the running container; this automatically tells CRIU to
292 # handle the network namespace defined in config.json as an external
293 runc checkpoint --work-path ./work-dir test_busybox
294 grep -B 5 Error ./work-dir/dump.log || true
295 [ "$status" -eq 0 ]
296
297 # after checkpoint busybox is no longer running
298 testcontainer test_busybox checkpointed
299
300 # restore from checkpoint; this should restore the container into the existing network namespace
301 runc restore -d --work-path ./work-dir --console-socket "$CONSOLE_SOCKET" test_busybox
302 grep -B 5 Error ./work-dir/restore.log || true
303 [ "$status" -eq 0 ]
304
305 # busybox should be back up and running
306 testcontainer test_busybox running
307
308 # container should be running in same network namespace as before
309 pid=$(__runc state test_busybox | jq '.pid')
310 ns_inode_new=$(readlink /proc/"$pid"/ns/net | sed -e 's/.*\[\(.*\)\]/\1/')
311 echo "old network namespace inode $ns_inode"
312 echo "new network namespace inode $ns_inode_new"
313 [ "$ns_inode" -eq "$ns_inode_new" ]
314 done
315 ip netns del "$ns_name"
316}
317
318@test "checkpoint and restore with container specific CRIU config" {
319 tmp=$(mktemp /tmp/runc-criu-XXXXXX.conf)
320 # This is the file we write to /etc/criu/default.conf
321 tmplog1=$(mktemp /tmp/runc-criu-log-XXXXXX.log)
322 unlink "$tmplog1"
323 tmplog1=$(basename "$tmplog1")
324 # That is the actual configuration file to be used
325 tmplog2=$(mktemp /tmp/runc-criu-log-XXXXXX.log)
326 unlink "$tmplog2"
327 tmplog2=$(basename "$tmplog2")
328 # This adds the annotation 'org.criu.config' to set a container
329 # specific CRIU config file.
330 update_config '.annotations += {"org.criu.config": "'"$tmp"'"}'
331
332 # Tell CRIU to use another configuration file
333 mkdir -p /etc/criu
334 echo "log-file=$tmplog1" >/etc/criu/default.conf
335 # Make sure the RPC defined configuration file overwrites the previous
336 echo "log-file=$tmplog2" >"$tmp"
337
338 runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
339 [ "$status" -eq 0 ]
340
341 testcontainer test_busybox running
342
343 # checkpoint the running container
344 runc checkpoint --work-path ./work-dir test_busybox
345 grep -B 5 Error ./work-dir/dump.log || true
346 [ "$status" -eq 0 ]
347 run ! test -f ./work-dir/"$tmplog1"
348 test -f ./work-dir/"$tmplog2"
349
350 # after checkpoint busybox is no longer running
351 testcontainer test_busybox checkpointed
352
353 test -f ./work-dir/"$tmplog2" && unlink ./work-dir/"$tmplog2"
354 # restore from checkpoint
355 runc restore -d --work-path ./work-dir --console-socket "$CONSOLE_SOCKET" test_busybox
356 grep -B 5 Error ./work-dir/restore.log || true
357 [ "$status" -eq 0 ]
358 run ! test -f ./work-dir/"$tmplog1"
359 test -f ./work-dir/"$tmplog2"
360
361 # busybox should be back up and running
362 testcontainer test_busybox running
363 unlink "$tmp"
364 test -f ./work-dir/"$tmplog2" && unlink ./work-dir/"$tmplog2"
365}
366
367@test "checkpoint and restore with nested bind mounts" {
368 bind1=$(mktemp -d -p .)
369 bind2=$(mktemp -d -p .)
370 update_config ' .mounts += [{
371 type: "bind",
372 source: "'"$bind1"'",
373 destination: "/test",
374 options: ["rw", "bind"]
375 },
376 {
377 type: "bind",
378 source: "'"$bind2"'",
379 destination: "/test/for/nested",
380 options: ["rw", "bind"]
381 }]'
382
383 runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
384 [ "$status" -eq 0 ]
385
386 testcontainer test_busybox running
387
388 # checkpoint the running container
389 runc checkpoint --work-path ./work-dir test_busybox
390 grep -B 5 Error ./work-dir/dump.log || true
391 [ "$status" -eq 0 ]
392
393 # after checkpoint busybox is no longer running
394 testcontainer test_busybox checkpointed
395
396 # cleanup mountpoints created by runc during creation
397 # the mountpoints should be recreated during restore - that is the actual thing tested here
398 rm -rf "${bind1:?}"/*
399
400 # restore from checkpoint
401 runc restore -d --work-path ./work-dir --console-socket "$CONSOLE_SOCKET" test_busybox
402 grep -B 5 Error ./work-dir/restore.log || true
403 [ "$status" -eq 0 ]
404
405 # busybox should be back up and running
406 testcontainer test_busybox running
407}
View as plain text