CoCalc -- test_snapshot

GitHub Repository: aos/firecracker
Path: blob/main/tests/integration_tests/functional/test_snapshot_basic.py
¹⁹⁵⁸ views
1
# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
# SPDX-License-Identifier: Apache-2.0
3
"""Basic tests scenarios for snapshot save/restore."""
4

5
import filecmp
6
import logging
7
import os
8
import tempfile
9
from pathlib import Path
10

11
from conftest import _test_images_s3_bucket
12
from framework.artifacts import ArtifactCollection, ArtifactSet
13
from framework.builder import MicrovmBuilder, SnapshotBuilder, SnapshotType
14
from framework.matrix import TestMatrix, TestContext
15
from framework.utils import wait_process_termination
16
from framework.utils_vsock import make_blob, \
17
    check_host_connections, check_guest_connections
18

19
import host_tools.network as net_tools  # pylint: disable=import-error
20
import host_tools.drive as drive_tools
21

22
VSOCK_UDS_PATH = "v.sock"
23
ECHO_SERVER_PORT = 5252
24

25

26
def _guest_run_fio_iteration(ssh_connection, iteration):
27
    fio = """fio --filename=/dev/vda --direct=1 --rw=randread --bs=4k \
28
        --ioengine=libaio --iodepth=16 --runtime=10 --numjobs=4 --time_based \
29
        --group_reporting --name=iops-test-job --eta-newline=1 --readonly"""
30
    ssh_cmd = "screen -L -Logfile /tmp/fio{} -dmS test{} {}"
31
    ssh_cmd = ssh_cmd.format(iteration, iteration, fio)
32
    exit_code, _, _ = ssh_connection.execute_command(ssh_cmd)
33
    assert exit_code == 0
34

35

36
def _get_guest_drive_size(ssh_connection, guest_dev_name='/dev/vdb'):
37
    # `lsblk` command outputs 2 lines to STDOUT:
38
    # "SIZE" and the size of the device, in bytes.
39
    blksize_cmd = "lsblk -b {} --output SIZE".format(guest_dev_name)
40
    _, stdout, stderr = ssh_connection.execute_command(blksize_cmd)
41
    assert stderr.read() == ''
42
    stdout.readline()  # skip "SIZE"
43
    return stdout.readline().strip()
44

45

46
def _copy_vsock_data_to_guest(ssh_connection,
47
                              blob_path,
48
                              vm_blob_path,
49
                              vsock_helper):
50
    # Copy the data file and a vsock helper to the guest.
51
    cmd = "mkdir -p /tmp/vsock && mount -t tmpfs tmpfs /tmp/vsock"
52
    ecode, _, _ = ssh_connection.execute_command(cmd)
53
    assert ecode == 0, "Failed to set up tmpfs drive on the guest."
54

55
    ssh_connection.scp_file(vsock_helper, '/bin/vsock_helper')
56
    ssh_connection.scp_file(blob_path, vm_blob_path)
57

58

59
def _test_seq_snapshots(context):
60
    logger = context.custom['logger']
61
    seq_len = context.custom['seq_len']
62
    vm_builder = context.custom['builder']
63
    snapshot_type = context.custom['snapshot_type']
64
    diff_snapshots = snapshot_type == SnapshotType.DIFF
65

66
    logger.info("Testing {} with microvm: \"{}\", kernel {}, disk {} "
67
                .format(snapshot_type,
68
                        context.microvm.name(),
69
                        context.kernel.name(),
70
                        context.disk.name()))
71

72
    # Create a rw copy artifact.
73
    root_disk = context.disk.copy()
74
    # Get ssh key from read-only artifact.
75
    ssh_key = context.disk.ssh_key()
76
    # Create a fresh microvm from artifacts.
77
    vm_instance = vm_builder.build(kernel=context.kernel,
78
                                   disks=[root_disk],
79
                                   ssh_key=ssh_key,
80
                                   config=context.microvm,
81
                                   diff_snapshots=diff_snapshots)
82
    basevm = vm_instance.vm
83
    basevm.vsock.put(
84
        vsock_id="vsock0",
85
        guest_cid=3,
86
        uds_path="/{}".format(VSOCK_UDS_PATH)
87
    )
88

89
    basevm.start()
90
    ssh_connection = net_tools.SSHConnection(basevm.ssh_config)
91

92
    # Verify if guest can run commands.
93
    exit_code, _, _ = ssh_connection.execute_command("sync")
94
    assert exit_code == 0
95

96
    test_fc_session_root_path = context.custom['test_fc_session_root_path']
97
    vsock_helper = context.custom['bin_vsock_path']
98
    vm_blob_path = "/tmp/vsock/test.blob"
99
    # Generate a random data file for vsock.
100
    blob_path, blob_hash = make_blob(test_fc_session_root_path)
101
    # Copy the data file and a vsock helper to the guest.
102
    _copy_vsock_data_to_guest(ssh_connection,
103
                              blob_path,
104
                              vm_blob_path,
105
                              vsock_helper)
106

107
    logger.info("Create {} #0.".format(snapshot_type))
108
    # Create a snapshot builder from a microvm.
109
    snapshot_builder = SnapshotBuilder(basevm)
110

111
    # Create base snapshot.
112
    snapshot = snapshot_builder.create([root_disk.local_path()],
113
                                       ssh_key,
114
                                       snapshot_type)
115

116
    base_snapshot = snapshot
117
    basevm.kill()
118

119
    for i in range(seq_len):
120
        logger.info("Load snapshot #{}, mem {}".format(i, snapshot.mem))
121
        microvm, _ = vm_builder.build_from_snapshot(snapshot,
122
                                                    True,
123
                                                    diff_snapshots)
124

125
        # Attempt to connect to resumed microvm.
126
        ssh_connection = net_tools.SSHConnection(microvm.ssh_config)
127

128
        # Test vsock guest-initiated connections.
129
        path = os.path.join(
130
            microvm.path,
131
            "{}_{}".format(VSOCK_UDS_PATH, ECHO_SERVER_PORT)
132
        )
133
        check_guest_connections(microvm, path, vm_blob_path, blob_hash)
134
        # Test vsock host-initiated connections.
135
        path = os.path.join(microvm.jailer.chroot_path(), VSOCK_UDS_PATH)
136
        check_host_connections(microvm, path, blob_path, blob_hash)
137

138
        # Start a new instance of fio on each iteration.
139
        _guest_run_fio_iteration(ssh_connection, i)
140

141
        logger.info("Create snapshot #{}.".format(i + 1))
142

143
        # Create a snapshot builder from the currently running microvm.
144
        snapshot_builder = SnapshotBuilder(microvm)
145

146
        snapshot = snapshot_builder.create([root_disk.local_path()],
147
                                           ssh_key,
148
                                           snapshot_type)
149

150
        # If we are testing incremental snapshots we must merge the base with
151
        # current layer.
152
        if snapshot_type == SnapshotType.DIFF:
153
            logger.info("Base: {}, Layer: {}".format(base_snapshot.mem,
154
                                                     snapshot.mem))
155
            snapshot.rebase_snapshot(base_snapshot)
156
            # Update the base for next iteration.
157
            base_snapshot = snapshot
158

159
        microvm.kill()
160

161

162
def _test_compare_mem_files(context):
163
    logger = context.custom['logger']
164
    vm_builder = context.custom['builder']
165

166
    # Create a rw copy artifact.
167
    root_disk = context.disk.copy()
168
    # Get ssh key from read-only artifact.
169
    ssh_key = context.disk.ssh_key()
170
    # Create a fresh microvm from aftifacts.
171
    vm_instance = vm_builder.build(kernel=context.kernel,
172
                                   disks=[root_disk],
173
                                   ssh_key=ssh_key,
174
                                   config=context.microvm,
175
                                   diff_snapshots=True)
176
    basevm = vm_instance.vm
177
    basevm.start()
178
    ssh_connection = net_tools.SSHConnection(basevm.ssh_config)
179

180
    # Verify if guest can run commands.
181
    exit_code, _, _ = ssh_connection.execute_command("sync")
182
    assert exit_code == 0
183

184
    # Create a snapshot builder from a microvm.
185
    snapshot_builder = SnapshotBuilder(basevm)
186

187
    logger.info("Create full snapshot.")
188
    # Create full snapshot.
189
    full_snapshot = snapshot_builder.create([root_disk.local_path()],
190
                                            ssh_key,
191
                                            SnapshotType.FULL)
192

193
    logger.info("Create diff snapshot.")
194
    # Create diff snapshot.
195
    diff_snapshot = snapshot_builder.create([root_disk.local_path()],
196
                                            ssh_key,
197
                                            SnapshotType.DIFF,
198
                                            mem_file_name="diff_vm.mem",
199
                                            snapshot_name="diff_vm.vmstate")
200
    assert filecmp.cmp(full_snapshot.mem, diff_snapshot.mem)
201

202
    basevm.kill()
203

204

205
def test_patch_drive_snapshot(bin_cloner_path):
206
    """Test scenario: 5 full sequential snapshots."""
207
    logger = logging.getLogger("snapshot_sequence")
208

209
    vm_builder = MicrovmBuilder(bin_cloner_path)
210
    snapshot_type = SnapshotType.FULL
211
    diff_snapshots = False
212

213
    # Use a predefined vm instance.
214
    vm_instance = vm_builder.build_vm_nano()
215
    basevm = vm_instance.vm
216
    root_disk = vm_instance.disks[0]
217
    ssh_key = vm_instance.ssh_key
218

219
    # Add a scratch 128MB RW non-root block device.
220
    scratchdisk1 = drive_tools.FilesystemFile(tempfile.mktemp(), size=128)
221
    basevm.add_drive('scratch', scratchdisk1.path)
222

223
    basevm.start()
224
    ssh_connection = net_tools.SSHConnection(basevm.ssh_config)
225

226
    # Verify if guest can run commands.
227
    exit_code, _, _ = ssh_connection.execute_command("sync")
228
    assert exit_code == 0
229

230
    # Update drive to have another backing file, double in size.
231
    new_file_size_mb = 2 * int(scratchdisk1.size()/(1024*1024))
232
    logger.info("Patch drive, new file: size %sMB.", new_file_size_mb)
233
    scratchdisk1 = drive_tools.FilesystemFile(tempfile.mktemp(),
234
                                              new_file_size_mb)
235
    basevm.patch_drive('scratch', scratchdisk1)
236

237
    logger.info("Create %s #0.", snapshot_type)
238
    # Create a snapshot builder from a microvm.
239
    snapshot_builder = SnapshotBuilder(basevm)
240

241
    disks = [root_disk.local_path(), scratchdisk1.path]
242
    # Create base snapshot.
243
    snapshot = snapshot_builder.create(disks,
244
                                       ssh_key,
245
                                       snapshot_type)
246

247
    basevm.kill()
248

249
    # Load snapshot in a new Firecracker microVM.
250
    logger.info("Load snapshot, mem %s", snapshot.mem)
251
    microvm, _ = vm_builder.build_from_snapshot(snapshot,
252
                                                True,
253
                                                diff_snapshots)
254
    # Attempt to connect to resumed microvm.
255
    ssh_connection = net_tools.SSHConnection(microvm.ssh_config)
256

257
    # Verify the new microVM has the right scratch drive.
258
    guest_drive_size = _get_guest_drive_size(ssh_connection)
259
    assert guest_drive_size == str(scratchdisk1.size())
260

261
    microvm.kill()
262

263

264
def test_5_full_snapshots(network_config,
265
                          bin_cloner_path,
266
                          bin_vsock_path,
267
                          test_fc_session_root_path):
268
    """Test scenario: 5 full sequential snapshots."""
269
    logger = logging.getLogger("snapshot_sequence")
270

271
    artifacts = ArtifactCollection(_test_images_s3_bucket())
272
    # Testing matrix:
273
    # - Guest kernel: Linux 4.9/4.14
274
    # - Rootfs: Ubuntu 18.04
275
    # - Microvm: 2vCPU with 512 MB RAM
276
    # TODO: Multiple microvm sizes must be tested in the async pipeline.
277
    microvm_artifacts = ArtifactSet(artifacts.microvms(keyword="2vcpu_256mb"))
278
    kernel_artifacts = ArtifactSet(artifacts.kernels(keyword="vmlinux-4.14"))
279
    disk_artifacts = ArtifactSet(artifacts.disks(keyword="ubuntu"))
280

281
    # Create a test context and add builder, logger, network.
282
    test_context = TestContext()
283
    test_context.custom = {
284
        'builder': MicrovmBuilder(bin_cloner_path),
285
        'network_config': network_config,
286
        'logger': logger,
287
        'snapshot_type': SnapshotType.FULL,
288
        'seq_len': 5,
289
        'bin_vsock_path': bin_vsock_path,
290
        'test_fc_session_root_path': test_fc_session_root_path
291
    }
292

293
    # Create the test matrix.
294
    test_matrix = TestMatrix(context=test_context,
295
                             artifact_sets=[
296
                                 microvm_artifacts,
297
                                 kernel_artifacts,
298
                                 disk_artifacts
299
                             ])
300

301
    test_matrix.run_test(_test_seq_snapshots)
302

303

304
def test_5_inc_snapshots(network_config,
305
                         bin_cloner_path,
306
                         bin_vsock_path,
307
                         test_fc_session_root_path):
308
    """Test scenario: 5 incremental snapshots with disk intensive workload."""
309
    logger = logging.getLogger("snapshot_sequence")
310

311
    artifacts = ArtifactCollection(_test_images_s3_bucket())
312
    # Testing matrix:
313
    # - Guest kernel: Linux 4.9/4.14
314
    # - Rootfs: Ubuntu 18.04
315
    # - Microvm: 2vCPU with 512 MB RAM
316
    # TODO: Multiple microvm sizes must be tested in the async pipeline.
317
    microvm_artifacts = ArtifactSet(artifacts.microvms(keyword="2vcpu_4096mb"))
318
    kernel_artifacts = ArtifactSet(artifacts.kernels(keyword="vmlinux-4.14"))
319
    disk_artifacts = ArtifactSet(artifacts.disks(keyword="ubuntu"))
320

321
    # Create a test context and add builder, logger, network.
322
    test_context = TestContext()
323
    test_context.custom = {
324
        'builder': MicrovmBuilder(bin_cloner_path),
325
        'network_config': network_config,
326
        'logger': logger,
327
        'snapshot_type': SnapshotType.DIFF,
328
        'seq_len': 5,
329
        'bin_vsock_path': bin_vsock_path,
330
        'test_fc_session_root_path': test_fc_session_root_path
331
    }
332

333
    # Create the test matrix.
334
    test_matrix = TestMatrix(context=test_context,
335
                             artifact_sets=[
336
                                 microvm_artifacts,
337
                                 kernel_artifacts,
338
                                 disk_artifacts
339
                             ])
340

341
    test_matrix.run_test(_test_seq_snapshots)
342

343

344
def test_load_snapshot_failure_handling(test_microvm_with_api):
345
    """
346
    Test scenario.
347

348
    1. Create two empty files representing snapshot memory and
349
    microvm state
350
    2. Try to load a VM snapshot out of the empty files.
351
    3. Verify that an error was shown and the FC process is terminated.
352
    """
353
    logger = logging.getLogger("snapshot_load_failure")
354
    vm = test_microvm_with_api
355
    vm.spawn(log_level='Info')
356

357
    # Create two empty files for snapshot state and snapshot memory
358
    chroot_path = vm.jailer.chroot_path()
359
    snapshot_dir = os.path.join(chroot_path, "snapshot")
360
    Path(snapshot_dir).mkdir(parents=True, exist_ok=True)
361

362
    snapshot_mem = os.path.join(snapshot_dir, "snapshot_mem")
363
    open(snapshot_mem, "w+").close()
364
    snapshot_vmstate = os.path.join(snapshot_dir, "snapshot_vmstate")
365
    open(snapshot_vmstate, "w+").close()
366

367
    # Hardlink the snapshot files into the microvm jail.
368
    jailed_mem = vm.create_jailed_resource(snapshot_mem)
369
    jailed_vmstate = vm.create_jailed_resource(snapshot_vmstate)
370

371
    # Load the snapshot
372
    response = vm.snapshot.load(mem_file_path=jailed_mem,
373
                                snapshot_path=jailed_vmstate)
374

375
    logger.info("Response status code %d, content: %s.",
376
                response.status_code,
377
                response.text)
378
    assert vm.api_session.is_status_bad_request(response.status_code)
379
    assert "Cannot deserialize the microVM state" in response.text
380

381
    # Check if FC process is closed
382
    wait_process_termination(vm.jailer_clone_pid)
383

384

385
def test_cmp_full_and_first_diff_mem(network_config,
386
                                     bin_cloner_path):
387
    """Test scenario: cmp memory of 2 consecutive full and diff snapshots."""
388
    logger = logging.getLogger("snapshot_sequence")
389

390
    artifacts = ArtifactCollection(_test_images_s3_bucket())
391
    # Testing matrix:
392
    # - Guest kernel: Linux 4.9/4.14
393
    # - Rootfs: Ubuntu 18.04
394
    # - Microvm: 2vCPU with 512 MB RAM
395
    microvm_artifacts = ArtifactSet(artifacts.microvms(keyword="2vcpu_512mb"))
396
    kernel_artifacts = ArtifactSet(artifacts.kernels(keyword="vmlinux-4.14"))
397
    disk_artifacts = ArtifactSet(artifacts.disks(keyword="ubuntu"))
398

399
    # Create a test context and add builder, logger, network.
400
    test_context = TestContext()
401
    test_context.custom = {
402
        'builder': MicrovmBuilder(bin_cloner_path),
403
        'network_config': network_config,
404
        'logger': logger
405
    }
406

407
    # Create the test matrix.
408
    test_matrix = TestMatrix(context=test_context,
409
                             artifact_sets=[
410
                                 microvm_artifacts,
411
                                 kernel_artifacts,
412
                                 disk_artifacts
413
                             ])
414

415
    test_matrix.run_test(_test_compare_mem_files)
416

417

418
def test_negative_postload_api(bin_cloner_path):
419
    """Test APIs fail after loading from snapshot."""
420
    logger = logging.getLogger("snapshot_api_fail")
421

422
    vm_builder = MicrovmBuilder(bin_cloner_path)
423
    vm_instance = vm_builder.build_vm_nano(diff_snapshots=True)
424
    basevm = vm_instance.vm
425
    root_disk = vm_instance.disks[0]
426
    ssh_key = vm_instance.ssh_key
427

428
    basevm.start()
429
    ssh_connection = net_tools.SSHConnection(basevm.ssh_config)
430

431
    # Verify if guest can run commands.
432
    exit_code, _, _ = ssh_connection.execute_command("sync")
433
    assert exit_code == 0
434

435
    logger.info("Create snapshot")
436
    # Create a snapshot builder from a microvm.
437
    snapshot_builder = SnapshotBuilder(basevm)
438

439
    # Create base snapshot.
440
    snapshot = snapshot_builder.create([root_disk.local_path()],
441
                                       ssh_key,
442
                                       SnapshotType.DIFF)
443

444
    basevm.kill()
445

446
    logger.info("Load snapshot, mem %s", snapshot.mem)
447
    # Do not resume, just load, so we can still call APIs that work.
448
    microvm, _ = vm_builder.build_from_snapshot(snapshot,
449
                                                False,
450
                                                True)
451
    fail_msg = "The requested operation is not supported after starting " \
452
        "the microVM"
453

454
    response = microvm.actions.put(action_type='InstanceStart')
455
    assert fail_msg in response.text
456

457
    try:
458
        microvm.basic_config()
459
    except AssertionError as error:
460
        assert fail_msg in str(error)
461
    else:
462
        assert False, "Negative test failed"
463

464
    microvm.kill()
465

466

467
def test_negative_snapshot_permissions(bin_cloner_path):
468
    """Test missing permission error scenarios."""
469
    logger = logging.getLogger("snapshot_negative")
470
    vm_builder = MicrovmBuilder(bin_cloner_path)
471

472
    # Use a predefined vm instance.
473
    vm_instance = vm_builder.build_vm_nano()
474
    basevm = vm_instance.vm
475
    root_disk = vm_instance.disks[0]
476
    ssh_key = vm_instance.ssh_key
477

478
    basevm.start()
479

480
    logger.info("Create snapshot")
481
    # Create a snapshot builder from a microvm.
482
    snapshot_builder = SnapshotBuilder(basevm)
483

484
    disks = [root_disk.local_path()]
485

486
    # Remove write permissions.
487
    os.chmod(basevm.jailer.chroot_path(), 0o444)
488

489
    try:
490
        _ = snapshot_builder.create(disks,
491
                                    ssh_key,
492
                                    SnapshotType.FULL)
493
    except AssertionError as error:
494
        # Check if proper error is returned.
495
        assert "Permission denied" in str(error)
496
    else:
497
        assert False, "Negative test failed"
498

499
    # Restore proper permissions.
500
    os.chmod(basevm.jailer.chroot_path(), 0o744)
501

502
    # Create base snapshot.
503
    snapshot = snapshot_builder.create(disks,
504
                                       ssh_key,
505
                                       SnapshotType.FULL)
506

507
    logger.info("Load snapshot, mem %s", snapshot.mem)
508

509
    basevm.kill()
510

511
    # Remove permissions for mem file.
512
    os.chmod(snapshot.mem, 0o000)
513

514
    try:
515
        _, _ = vm_builder.build_from_snapshot(snapshot, True, True)
516
    except AssertionError as error:
517
        # Check if proper error is returned.
518
        assert "Cannot open the memory file: Permission denied" in str(error)
519
    else:
520
        assert False, "Negative test failed"
521

522
    # Remove permissions for state file.
523
    os.chmod(snapshot.vmstate, 0o000)
524

525
    try:
526
        _, _ = vm_builder.build_from_snapshot(snapshot, True, True)
527
    except AssertionError as error:
528
        # Check if proper error is returned.
529
        assert "Cannot perform open on the snapshot backing file:" \
530
               " Permission denied" in str(error)
531
    else:
532
        assert False, "Negative test failed"
533

534
    # Restore permissions for state file.
535
    os.chmod(snapshot.vmstate, 0o744)
536
    os.chmod(snapshot.mem, 0o744)
537

538
    # Remove permissions for block file.
539
    os.chmod(snapshot.disks[0], 0o000)
540

541
    try:
542
        _, _ = vm_builder.build_from_snapshot(snapshot, True, True)
543
    except AssertionError as error:
544
        # Check if proper error is returned.
545
        assert "Block(Os { code: 13, kind: PermissionDenied" in str(error)
546
    else:
547
        assert False, "Negative test failed"
548

549

550
def test_negative_snapshot_create(bin_cloner_path):
551
    """Test create snapshot before pause."""
552
    vm_builder = MicrovmBuilder(bin_cloner_path)
553
    vm_instance = vm_builder.build_vm_nano()
554
    vm = vm_instance.vm
555

556
    vm.start()
557

558
    response = vm.snapshot.create(mem_file_path='memfile',
559
                                  snapshot_path='statefile',
560
                                  diff=False)
561

562
    assert vm.api_session.is_status_bad_request(response.status_code)
563
    assert "save/restore unavailable while running" in response.text
564

565
    response = vm.vm.patch(state='Paused')
566
    assert vm.api_session.is_status_no_content(response.status_code)
567

568
    # Try diff with dirty pages tracking disabled.
569
    response = vm.snapshot.create(mem_file_path='memfile',
570
                                  snapshot_path='statefile',
571
                                  diff=True)
572
    msg = "Diff snapshots are not allowed on uVMs with dirty page" \
573
          " tracking disabled"
574
    assert msg in response.text
575
    assert not os.path.exists('statefile')
576
    assert not os.path.exists('memfile')
577

578
    vm.kill()
579

580
Product

Resources

Company