Path: blob/main/tests/integration_tests/functional/test_snapshot_basic.py
1958 views
# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.1# SPDX-License-Identifier: Apache-2.02"""Basic tests scenarios for snapshot save/restore."""34import filecmp5import logging6import os7import tempfile8from pathlib import Path910from conftest import _test_images_s3_bucket11from framework.artifacts import ArtifactCollection, ArtifactSet12from framework.builder import MicrovmBuilder, SnapshotBuilder, SnapshotType13from framework.matrix import TestMatrix, TestContext14from framework.utils import wait_process_termination15from framework.utils_vsock import make_blob, \16check_host_connections, check_guest_connections1718import host_tools.network as net_tools # pylint: disable=import-error19import host_tools.drive as drive_tools2021VSOCK_UDS_PATH = "v.sock"22ECHO_SERVER_PORT = 5252232425def _guest_run_fio_iteration(ssh_connection, iteration):26fio = """fio --filename=/dev/vda --direct=1 --rw=randread --bs=4k \27--ioengine=libaio --iodepth=16 --runtime=10 --numjobs=4 --time_based \28--group_reporting --name=iops-test-job --eta-newline=1 --readonly"""29ssh_cmd = "screen -L -Logfile /tmp/fio{} -dmS test{} {}"30ssh_cmd = ssh_cmd.format(iteration, iteration, fio)31exit_code, _, _ = ssh_connection.execute_command(ssh_cmd)32assert exit_code == 0333435def _get_guest_drive_size(ssh_connection, guest_dev_name='/dev/vdb'):36# `lsblk` command outputs 2 lines to STDOUT:37# "SIZE" and the size of the device, in bytes.38blksize_cmd = "lsblk -b {} --output SIZE".format(guest_dev_name)39_, stdout, stderr = ssh_connection.execute_command(blksize_cmd)40assert stderr.read() == ''41stdout.readline() # skip "SIZE"42return stdout.readline().strip()434445def _copy_vsock_data_to_guest(ssh_connection,46blob_path,47vm_blob_path,48vsock_helper):49# Copy the data file and a vsock helper to the guest.50cmd = "mkdir -p /tmp/vsock && mount -t tmpfs tmpfs /tmp/vsock"51ecode, _, _ = ssh_connection.execute_command(cmd)52assert ecode == 0, "Failed to set up tmpfs drive on the guest."5354ssh_connection.scp_file(vsock_helper, '/bin/vsock_helper')55ssh_connection.scp_file(blob_path, vm_blob_path)565758def _test_seq_snapshots(context):59logger = context.custom['logger']60seq_len = context.custom['seq_len']61vm_builder = context.custom['builder']62snapshot_type = context.custom['snapshot_type']63diff_snapshots = snapshot_type == SnapshotType.DIFF6465logger.info("Testing {} with microvm: \"{}\", kernel {}, disk {} "66.format(snapshot_type,67context.microvm.name(),68context.kernel.name(),69context.disk.name()))7071# Create a rw copy artifact.72root_disk = context.disk.copy()73# Get ssh key from read-only artifact.74ssh_key = context.disk.ssh_key()75# Create a fresh microvm from artifacts.76vm_instance = vm_builder.build(kernel=context.kernel,77disks=[root_disk],78ssh_key=ssh_key,79config=context.microvm,80diff_snapshots=diff_snapshots)81basevm = vm_instance.vm82basevm.vsock.put(83vsock_id="vsock0",84guest_cid=3,85uds_path="/{}".format(VSOCK_UDS_PATH)86)8788basevm.start()89ssh_connection = net_tools.SSHConnection(basevm.ssh_config)9091# Verify if guest can run commands.92exit_code, _, _ = ssh_connection.execute_command("sync")93assert exit_code == 09495test_fc_session_root_path = context.custom['test_fc_session_root_path']96vsock_helper = context.custom['bin_vsock_path']97vm_blob_path = "/tmp/vsock/test.blob"98# Generate a random data file for vsock.99blob_path, blob_hash = make_blob(test_fc_session_root_path)100# Copy the data file and a vsock helper to the guest.101_copy_vsock_data_to_guest(ssh_connection,102blob_path,103vm_blob_path,104vsock_helper)105106logger.info("Create {} #0.".format(snapshot_type))107# Create a snapshot builder from a microvm.108snapshot_builder = SnapshotBuilder(basevm)109110# Create base snapshot.111snapshot = snapshot_builder.create([root_disk.local_path()],112ssh_key,113snapshot_type)114115base_snapshot = snapshot116basevm.kill()117118for i in range(seq_len):119logger.info("Load snapshot #{}, mem {}".format(i, snapshot.mem))120microvm, _ = vm_builder.build_from_snapshot(snapshot,121True,122diff_snapshots)123124# Attempt to connect to resumed microvm.125ssh_connection = net_tools.SSHConnection(microvm.ssh_config)126127# Test vsock guest-initiated connections.128path = os.path.join(129microvm.path,130"{}_{}".format(VSOCK_UDS_PATH, ECHO_SERVER_PORT)131)132check_guest_connections(microvm, path, vm_blob_path, blob_hash)133# Test vsock host-initiated connections.134path = os.path.join(microvm.jailer.chroot_path(), VSOCK_UDS_PATH)135check_host_connections(microvm, path, blob_path, blob_hash)136137# Start a new instance of fio on each iteration.138_guest_run_fio_iteration(ssh_connection, i)139140logger.info("Create snapshot #{}.".format(i + 1))141142# Create a snapshot builder from the currently running microvm.143snapshot_builder = SnapshotBuilder(microvm)144145snapshot = snapshot_builder.create([root_disk.local_path()],146ssh_key,147snapshot_type)148149# If we are testing incremental snapshots we must merge the base with150# current layer.151if snapshot_type == SnapshotType.DIFF:152logger.info("Base: {}, Layer: {}".format(base_snapshot.mem,153snapshot.mem))154snapshot.rebase_snapshot(base_snapshot)155# Update the base for next iteration.156base_snapshot = snapshot157158microvm.kill()159160161def _test_compare_mem_files(context):162logger = context.custom['logger']163vm_builder = context.custom['builder']164165# Create a rw copy artifact.166root_disk = context.disk.copy()167# Get ssh key from read-only artifact.168ssh_key = context.disk.ssh_key()169# Create a fresh microvm from aftifacts.170vm_instance = vm_builder.build(kernel=context.kernel,171disks=[root_disk],172ssh_key=ssh_key,173config=context.microvm,174diff_snapshots=True)175basevm = vm_instance.vm176basevm.start()177ssh_connection = net_tools.SSHConnection(basevm.ssh_config)178179# Verify if guest can run commands.180exit_code, _, _ = ssh_connection.execute_command("sync")181assert exit_code == 0182183# Create a snapshot builder from a microvm.184snapshot_builder = SnapshotBuilder(basevm)185186logger.info("Create full snapshot.")187# Create full snapshot.188full_snapshot = snapshot_builder.create([root_disk.local_path()],189ssh_key,190SnapshotType.FULL)191192logger.info("Create diff snapshot.")193# Create diff snapshot.194diff_snapshot = snapshot_builder.create([root_disk.local_path()],195ssh_key,196SnapshotType.DIFF,197mem_file_name="diff_vm.mem",198snapshot_name="diff_vm.vmstate")199assert filecmp.cmp(full_snapshot.mem, diff_snapshot.mem)200201basevm.kill()202203204def test_patch_drive_snapshot(bin_cloner_path):205"""Test scenario: 5 full sequential snapshots."""206logger = logging.getLogger("snapshot_sequence")207208vm_builder = MicrovmBuilder(bin_cloner_path)209snapshot_type = SnapshotType.FULL210diff_snapshots = False211212# Use a predefined vm instance.213vm_instance = vm_builder.build_vm_nano()214basevm = vm_instance.vm215root_disk = vm_instance.disks[0]216ssh_key = vm_instance.ssh_key217218# Add a scratch 128MB RW non-root block device.219scratchdisk1 = drive_tools.FilesystemFile(tempfile.mktemp(), size=128)220basevm.add_drive('scratch', scratchdisk1.path)221222basevm.start()223ssh_connection = net_tools.SSHConnection(basevm.ssh_config)224225# Verify if guest can run commands.226exit_code, _, _ = ssh_connection.execute_command("sync")227assert exit_code == 0228229# Update drive to have another backing file, double in size.230new_file_size_mb = 2 * int(scratchdisk1.size()/(1024*1024))231logger.info("Patch drive, new file: size %sMB.", new_file_size_mb)232scratchdisk1 = drive_tools.FilesystemFile(tempfile.mktemp(),233new_file_size_mb)234basevm.patch_drive('scratch', scratchdisk1)235236logger.info("Create %s #0.", snapshot_type)237# Create a snapshot builder from a microvm.238snapshot_builder = SnapshotBuilder(basevm)239240disks = [root_disk.local_path(), scratchdisk1.path]241# Create base snapshot.242snapshot = snapshot_builder.create(disks,243ssh_key,244snapshot_type)245246basevm.kill()247248# Load snapshot in a new Firecracker microVM.249logger.info("Load snapshot, mem %s", snapshot.mem)250microvm, _ = vm_builder.build_from_snapshot(snapshot,251True,252diff_snapshots)253# Attempt to connect to resumed microvm.254ssh_connection = net_tools.SSHConnection(microvm.ssh_config)255256# Verify the new microVM has the right scratch drive.257guest_drive_size = _get_guest_drive_size(ssh_connection)258assert guest_drive_size == str(scratchdisk1.size())259260microvm.kill()261262263def test_5_full_snapshots(network_config,264bin_cloner_path,265bin_vsock_path,266test_fc_session_root_path):267"""Test scenario: 5 full sequential snapshots."""268logger = logging.getLogger("snapshot_sequence")269270artifacts = ArtifactCollection(_test_images_s3_bucket())271# Testing matrix:272# - Guest kernel: Linux 4.9/4.14273# - Rootfs: Ubuntu 18.04274# - Microvm: 2vCPU with 512 MB RAM275# TODO: Multiple microvm sizes must be tested in the async pipeline.276microvm_artifacts = ArtifactSet(artifacts.microvms(keyword="2vcpu_256mb"))277kernel_artifacts = ArtifactSet(artifacts.kernels(keyword="vmlinux-4.14"))278disk_artifacts = ArtifactSet(artifacts.disks(keyword="ubuntu"))279280# Create a test context and add builder, logger, network.281test_context = TestContext()282test_context.custom = {283'builder': MicrovmBuilder(bin_cloner_path),284'network_config': network_config,285'logger': logger,286'snapshot_type': SnapshotType.FULL,287'seq_len': 5,288'bin_vsock_path': bin_vsock_path,289'test_fc_session_root_path': test_fc_session_root_path290}291292# Create the test matrix.293test_matrix = TestMatrix(context=test_context,294artifact_sets=[295microvm_artifacts,296kernel_artifacts,297disk_artifacts298])299300test_matrix.run_test(_test_seq_snapshots)301302303def test_5_inc_snapshots(network_config,304bin_cloner_path,305bin_vsock_path,306test_fc_session_root_path):307"""Test scenario: 5 incremental snapshots with disk intensive workload."""308logger = logging.getLogger("snapshot_sequence")309310artifacts = ArtifactCollection(_test_images_s3_bucket())311# Testing matrix:312# - Guest kernel: Linux 4.9/4.14313# - Rootfs: Ubuntu 18.04314# - Microvm: 2vCPU with 512 MB RAM315# TODO: Multiple microvm sizes must be tested in the async pipeline.316microvm_artifacts = ArtifactSet(artifacts.microvms(keyword="2vcpu_4096mb"))317kernel_artifacts = ArtifactSet(artifacts.kernels(keyword="vmlinux-4.14"))318disk_artifacts = ArtifactSet(artifacts.disks(keyword="ubuntu"))319320# Create a test context and add builder, logger, network.321test_context = TestContext()322test_context.custom = {323'builder': MicrovmBuilder(bin_cloner_path),324'network_config': network_config,325'logger': logger,326'snapshot_type': SnapshotType.DIFF,327'seq_len': 5,328'bin_vsock_path': bin_vsock_path,329'test_fc_session_root_path': test_fc_session_root_path330}331332# Create the test matrix.333test_matrix = TestMatrix(context=test_context,334artifact_sets=[335microvm_artifacts,336kernel_artifacts,337disk_artifacts338])339340test_matrix.run_test(_test_seq_snapshots)341342343def test_load_snapshot_failure_handling(test_microvm_with_api):344"""345Test scenario.3463471. Create two empty files representing snapshot memory and348microvm state3492. Try to load a VM snapshot out of the empty files.3503. Verify that an error was shown and the FC process is terminated.351"""352logger = logging.getLogger("snapshot_load_failure")353vm = test_microvm_with_api354vm.spawn(log_level='Info')355356# Create two empty files for snapshot state and snapshot memory357chroot_path = vm.jailer.chroot_path()358snapshot_dir = os.path.join(chroot_path, "snapshot")359Path(snapshot_dir).mkdir(parents=True, exist_ok=True)360361snapshot_mem = os.path.join(snapshot_dir, "snapshot_mem")362open(snapshot_mem, "w+").close()363snapshot_vmstate = os.path.join(snapshot_dir, "snapshot_vmstate")364open(snapshot_vmstate, "w+").close()365366# Hardlink the snapshot files into the microvm jail.367jailed_mem = vm.create_jailed_resource(snapshot_mem)368jailed_vmstate = vm.create_jailed_resource(snapshot_vmstate)369370# Load the snapshot371response = vm.snapshot.load(mem_file_path=jailed_mem,372snapshot_path=jailed_vmstate)373374logger.info("Response status code %d, content: %s.",375response.status_code,376response.text)377assert vm.api_session.is_status_bad_request(response.status_code)378assert "Cannot deserialize the microVM state" in response.text379380# Check if FC process is closed381wait_process_termination(vm.jailer_clone_pid)382383384def test_cmp_full_and_first_diff_mem(network_config,385bin_cloner_path):386"""Test scenario: cmp memory of 2 consecutive full and diff snapshots."""387logger = logging.getLogger("snapshot_sequence")388389artifacts = ArtifactCollection(_test_images_s3_bucket())390# Testing matrix:391# - Guest kernel: Linux 4.9/4.14392# - Rootfs: Ubuntu 18.04393# - Microvm: 2vCPU with 512 MB RAM394microvm_artifacts = ArtifactSet(artifacts.microvms(keyword="2vcpu_512mb"))395kernel_artifacts = ArtifactSet(artifacts.kernels(keyword="vmlinux-4.14"))396disk_artifacts = ArtifactSet(artifacts.disks(keyword="ubuntu"))397398# Create a test context and add builder, logger, network.399test_context = TestContext()400test_context.custom = {401'builder': MicrovmBuilder(bin_cloner_path),402'network_config': network_config,403'logger': logger404}405406# Create the test matrix.407test_matrix = TestMatrix(context=test_context,408artifact_sets=[409microvm_artifacts,410kernel_artifacts,411disk_artifacts412])413414test_matrix.run_test(_test_compare_mem_files)415416417def test_negative_postload_api(bin_cloner_path):418"""Test APIs fail after loading from snapshot."""419logger = logging.getLogger("snapshot_api_fail")420421vm_builder = MicrovmBuilder(bin_cloner_path)422vm_instance = vm_builder.build_vm_nano(diff_snapshots=True)423basevm = vm_instance.vm424root_disk = vm_instance.disks[0]425ssh_key = vm_instance.ssh_key426427basevm.start()428ssh_connection = net_tools.SSHConnection(basevm.ssh_config)429430# Verify if guest can run commands.431exit_code, _, _ = ssh_connection.execute_command("sync")432assert exit_code == 0433434logger.info("Create snapshot")435# Create a snapshot builder from a microvm.436snapshot_builder = SnapshotBuilder(basevm)437438# Create base snapshot.439snapshot = snapshot_builder.create([root_disk.local_path()],440ssh_key,441SnapshotType.DIFF)442443basevm.kill()444445logger.info("Load snapshot, mem %s", snapshot.mem)446# Do not resume, just load, so we can still call APIs that work.447microvm, _ = vm_builder.build_from_snapshot(snapshot,448False,449True)450fail_msg = "The requested operation is not supported after starting " \451"the microVM"452453response = microvm.actions.put(action_type='InstanceStart')454assert fail_msg in response.text455456try:457microvm.basic_config()458except AssertionError as error:459assert fail_msg in str(error)460else:461assert False, "Negative test failed"462463microvm.kill()464465466def test_negative_snapshot_permissions(bin_cloner_path):467"""Test missing permission error scenarios."""468logger = logging.getLogger("snapshot_negative")469vm_builder = MicrovmBuilder(bin_cloner_path)470471# Use a predefined vm instance.472vm_instance = vm_builder.build_vm_nano()473basevm = vm_instance.vm474root_disk = vm_instance.disks[0]475ssh_key = vm_instance.ssh_key476477basevm.start()478479logger.info("Create snapshot")480# Create a snapshot builder from a microvm.481snapshot_builder = SnapshotBuilder(basevm)482483disks = [root_disk.local_path()]484485# Remove write permissions.486os.chmod(basevm.jailer.chroot_path(), 0o444)487488try:489_ = snapshot_builder.create(disks,490ssh_key,491SnapshotType.FULL)492except AssertionError as error:493# Check if proper error is returned.494assert "Permission denied" in str(error)495else:496assert False, "Negative test failed"497498# Restore proper permissions.499os.chmod(basevm.jailer.chroot_path(), 0o744)500501# Create base snapshot.502snapshot = snapshot_builder.create(disks,503ssh_key,504SnapshotType.FULL)505506logger.info("Load snapshot, mem %s", snapshot.mem)507508basevm.kill()509510# Remove permissions for mem file.511os.chmod(snapshot.mem, 0o000)512513try:514_, _ = vm_builder.build_from_snapshot(snapshot, True, True)515except AssertionError as error:516# Check if proper error is returned.517assert "Cannot open the memory file: Permission denied" in str(error)518else:519assert False, "Negative test failed"520521# Remove permissions for state file.522os.chmod(snapshot.vmstate, 0o000)523524try:525_, _ = vm_builder.build_from_snapshot(snapshot, True, True)526except AssertionError as error:527# Check if proper error is returned.528assert "Cannot perform open on the snapshot backing file:" \529" Permission denied" in str(error)530else:531assert False, "Negative test failed"532533# Restore permissions for state file.534os.chmod(snapshot.vmstate, 0o744)535os.chmod(snapshot.mem, 0o744)536537# Remove permissions for block file.538os.chmod(snapshot.disks[0], 0o000)539540try:541_, _ = vm_builder.build_from_snapshot(snapshot, True, True)542except AssertionError as error:543# Check if proper error is returned.544assert "Block(Os { code: 13, kind: PermissionDenied" in str(error)545else:546assert False, "Negative test failed"547548549def test_negative_snapshot_create(bin_cloner_path):550"""Test create snapshot before pause."""551vm_builder = MicrovmBuilder(bin_cloner_path)552vm_instance = vm_builder.build_vm_nano()553vm = vm_instance.vm554555vm.start()556557response = vm.snapshot.create(mem_file_path='memfile',558snapshot_path='statefile',559diff=False)560561assert vm.api_session.is_status_bad_request(response.status_code)562assert "save/restore unavailable while running" in response.text563564response = vm.vm.patch(state='Paused')565assert vm.api_session.is_status_no_content(response.status_code)566567# Try diff with dirty pages tracking disabled.568response = vm.snapshot.create(mem_file_path='memfile',569snapshot_path='statefile',570diff=True)571msg = "Diff snapshots are not allowed on uVMs with dirty page" \572" tracking disabled"573assert msg in response.text574assert not os.path.exists('statefile')575assert not os.path.exists('memfile')576577vm.kill()578579580