Path: blob/main/tests/integration_tests/performance/test_vsock_throughput.py
1958 views
# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.1# SPDX-License-Identifier: Apache-2.02"""Tests the VSOCK throughput of Firecracker uVMs."""345import os6import json7import logging8import time9import concurrent.futures1011import pytest12from conftest import _test_images_s3_bucket13from framework.artifacts import ArtifactCollection, ArtifactSet14from framework.matrix import TestMatrix, TestContext15from framework.builder import MicrovmBuilder16from framework.stats import core, consumer, producer17from framework.stats.baseline import Provider as BaselineProvider18from framework.stats.metadata import DictProvider as DictMetadataProvider19from framework.utils import CpuMap, CmdBuilder, run_cmd, get_cpu_percent, \20DictQuery21from framework.utils_cpuid import get_cpu_model_name22import host_tools.network as net_tools23from integration_tests.performance.configs import defs24from integration_tests.performance.utils import handle_failure, \25dump_test_result2627CONFIG = json.load(open(defs.CFG_LOCATION /28"vsock_throughput_test_config.json"))29SERVER_STARTUP_TIME = CONFIG["server_startup_time"]30VSOCK_UDS_PATH = "v.sock"31IPERF3 = "iperf3-vsock"32THROUGHPUT = "throughput"33DURATION = "duration"34BASE_PORT = 520135CPU_UTILIZATION_VMM = "cpu_utilization_vmm"36CPU_UTILIZATION_VCPUS_TOTAL = "cpu_utilization_vcpus_total"37IPERF3_CPU_UTILIZATION_PERCENT_OUT_TAG = "cpu_utilization_percent"38IPERF3_END_RESULTS_TAG = "end"39TARGET_TAG = "target"40DELTA_PERCENTAGE_TAG = "delta_percentage"41THROUGHPUT_UNIT = "Mbps"42DURATION_UNIT = "seconds"43CPU_UTILIZATION_UNIT = "percentage"444546# pylint: disable=R090347class VsockThroughputBaselineProvider(BaselineProvider):48"""Implementation of a baseline provider for the vsock throughput...4950...performance test.51"""5253def __init__(self, env_id, iperf_id):54"""Vsock throughput baseline provider initialization."""55cpu_model_name = get_cpu_model_name()56baselines = list(filter(57lambda cpu_baseline: cpu_baseline["model"] == cpu_model_name,58CONFIG["hosts"]["instances"]["m5d.metal"]["cpus"]))59super().__init__(DictQuery(dict()))60if len(baselines) > 0:61super().__init__(DictQuery(baselines[0]))6263self._tag = "baselines/{}/" + env_id + "/{}/" + iperf_id6465def get(self, ms_name: str, st_name: str) -> dict:66"""Return the baseline corresponding to the key."""67key = self._tag.format(ms_name, st_name)68baseline = self._baselines.get(key)69if baseline:70target = baseline.get("target")71delta_percentage = baseline.get("delta_percentage")72return {73"target": target,74"delta": delta_percentage * target / 100,75}76return None777879def produce_iperf_output(basevm,80guest_cmd_builder,81current_avail_cpu,82runtime,83omit,84load_factor,85modes):86"""Produce iperf raw output from server-client connection."""87# Check if we have enough CPUs to pin the servers on the host.88# The available CPUs are the total minus vcpus, vmm and API threads.89assert load_factor * basevm.vcpus_count < CpuMap.len() - \90basevm.vcpus_count - 29192host_uds_path = os.path.join(93basevm.path,94VSOCK_UDS_PATH95)9697# Start the servers.98for server_idx in range(load_factor*basevm.vcpus_count):99assigned_cpu = CpuMap(current_avail_cpu)100iperf_server = \101CmdBuilder(f"taskset --cpu-list {assigned_cpu}") \102.with_arg(IPERF3) \103.with_arg("-sD") \104.with_arg("--vsock") \105.with_arg("-B", host_uds_path) \106.with_arg("-p", f"{BASE_PORT + server_idx}") \107.with_arg("-1") \108.build()109110run_cmd(iperf_server)111current_avail_cpu += 1112113# Wait for iperf3 servers to start.114time.sleep(SERVER_STARTUP_TIME)115116# Start `vcpus` iperf3 clients. We can not use iperf3 parallel streams117# due to non deterministic results and lack of scaling.118def spawn_iperf_client(conn, client_idx, mode):119# Add the port where the iperf3 client is going to send/receive.120cmd = guest_cmd_builder.with_arg(121"-p", BASE_PORT + client_idx).with_arg(mode).build()122123# Bind the UDS in the jailer's root.124basevm.create_jailed_resource(os.path.join(125basevm.path,126_make_host_port_path(VSOCK_UDS_PATH, BASE_PORT + client_idx)))127128pinned_cmd = f"taskset --cpu-list {client_idx % basevm.vcpus_count}" \129f" {cmd}"130rc, stdout, _ = conn.execute_command(pinned_cmd)131132assert rc == 0133134return stdout.read()135136with concurrent.futures.ThreadPoolExecutor() as executor:137futures = list()138cpu_load_future = executor.submit(get_cpu_percent,139basevm.jailer_clone_pid,140runtime - SERVER_STARTUP_TIME,141omit)142143modes_len = len(modes)144ssh_connection = net_tools.SSHConnection(basevm.ssh_config)145for client_idx in range(load_factor*basevm.vcpus_count):146futures.append(executor.submit(spawn_iperf_client,147ssh_connection,148client_idx,149# Distribute the modes evenly.150modes[client_idx % modes_len]))151152cpu_load = cpu_load_future.result()153for future in futures[:-1]:154res = json.loads(future.result())155res[IPERF3_END_RESULTS_TAG][156IPERF3_CPU_UTILIZATION_PERCENT_OUT_TAG] = None157yield res158159# Attach the real CPU utilization vmm/vcpus to160# the last iperf3 server-client pair measurements.161res = json.loads(futures[-1].result())162163# We expect a single emulation thread tagged with `firecracker` name.164tag = "firecracker"165assert tag in cpu_load and len(cpu_load[tag]) == 1166thread_id = list(cpu_load[tag])[0]167data = cpu_load[tag][thread_id]168vmm_util = sum(data)/len(data)169cpu_util_perc = res[IPERF3_END_RESULTS_TAG][170IPERF3_CPU_UTILIZATION_PERCENT_OUT_TAG] = dict()171cpu_util_perc[CPU_UTILIZATION_VMM] = vmm_util172173vcpus_util = 0174for vcpu in range(basevm.vcpus_count):175# We expect a single fc_vcpu thread tagged with176# f`fc_vcpu {vcpu}`.177tag = f"fc_vcpu {vcpu}"178assert tag in cpu_load and len(cpu_load[tag]) == 1179thread_id = list(cpu_load[tag])[0]180data = cpu_load[tag][thread_id]181vcpus_util += (sum(data)/len(data))182183cpu_util_perc[CPU_UTILIZATION_VCPUS_TOTAL] = vcpus_util184185yield res186187188def consume_iperf_output(cons, result):189"""Consume iperf3 output result for TCP workload."""190total_received = result[IPERF3_END_RESULTS_TAG]['sum_received']191duration = float(total_received['seconds'])192cons.consume_data(DURATION, duration)193194# Computed at the receiving end.195total_recv_bytes = int(total_received['bytes'])196tput = round((total_recv_bytes*8) / (1024*1024*duration), 2)197cons.consume_data(THROUGHPUT, tput)198199cpu_util = result[IPERF3_END_RESULTS_TAG][200IPERF3_CPU_UTILIZATION_PERCENT_OUT_TAG]201if cpu_util:202cpu_util_host = cpu_util[CPU_UTILIZATION_VMM]203cpu_util_guest = cpu_util[CPU_UTILIZATION_VCPUS_TOTAL]204205cons.consume_stat("Avg", CPU_UTILIZATION_VMM, cpu_util_host)206cons.consume_stat("Avg", CPU_UTILIZATION_VCPUS_TOTAL, cpu_util_guest)207208209def pipes(basevm, current_avail_cpu, env_id):210"""Producer/Consumer pipes generator."""211for mode in CONFIG["modes"]:212# We run bi-directional tests only on uVM with more than 2 vCPus213# because we need to pin one iperf3/direction per vCPU, and since we214# have two directions, we need at least two vCPUs.215if mode == "bd" and basevm.vcpus_count < 2:216continue217218for protocol in CONFIG["protocols"]:219for payload_length in protocol["payload_length"]:220iperf_guest_cmd_builder = CmdBuilder(IPERF3) \221.with_arg("--vsock") \222.with_arg("-c", 2) \223.with_arg("--json") \224.with_arg("--omit", protocol["omit"]) \225.with_arg("--time", CONFIG["time"])226227if payload_length != "DEFAULT":228iperf_guest_cmd_builder = iperf_guest_cmd_builder \229.with_arg("--len", f"{payload_length}")230231iperf3_id = f"vsock-p{payload_length}-{mode}"232233cons = consumer.LambdaConsumer(234metadata_provider=DictMetadataProvider(235CONFIG["measurements"],236VsockThroughputBaselineProvider(env_id, iperf3_id)),237func=consume_iperf_output238)239240prod_kwargs = {241"guest_cmd_builder": iperf_guest_cmd_builder,242"basevm": basevm,243"current_avail_cpu": current_avail_cpu,244"runtime": CONFIG["time"],245"omit": protocol["omit"],246"load_factor": CONFIG["load_factor"],247"modes": CONFIG["modes"][mode],248}249prod = producer.LambdaProducer(produce_iperf_output,250prod_kwargs)251yield cons, prod, f"{env_id}/{iperf3_id}"252253254@pytest.mark.nonci255@pytest.mark.timeout(600)256def test_vsock_throughput(bin_cloner_path, results_file_dumper):257"""Test vsock throughput driver for multiple artifacts."""258logger = logging.getLogger("vsock_throughput")259artifacts = ArtifactCollection(_test_images_s3_bucket())260microvm_artifacts = ArtifactSet(artifacts.microvms(keyword="1vcpu_1024mb"))261microvm_artifacts.insert(artifacts.microvms(keyword="2vcpu_1024mb"))262kernel_artifacts = ArtifactSet(263artifacts.kernels(keyword="vmlinux-4.14.bin"))264disk_artifacts = ArtifactSet(artifacts.disks(keyword="ubuntu"))265266# Create a test context and add builder, logger, network.267test_context = TestContext()268test_context.custom = {269'builder': MicrovmBuilder(bin_cloner_path),270'logger': logger,271'name': 'vsock_throughput',272'results_file_dumper': results_file_dumper273}274275test_matrix = TestMatrix(context=test_context,276artifact_sets=[277microvm_artifacts,278kernel_artifacts,279disk_artifacts280])281test_matrix.run_test(iperf_workload)282283284def iperf_workload(context):285"""Run a statistic exercise."""286vm_builder = context.custom['builder']287logger = context.custom["logger"]288file_dumper = context.custom['results_file_dumper']289290# Create a rw copy artifact.291rw_disk = context.disk.copy()292# Get ssh key from read-only artifact.293ssh_key = context.disk.ssh_key()294# Create a fresh microvm from artifacts.295vm_instance = vm_builder.build(kernel=context.kernel,296disks=[rw_disk],297ssh_key=ssh_key,298config=context.microvm)299basevm = vm_instance.vm300# Create a vsock device301basevm.vsock.put(302vsock_id="vsock0",303guest_cid=3,304uds_path="/" + VSOCK_UDS_PATH305)306307basevm.start()308309st_core = core.Core(name="vsock_throughput",310iterations=1,311custom={'cpu_model_name': get_cpu_model_name()})312313# Check if the needed CPU cores are available. We have the API thread, VMM314# thread and then one thread for each configured vCPU.315assert CpuMap.len() >= 2 + basevm.vcpus_count316317# Pin uVM threads to physical cores.318current_avail_cpu = 0319assert basevm.pin_vmm(current_avail_cpu), \320"Failed to pin firecracker thread."321current_avail_cpu += 1322assert basevm.pin_api(current_avail_cpu), \323"Failed to pin fc_api thread."324for i in range(basevm.vcpus_count):325current_avail_cpu += 1326assert basevm.pin_vcpu(i, current_avail_cpu), \327f"Failed to pin fc_vcpu {i} thread."328329logger.info("Testing with microvm: \"{}\", kernel {}, disk {}"330.format(context.microvm.name(),331context.kernel.name(),332context.disk.name()))333334for cons, prod, tag in \335pipes(basevm,336current_avail_cpu + 1,337f"{context.kernel.name()}/{context.disk.name()}/"338f"{context.microvm.name()}"):339st_core.add_pipe(prod, cons, tag)340341# Start running the commands on guest, gather results and verify pass342# criteria.343try:344result = st_core.run_exercise()345except core.CoreException as err:346handle_failure(file_dumper, err)347348dump_test_result(file_dumper, result)349350351def _make_host_port_path(uds_path, port):352"""Build the path for a Unix socket, mapped to host vsock port `port`."""353return "{}_{}".format(uds_path, port)354355356