Path: blob/main/tests/integration_tests/functional/test_rate_limiter.py
1958 views
# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.1# SPDX-License-Identifier: Apache-2.02"""Tests that fail if network throughput does not obey rate limits."""3import time45import framework.utils as utils6import host_tools.network as net_tools # pylint: disable=import-error78# The iperf version to run this tests with9IPERF_BINARY = 'iperf3'1011# Interval used by iperf to get maximum bandwidth12IPERF_TRANSMIT_TIME = 41314# Use a fixed-size TCP window so we get constant flow15IPERF_TCP_WINDOW = '1000K'1617# The rate limiting value18RATE_LIMIT_BYTES = 104857601920# The initial token bucket size21BURST_SIZE = 1048576002223# The refill time for the token bucket24REFILL_TIME_MS = 1002526# Deltas that are accepted between expected values and achieved27# values throughout the tests28MAX_BYTES_DIFF_PERCENTAGE = 1029MAX_TIME_DIFF = 25303132def test_tx_rate_limiting(test_microvm_with_ssh, network_config):33"""Run iperf tx with and without rate limiting; check limiting effect."""34test_microvm = test_microvm_with_ssh35test_microvm.spawn()3637test_microvm.basic_config()3839# For this test we will be adding three interfaces:40# 1. No rate limiting41# 2. Rate limiting without burst42# 3. Rate limiting with burst43host_ips = ['', '', '']44guest_ips = ['', '', '']4546iface_id = '1'47# Create tap before configuring interface.48_tap1, host_ip, guest_ip = test_microvm.ssh_network_config(49network_config,50iface_id51)52guest_ips[0] = guest_ip53host_ips[0] = host_ip5455iface_id = '2'56tx_rate_limiter_no_burst = {57'bandwidth': {58'size': RATE_LIMIT_BYTES,59'refill_time': REFILL_TIME_MS60}61}62_tap2, host_ip, guest_ip = test_microvm.ssh_network_config(63network_config,64iface_id,65tx_rate_limiter=tx_rate_limiter_no_burst66)67guest_ips[1] = guest_ip68host_ips[1] = host_ip6970iface_id = '3'71tx_rate_limiter_with_burst = {72'bandwidth': {73'size': RATE_LIMIT_BYTES,74'one_time_burst': BURST_SIZE,75'refill_time': REFILL_TIME_MS76}77}78_tap3, host_ip, guest_ip = test_microvm.ssh_network_config(79network_config,80iface_id,81tx_rate_limiter=tx_rate_limiter_with_burst82)83guest_ips[2] = guest_ip84host_ips[2] = host_ip8586test_microvm.start()8788_check_tx_rate_limiting(test_microvm, guest_ips, host_ips)89_check_tx_rate_limit_patch(test_microvm, guest_ips, host_ips)909192def test_rx_rate_limiting(test_microvm_with_ssh, network_config):93"""Run iperf rx with and without rate limiting; check limiting effect."""94test_microvm = test_microvm_with_ssh95test_microvm.spawn()9697test_microvm.basic_config()9899# For this test we will be adding three interfaces:100# 1. No rate limiting101# 2. Rate limiting without burst102# 3. Rate limiting with burst103host_ips = ['', '', '']104guest_ips = ['', '', '']105106iface_id = '1'107# Create tap before configuring interface.108_tap1, host_ip, guest_ip = test_microvm.ssh_network_config(109network_config,110iface_id111)112guest_ips[0] = guest_ip113host_ips[0] = host_ip114115iface_id = '2'116rx_rate_limiter_no_burst = {117'bandwidth': {118'size': RATE_LIMIT_BYTES,119'refill_time': REFILL_TIME_MS120}121}122_tap2, host_ip, guest_ip = test_microvm.ssh_network_config(123network_config,124iface_id,125rx_rate_limiter=rx_rate_limiter_no_burst126)127guest_ips[1] = guest_ip128host_ips[1] = host_ip129130iface_id = '3'131rx_rate_limiter_no_burst = {132'bandwidth': {133'size': RATE_LIMIT_BYTES,134'one_time_burst': BURST_SIZE,135'refill_time': REFILL_TIME_MS136}137}138_tap3, host_ip, guest_ip = test_microvm.ssh_network_config(139network_config,140iface_id,141rx_rate_limiter=rx_rate_limiter_no_burst142)143guest_ips[2] = guest_ip144host_ips[2] = host_ip145146# Start the microvm.147test_microvm.start()148149_check_rx_rate_limiting(test_microvm, guest_ips)150_check_rx_rate_limit_patch(test_microvm, guest_ips)151152153def test_rx_rate_limiting_cpu_load(test_microvm_with_ssh, network_config):154"""Run iperf rx with rate limiting; verify cpu load is below threshold."""155test_microvm = test_microvm_with_ssh156test_microvm.spawn()157158test_microvm.basic_config()159160# Enable monitor that checks if the cpu load is over the threshold.161# After multiple runs, the average value for the cpu load162# seems to be around 10%. Setting the threshold a little163# higher to skip false positives.164threshold = 20165test_microvm.enable_cpu_load_monitor(threshold)166167# Create interface with aggressive rate limiting enabled.168rx_rate_limiter_no_burst = {169'bandwidth': {170'size': 65536, # 64KBytes171'refill_time': 1000 # 1s172}173}174_tap, _host_ip, guest_ip = test_microvm.ssh_network_config(175network_config,176'1',177rx_rate_limiter=rx_rate_limiter_no_burst178)179180test_microvm.start()181182# Start iperf server on guest.183_start_iperf_on_guest(test_microvm, guest_ip)184185# Run iperf client sending UDP traffic.186iperf_cmd = '{} {} -u -c {} -b 1000000000 -t{} -f KBytes'.format(187test_microvm.jailer.netns_cmd_prefix(),188IPERF_BINARY,189guest_ip,190IPERF_TRANSMIT_TIME * 5191)192_iperf_out = _run_local_iperf(iperf_cmd)193194195def _check_tx_rate_limiting(test_microvm, guest_ips, host_ips):196"""Check that the transmit rate is within expectations."""197# Start iperf on the host as this is the tx rate limiting test.198_start_local_iperf(test_microvm.jailer.netns_cmd_prefix())199200# First step: get the transfer rate when no rate limiting is enabled.201# We are receiving the result in KBytes from iperf.202print("Run guest TX iperf with no rate-limit")203rate_no_limit_kbps = _get_tx_bandwidth_with_duration(204test_microvm,205guest_ips[0],206host_ips[0],207IPERF_TRANSMIT_TIME208)209print("TX rate_no_limit_kbps: {}".format(rate_no_limit_kbps))210211# Calculate the number of bytes that are expected to be sent212# in each second once the rate limiting is enabled.213expected_kbps = int(RATE_LIMIT_BYTES / (REFILL_TIME_MS / 1000.0) / 1024)214print("Rate-Limit TX expected_kbps: {}".format(expected_kbps))215216# Sanity check that bandwidth with no rate limiting is at least double217# than the one expected when rate limiting is in place.218assert _get_percentage_difference(rate_no_limit_kbps, expected_kbps) > 100219220# Second step: check bandwidth when rate limiting is on.221_check_tx_bandwidth(test_microvm, guest_ips[1], host_ips[1], expected_kbps)222223# Third step: get the number of bytes when rate limiting is on and there is224# an initial burst size from where to consume.225print("Run guest TX iperf with exact burst size")226# Use iperf to obtain the bandwidth when there is burst to consume from,227# send exactly BURST_SIZE packets.228iperf_cmd = '{} -c {} -n {} -f KBytes -w {} -N'.format(229IPERF_BINARY,230host_ips[2],231BURST_SIZE,232IPERF_TCP_WINDOW233)234iperf_out = _run_iperf_on_guest(test_microvm, iperf_cmd, guest_ips[2])235print(iperf_out)236_, burst_kbps = _process_iperf_output(iperf_out)237print("TX burst_kbps: {}".format(burst_kbps))238# Test that the burst bandwidth is at least as two times the rate limit.239assert _get_percentage_difference(burst_kbps, expected_kbps) > 100240241# Since the burst should be consumed, check rate limit is in place.242_check_tx_bandwidth(test_microvm, guest_ips[2], host_ips[2], expected_kbps)243244245def _check_rx_rate_limiting(test_microvm, guest_ips):246"""Check that the receiving rate is within expectations."""247# Start iperf on guest.248_start_iperf_on_guest(test_microvm, guest_ips[0])249250# First step: get the transfer rate when no rate limiting is enabled.251# We are receiving the result in KBytes from iperf.252print("Run guest RX iperf with no rate-limit")253rate_no_limit_kbps = _get_rx_bandwidth_with_duration(254test_microvm,255guest_ips[0],256IPERF_TRANSMIT_TIME257)258print("RX rate_no_limit_kbps: {}".format(rate_no_limit_kbps))259260# Calculate the number of bytes that are expected to be sent261# in each second once the rate limiting is enabled.262expected_kbps = int(RATE_LIMIT_BYTES / (REFILL_TIME_MS / 1000.0) / 1024)263print("Rate-Limit RX expected_kbps: {}".format(expected_kbps))264265# Sanity check that bandwidth with no rate limiting is at least double266# than the one expected when rate limiting is in place.267assert _get_percentage_difference(rate_no_limit_kbps, expected_kbps) > 100268269# Second step: check bandwidth when rate limiting is on.270_check_rx_bandwidth(test_microvm, guest_ips[1], expected_kbps)271272# Third step: get the number of bytes when rate limiting is on and there is273# an initial burst size from where to consume.274print("Run guest TX iperf with exact burst size")275# Use iperf to obtain the bandwidth when there is burst to consume from,276# send exactly BURST_SIZE packets.277iperf_cmd = '{} {} -c {} -n {} -f KBytes -w {} -N'.format(278test_microvm.jailer.netns_cmd_prefix(),279IPERF_BINARY,280guest_ips[2],281BURST_SIZE,282IPERF_TCP_WINDOW283)284iperf_out = _run_local_iperf(iperf_cmd)285print(iperf_out)286_, burst_kbps = _process_iperf_output(iperf_out)287print("RX burst_kbps: {}".format(burst_kbps))288# Test that the burst bandwidth is at least as two times the rate limit.289assert _get_percentage_difference(burst_kbps, expected_kbps) > 100290291# Since the burst should be consumed, check rate limit is in place.292_check_rx_bandwidth(test_microvm, guest_ips[2], expected_kbps)293294295def _check_tx_rate_limit_patch(test_microvm, guest_ips, host_ips):296"""Patch the TX rate limiters and check the new limits."""297bucket_size = int(RATE_LIMIT_BYTES * 2)298expected_kbps = int(bucket_size / (REFILL_TIME_MS / 1000.0) / 1024)299300# Check that a TX rate limiter can be applied to a previously unlimited301# interface.302_patch_iface_bw(test_microvm, "1", "TX", bucket_size, REFILL_TIME_MS)303_check_tx_bandwidth(test_microvm, guest_ips[0], host_ips[0], expected_kbps)304305# Check that a TX rate limiter can be updated.306_patch_iface_bw(test_microvm, "2", "TX", bucket_size, REFILL_TIME_MS)307_check_tx_bandwidth(test_microvm, guest_ips[1], host_ips[1], expected_kbps)308309# Check that a TX rate limiter can be removed.310_patch_iface_bw(test_microvm, "1", "TX", 0, 0)311rate_no_limit_kbps = _get_tx_bandwidth_with_duration(312test_microvm,313guest_ips[0],314host_ips[0],315IPERF_TRANSMIT_TIME316)317# Check that bandwidth when rate-limit disabled is at least 1.5x larger318# than the one when rate limiting was enabled.319assert _get_percentage_difference(rate_no_limit_kbps, expected_kbps) > 50320321322def _check_rx_rate_limit_patch(test_microvm, guest_ips):323"""Patch the RX rate limiters and check the new limits."""324bucket_size = int(RATE_LIMIT_BYTES * 2)325expected_kbps = int(bucket_size / (REFILL_TIME_MS / 1000.0) / 1024)326327# Check that an RX rate limiter can be applied to a previously unlimited328# interface.329_patch_iface_bw(test_microvm, "1", "RX", bucket_size, REFILL_TIME_MS)330_check_rx_bandwidth(test_microvm, guest_ips[0], expected_kbps)331332# Check that an RX rate limiter can be updated.333_patch_iface_bw(test_microvm, "2", "RX", bucket_size, REFILL_TIME_MS)334_check_rx_bandwidth(test_microvm, guest_ips[1], expected_kbps)335336# Check that an RX rate limiter can be removed.337_patch_iface_bw(test_microvm, "1", "RX", 0, 0)338rate_no_limit_kbps = _get_rx_bandwidth_with_duration(339test_microvm,340guest_ips[0],341IPERF_TRANSMIT_TIME342)343# Check that bandwidth when rate-limit disabled is at least 1.5x larger344# than the one when rate limiting was enabled.345assert _get_percentage_difference(rate_no_limit_kbps, expected_kbps) > 50346347348def _check_tx_bandwidth(349test_microvm,350guest_ip,351host_ip,352expected_kbps353):354"""Check that the rate-limited TX bandwidth is close to what we expect.355356At this point, a daemonized iperf3 server is expected to be running on357the host.358"""359print("Check guest TX rate-limit; expected kbps {}".format(expected_kbps))360observed_kbps = _get_tx_bandwidth_with_duration(361test_microvm,362guest_ip,363host_ip,364IPERF_TRANSMIT_TIME365)366367diff_pc = _get_percentage_difference(observed_kbps, expected_kbps)368print("TX calculated diff percentage: {}\n".format(diff_pc))369370if diff_pc >= MAX_BYTES_DIFF_PERCENTAGE:371print("Short duration test failed. Try another run with 10x duration.")372373observed_kbps = _get_tx_bandwidth_with_duration(374test_microvm,375guest_ip,376host_ip,37710 * IPERF_TRANSMIT_TIME378)379diff_pc = _get_percentage_difference(observed_kbps, expected_kbps)380print("TX calculated diff percentage: {}\n".format(diff_pc))381382assert diff_pc < MAX_BYTES_DIFF_PERCENTAGE383384385def _get_tx_bandwidth_with_duration(386test_microvm,387guest_ip,388host_ip,389duration390):391"""Check that the rate-limited TX bandwidth is close to what we expect."""392iperf_cmd = '{} -c {} -t {} -f KBytes -w {} -N'.format(393IPERF_BINARY,394host_ip,395duration,396IPERF_TCP_WINDOW397)398399iperf_out = _run_iperf_on_guest(test_microvm, iperf_cmd, guest_ip)400print(iperf_out)401402_, observed_kbps = _process_iperf_output(iperf_out)403print("TX observed_kbps: {}".format(observed_kbps))404return observed_kbps405406407def _check_rx_bandwidth(408test_microvm,409guest_ip,410expected_kbps411):412"""Check that the rate-limited RX bandwidth is close to what we expect.413414At this point, a daemonized iperf3 server is expected to be running on415the guest.416"""417print("Check guest RX rate-limit; expected kbps {}".format(expected_kbps))418observed_kbps = _get_rx_bandwidth_with_duration(419test_microvm,420guest_ip,421IPERF_TRANSMIT_TIME422)423424diff_pc = _get_percentage_difference(observed_kbps, expected_kbps)425print("RX calculated diff percentage: {}\n".format(diff_pc))426427if diff_pc >= MAX_BYTES_DIFF_PERCENTAGE:428print("Short duration test failed. Try another run with 10x duration.")429430observed_kbps = _get_rx_bandwidth_with_duration(431test_microvm,432guest_ip,43310 * IPERF_TRANSMIT_TIME434)435diff_pc = _get_percentage_difference(observed_kbps, expected_kbps)436print("TX calculated diff percentage: {}\n".format(diff_pc))437438assert diff_pc < MAX_BYTES_DIFF_PERCENTAGE439440441def _get_rx_bandwidth_with_duration(442test_microvm,443guest_ip,444duration445):446"""Check that the rate-limited RX bandwidth is close to what we expect."""447iperf_cmd = "{} {} -c {} -t {} -f KBytes -w {} -N".format(448test_microvm.jailer.netns_cmd_prefix(),449IPERF_BINARY,450guest_ip,451duration,452IPERF_TCP_WINDOW453)454iperf_out = _run_local_iperf(iperf_cmd)455print(iperf_out)456457_, observed_kbps = _process_iperf_output(iperf_out)458print("RX observed_kbps: {}".format(observed_kbps))459return observed_kbps460461462def _patch_iface_bw(463test_microvm,464iface_id,465rx_or_tx,466new_bucket_size,467new_refill_time468):469"""Update the bandwidth rate limiter for a given interface.470471Update the `rx_or_tx` rate limiter, on interface `iface_id` to the472new `bucket_size`.473"""474assert rx_or_tx in ['RX', 'TX']475args = {476'iface_id': iface_id,477"{}_rate_limiter".format(rx_or_tx.lower()): {478'bandwidth': {479'size': new_bucket_size,480'refill_time': new_refill_time481}482}483}484resp = test_microvm.network.patch(**args)485assert test_microvm.api_session.is_status_no_content(resp.status_code)486487488def _start_iperf_on_guest(test_microvm, hostname):489"""Start iperf in server mode through an SSH connection."""490test_microvm.ssh_config['hostname'] = hostname491ssh_connection = net_tools.SSHConnection(test_microvm.ssh_config)492493iperf_cmd = '{} -sD -f KBytes\n'.format(IPERF_BINARY)494ssh_connection.execute_command(iperf_cmd)495496# Wait for the iperf daemon to start.497time.sleep(1)498499500def _run_iperf_on_guest(test_microvm, iperf_cmd, hostname):501"""Run a client related iperf command through an SSH connection."""502test_microvm.ssh_config['hostname'] = hostname503ssh_connection = net_tools.SSHConnection(test_microvm.ssh_config)504_, stdout, stderr = ssh_connection.execute_command(iperf_cmd)505assert stderr.read() == ''506507out = stdout.read()508return out509510511def _start_local_iperf(netns_cmd_prefix):512"""Start iperf in server mode after killing any leftover iperf daemon."""513iperf_cmd = 'pkill {}\n'.format(IPERF_BINARY)514515# Don't check the result of this command because it can fail if no iperf516# is running.517utils.run_cmd(iperf_cmd, ignore_return_code=True)518519iperf_cmd = '{} {} -sD -f KBytes\n'.format(netns_cmd_prefix, IPERF_BINARY)520521utils.run_cmd(iperf_cmd)522523# Wait for the iperf daemon to start.524time.sleep(1)525526527def _run_local_iperf(iperf_cmd):528"""Execute a client related iperf command locally."""529process = utils.run_cmd(iperf_cmd)530return process.stdout531532533def _get_percentage_difference(measured, base):534"""Return the percentage delta between the arguments."""535if measured == base:536return 0537try:538return (abs(measured - base) / base) * 100.0539except ZeroDivisionError:540# It means base and only base is 0.541return 100.0542543544def _process_iperf_line(line):545"""Parse iperf3 summary line and return test time and bandwidth."""546test_time = line.split(' ')[2].split('-')[1].strip().split(" ")[0]547test_bw = line.split(' ')[5].split(' ')[0].strip()548return float(test_time), float(test_bw)549550551def _process_iperf_output(iperf_out):552"""Parse iperf3 output and return average test time and bandwidth."""553iperf_out_lines = iperf_out.splitlines()554for line in iperf_out_lines:555if line.find('sender') != -1:556send_time, send_bw = _process_iperf_line(line)557if line.find('receiver') != -1:558rcv_time, rcv_bw = _process_iperf_line(line)559iperf_out_time = (send_time + rcv_time) / 2.0560iperf_out_bw = (send_bw + rcv_bw) / 2.0561return float(iperf_out_time), float(iperf_out_bw)562563564