Path: blob/master/tools/testing/selftests/kvm/dirty_log_perf_test.c
38189 views
// SPDX-License-Identifier: GPL-2.01/*2* KVM dirty page logging performance test3*4* Based on dirty_log_test.c5*6* Copyright (C) 2018, Red Hat, Inc.7* Copyright (C) 2020, Google, Inc.8*/910#include <stdio.h>11#include <stdlib.h>12#include <time.h>13#include <pthread.h>14#include <linux/bitmap.h>1516#include "kvm_util.h"17#include "test_util.h"18#include "memstress.h"19#include "guest_modes.h"20#include "ucall_common.h"2122/* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/23#define TEST_HOST_LOOP_N 2UL2425static int nr_vcpus = 1;26static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;27static bool run_vcpus_while_disabling_dirty_logging;2829/* Host variables */30static u64 dirty_log_manual_caps;31static bool host_quit;32static int iteration;33static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];3435static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)36{37struct kvm_vcpu *vcpu = vcpu_args->vcpu;38int vcpu_idx = vcpu_args->vcpu_idx;39uint64_t pages_count = 0;40struct kvm_run *run;41struct timespec start;42struct timespec ts_diff;43struct timespec total = (struct timespec){0};44struct timespec avg;45int ret;4647run = vcpu->run;4849while (!READ_ONCE(host_quit)) {50int current_iteration = READ_ONCE(iteration);5152clock_gettime(CLOCK_MONOTONIC, &start);53ret = _vcpu_run(vcpu);54ts_diff = timespec_elapsed(start);5556TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret);57TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,58"Invalid guest sync status: exit_reason=%s",59exit_reason_str(run->exit_reason));6061pr_debug("Got sync event from vCPU %d\n", vcpu_idx);62vcpu_last_completed_iteration[vcpu_idx] = current_iteration;63pr_debug("vCPU %d updated last completed iteration to %d\n",64vcpu_idx, vcpu_last_completed_iteration[vcpu_idx]);6566if (current_iteration) {67pages_count += vcpu_args->pages;68total = timespec_add(total, ts_diff);69pr_debug("vCPU %d iteration %d dirty memory time: %ld.%.9lds\n",70vcpu_idx, current_iteration, ts_diff.tv_sec,71ts_diff.tv_nsec);72} else {73pr_debug("vCPU %d iteration %d populate memory time: %ld.%.9lds\n",74vcpu_idx, current_iteration, ts_diff.tv_sec,75ts_diff.tv_nsec);76}7778/*79* Keep running the guest while dirty logging is being disabled80* (iteration is negative) so that vCPUs are accessing memory81* for the entire duration of zapping collapsible SPTEs.82*/83while (current_iteration == READ_ONCE(iteration) &&84READ_ONCE(iteration) >= 0 && !READ_ONCE(host_quit)) {}85}8687avg = timespec_div(total, vcpu_last_completed_iteration[vcpu_idx]);88pr_debug("\nvCPU %d dirtied 0x%lx pages over %d iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",89vcpu_idx, pages_count, vcpu_last_completed_iteration[vcpu_idx],90total.tv_sec, total.tv_nsec, avg.tv_sec, avg.tv_nsec);91}9293struct test_params {94unsigned long iterations;95uint64_t phys_offset;96bool partition_vcpu_memory_access;97enum vm_mem_backing_src_type backing_src;98int slots;99uint32_t write_percent;100bool random_access;101};102103static void run_test(enum vm_guest_mode mode, void *arg)104{105struct test_params *p = arg;106struct kvm_vm *vm;107unsigned long **bitmaps;108uint64_t guest_num_pages;109uint64_t host_num_pages;110uint64_t pages_per_slot;111struct timespec start;112struct timespec ts_diff;113struct timespec get_dirty_log_total = (struct timespec){0};114struct timespec vcpu_dirty_total = (struct timespec){0};115struct timespec avg;116struct timespec clear_dirty_log_total = (struct timespec){0};117int i;118119vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size,120p->slots, p->backing_src,121p->partition_vcpu_memory_access);122123memstress_set_write_percent(vm, p->write_percent);124125guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm->page_shift;126guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);127host_num_pages = vm_num_host_pages(mode, guest_num_pages);128pages_per_slot = host_num_pages / p->slots;129130bitmaps = memstress_alloc_bitmaps(p->slots, pages_per_slot);131132if (dirty_log_manual_caps)133vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2,134dirty_log_manual_caps);135136/* Start the iterations */137iteration = 0;138host_quit = false;139140clock_gettime(CLOCK_MONOTONIC, &start);141for (i = 0; i < nr_vcpus; i++)142vcpu_last_completed_iteration[i] = -1;143144/*145* Use 100% writes during the population phase to ensure all146* memory is actually populated and not just mapped to the zero147* page. The prevents expensive copy-on-write faults from148* occurring during the dirty memory iterations below, which149* would pollute the performance results.150*/151memstress_set_write_percent(vm, 100);152memstress_set_random_access(vm, false);153memstress_start_vcpu_threads(nr_vcpus, vcpu_worker);154155/* Allow the vCPUs to populate memory */156pr_debug("Starting iteration %d - Populating\n", iteration);157for (i = 0; i < nr_vcpus; i++) {158while (READ_ONCE(vcpu_last_completed_iteration[i]) !=159iteration)160;161}162163ts_diff = timespec_elapsed(start);164pr_info("Populate memory time: %ld.%.9lds\n",165ts_diff.tv_sec, ts_diff.tv_nsec);166167/* Enable dirty logging */168clock_gettime(CLOCK_MONOTONIC, &start);169memstress_enable_dirty_logging(vm, p->slots);170ts_diff = timespec_elapsed(start);171pr_info("Enabling dirty logging time: %ld.%.9lds\n\n",172ts_diff.tv_sec, ts_diff.tv_nsec);173174memstress_set_write_percent(vm, p->write_percent);175memstress_set_random_access(vm, p->random_access);176177while (iteration < p->iterations) {178/*179* Incrementing the iteration number will start the vCPUs180* dirtying memory again.181*/182clock_gettime(CLOCK_MONOTONIC, &start);183iteration++;184185pr_debug("Starting iteration %d\n", iteration);186for (i = 0; i < nr_vcpus; i++) {187while (READ_ONCE(vcpu_last_completed_iteration[i])188!= iteration)189;190}191192ts_diff = timespec_elapsed(start);193vcpu_dirty_total = timespec_add(vcpu_dirty_total, ts_diff);194pr_info("Iteration %d dirty memory time: %ld.%.9lds\n",195iteration, ts_diff.tv_sec, ts_diff.tv_nsec);196197clock_gettime(CLOCK_MONOTONIC, &start);198memstress_get_dirty_log(vm, bitmaps, p->slots);199ts_diff = timespec_elapsed(start);200get_dirty_log_total = timespec_add(get_dirty_log_total,201ts_diff);202pr_info("Iteration %d get dirty log time: %ld.%.9lds\n",203iteration, ts_diff.tv_sec, ts_diff.tv_nsec);204205if (dirty_log_manual_caps) {206clock_gettime(CLOCK_MONOTONIC, &start);207memstress_clear_dirty_log(vm, bitmaps, p->slots,208pages_per_slot);209ts_diff = timespec_elapsed(start);210clear_dirty_log_total = timespec_add(clear_dirty_log_total,211ts_diff);212pr_info("Iteration %d clear dirty log time: %ld.%.9lds\n",213iteration, ts_diff.tv_sec, ts_diff.tv_nsec);214}215}216217/*218* Run vCPUs while dirty logging is being disabled to stress disabling219* in terms of both performance and correctness. Opt-in via command220* line as this significantly increases time to disable dirty logging.221*/222if (run_vcpus_while_disabling_dirty_logging)223WRITE_ONCE(iteration, -1);224225/* Disable dirty logging */226clock_gettime(CLOCK_MONOTONIC, &start);227memstress_disable_dirty_logging(vm, p->slots);228ts_diff = timespec_elapsed(start);229pr_info("Disabling dirty logging time: %ld.%.9lds\n",230ts_diff.tv_sec, ts_diff.tv_nsec);231232/*233* Tell the vCPU threads to quit. No need to manually check that vCPUs234* have stopped running after disabling dirty logging, the join will235* wait for them to exit.236*/237host_quit = true;238memstress_join_vcpu_threads(nr_vcpus);239240avg = timespec_div(get_dirty_log_total, p->iterations);241pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",242p->iterations, get_dirty_log_total.tv_sec,243get_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);244245if (dirty_log_manual_caps) {246avg = timespec_div(clear_dirty_log_total, p->iterations);247pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",248p->iterations, clear_dirty_log_total.tv_sec,249clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);250}251252memstress_free_bitmaps(bitmaps, p->slots);253memstress_destroy_vm(vm);254}255256static void help(char *name)257{258puts("");259printf("usage: %s [-h] [-a] [-i iterations] [-p offset] [-g] "260"[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-r random seed ] [-s mem type]"261"[-x memslots] [-w percentage] [-c physical cpus to run test on]\n", name);262puts("");263printf(" -a: access memory randomly rather than in order.\n");264printf(" -i: specify iteration counts (default: %"PRIu64")\n",265TEST_HOST_LOOP_N);266printf(" -g: Do not enable KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2. This\n"267" makes KVM_GET_DIRTY_LOG clear the dirty log (i.e.\n"268" KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE is not enabled)\n"269" and writes will be tracked as soon as dirty logging is\n"270" enabled on the memslot (i.e. KVM_DIRTY_LOG_INITIALLY_SET\n"271" is not enabled).\n");272printf(" -p: specify guest physical test memory offset\n"273" Warning: a low offset can conflict with the loaded test code.\n");274guest_modes_help();275printf(" -n: Run the vCPUs in nested mode (L2)\n");276printf(" -e: Run vCPUs while dirty logging is being disabled. This\n"277" can significantly increase runtime, especially if there\n"278" isn't a dedicated pCPU for the main thread.\n");279printf(" -b: specify the size of the memory region which should be\n"280" dirtied by each vCPU. e.g. 10M or 3G.\n"281" (default: 1G)\n");282printf(" -v: specify the number of vCPUs to run.\n");283printf(" -o: Overlap guest memory accesses instead of partitioning\n"284" them into a separate region of memory for each vCPU.\n");285printf(" -r: specify the starting random seed.\n");286backing_src_help("-s");287printf(" -x: Split the memory region into this number of memslots.\n"288" (default: 1)\n");289printf(" -w: specify the percentage of pages which should be written to\n"290" as an integer from 0-100 inclusive. This is probabilistic,\n"291" so -w X means each page has an X%% chance of writing\n"292" and a (100-X)%% chance of reading.\n"293" (default: 100 i.e. all pages are written to.)\n");294kvm_print_vcpu_pinning_help();295puts("");296exit(0);297}298299int main(int argc, char *argv[])300{301int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);302const char *pcpu_list = NULL;303struct test_params p = {304.iterations = TEST_HOST_LOOP_N,305.partition_vcpu_memory_access = true,306.backing_src = DEFAULT_VM_MEM_SRC,307.slots = 1,308.write_percent = 100,309};310int opt;311312/* Override the seed to be deterministic by default. */313guest_random_seed = 1;314315dirty_log_manual_caps =316kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);317dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |318KVM_DIRTY_LOG_INITIALLY_SET);319320guest_modes_append_default();321322while ((opt = getopt(argc, argv, "ab:c:eghi:m:nop:r:s:v:x:w:")) != -1) {323switch (opt) {324case 'a':325p.random_access = true;326break;327case 'b':328guest_percpu_mem_size = parse_size(optarg);329break;330case 'c':331pcpu_list = optarg;332break;333case 'e':334/* 'e' is for evil. */335run_vcpus_while_disabling_dirty_logging = true;336break;337case 'g':338dirty_log_manual_caps = 0;339break;340case 'h':341help(argv[0]);342break;343case 'i':344p.iterations = atoi_positive("Number of iterations", optarg);345break;346case 'm':347guest_modes_cmdline(optarg);348break;349case 'n':350memstress_args.nested = true;351break;352case 'o':353p.partition_vcpu_memory_access = false;354break;355case 'p':356p.phys_offset = strtoull(optarg, NULL, 0);357break;358case 'r':359guest_random_seed = atoi_positive("Random seed", optarg);360break;361case 's':362p.backing_src = parse_backing_src_type(optarg);363break;364case 'v':365nr_vcpus = atoi_positive("Number of vCPUs", optarg);366TEST_ASSERT(nr_vcpus <= max_vcpus,367"Invalid number of vcpus, must be between 1 and %d", max_vcpus);368break;369case 'w':370p.write_percent = atoi_non_negative("Write percentage", optarg);371TEST_ASSERT(p.write_percent <= 100,372"Write percentage must be between 0 and 100");373break;374case 'x':375p.slots = atoi_positive("Number of slots", optarg);376break;377default:378help(argv[0]);379break;380}381}382383if (pcpu_list) {384kvm_parse_vcpu_pinning(pcpu_list, memstress_args.vcpu_to_pcpu,385nr_vcpus);386memstress_args.pin_vcpus = true;387}388389TEST_ASSERT(p.iterations >= 2, "The test should have at least two iterations");390391pr_info("Test iterations: %"PRIu64"\n", p.iterations);392393for_each_guest_mode(run_test, &p);394395return 0;396}397398399