Raghavendra Rao Ananta d8d78398e5 KVM: arm64: selftests: Introduce and use hardware-definition macros
The kvm selftest library for arm64 currently configures the hardware
fields, such as shift and mask in the page-table entries and registers,
directly with numbers. While it add comments at places, it's better to
rewrite them with appropriate macros to improve the readability and
reduce the risk of errors. Hence, introduce macros to define the
hardware fields and use them in the arm64 processor library.

Most of the definitions are primary copied from the Linux's header,
arch/arm64/include/asm/pgtable-hwdef.h.

No functional change intended.

Suggested-by: Oliver Upton <oupton@google.com>
Signed-off-by: Raghavendra Rao Ananta <rananta@google.com>
Link: https://lore.kernel.org/r/20250405001042.1470552-2-rananta@google.com
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
2025-04-06 11:13:41 -07:00

1136 lines
34 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* page_fault_test.c - Test stage 2 faults.
*
* This test tries different combinations of guest accesses (e.g., write,
* S1PTW), backing source type (e.g., anon) and types of faults (e.g., read on
* hugetlbfs with a hole). It checks that the expected handling method is
* called (e.g., uffd faults with the right address and write/read flag).
*/
#include <linux/bitmap.h>
#include <fcntl.h>
#include <test_util.h>
#include <kvm_util.h>
#include <processor.h>
#include <asm/sysreg.h>
#include <linux/bitfield.h>
#include "guest_modes.h"
#include "userfaultfd_util.h"
/* Guest virtual addresses that point to the test page and its PTE. */
#define TEST_GVA 0xc0000000
#define TEST_EXEC_GVA (TEST_GVA + 0x8)
#define TEST_PTE_GVA 0xb0000000
#define TEST_DATA 0x0123456789ABCDEF
static uint64_t *guest_test_memory = (uint64_t *)TEST_GVA;
#define CMD_NONE (0)
#define CMD_SKIP_TEST (1ULL << 1)
#define CMD_HOLE_PT (1ULL << 2)
#define CMD_HOLE_DATA (1ULL << 3)
#define CMD_CHECK_WRITE_IN_DIRTY_LOG (1ULL << 4)
#define CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG (1ULL << 5)
#define CMD_CHECK_NO_WRITE_IN_DIRTY_LOG (1ULL << 6)
#define CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG (1ULL << 7)
#define CMD_SET_PTE_AF (1ULL << 8)
#define PREPARE_FN_NR 10
#define CHECK_FN_NR 10
static struct event_cnt {
int mmio_exits;
int fail_vcpu_runs;
int uffd_faults;
/* uffd_faults is incremented from multiple threads. */
pthread_mutex_t uffd_faults_mutex;
} events;
struct test_desc {
const char *name;
uint64_t mem_mark_cmd;
/* Skip the test if any prepare function returns false */
bool (*guest_prepare[PREPARE_FN_NR])(void);
void (*guest_test)(void);
void (*guest_test_check[CHECK_FN_NR])(void);
uffd_handler_t uffd_pt_handler;
uffd_handler_t uffd_data_handler;
void (*dabt_handler)(struct ex_regs *regs);
void (*iabt_handler)(struct ex_regs *regs);
void (*mmio_handler)(struct kvm_vm *vm, struct kvm_run *run);
void (*fail_vcpu_run_handler)(int ret);
uint32_t pt_memslot_flags;
uint32_t data_memslot_flags;
bool skip;
struct event_cnt expected_events;
};
struct test_params {
enum vm_mem_backing_src_type src_type;
struct test_desc *test_desc;
};
static inline void flush_tlb_page(uint64_t vaddr)
{
uint64_t page = vaddr >> 12;
dsb(ishst);
asm volatile("tlbi vaae1is, %0" :: "r" (page));
dsb(ish);
isb();
}
static void guest_write64(void)
{
uint64_t val;
WRITE_ONCE(*guest_test_memory, TEST_DATA);
val = READ_ONCE(*guest_test_memory);
GUEST_ASSERT_EQ(val, TEST_DATA);
}
/* Check the system for atomic instructions. */
static bool guest_check_lse(void)
{
uint64_t isar0 = read_sysreg(id_aa64isar0_el1);
uint64_t atomic;
atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC), isar0);
return atomic >= 2;
}
static bool guest_check_dc_zva(void)
{
uint64_t dczid = read_sysreg(dczid_el0);
uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_EL0_DZP), dczid);
return dzp == 0;
}
/* Compare and swap instruction. */
static void guest_cas(void)
{
uint64_t val;
GUEST_ASSERT(guest_check_lse());
asm volatile(".arch_extension lse\n"
"casal %0, %1, [%2]\n"
:: "r" (0ul), "r" (TEST_DATA), "r" (guest_test_memory));
val = READ_ONCE(*guest_test_memory);
GUEST_ASSERT_EQ(val, TEST_DATA);
}
static void guest_read64(void)
{
uint64_t val;
val = READ_ONCE(*guest_test_memory);
GUEST_ASSERT_EQ(val, 0);
}
/* Address translation instruction */
static void guest_at(void)
{
uint64_t par;
asm volatile("at s1e1r, %0" :: "r" (guest_test_memory));
isb();
par = read_sysreg(par_el1);
/* Bit 1 indicates whether the AT was successful */
GUEST_ASSERT_EQ(par & 1, 0);
}
/*
* The size of the block written by "dc zva" is guaranteed to be between (2 <<
* 0) and (2 << 9), which is safe in our case as we need the write to happen
* for at least a word, and not more than a page.
*/
static void guest_dc_zva(void)
{
uint16_t val;
asm volatile("dc zva, %0" :: "r" (guest_test_memory));
dsb(ish);
val = READ_ONCE(*guest_test_memory);
GUEST_ASSERT_EQ(val, 0);
}
/*
* Pre-indexing loads and stores don't have a valid syndrome (ESR_EL2.ISV==0).
* And that's special because KVM must take special care with those: they
* should still count as accesses for dirty logging or user-faulting, but
* should be handled differently on mmio.
*/
static void guest_ld_preidx(void)
{
uint64_t val;
uint64_t addr = TEST_GVA - 8;
/*
* This ends up accessing "TEST_GVA + 8 - 8", where "TEST_GVA - 8" is
* in a gap between memslots not backing by anything.
*/
asm volatile("ldr %0, [%1, #8]!"
: "=r" (val), "+r" (addr));
GUEST_ASSERT_EQ(val, 0);
GUEST_ASSERT_EQ(addr, TEST_GVA);
}
static void guest_st_preidx(void)
{
uint64_t val = TEST_DATA;
uint64_t addr = TEST_GVA - 8;
asm volatile("str %0, [%1, #8]!"
: "+r" (val), "+r" (addr));
GUEST_ASSERT_EQ(addr, TEST_GVA);
val = READ_ONCE(*guest_test_memory);
}
static bool guest_set_ha(void)
{
uint64_t mmfr1 = read_sysreg(id_aa64mmfr1_el1);
uint64_t hadbs, tcr;
/* Skip if HA is not supported. */
hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS), mmfr1);
if (hadbs == 0)
return false;
tcr = read_sysreg(tcr_el1) | TCR_HA;
write_sysreg(tcr, tcr_el1);
isb();
return true;
}
static bool guest_clear_pte_af(void)
{
*((uint64_t *)TEST_PTE_GVA) &= ~PTE_AF;
flush_tlb_page(TEST_GVA);
return true;
}
static void guest_check_pte_af(void)
{
dsb(ish);
GUEST_ASSERT_EQ(*((uint64_t *)TEST_PTE_GVA) & PTE_AF, PTE_AF);
}
static void guest_check_write_in_dirty_log(void)
{
GUEST_SYNC(CMD_CHECK_WRITE_IN_DIRTY_LOG);
}
static void guest_check_no_write_in_dirty_log(void)
{
GUEST_SYNC(CMD_CHECK_NO_WRITE_IN_DIRTY_LOG);
}
static void guest_check_s1ptw_wr_in_dirty_log(void)
{
GUEST_SYNC(CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG);
}
static void guest_check_no_s1ptw_wr_in_dirty_log(void)
{
GUEST_SYNC(CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG);
}
static void guest_exec(void)
{
int (*code)(void) = (int (*)(void))TEST_EXEC_GVA;
int ret;
ret = code();
GUEST_ASSERT_EQ(ret, 0x77);
}
static bool guest_prepare(struct test_desc *test)
{
bool (*prepare_fn)(void);
int i;
for (i = 0; i < PREPARE_FN_NR; i++) {
prepare_fn = test->guest_prepare[i];
if (prepare_fn && !prepare_fn())
return false;
}
return true;
}
static void guest_test_check(struct test_desc *test)
{
void (*check_fn)(void);
int i;
for (i = 0; i < CHECK_FN_NR; i++) {
check_fn = test->guest_test_check[i];
if (check_fn)
check_fn();
}
}
static void guest_code(struct test_desc *test)
{
if (!guest_prepare(test))
GUEST_SYNC(CMD_SKIP_TEST);
GUEST_SYNC(test->mem_mark_cmd);
if (test->guest_test)
test->guest_test();
guest_test_check(test);
GUEST_DONE();
}
static void no_dabt_handler(struct ex_regs *regs)
{
GUEST_FAIL("Unexpected dabt, far_el1 = 0x%lx", read_sysreg(far_el1));
}
static void no_iabt_handler(struct ex_regs *regs)
{
GUEST_FAIL("Unexpected iabt, pc = 0x%lx", regs->pc);
}
static struct uffd_args {
char *copy;
void *hva;
uint64_t paging_size;
} pt_args, data_args;
/* Returns true to continue the test, and false if it should be skipped. */
static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg,
struct uffd_args *args)
{
uint64_t addr = msg->arg.pagefault.address;
uint64_t flags = msg->arg.pagefault.flags;
struct uffdio_copy copy;
int ret;
TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING,
"The only expected UFFD mode is MISSING");
TEST_ASSERT_EQ(addr, (uint64_t)args->hva);
pr_debug("uffd fault: addr=%p write=%d\n",
(void *)addr, !!(flags & UFFD_PAGEFAULT_FLAG_WRITE));
copy.src = (uint64_t)args->copy;
copy.dst = addr;
copy.len = args->paging_size;
copy.mode = 0;
ret = ioctl(uffd, UFFDIO_COPY, &copy);
if (ret == -1) {
pr_info("Failed UFFDIO_COPY in 0x%lx with errno: %d\n",
addr, errno);
return ret;
}
pthread_mutex_lock(&events.uffd_faults_mutex);
events.uffd_faults += 1;
pthread_mutex_unlock(&events.uffd_faults_mutex);
return 0;
}
static int uffd_pt_handler(int mode, int uffd, struct uffd_msg *msg)
{
return uffd_generic_handler(mode, uffd, msg, &pt_args);
}
static int uffd_data_handler(int mode, int uffd, struct uffd_msg *msg)
{
return uffd_generic_handler(mode, uffd, msg, &data_args);
}
static void setup_uffd_args(struct userspace_mem_region *region,
struct uffd_args *args)
{
args->hva = (void *)region->region.userspace_addr;
args->paging_size = region->region.memory_size;
args->copy = malloc(args->paging_size);
TEST_ASSERT(args->copy, "Failed to allocate data copy.");
memcpy(args->copy, args->hva, args->paging_size);
}
static void setup_uffd(struct kvm_vm *vm, struct test_params *p,
struct uffd_desc **pt_uffd, struct uffd_desc **data_uffd)
{
struct test_desc *test = p->test_desc;
int uffd_mode = UFFDIO_REGISTER_MODE_MISSING;
setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_PT), &pt_args);
setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_TEST_DATA), &data_args);
*pt_uffd = NULL;
if (test->uffd_pt_handler)
*pt_uffd = uffd_setup_demand_paging(uffd_mode, 0,
pt_args.hva,
pt_args.paging_size,
1, test->uffd_pt_handler);
*data_uffd = NULL;
if (test->uffd_data_handler)
*data_uffd = uffd_setup_demand_paging(uffd_mode, 0,
data_args.hva,
data_args.paging_size,
1, test->uffd_data_handler);
}
static void free_uffd(struct test_desc *test, struct uffd_desc *pt_uffd,
struct uffd_desc *data_uffd)
{
if (test->uffd_pt_handler)
uffd_stop_demand_paging(pt_uffd);
if (test->uffd_data_handler)
uffd_stop_demand_paging(data_uffd);
free(pt_args.copy);
free(data_args.copy);
}
static int uffd_no_handler(int mode, int uffd, struct uffd_msg *msg)
{
TEST_FAIL("There was no UFFD fault expected.");
return -1;
}
/* Returns false if the test should be skipped. */
static bool punch_hole_in_backing_store(struct kvm_vm *vm,
struct userspace_mem_region *region)
{
void *hva = (void *)region->region.userspace_addr;
uint64_t paging_size = region->region.memory_size;
int ret, fd = region->fd;
if (fd != -1) {
ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
0, paging_size);
TEST_ASSERT(ret == 0, "fallocate failed");
} else {
ret = madvise(hva, paging_size, MADV_DONTNEED);
TEST_ASSERT(ret == 0, "madvise failed");
}
return true;
}
static void mmio_on_test_gpa_handler(struct kvm_vm *vm, struct kvm_run *run)
{
struct userspace_mem_region *region;
void *hva;
region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
hva = (void *)region->region.userspace_addr;
TEST_ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr);
memcpy(hva, run->mmio.data, run->mmio.len);
events.mmio_exits += 1;
}
static void mmio_no_handler(struct kvm_vm *vm, struct kvm_run *run)
{
uint64_t data;
memcpy(&data, run->mmio.data, sizeof(data));
pr_debug("addr=%lld len=%d w=%d data=%lx\n",
run->mmio.phys_addr, run->mmio.len,
run->mmio.is_write, data);
TEST_FAIL("There was no MMIO exit expected.");
}
static bool check_write_in_dirty_log(struct kvm_vm *vm,
struct userspace_mem_region *region,
uint64_t host_pg_nr)
{
unsigned long *bmap;
bool first_page_dirty;
uint64_t size = region->region.memory_size;
/* getpage_size() is not always equal to vm->page_size */
bmap = bitmap_zalloc(size / getpagesize());
kvm_vm_get_dirty_log(vm, region->region.slot, bmap);
first_page_dirty = test_bit(host_pg_nr, bmap);
free(bmap);
return first_page_dirty;
}
/* Returns true to continue the test, and false if it should be skipped. */
static bool handle_cmd(struct kvm_vm *vm, int cmd)
{
struct userspace_mem_region *data_region, *pt_region;
bool continue_test = true;
uint64_t pte_gpa, pte_pg;
data_region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
pt_region = vm_get_mem_region(vm, MEM_REGION_PT);
pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA));
pte_pg = (pte_gpa - pt_region->region.guest_phys_addr) / getpagesize();
if (cmd == CMD_SKIP_TEST)
continue_test = false;
if (cmd & CMD_HOLE_PT)
continue_test = punch_hole_in_backing_store(vm, pt_region);
if (cmd & CMD_HOLE_DATA)
continue_test = punch_hole_in_backing_store(vm, data_region);
if (cmd & CMD_CHECK_WRITE_IN_DIRTY_LOG)
TEST_ASSERT(check_write_in_dirty_log(vm, data_region, 0),
"Missing write in dirty log");
if (cmd & CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG)
TEST_ASSERT(check_write_in_dirty_log(vm, pt_region, pte_pg),
"Missing s1ptw write in dirty log");
if (cmd & CMD_CHECK_NO_WRITE_IN_DIRTY_LOG)
TEST_ASSERT(!check_write_in_dirty_log(vm, data_region, 0),
"Unexpected write in dirty log");
if (cmd & CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG)
TEST_ASSERT(!check_write_in_dirty_log(vm, pt_region, pte_pg),
"Unexpected s1ptw write in dirty log");
return continue_test;
}
void fail_vcpu_run_no_handler(int ret)
{
TEST_FAIL("Unexpected vcpu run failure");
}
void fail_vcpu_run_mmio_no_syndrome_handler(int ret)
{
TEST_ASSERT(errno == ENOSYS,
"The mmio handler should have returned not implemented.");
events.fail_vcpu_runs += 1;
}
typedef uint32_t aarch64_insn_t;
extern aarch64_insn_t __exec_test[2];
noinline void __return_0x77(void)
{
asm volatile("__exec_test: mov x0, #0x77\n"
"ret\n");
}
/*
* Note that this function runs on the host before the test VM starts: there's
* no need to sync the D$ and I$ caches.
*/
static void load_exec_code_for_test(struct kvm_vm *vm)
{
uint64_t *code;
struct userspace_mem_region *region;
void *hva;
region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
hva = (void *)region->region.userspace_addr;
assert(TEST_EXEC_GVA > TEST_GVA);
code = hva + TEST_EXEC_GVA - TEST_GVA;
memcpy(code, __exec_test, sizeof(__exec_test));
}
static void setup_abort_handlers(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
struct test_desc *test)
{
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vcpu);
vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
ESR_ELx_EC_DABT_CUR, no_dabt_handler);
vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
ESR_ELx_EC_IABT_CUR, no_iabt_handler);
}
static void setup_gva_maps(struct kvm_vm *vm)
{
struct userspace_mem_region *region;
uint64_t pte_gpa;
region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
/* Map TEST_GVA first. This will install a new PTE. */
virt_pg_map(vm, TEST_GVA, region->region.guest_phys_addr);
/* Then map TEST_PTE_GVA to the above PTE. */
pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA));
virt_pg_map(vm, TEST_PTE_GVA, pte_gpa);
}
enum pf_test_memslots {
CODE_AND_DATA_MEMSLOT,
PAGE_TABLE_MEMSLOT,
TEST_DATA_MEMSLOT,
};
/*
* Create a memslot for code and data at pfn=0, and test-data and PT ones
* at max_gfn.
*/
static void setup_memslots(struct kvm_vm *vm, struct test_params *p)
{
uint64_t backing_src_pagesz = get_backing_src_pagesz(p->src_type);
uint64_t guest_page_size = vm->page_size;
uint64_t max_gfn = vm_compute_max_gfn(vm);
/* Enough for 2M of code when using 4K guest pages. */
uint64_t code_npages = 512;
uint64_t pt_size, data_size, data_gpa;
/*
* This test requires 1 pgd, 2 pud, 4 pmd, and 6 pte pages when using
* VM_MODE_P48V48_4K. Note that the .text takes ~1.6MBs. That's 13
* pages. VM_MODE_P48V48_4K is the mode with most PT pages; let's use
* twice that just in case.
*/
pt_size = 26 * guest_page_size;
/* memslot sizes and gpa's must be aligned to the backing page size */
pt_size = align_up(pt_size, backing_src_pagesz);
data_size = align_up(guest_page_size, backing_src_pagesz);
data_gpa = (max_gfn * guest_page_size) - data_size;
data_gpa = align_down(data_gpa, backing_src_pagesz);
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0,
CODE_AND_DATA_MEMSLOT, code_npages, 0);
vm->memslots[MEM_REGION_CODE] = CODE_AND_DATA_MEMSLOT;
vm->memslots[MEM_REGION_DATA] = CODE_AND_DATA_MEMSLOT;
vm_userspace_mem_region_add(vm, p->src_type, data_gpa - pt_size,
PAGE_TABLE_MEMSLOT, pt_size / guest_page_size,
p->test_desc->pt_memslot_flags);
vm->memslots[MEM_REGION_PT] = PAGE_TABLE_MEMSLOT;
vm_userspace_mem_region_add(vm, p->src_type, data_gpa, TEST_DATA_MEMSLOT,
data_size / guest_page_size,
p->test_desc->data_memslot_flags);
vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
}
static void setup_ucall(struct kvm_vm *vm)
{
struct userspace_mem_region *region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
ucall_init(vm, region->region.guest_phys_addr + region->region.memory_size);
}
static void setup_default_handlers(struct test_desc *test)
{
if (!test->mmio_handler)
test->mmio_handler = mmio_no_handler;
if (!test->fail_vcpu_run_handler)
test->fail_vcpu_run_handler = fail_vcpu_run_no_handler;
}
static void check_event_counts(struct test_desc *test)
{
TEST_ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults);
TEST_ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits);
TEST_ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs);
}
static void print_test_banner(enum vm_guest_mode mode, struct test_params *p)
{
struct test_desc *test = p->test_desc;
pr_debug("Test: %s\n", test->name);
pr_debug("Testing guest mode: %s\n", vm_guest_mode_string(mode));
pr_debug("Testing memory backing src type: %s\n",
vm_mem_backing_src_alias(p->src_type)->name);
}
static void reset_event_counts(void)
{
memset(&events, 0, sizeof(events));
}
/*
* This function either succeeds, skips the test (after setting test->skip), or
* fails with a TEST_FAIL that aborts all tests.
*/
static void vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
struct test_desc *test)
{
struct kvm_run *run;
struct ucall uc;
int ret;
run = vcpu->run;
for (;;) {
ret = _vcpu_run(vcpu);
if (ret) {
test->fail_vcpu_run_handler(ret);
goto done;
}
switch (get_ucall(vcpu, &uc)) {
case UCALL_SYNC:
if (!handle_cmd(vm, uc.args[1])) {
test->skip = true;
goto done;
}
break;
case UCALL_ABORT:
REPORT_GUEST_ASSERT(uc);
break;
case UCALL_DONE:
goto done;
case UCALL_NONE:
if (run->exit_reason == KVM_EXIT_MMIO)
test->mmio_handler(vm, run);
break;
default:
TEST_FAIL("Unknown ucall %lu", uc.cmd);
}
}
done:
pr_debug(test->skip ? "Skipped.\n" : "Done.\n");
}
static void run_test(enum vm_guest_mode mode, void *arg)
{
struct test_params *p = (struct test_params *)arg;
struct test_desc *test = p->test_desc;
struct kvm_vm *vm;
struct kvm_vcpu *vcpu;
struct uffd_desc *pt_uffd, *data_uffd;
print_test_banner(mode, p);
vm = ____vm_create(VM_SHAPE(mode));
setup_memslots(vm, p);
kvm_vm_elf_load(vm, program_invocation_name);
setup_ucall(vm);
vcpu = vm_vcpu_add(vm, 0, guest_code);
setup_gva_maps(vm);
reset_event_counts();
/*
* Set some code in the data memslot for the guest to execute (only
* applicable to the EXEC tests). This has to be done before
* setup_uffd() as that function copies the memslot data for the uffd
* handler.
*/
load_exec_code_for_test(vm);
setup_uffd(vm, p, &pt_uffd, &data_uffd);
setup_abort_handlers(vm, vcpu, test);
setup_default_handlers(test);
vcpu_args_set(vcpu, 1, test);
vcpu_run_loop(vm, vcpu, test);
kvm_vm_free(vm);
free_uffd(test, pt_uffd, data_uffd);
/*
* Make sure we check the events after the uffd threads have exited,
* which means they updated their respective event counters.
*/
if (!test->skip)
check_event_counts(test);
}
static void help(char *name)
{
puts("");
printf("usage: %s [-h] [-s mem-type]\n", name);
puts("");
guest_modes_help();
backing_src_help("-s");
puts("");
}
#define SNAME(s) #s
#define SCAT2(a, b) SNAME(a ## _ ## b)
#define SCAT3(a, b, c) SCAT2(a, SCAT2(b, c))
#define SCAT4(a, b, c, d) SCAT2(a, SCAT3(b, c, d))
#define _CHECK(_test) _CHECK_##_test
#define _PREPARE(_test) _PREPARE_##_test
#define _PREPARE_guest_read64 NULL
#define _PREPARE_guest_ld_preidx NULL
#define _PREPARE_guest_write64 NULL
#define _PREPARE_guest_st_preidx NULL
#define _PREPARE_guest_exec NULL
#define _PREPARE_guest_at NULL
#define _PREPARE_guest_dc_zva guest_check_dc_zva
#define _PREPARE_guest_cas guest_check_lse
/* With or without access flag checks */
#define _PREPARE_with_af guest_set_ha, guest_clear_pte_af
#define _PREPARE_no_af NULL
#define _CHECK_with_af guest_check_pte_af
#define _CHECK_no_af NULL
/* Performs an access and checks that no faults were triggered. */
#define TEST_ACCESS(_access, _with_af, _mark_cmd) \
{ \
.name = SCAT3(_access, _with_af, #_mark_cmd), \
.guest_prepare = { _PREPARE(_with_af), \
_PREPARE(_access) }, \
.mem_mark_cmd = _mark_cmd, \
.guest_test = _access, \
.guest_test_check = { _CHECK(_with_af) }, \
.expected_events = { 0 }, \
}
#define TEST_UFFD(_access, _with_af, _mark_cmd, \
_uffd_data_handler, _uffd_pt_handler, _uffd_faults) \
{ \
.name = SCAT4(uffd, _access, _with_af, #_mark_cmd), \
.guest_prepare = { _PREPARE(_with_af), \
_PREPARE(_access) }, \
.guest_test = _access, \
.mem_mark_cmd = _mark_cmd, \
.guest_test_check = { _CHECK(_with_af) }, \
.uffd_data_handler = _uffd_data_handler, \
.uffd_pt_handler = _uffd_pt_handler, \
.expected_events = { .uffd_faults = _uffd_faults, }, \
}
#define TEST_DIRTY_LOG(_access, _with_af, _test_check, _pt_check) \
{ \
.name = SCAT3(dirty_log, _access, _with_af), \
.data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \
.pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \
.guest_prepare = { _PREPARE(_with_af), \
_PREPARE(_access) }, \
.guest_test = _access, \
.guest_test_check = { _CHECK(_with_af), _test_check, _pt_check }, \
.expected_events = { 0 }, \
}
#define TEST_UFFD_AND_DIRTY_LOG(_access, _with_af, _uffd_data_handler, \
_uffd_faults, _test_check, _pt_check) \
{ \
.name = SCAT3(uffd_and_dirty_log, _access, _with_af), \
.data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \
.pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \
.guest_prepare = { _PREPARE(_with_af), \
_PREPARE(_access) }, \
.guest_test = _access, \
.mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \
.guest_test_check = { _CHECK(_with_af), _test_check, _pt_check }, \
.uffd_data_handler = _uffd_data_handler, \
.uffd_pt_handler = uffd_pt_handler, \
.expected_events = { .uffd_faults = _uffd_faults, }, \
}
#define TEST_RO_MEMSLOT(_access, _mmio_handler, _mmio_exits) \
{ \
.name = SCAT2(ro_memslot, _access), \
.data_memslot_flags = KVM_MEM_READONLY, \
.pt_memslot_flags = KVM_MEM_READONLY, \
.guest_prepare = { _PREPARE(_access) }, \
.guest_test = _access, \
.mmio_handler = _mmio_handler, \
.expected_events = { .mmio_exits = _mmio_exits }, \
}
#define TEST_RO_MEMSLOT_NO_SYNDROME(_access) \
{ \
.name = SCAT2(ro_memslot_no_syndrome, _access), \
.data_memslot_flags = KVM_MEM_READONLY, \
.pt_memslot_flags = KVM_MEM_READONLY, \
.guest_prepare = { _PREPARE(_access) }, \
.guest_test = _access, \
.fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \
.expected_events = { .fail_vcpu_runs = 1 }, \
}
#define TEST_RO_MEMSLOT_AND_DIRTY_LOG(_access, _mmio_handler, _mmio_exits, \
_test_check) \
{ \
.name = SCAT2(ro_memslot, _access), \
.data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
.pt_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
.guest_prepare = { _PREPARE(_access) }, \
.guest_test = _access, \
.guest_test_check = { _test_check }, \
.mmio_handler = _mmio_handler, \
.expected_events = { .mmio_exits = _mmio_exits}, \
}
#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(_access, _test_check) \
{ \
.name = SCAT2(ro_memslot_no_syn_and_dlog, _access), \
.data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
.pt_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
.guest_prepare = { _PREPARE(_access) }, \
.guest_test = _access, \
.guest_test_check = { _test_check }, \
.fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \
.expected_events = { .fail_vcpu_runs = 1 }, \
}
#define TEST_RO_MEMSLOT_AND_UFFD(_access, _mmio_handler, _mmio_exits, \
_uffd_data_handler, _uffd_faults) \
{ \
.name = SCAT2(ro_memslot_uffd, _access), \
.data_memslot_flags = KVM_MEM_READONLY, \
.pt_memslot_flags = KVM_MEM_READONLY, \
.mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \
.guest_prepare = { _PREPARE(_access) }, \
.guest_test = _access, \
.uffd_data_handler = _uffd_data_handler, \
.uffd_pt_handler = uffd_pt_handler, \
.mmio_handler = _mmio_handler, \
.expected_events = { .mmio_exits = _mmio_exits, \
.uffd_faults = _uffd_faults }, \
}
#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(_access, _uffd_data_handler, \
_uffd_faults) \
{ \
.name = SCAT2(ro_memslot_no_syndrome, _access), \
.data_memslot_flags = KVM_MEM_READONLY, \
.pt_memslot_flags = KVM_MEM_READONLY, \
.mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \
.guest_prepare = { _PREPARE(_access) }, \
.guest_test = _access, \
.uffd_data_handler = _uffd_data_handler, \
.uffd_pt_handler = uffd_pt_handler, \
.fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \
.expected_events = { .fail_vcpu_runs = 1, \
.uffd_faults = _uffd_faults }, \
}
static struct test_desc tests[] = {
/* Check that HW is setting the Access Flag (AF) (sanity checks). */
TEST_ACCESS(guest_read64, with_af, CMD_NONE),
TEST_ACCESS(guest_ld_preidx, with_af, CMD_NONE),
TEST_ACCESS(guest_cas, with_af, CMD_NONE),
TEST_ACCESS(guest_write64, with_af, CMD_NONE),
TEST_ACCESS(guest_st_preidx, with_af, CMD_NONE),
TEST_ACCESS(guest_dc_zva, with_af, CMD_NONE),
TEST_ACCESS(guest_exec, with_af, CMD_NONE),
/*
* Punch a hole in the data backing store, and then try multiple
* accesses: reads should rturn zeroes, and writes should
* re-populate the page. Moreover, the test also check that no
* exception was generated in the guest. Note that this
* reading/writing behavior is the same as reading/writing a
* punched page (with fallocate(FALLOC_FL_PUNCH_HOLE)) from
* userspace.
*/
TEST_ACCESS(guest_read64, no_af, CMD_HOLE_DATA),
TEST_ACCESS(guest_cas, no_af, CMD_HOLE_DATA),
TEST_ACCESS(guest_ld_preidx, no_af, CMD_HOLE_DATA),
TEST_ACCESS(guest_write64, no_af, CMD_HOLE_DATA),
TEST_ACCESS(guest_st_preidx, no_af, CMD_HOLE_DATA),
TEST_ACCESS(guest_at, no_af, CMD_HOLE_DATA),
TEST_ACCESS(guest_dc_zva, no_af, CMD_HOLE_DATA),
/*
* Punch holes in the data and PT backing stores and mark them for
* userfaultfd handling. This should result in 2 faults: the access
* on the data backing store, and its respective S1 page table walk
* (S1PTW).
*/
TEST_UFFD(guest_read64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
uffd_data_handler, uffd_pt_handler, 2),
TEST_UFFD(guest_read64, no_af, CMD_HOLE_DATA | CMD_HOLE_PT,
uffd_data_handler, uffd_pt_handler, 2),
TEST_UFFD(guest_cas, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
uffd_data_handler, uffd_pt_handler, 2),
/*
* Can't test guest_at with_af as it's IMPDEF whether the AF is set.
* The S1PTW fault should still be marked as a write.
*/
TEST_UFFD(guest_at, no_af, CMD_HOLE_DATA | CMD_HOLE_PT,
uffd_no_handler, uffd_pt_handler, 1),
TEST_UFFD(guest_ld_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
uffd_data_handler, uffd_pt_handler, 2),
TEST_UFFD(guest_write64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
uffd_data_handler, uffd_pt_handler, 2),
TEST_UFFD(guest_dc_zva, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
uffd_data_handler, uffd_pt_handler, 2),
TEST_UFFD(guest_st_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
uffd_data_handler, uffd_pt_handler, 2),
TEST_UFFD(guest_exec, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
uffd_data_handler, uffd_pt_handler, 2),
/*
* Try accesses when the data and PT memory regions are both
* tracked for dirty logging.
*/
TEST_DIRTY_LOG(guest_read64, with_af, guest_check_no_write_in_dirty_log,
guest_check_s1ptw_wr_in_dirty_log),
TEST_DIRTY_LOG(guest_read64, no_af, guest_check_no_write_in_dirty_log,
guest_check_no_s1ptw_wr_in_dirty_log),
TEST_DIRTY_LOG(guest_ld_preidx, with_af,
guest_check_no_write_in_dirty_log,
guest_check_s1ptw_wr_in_dirty_log),
TEST_DIRTY_LOG(guest_at, no_af, guest_check_no_write_in_dirty_log,
guest_check_no_s1ptw_wr_in_dirty_log),
TEST_DIRTY_LOG(guest_exec, with_af, guest_check_no_write_in_dirty_log,
guest_check_s1ptw_wr_in_dirty_log),
TEST_DIRTY_LOG(guest_write64, with_af, guest_check_write_in_dirty_log,
guest_check_s1ptw_wr_in_dirty_log),
TEST_DIRTY_LOG(guest_cas, with_af, guest_check_write_in_dirty_log,
guest_check_s1ptw_wr_in_dirty_log),
TEST_DIRTY_LOG(guest_dc_zva, with_af, guest_check_write_in_dirty_log,
guest_check_s1ptw_wr_in_dirty_log),
TEST_DIRTY_LOG(guest_st_preidx, with_af, guest_check_write_in_dirty_log,
guest_check_s1ptw_wr_in_dirty_log),
/*
* Access when the data and PT memory regions are both marked for
* dirty logging and UFFD at the same time. The expected result is
* that writes should mark the dirty log and trigger a userfaultfd
* write fault. Reads/execs should result in a read userfaultfd
* fault, and nothing in the dirty log. Any S1PTW should result in
* a write in the dirty log and a userfaultfd write.
*/
TEST_UFFD_AND_DIRTY_LOG(guest_read64, with_af,
uffd_data_handler, 2,
guest_check_no_write_in_dirty_log,
guest_check_s1ptw_wr_in_dirty_log),
TEST_UFFD_AND_DIRTY_LOG(guest_read64, no_af,
uffd_data_handler, 2,
guest_check_no_write_in_dirty_log,
guest_check_no_s1ptw_wr_in_dirty_log),
TEST_UFFD_AND_DIRTY_LOG(guest_ld_preidx, with_af,
uffd_data_handler,
2, guest_check_no_write_in_dirty_log,
guest_check_s1ptw_wr_in_dirty_log),
TEST_UFFD_AND_DIRTY_LOG(guest_at, with_af, uffd_no_handler, 1,
guest_check_no_write_in_dirty_log,
guest_check_s1ptw_wr_in_dirty_log),
TEST_UFFD_AND_DIRTY_LOG(guest_exec, with_af,
uffd_data_handler, 2,
guest_check_no_write_in_dirty_log,
guest_check_s1ptw_wr_in_dirty_log),
TEST_UFFD_AND_DIRTY_LOG(guest_write64, with_af,
uffd_data_handler,
2, guest_check_write_in_dirty_log,
guest_check_s1ptw_wr_in_dirty_log),
TEST_UFFD_AND_DIRTY_LOG(guest_cas, with_af,
uffd_data_handler, 2,
guest_check_write_in_dirty_log,
guest_check_s1ptw_wr_in_dirty_log),
TEST_UFFD_AND_DIRTY_LOG(guest_dc_zva, with_af,
uffd_data_handler,
2, guest_check_write_in_dirty_log,
guest_check_s1ptw_wr_in_dirty_log),
TEST_UFFD_AND_DIRTY_LOG(guest_st_preidx, with_af,
uffd_data_handler, 2,
guest_check_write_in_dirty_log,
guest_check_s1ptw_wr_in_dirty_log),
/*
* Access when both the PT and data regions are marked read-only
* (with KVM_MEM_READONLY). Writes with a syndrome result in an
* MMIO exit, writes with no syndrome (e.g., CAS) result in a
* failed vcpu run, and reads/execs with and without syndroms do
* not fault.
*/
TEST_RO_MEMSLOT(guest_read64, 0, 0),
TEST_RO_MEMSLOT(guest_ld_preidx, 0, 0),
TEST_RO_MEMSLOT(guest_at, 0, 0),
TEST_RO_MEMSLOT(guest_exec, 0, 0),
TEST_RO_MEMSLOT(guest_write64, mmio_on_test_gpa_handler, 1),
TEST_RO_MEMSLOT_NO_SYNDROME(guest_dc_zva),
TEST_RO_MEMSLOT_NO_SYNDROME(guest_cas),
TEST_RO_MEMSLOT_NO_SYNDROME(guest_st_preidx),
/*
* The PT and data regions are both read-only and marked
* for dirty logging at the same time. The expected result is that
* for writes there should be no write in the dirty log. The
* readonly handling is the same as if the memslot was not marked
* for dirty logging: writes with a syndrome result in an MMIO
* exit, and writes with no syndrome result in a failed vcpu run.
*/
TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_read64, 0, 0,
guest_check_no_write_in_dirty_log),
TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_ld_preidx, 0, 0,
guest_check_no_write_in_dirty_log),
TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_at, 0, 0,
guest_check_no_write_in_dirty_log),
TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_exec, 0, 0,
guest_check_no_write_in_dirty_log),
TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_write64, mmio_on_test_gpa_handler,
1, guest_check_no_write_in_dirty_log),
TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_dc_zva,
guest_check_no_write_in_dirty_log),
TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_cas,
guest_check_no_write_in_dirty_log),
TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_st_preidx,
guest_check_no_write_in_dirty_log),
/*
* The PT and data regions are both read-only and punched with
* holes tracked with userfaultfd. The expected result is the
* union of both userfaultfd and read-only behaviors. For example,
* write accesses result in a userfaultfd write fault and an MMIO
* exit. Writes with no syndrome result in a failed vcpu run and
* no userfaultfd write fault. Reads result in userfaultfd getting
* triggered.
*/
TEST_RO_MEMSLOT_AND_UFFD(guest_read64, 0, 0, uffd_data_handler, 2),
TEST_RO_MEMSLOT_AND_UFFD(guest_ld_preidx, 0, 0, uffd_data_handler, 2),
TEST_RO_MEMSLOT_AND_UFFD(guest_at, 0, 0, uffd_no_handler, 1),
TEST_RO_MEMSLOT_AND_UFFD(guest_exec, 0, 0, uffd_data_handler, 2),
TEST_RO_MEMSLOT_AND_UFFD(guest_write64, mmio_on_test_gpa_handler, 1,
uffd_data_handler, 2),
TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_cas, uffd_data_handler, 2),
TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_dc_zva, uffd_no_handler, 1),
TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_st_preidx, uffd_no_handler, 1),
{ 0 }
};
static void for_each_test_and_guest_mode(enum vm_mem_backing_src_type src_type)
{
struct test_desc *t;
for (t = &tests[0]; t->name; t++) {
if (t->skip)
continue;
struct test_params p = {
.src_type = src_type,
.test_desc = t,
};
for_each_guest_mode(run_test, &p);
}
}
int main(int argc, char *argv[])
{
enum vm_mem_backing_src_type src_type;
int opt;
src_type = DEFAULT_VM_MEM_SRC;
while ((opt = getopt(argc, argv, "hm:s:")) != -1) {
switch (opt) {
case 'm':
guest_modes_cmdline(optarg);
break;
case 's':
src_type = parse_backing_src_type(optarg);
break;
case 'h':
default:
help(argv[0]);
exit(0);
}
}
for_each_test_and_guest_mode(src_type);
return 0;
}