mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/
synced 2025-04-19 20:58:31 +09:00
hyperv-next for 6.15
-----BEGIN PGP SIGNATURE----- iQFHBAABCgAxFiEEIbPD0id6easf0xsudhRwX5BBoF4FAmfhlLATHHdlaS5saXVA a2VybmVsLm9yZwAKCRB2FHBfkEGgXgchCADOz33rSm4G4w4r0qT05dTDi/lZkEdK 64dQq322XXP/C9FfR66d30243gsAmuM5a0SvzFHLXAOu6yqM270Xehd/Rud+Um2s lSVnc0Ux0AWBgksqFd0t577aN7zmJEukosEYO5lBNop+zOcadrm3S6Th/AoL2h/D yphPkhH13bsCK+Wll/eBOQLIhC9iA0konYbBLuEQ5MqvUbrzc6Rmb5gxsHHZKOqg vLjkrYR/d3s2gIpKxiFp0RwvzGyffZEHxvU/YF3hTenPMlTlnXWbyspBSTVmWggP 13IFLzqxDdW9RgUnGB4xRc424AC1LKqEr42QPQE7zGvl2jdJriA2Q1LT =BXqj -----END PGP SIGNATURE----- Merge tag 'hyperv-next-signed-20250324' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux Pull hyperv updates from Wei Liu: - Add support for running as the root partition in Hyper-V (Microsoft Hypervisor) by exposing /dev/mshv (Nuno and various people) - Add support for CPU offlining in Hyper-V (Hamza Mahfooz) - Misc fixes and cleanups (Roman Kisel, Tianyu Lan, Wei Liu, Michael Kelley, Thorsten Blum) * tag 'hyperv-next-signed-20250324' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux: (24 commits) x86/hyperv: fix an indentation issue in mshyperv.h x86/hyperv: Add comments about hv_vpset and var size hypercall input args Drivers: hv: Introduce mshv_root module to expose /dev/mshv to VMMs hyperv: Add definitions for root partition driver to hv headers x86: hyperv: Add mshv_handler() irq handler and setup function Drivers: hv: Introduce per-cpu event ring tail Drivers: hv: Export some functions for use by root partition module acpi: numa: Export node_to_pxm() hyperv: Introduce hv_recommend_using_aeoi() arm64/hyperv: Add some missing functions to arm64 x86/mshyperv: Add support for extended Hyper-V features hyperv: Log hypercall status codes as strings x86/hyperv: Fix check of return value from snp_set_vmsa() x86/hyperv: Add VTL mode callback for restarting the system x86/hyperv: Add VTL mode emergency restart callback hyperv: Remove unused union and structs hyperv: Add CONFIG_MSHV_ROOT to gate root partition support hyperv: Change hv_root_partition into a function hyperv: Convert hypercall statuses to linux error codes drivers/hv: add CPU offlining support ...
This commit is contained in:
commit
a5b3d8660b
@ -370,6 +370,8 @@ Code Seq# Include File Comments
|
||||
0xB7 all uapi/linux/remoteproc_cdev.h <mailto:linux-remoteproc@vger.kernel.org>
|
||||
0xB7 all uapi/linux/nsfs.h <mailto:Andrei Vagin <avagin@openvz.org>>
|
||||
0xB8 01-02 uapi/misc/mrvl_cn10k_dpi.h Marvell CN10K DPI driver
|
||||
0xB8 all uapi/linux/mshv.h Microsoft Hyper-V /dev/mshv driver
|
||||
<mailto:linux-hyperv@vger.kernel.org>
|
||||
0xC0 00-0F linux/usb/iowarrior.h
|
||||
0xCA 00-0F uapi/misc/cxl.h
|
||||
0xCA 10-2F uapi/misc/ocxl.h
|
||||
|
@ -53,6 +53,23 @@ u64 hv_do_fast_hypercall8(u16 code, u64 input)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hv_do_fast_hypercall8);
|
||||
|
||||
/*
|
||||
* hv_do_fast_hypercall16 -- Invoke the specified hypercall
|
||||
* with arguments in registers instead of physical memory.
|
||||
* Avoids the overhead of virt_to_phys for simple hypercalls.
|
||||
*/
|
||||
u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
|
||||
{
|
||||
struct arm_smccc_res res;
|
||||
u64 control;
|
||||
|
||||
control = (u64)code | HV_HYPERCALL_FAST_BIT;
|
||||
|
||||
arm_smccc_1_1_hvc(HV_FUNC_ID, control, input1, input2, &res);
|
||||
return res.a0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hv_do_fast_hypercall16);
|
||||
|
||||
/*
|
||||
* Set a single VP register to a 64-bit value.
|
||||
*/
|
||||
|
@ -26,6 +26,7 @@ int hv_get_hypervisor_version(union hv_hypervisor_version_info *info)
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hv_get_hypervisor_version);
|
||||
|
||||
static int __init hyperv_init(void)
|
||||
{
|
||||
@ -61,6 +62,8 @@ static int __init hyperv_init(void)
|
||||
ms_hyperv.features, ms_hyperv.priv_high, ms_hyperv.hints,
|
||||
ms_hyperv.misc_features);
|
||||
|
||||
hv_identify_partition_type();
|
||||
|
||||
ret = hv_common_init();
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -72,6 +75,9 @@ static int __init hyperv_init(void)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (ms_hyperv.priv_high & HV_ACCESS_PARTITION_ID)
|
||||
hv_get_partition_id();
|
||||
|
||||
ms_hyperv_late_init();
|
||||
|
||||
hyperv_initialized = true;
|
||||
|
@ -40,6 +40,19 @@ static inline u64 hv_get_msr(unsigned int reg)
|
||||
return hv_get_vpreg(reg);
|
||||
}
|
||||
|
||||
/*
|
||||
* Nested is not supported on arm64
|
||||
*/
|
||||
static inline void hv_set_non_nested_msr(unsigned int reg, u64 value)
|
||||
{
|
||||
hv_set_msr(reg, value);
|
||||
}
|
||||
|
||||
static inline u64 hv_get_non_nested_msr(unsigned int reg)
|
||||
{
|
||||
return hv_get_msr(reg);
|
||||
}
|
||||
|
||||
/* SMCCC hypercall parameters */
|
||||
#define HV_SMCCC_FUNC_NUMBER 1
|
||||
#define HV_FUNC_ID ARM_SMCCC_CALL_VAL( \
|
||||
|
@ -1,6 +1,6 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
obj-y := hv_init.o mmu.o nested.o irqdomain.o ivm.o
|
||||
obj-$(CONFIG_X86_64) += hv_apic.o hv_proc.o
|
||||
obj-$(CONFIG_X86_64) += hv_apic.o
|
||||
obj-$(CONFIG_HYPERV_VTL_MODE) += hv_vtl.o
|
||||
|
||||
ifdef CONFIG_X86_64
|
||||
|
@ -145,6 +145,11 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
|
||||
ipi_arg->vp_set.format = HV_GENERIC_SET_ALL;
|
||||
}
|
||||
|
||||
/*
|
||||
* For this hypercall, Hyper-V treats the valid_bank_mask field
|
||||
* of ipi_arg->vp_set as part of the fixed size input header.
|
||||
* So the variable input header size is equal to nr_bank.
|
||||
*/
|
||||
status = hv_do_rep_hypercall(HVCALL_SEND_IPI_EX, 0, nr_bank,
|
||||
ipi_arg, NULL);
|
||||
|
||||
|
@ -34,9 +34,6 @@
|
||||
#include <clocksource/hyperv_timer.h>
|
||||
#include <linux/highmem.h>
|
||||
|
||||
u64 hv_current_partition_id = ~0ull;
|
||||
EXPORT_SYMBOL_GPL(hv_current_partition_id);
|
||||
|
||||
void *hv_hypercall_pg;
|
||||
EXPORT_SYMBOL_GPL(hv_hypercall_pg);
|
||||
|
||||
@ -93,7 +90,7 @@ static int hv_cpu_init(unsigned int cpu)
|
||||
return 0;
|
||||
|
||||
hvp = &hv_vp_assist_page[cpu];
|
||||
if (hv_root_partition) {
|
||||
if (hv_root_partition()) {
|
||||
/*
|
||||
* For root partition we get the hypervisor provided VP assist
|
||||
* page, instead of allocating a new page.
|
||||
@ -245,7 +242,7 @@ static int hv_cpu_die(unsigned int cpu)
|
||||
|
||||
if (hv_vp_assist_page && hv_vp_assist_page[cpu]) {
|
||||
union hv_vp_assist_msr_contents msr = { 0 };
|
||||
if (hv_root_partition) {
|
||||
if (hv_root_partition()) {
|
||||
/*
|
||||
* For root partition the VP assist page is mapped to
|
||||
* hypervisor provided page, and thus we unmap the
|
||||
@ -320,7 +317,7 @@ static int hv_suspend(void)
|
||||
union hv_x64_msr_hypercall_contents hypercall_msr;
|
||||
int ret;
|
||||
|
||||
if (hv_root_partition)
|
||||
if (hv_root_partition())
|
||||
return -EPERM;
|
||||
|
||||
/*
|
||||
@ -393,24 +390,6 @@ static void __init hv_stimer_setup_percpu_clockev(void)
|
||||
old_setup_percpu_clockev();
|
||||
}
|
||||
|
||||
static void __init hv_get_partition_id(void)
|
||||
{
|
||||
struct hv_get_partition_id *output_page;
|
||||
u64 status;
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
output_page = *this_cpu_ptr(hyperv_pcpu_output_arg);
|
||||
status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, output_page);
|
||||
if (!hv_result_success(status)) {
|
||||
/* No point in proceeding if this failed */
|
||||
pr_err("Failed to get partition ID: %lld\n", status);
|
||||
BUG();
|
||||
}
|
||||
hv_current_partition_id = output_page->partition_id;
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_HYPERV_VTL_MODE)
|
||||
static u8 __init get_vtl(void)
|
||||
{
|
||||
@ -539,7 +518,7 @@ void __init hyperv_init(void)
|
||||
rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
|
||||
hypercall_msr.enable = 1;
|
||||
|
||||
if (hv_root_partition) {
|
||||
if (hv_root_partition()) {
|
||||
struct page *pg;
|
||||
void *src;
|
||||
|
||||
@ -605,17 +584,15 @@ skip_hypercall_pg_init:
|
||||
|
||||
register_syscore_ops(&hv_syscore_ops);
|
||||
|
||||
if (cpuid_ebx(HYPERV_CPUID_FEATURES) & HV_ACCESS_PARTITION_ID)
|
||||
if (ms_hyperv.priv_high & HV_ACCESS_PARTITION_ID)
|
||||
hv_get_partition_id();
|
||||
|
||||
BUG_ON(hv_root_partition && hv_current_partition_id == ~0ull);
|
||||
|
||||
#ifdef CONFIG_PCI_MSI
|
||||
/*
|
||||
* If we're running as root, we want to create our own PCI MSI domain.
|
||||
* We can't set this in hv_pci_init because that would be too late.
|
||||
*/
|
||||
if (hv_root_partition)
|
||||
if (hv_root_partition())
|
||||
x86_init.irqs.create_pci_msi_domain = hv_create_pci_msi_domain;
|
||||
#endif
|
||||
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <asm/i8259.h>
|
||||
#include <asm/mshyperv.h>
|
||||
#include <asm/realmode.h>
|
||||
#include <asm/reboot.h>
|
||||
#include <../kernel/smpboot.h>
|
||||
|
||||
extern struct boot_params boot_params;
|
||||
@ -22,6 +23,36 @@ static bool __init hv_vtl_msi_ext_dest_id(void)
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* The `native_machine_emergency_restart` function from `reboot.c` writes
|
||||
* to the physical address 0x472 to indicate the type of reboot for the
|
||||
* firmware. We cannot have that in VSM as the memory composition might
|
||||
* be more generic, and such write effectively corrupts the memory thus
|
||||
* making diagnostics harder at the very least.
|
||||
*/
|
||||
static void __noreturn hv_vtl_emergency_restart(void)
|
||||
{
|
||||
/*
|
||||
* Cause a triple fault and the immediate reset. Here the code does not run
|
||||
* on the top of any firmware, whereby cannot reach out to its services.
|
||||
* The inifinite loop is for the improbable case that the triple fault does
|
||||
* not work and have to preserve the state intact for debugging.
|
||||
*/
|
||||
for (;;) {
|
||||
idt_invalidate();
|
||||
__asm__ __volatile__("int3");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The only way to restart in the VTL mode is to triple fault as the kernel runs
|
||||
* as firmware.
|
||||
*/
|
||||
static void __noreturn hv_vtl_restart(char __maybe_unused *cmd)
|
||||
{
|
||||
hv_vtl_emergency_restart();
|
||||
}
|
||||
|
||||
void __init hv_vtl_init_platform(void)
|
||||
{
|
||||
pr_info("Linux runs in Hyper-V Virtual Trust Level\n");
|
||||
@ -236,6 +267,9 @@ static int hv_vtl_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip)
|
||||
|
||||
int __init hv_vtl_early_init(void)
|
||||
{
|
||||
machine_ops.emergency_restart = hv_vtl_emergency_restart;
|
||||
machine_ops.restart = hv_vtl_restart;
|
||||
|
||||
/*
|
||||
* `boot_cpu_has` returns the runtime feature support,
|
||||
* and here is the earliest it can be used.
|
||||
|
@ -64,7 +64,7 @@ static int hv_map_interrupt(union hv_device_id device_id, bool level,
|
||||
local_irq_restore(flags);
|
||||
|
||||
if (!hv_result_success(status))
|
||||
pr_err("%s: hypercall failed, status %lld\n", __func__, status);
|
||||
hv_status_err(status, "\n");
|
||||
|
||||
return hv_result(status);
|
||||
}
|
||||
@ -224,7 +224,7 @@ static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
|
||||
kfree(stored_entry);
|
||||
|
||||
if (status != HV_STATUS_SUCCESS) {
|
||||
pr_debug("%s: failed to unmap, status %lld", __func__, status);
|
||||
hv_status_debug(status, "failed to unmap\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -273,7 +273,7 @@ static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd)
|
||||
status = hv_unmap_msi_interrupt(dev, &old_entry);
|
||||
|
||||
if (status != HV_STATUS_SUCCESS)
|
||||
pr_err("%s: hypercall failed, status %lld\n", __func__, status);
|
||||
hv_status_err(status, "\n");
|
||||
}
|
||||
|
||||
static void hv_msi_free_irq(struct irq_domain *domain,
|
||||
|
@ -338,7 +338,7 @@ int hv_snp_boot_ap(u32 cpu, unsigned long start_ip)
|
||||
vmsa->sev_features = sev_status >> 2;
|
||||
|
||||
ret = snp_set_vmsa(vmsa, true);
|
||||
if (!ret) {
|
||||
if (ret) {
|
||||
pr_err("RMPADJUST(%llx) failed: %llx\n", (u64)vmsa, ret);
|
||||
free_page((u64)vmsa);
|
||||
return ret;
|
||||
|
@ -205,6 +205,10 @@ static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
|
||||
/*
|
||||
* We can flush not more than max_gvas with one hypercall. Flush the
|
||||
* whole address space if we were asked to do more.
|
||||
*
|
||||
* For these hypercalls, Hyper-V treats the valid_bank_mask field
|
||||
* of flush->hv_vp_set as part of the fixed size input header.
|
||||
* So the variable input header size is equal to nr_bank.
|
||||
*/
|
||||
max_gvas =
|
||||
(PAGE_SIZE - sizeof(*flush) - nr_bank *
|
||||
|
@ -43,8 +43,6 @@ extern bool hyperv_paravisor_present;
|
||||
|
||||
extern void *hv_hypercall_pg;
|
||||
|
||||
extern u64 hv_current_partition_id;
|
||||
|
||||
extern union hv_ghcb * __percpu *hv_ghcb_pg;
|
||||
|
||||
bool hv_isolation_type_snp(void);
|
||||
@ -58,10 +56,6 @@ u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
|
||||
#define HV_AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL
|
||||
#define HV_AP_SEGMENT_LIMIT 0xffffffff
|
||||
|
||||
int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
|
||||
int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
|
||||
int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
|
||||
|
||||
/*
|
||||
* If the hypercall involves no input or output parameters, the hypervisor
|
||||
* ignores the corresponding GPA pointer.
|
||||
@ -160,7 +154,7 @@ static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1)
|
||||
: "cc", "edi", "esi");
|
||||
}
|
||||
#endif
|
||||
return hv_status;
|
||||
return hv_status;
|
||||
}
|
||||
|
||||
static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
|
||||
|
@ -33,8 +33,6 @@
|
||||
#include <asm/numa.h>
|
||||
#include <asm/svm.h>
|
||||
|
||||
/* Is Linux running as the root partition? */
|
||||
bool hv_root_partition;
|
||||
/* Is Linux running on nested Microsoft Hypervisor */
|
||||
bool hv_nested;
|
||||
struct ms_hyperv_info ms_hyperv;
|
||||
@ -109,6 +107,7 @@ void hv_set_msr(unsigned int reg, u64 value)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hv_set_msr);
|
||||
|
||||
static void (*mshv_handler)(void);
|
||||
static void (*vmbus_handler)(void);
|
||||
static void (*hv_stimer0_handler)(void);
|
||||
static void (*hv_kexec_handler)(void);
|
||||
@ -119,6 +118,9 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback)
|
||||
struct pt_regs *old_regs = set_irq_regs(regs);
|
||||
|
||||
inc_irq_stat(irq_hv_callback_count);
|
||||
if (mshv_handler)
|
||||
mshv_handler();
|
||||
|
||||
if (vmbus_handler)
|
||||
vmbus_handler();
|
||||
|
||||
@ -128,6 +130,11 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback)
|
||||
set_irq_regs(old_regs);
|
||||
}
|
||||
|
||||
void hv_setup_mshv_handler(void (*handler)(void))
|
||||
{
|
||||
mshv_handler = handler;
|
||||
}
|
||||
|
||||
void hv_setup_vmbus_handler(void (*handler)(void))
|
||||
{
|
||||
vmbus_handler = handler;
|
||||
@ -422,6 +429,7 @@ int hv_get_hypervisor_version(union hv_hypervisor_version_info *info)
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hv_get_hypervisor_version);
|
||||
|
||||
static void __init ms_hyperv_init_platform(void)
|
||||
{
|
||||
@ -436,13 +444,15 @@ static void __init ms_hyperv_init_platform(void)
|
||||
*/
|
||||
ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES);
|
||||
ms_hyperv.priv_high = cpuid_ebx(HYPERV_CPUID_FEATURES);
|
||||
ms_hyperv.ext_features = cpuid_ecx(HYPERV_CPUID_FEATURES);
|
||||
ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
|
||||
ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);
|
||||
|
||||
hv_max_functions_eax = cpuid_eax(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS);
|
||||
|
||||
pr_info("Hyper-V: privilege flags low 0x%x, high 0x%x, hints 0x%x, misc 0x%x\n",
|
||||
ms_hyperv.features, ms_hyperv.priv_high, ms_hyperv.hints,
|
||||
pr_info("Hyper-V: privilege flags low %#x, high %#x, ext %#x, hints %#x, misc %#x\n",
|
||||
ms_hyperv.features, ms_hyperv.priv_high,
|
||||
ms_hyperv.ext_features, ms_hyperv.hints,
|
||||
ms_hyperv.misc_features);
|
||||
|
||||
ms_hyperv.max_vp_index = cpuid_eax(HYPERV_CPUID_IMPLEMENT_LIMITS);
|
||||
@ -451,25 +461,7 @@ static void __init ms_hyperv_init_platform(void)
|
||||
pr_debug("Hyper-V: max %u virtual processors, %u logical processors\n",
|
||||
ms_hyperv.max_vp_index, ms_hyperv.max_lp_index);
|
||||
|
||||
/*
|
||||
* Check CPU management privilege.
|
||||
*
|
||||
* To mirror what Windows does we should extract CPU management
|
||||
* features and use the ReservedIdentityBit to detect if Linux is the
|
||||
* root partition. But that requires negotiating CPU management
|
||||
* interface (a process to be finalized). For now, use the privilege
|
||||
* flag as the indicator for running as root.
|
||||
*
|
||||
* Hyper-V should never specify running as root and as a Confidential
|
||||
* VM. But to protect against a compromised/malicious Hyper-V trying
|
||||
* to exploit root behavior to expose Confidential VM memory, ignore
|
||||
* the root partition setting if also a Confidential VM.
|
||||
*/
|
||||
if ((ms_hyperv.priv_high & HV_CPU_MANAGEMENT) &&
|
||||
!(ms_hyperv.priv_high & HV_ISOLATION)) {
|
||||
hv_root_partition = true;
|
||||
pr_info("Hyper-V: running as root partition\n");
|
||||
}
|
||||
hv_identify_partition_type();
|
||||
|
||||
if (ms_hyperv.hints & HV_X64_HYPERV_NESTED) {
|
||||
hv_nested = true;
|
||||
@ -618,7 +610,7 @@ static void __init ms_hyperv_init_platform(void)
|
||||
|
||||
# ifdef CONFIG_SMP
|
||||
smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu;
|
||||
if (hv_root_partition ||
|
||||
if (hv_root_partition() ||
|
||||
(!ms_hyperv.paravisor_present && hv_isolation_type_snp()))
|
||||
smp_ops.smp_prepare_cpus = hv_smp_prepare_cpus;
|
||||
# endif
|
||||
|
@ -51,6 +51,7 @@ int node_to_pxm(int node)
|
||||
return PXM_INVAL;
|
||||
return node_to_pxm_map[node];
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(node_to_pxm);
|
||||
|
||||
static void __acpi_map_pxm_to_node(int pxm, int node)
|
||||
{
|
||||
|
@ -582,7 +582,7 @@ static void __init hv_init_tsc_clocksource(void)
|
||||
* mapped.
|
||||
*/
|
||||
tsc_msr.as_uint64 = hv_get_msr(HV_MSR_REFERENCE_TSC);
|
||||
if (hv_root_partition)
|
||||
if (hv_root_partition())
|
||||
tsc_pfn = tsc_msr.pfn;
|
||||
else
|
||||
tsc_pfn = HVPFN_DOWN(virt_to_phys(tsc_page));
|
||||
@ -627,7 +627,7 @@ void __init hv_remap_tsc_clocksource(void)
|
||||
if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
|
||||
return;
|
||||
|
||||
if (!hv_root_partition) {
|
||||
if (!hv_root_partition()) {
|
||||
WARN(1, "%s: attempt to remap TSC page in guest partition\n",
|
||||
__func__);
|
||||
return;
|
||||
|
@ -55,4 +55,21 @@ config HYPERV_BALLOON
|
||||
help
|
||||
Select this option to enable Hyper-V Balloon driver.
|
||||
|
||||
config MSHV_ROOT
|
||||
tristate "Microsoft Hyper-V root partition support"
|
||||
depends on HYPERV && (X86_64 || ARM64)
|
||||
depends on !HYPERV_VTL_MODE
|
||||
# The hypervisor interface operates on 4k pages. Enforcing it here
|
||||
# simplifies many assumptions in the root partition code.
|
||||
# e.g. When withdrawing memory, the hypervisor gives back 4k pages in
|
||||
# no particular order, making it impossible to reassemble larger pages
|
||||
depends on PAGE_SIZE_4KB
|
||||
select EVENTFD
|
||||
default n
|
||||
help
|
||||
Select this option to enable support for booting and running as root
|
||||
partition on Microsoft Hyper-V.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
endmenu
|
||||
|
@ -2,6 +2,7 @@
|
||||
obj-$(CONFIG_HYPERV) += hv_vmbus.o
|
||||
obj-$(CONFIG_HYPERV_UTILS) += hv_utils.o
|
||||
obj-$(CONFIG_HYPERV_BALLOON) += hv_balloon.o
|
||||
obj-$(CONFIG_MSHV_ROOT) += mshv_root.o
|
||||
|
||||
CFLAGS_hv_trace.o = -I$(src)
|
||||
CFLAGS_hv_balloon.o = -I$(src)
|
||||
@ -11,6 +12,9 @@ hv_vmbus-y := vmbus_drv.o \
|
||||
channel_mgmt.o ring_buffer.o hv_trace.o
|
||||
hv_vmbus-$(CONFIG_HYPERV_TESTING) += hv_debugfs.o
|
||||
hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o hv_utils_transport.o
|
||||
mshv_root-y := mshv_root_main.o mshv_synic.o mshv_eventfd.o mshv_irq.o \
|
||||
mshv_root_hv_call.o mshv_portid_table.o
|
||||
|
||||
# Code that must be built-in
|
||||
obj-$(subst m,y,$(CONFIG_HYPERV)) += hv_common.o
|
||||
obj-$(subst m,y,$(CONFIG_MSHV_ROOT)) += hv_proc.o mshv_common.o
|
||||
|
@ -144,7 +144,7 @@ int hv_synic_alloc(void)
|
||||
* Synic message and event pages are allocated by paravisor.
|
||||
* Skip these pages allocation here.
|
||||
*/
|
||||
if (!ms_hyperv.paravisor_present && !hv_root_partition) {
|
||||
if (!ms_hyperv.paravisor_present && !hv_root_partition()) {
|
||||
hv_cpu->synic_message_page =
|
||||
(void *)get_zeroed_page(GFP_ATOMIC);
|
||||
if (!hv_cpu->synic_message_page) {
|
||||
@ -272,7 +272,7 @@ void hv_synic_enable_regs(unsigned int cpu)
|
||||
simp.as_uint64 = hv_get_msr(HV_MSR_SIMP);
|
||||
simp.simp_enabled = 1;
|
||||
|
||||
if (ms_hyperv.paravisor_present || hv_root_partition) {
|
||||
if (ms_hyperv.paravisor_present || hv_root_partition()) {
|
||||
/* Mask out vTOM bit. ioremap_cache() maps decrypted */
|
||||
u64 base = (simp.base_simp_gpa << HV_HYP_PAGE_SHIFT) &
|
||||
~ms_hyperv.shared_gpa_boundary;
|
||||
@ -291,7 +291,7 @@ void hv_synic_enable_regs(unsigned int cpu)
|
||||
siefp.as_uint64 = hv_get_msr(HV_MSR_SIEFP);
|
||||
siefp.siefp_enabled = 1;
|
||||
|
||||
if (ms_hyperv.paravisor_present || hv_root_partition) {
|
||||
if (ms_hyperv.paravisor_present || hv_root_partition()) {
|
||||
/* Mask out vTOM bit. ioremap_cache() maps decrypted */
|
||||
u64 base = (siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT) &
|
||||
~ms_hyperv.shared_gpa_boundary;
|
||||
@ -313,17 +313,7 @@ void hv_synic_enable_regs(unsigned int cpu)
|
||||
|
||||
shared_sint.vector = vmbus_interrupt;
|
||||
shared_sint.masked = false;
|
||||
|
||||
/*
|
||||
* On architectures where Hyper-V doesn't support AEOI (e.g., ARM64),
|
||||
* it doesn't provide a recommendation flag and AEOI must be disabled.
|
||||
*/
|
||||
#ifdef HV_DEPRECATING_AEOI_RECOMMENDED
|
||||
shared_sint.auto_eoi =
|
||||
!(ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED);
|
||||
#else
|
||||
shared_sint.auto_eoi = 0;
|
||||
#endif
|
||||
shared_sint.auto_eoi = hv_recommend_using_aeoi();
|
||||
hv_set_msr(HV_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
|
||||
|
||||
/* Enable the global synic bit */
|
||||
@ -367,7 +357,7 @@ void hv_synic_disable_regs(unsigned int cpu)
|
||||
* addresses.
|
||||
*/
|
||||
simp.simp_enabled = 0;
|
||||
if (ms_hyperv.paravisor_present || hv_root_partition) {
|
||||
if (ms_hyperv.paravisor_present || hv_root_partition()) {
|
||||
iounmap(hv_cpu->synic_message_page);
|
||||
hv_cpu->synic_message_page = NULL;
|
||||
} else {
|
||||
@ -379,7 +369,7 @@ void hv_synic_disable_regs(unsigned int cpu)
|
||||
siefp.as_uint64 = hv_get_msr(HV_MSR_SIEFP);
|
||||
siefp.siefp_enabled = 0;
|
||||
|
||||
if (ms_hyperv.paravisor_present || hv_root_partition) {
|
||||
if (ms_hyperv.paravisor_present || hv_root_partition()) {
|
||||
iounmap(hv_cpu->synic_event_page);
|
||||
hv_cpu->synic_event_page = NULL;
|
||||
} else {
|
||||
@ -433,13 +423,47 @@ retry:
|
||||
return pending;
|
||||
}
|
||||
|
||||
static int hv_pick_new_cpu(struct vmbus_channel *channel)
|
||||
{
|
||||
int ret = -EBUSY;
|
||||
int start;
|
||||
int cpu;
|
||||
|
||||
lockdep_assert_cpus_held();
|
||||
lockdep_assert_held(&vmbus_connection.channel_mutex);
|
||||
|
||||
/*
|
||||
* We can't assume that the relevant interrupts will be sent before
|
||||
* the cpu is offlined on older versions of hyperv.
|
||||
*/
|
||||
if (vmbus_proto_version < VERSION_WIN10_V5_3)
|
||||
return -EBUSY;
|
||||
|
||||
start = get_random_u32_below(nr_cpu_ids);
|
||||
|
||||
for_each_cpu_wrap(cpu, cpu_online_mask, start) {
|
||||
if (channel->target_cpu == cpu ||
|
||||
channel->target_cpu == VMBUS_CONNECT_CPU)
|
||||
continue;
|
||||
|
||||
ret = vmbus_channel_set_cpu(channel, cpu);
|
||||
if (!ret)
|
||||
break;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
ret = vmbus_channel_set_cpu(channel, VMBUS_CONNECT_CPU);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* hv_synic_cleanup - Cleanup routine for hv_synic_init().
|
||||
*/
|
||||
int hv_synic_cleanup(unsigned int cpu)
|
||||
{
|
||||
struct vmbus_channel *channel, *sc;
|
||||
bool channel_found = false;
|
||||
int ret = 0;
|
||||
|
||||
if (vmbus_connection.conn_state != CONNECTED)
|
||||
goto always_cleanup;
|
||||
@ -456,38 +480,34 @@ int hv_synic_cleanup(unsigned int cpu)
|
||||
|
||||
/*
|
||||
* Search for channels which are bound to the CPU we're about to
|
||||
* cleanup. In case we find one and vmbus is still connected, we
|
||||
* fail; this will effectively prevent CPU offlining.
|
||||
*
|
||||
* TODO: Re-bind the channels to different CPUs.
|
||||
* cleanup.
|
||||
*/
|
||||
mutex_lock(&vmbus_connection.channel_mutex);
|
||||
list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
|
||||
if (channel->target_cpu == cpu) {
|
||||
channel_found = true;
|
||||
break;
|
||||
ret = hv_pick_new_cpu(channel);
|
||||
if (ret) {
|
||||
mutex_unlock(&vmbus_connection.channel_mutex);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
list_for_each_entry(sc, &channel->sc_list, sc_list) {
|
||||
if (sc->target_cpu == cpu) {
|
||||
channel_found = true;
|
||||
break;
|
||||
ret = hv_pick_new_cpu(sc);
|
||||
if (ret) {
|
||||
mutex_unlock(&vmbus_connection.channel_mutex);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (channel_found)
|
||||
break;
|
||||
}
|
||||
mutex_unlock(&vmbus_connection.channel_mutex);
|
||||
|
||||
if (channel_found)
|
||||
return -EBUSY;
|
||||
|
||||
/*
|
||||
* channel_found == false means that any channels that were previously
|
||||
* assigned to the CPU have been reassigned elsewhere with a call of
|
||||
* vmbus_send_modifychannel(). Scan the event flags page looking for
|
||||
* bits that are set and waiting with a timeout for vmbus_chan_sched()
|
||||
* to process such bits. If bits are still set after this operation
|
||||
* and VMBus is connected, fail the CPU offlining operation.
|
||||
* Scan the event flags page looking for bits that are set and waiting
|
||||
* with a timeout for vmbus_chan_sched() to process such bits. If bits
|
||||
* are still set after this operation and VMBus is connected, fail the
|
||||
* CPU offlining operation.
|
||||
*/
|
||||
if (vmbus_proto_version >= VERSION_WIN10_V4_1 && hv_synic_event_pending())
|
||||
return -EBUSY;
|
||||
@ -497,5 +517,5 @@ always_cleanup:
|
||||
|
||||
hv_synic_disable_regs(cpu);
|
||||
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
@ -31,8 +31,14 @@
|
||||
#include <hyperv/hvhdk.h>
|
||||
#include <asm/mshyperv.h>
|
||||
|
||||
u64 hv_current_partition_id = HV_PARTITION_ID_SELF;
|
||||
EXPORT_SYMBOL_GPL(hv_current_partition_id);
|
||||
|
||||
enum hv_partition_type hv_curr_partition_type;
|
||||
EXPORT_SYMBOL_GPL(hv_curr_partition_type);
|
||||
|
||||
/*
|
||||
* hv_root_partition, ms_hyperv and hv_nested are defined here with other
|
||||
* ms_hyperv and hv_nested are defined here with other
|
||||
* Hyper-V specific globals so they are shared across all architectures and are
|
||||
* built only when CONFIG_HYPERV is defined. But on x86,
|
||||
* ms_hyperv_init_platform() is built even when CONFIG_HYPERV is not
|
||||
@ -40,9 +46,6 @@
|
||||
* here, allowing for an overriding definition in the module containing
|
||||
* ms_hyperv_init_platform().
|
||||
*/
|
||||
bool __weak hv_root_partition;
|
||||
EXPORT_SYMBOL_GPL(hv_root_partition);
|
||||
|
||||
bool __weak hv_nested;
|
||||
EXPORT_SYMBOL_GPL(hv_nested);
|
||||
|
||||
@ -65,6 +68,16 @@ static void hv_kmsg_dump_unregister(void);
|
||||
|
||||
static struct ctl_table_header *hv_ctl_table_hdr;
|
||||
|
||||
/*
|
||||
* Per-cpu array holding the tail pointer for the SynIC event ring buffer
|
||||
* for each SINT.
|
||||
*
|
||||
* We cannot maintain this in mshv driver because the tail pointer should
|
||||
* persist even if the mshv driver is unloaded.
|
||||
*/
|
||||
u8 * __percpu *hv_synic_eventring_tail;
|
||||
EXPORT_SYMBOL_GPL(hv_synic_eventring_tail);
|
||||
|
||||
/*
|
||||
* Hyper-V specific initialization and shutdown code that is
|
||||
* common across all architectures. Called from architecture
|
||||
@ -87,6 +100,9 @@ void __init hv_common_free(void)
|
||||
|
||||
free_percpu(hyperv_pcpu_input_arg);
|
||||
hyperv_pcpu_input_arg = NULL;
|
||||
|
||||
free_percpu(hv_synic_eventring_tail);
|
||||
hv_synic_eventring_tail = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -280,7 +296,26 @@ static void hv_kmsg_dump_register(void)
|
||||
|
||||
static inline bool hv_output_page_exists(void)
|
||||
{
|
||||
return hv_root_partition || IS_ENABLED(CONFIG_HYPERV_VTL_MODE);
|
||||
return hv_root_partition() || IS_ENABLED(CONFIG_HYPERV_VTL_MODE);
|
||||
}
|
||||
|
||||
void __init hv_get_partition_id(void)
|
||||
{
|
||||
struct hv_output_get_partition_id *output;
|
||||
unsigned long flags;
|
||||
u64 status, pt_id;
|
||||
|
||||
local_irq_save(flags);
|
||||
output = *this_cpu_ptr(hyperv_pcpu_input_arg);
|
||||
status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, &output);
|
||||
pt_id = output->partition_id;
|
||||
local_irq_restore(flags);
|
||||
|
||||
if (hv_result_success(status))
|
||||
hv_current_partition_id = pt_id;
|
||||
else
|
||||
pr_err("Hyper-V: failed to get partition ID: %#x\n",
|
||||
hv_result(status));
|
||||
}
|
||||
|
||||
int __init hv_common_init(void)
|
||||
@ -350,6 +385,11 @@ int __init hv_common_init(void)
|
||||
BUG_ON(!hyperv_pcpu_output_arg);
|
||||
}
|
||||
|
||||
if (hv_root_partition()) {
|
||||
hv_synic_eventring_tail = alloc_percpu(u8 *);
|
||||
BUG_ON(!hv_synic_eventring_tail);
|
||||
}
|
||||
|
||||
hv_vp_index = kmalloc_array(nr_cpu_ids, sizeof(*hv_vp_index),
|
||||
GFP_KERNEL);
|
||||
if (!hv_vp_index) {
|
||||
@ -438,11 +478,12 @@ error:
|
||||
int hv_common_cpu_init(unsigned int cpu)
|
||||
{
|
||||
void **inputarg, **outputarg;
|
||||
u8 **synic_eventring_tail;
|
||||
u64 msr_vp_index;
|
||||
gfp_t flags;
|
||||
const int pgcount = hv_output_page_exists() ? 2 : 1;
|
||||
void *mem;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
|
||||
/* hv_cpu_init() can be called with IRQs disabled from hv_resume() */
|
||||
flags = irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL;
|
||||
@ -450,8 +491,8 @@ int hv_common_cpu_init(unsigned int cpu)
|
||||
inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
|
||||
|
||||
/*
|
||||
* hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory is already
|
||||
* allocated if this CPU was previously online and then taken offline
|
||||
* The per-cpu memory is already allocated if this CPU was previously
|
||||
* online and then taken offline
|
||||
*/
|
||||
if (!*inputarg) {
|
||||
mem = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
|
||||
@ -498,11 +539,21 @@ int hv_common_cpu_init(unsigned int cpu)
|
||||
if (msr_vp_index > hv_max_vp_index)
|
||||
hv_max_vp_index = msr_vp_index;
|
||||
|
||||
return 0;
|
||||
if (hv_root_partition()) {
|
||||
synic_eventring_tail = (u8 **)this_cpu_ptr(hv_synic_eventring_tail);
|
||||
*synic_eventring_tail = kcalloc(HV_SYNIC_SINT_COUNT,
|
||||
sizeof(u8), flags);
|
||||
/* No need to unwind any of the above on failure here */
|
||||
if (unlikely(!*synic_eventring_tail))
|
||||
ret = -ENOMEM;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int hv_common_cpu_die(unsigned int cpu)
|
||||
{
|
||||
u8 **synic_eventring_tail;
|
||||
/*
|
||||
* The hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory
|
||||
* is not freed when the CPU goes offline as the hyperv_pcpu_input_arg
|
||||
@ -515,6 +566,10 @@ int hv_common_cpu_die(unsigned int cpu)
|
||||
* originally allocated memory is reused in hv_common_cpu_init().
|
||||
*/
|
||||
|
||||
synic_eventring_tail = this_cpu_ptr(hv_synic_eventring_tail);
|
||||
kfree(*synic_eventring_tail);
|
||||
*synic_eventring_tail = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -572,7 +627,7 @@ EXPORT_SYMBOL_GPL(hv_setup_dma_ops);
|
||||
|
||||
bool hv_is_hibernation_supported(void)
|
||||
{
|
||||
return !hv_root_partition && acpi_sleep_state_supported(ACPI_STATE_S4);
|
||||
return !hv_root_partition() && acpi_sleep_state_supported(ACPI_STATE_S4);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hv_is_hibernation_supported);
|
||||
|
||||
@ -625,6 +680,11 @@ void __weak hv_remove_vmbus_handler(void)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hv_remove_vmbus_handler);
|
||||
|
||||
void __weak hv_setup_mshv_handler(void (*handler)(void))
|
||||
{
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hv_setup_mshv_handler);
|
||||
|
||||
void __weak hv_setup_kexec_handler(void (*handler)(void))
|
||||
{
|
||||
}
|
||||
@ -661,3 +721,121 @@ u64 __weak hv_tdx_hypercall(u64 control, u64 param1, u64 param2)
|
||||
return HV_STATUS_INVALID_PARAMETER;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hv_tdx_hypercall);
|
||||
|
||||
void hv_identify_partition_type(void)
|
||||
{
|
||||
/* Assume guest role */
|
||||
hv_curr_partition_type = HV_PARTITION_TYPE_GUEST;
|
||||
/*
|
||||
* Check partition creation and cpu management privileges
|
||||
*
|
||||
* Hyper-V should never specify running as root and as a Confidential
|
||||
* VM. But to protect against a compromised/malicious Hyper-V trying
|
||||
* to exploit root behavior to expose Confidential VM memory, ignore
|
||||
* the root partition setting if also a Confidential VM.
|
||||
*/
|
||||
if ((ms_hyperv.priv_high & HV_CREATE_PARTITIONS) &&
|
||||
(ms_hyperv.priv_high & HV_CPU_MANAGEMENT) &&
|
||||
!(ms_hyperv.priv_high & HV_ISOLATION)) {
|
||||
pr_info("Hyper-V: running as root partition\n");
|
||||
if (IS_ENABLED(CONFIG_MSHV_ROOT))
|
||||
hv_curr_partition_type = HV_PARTITION_TYPE_ROOT;
|
||||
else
|
||||
pr_crit("Hyper-V: CONFIG_MSHV_ROOT not enabled!\n");
|
||||
}
|
||||
}
|
||||
|
||||
struct hv_status_info {
|
||||
char *string;
|
||||
int errno;
|
||||
u16 code;
|
||||
};
|
||||
|
||||
/*
|
||||
* Note on the errno mappings:
|
||||
* A failed hypercall is usually only recoverable (or loggable) near
|
||||
* the call site where the HV_STATUS_* code is known. So the errno
|
||||
* it gets converted to is not too useful further up the stack.
|
||||
* Provide a few mappings that could be useful, and revert to -EIO
|
||||
* as a fallback.
|
||||
*/
|
||||
static const struct hv_status_info hv_status_infos[] = {
|
||||
#define _STATUS_INFO(status, errno) { #status, (errno), (status) }
|
||||
_STATUS_INFO(HV_STATUS_SUCCESS, 0),
|
||||
_STATUS_INFO(HV_STATUS_INVALID_HYPERCALL_CODE, -EINVAL),
|
||||
_STATUS_INFO(HV_STATUS_INVALID_HYPERCALL_INPUT, -EINVAL),
|
||||
_STATUS_INFO(HV_STATUS_INVALID_ALIGNMENT, -EIO),
|
||||
_STATUS_INFO(HV_STATUS_INVALID_PARAMETER, -EINVAL),
|
||||
_STATUS_INFO(HV_STATUS_ACCESS_DENIED, -EIO),
|
||||
_STATUS_INFO(HV_STATUS_INVALID_PARTITION_STATE, -EIO),
|
||||
_STATUS_INFO(HV_STATUS_OPERATION_DENIED, -EIO),
|
||||
_STATUS_INFO(HV_STATUS_UNKNOWN_PROPERTY, -EIO),
|
||||
_STATUS_INFO(HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE, -EIO),
|
||||
_STATUS_INFO(HV_STATUS_INSUFFICIENT_MEMORY, -ENOMEM),
|
||||
_STATUS_INFO(HV_STATUS_INVALID_PARTITION_ID, -EINVAL),
|
||||
_STATUS_INFO(HV_STATUS_INVALID_VP_INDEX, -EINVAL),
|
||||
_STATUS_INFO(HV_STATUS_NOT_FOUND, -EIO),
|
||||
_STATUS_INFO(HV_STATUS_INVALID_PORT_ID, -EINVAL),
|
||||
_STATUS_INFO(HV_STATUS_INVALID_CONNECTION_ID, -EINVAL),
|
||||
_STATUS_INFO(HV_STATUS_INSUFFICIENT_BUFFERS, -EIO),
|
||||
_STATUS_INFO(HV_STATUS_NOT_ACKNOWLEDGED, -EIO),
|
||||
_STATUS_INFO(HV_STATUS_INVALID_VP_STATE, -EIO),
|
||||
_STATUS_INFO(HV_STATUS_NO_RESOURCES, -EIO),
|
||||
_STATUS_INFO(HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED, -EIO),
|
||||
_STATUS_INFO(HV_STATUS_INVALID_LP_INDEX, -EINVAL),
|
||||
_STATUS_INFO(HV_STATUS_INVALID_REGISTER_VALUE, -EINVAL),
|
||||
_STATUS_INFO(HV_STATUS_INVALID_LP_INDEX, -EIO),
|
||||
_STATUS_INFO(HV_STATUS_INVALID_REGISTER_VALUE, -EIO),
|
||||
_STATUS_INFO(HV_STATUS_OPERATION_FAILED, -EIO),
|
||||
_STATUS_INFO(HV_STATUS_TIME_OUT, -EIO),
|
||||
_STATUS_INFO(HV_STATUS_CALL_PENDING, -EIO),
|
||||
_STATUS_INFO(HV_STATUS_VTL_ALREADY_ENABLED, -EIO),
|
||||
#undef _STATUS_INFO
|
||||
};
|
||||
|
||||
static inline const struct hv_status_info *find_hv_status_info(u64 hv_status)
|
||||
{
|
||||
int i;
|
||||
u16 code = hv_result(hv_status);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(hv_status_infos); ++i) {
|
||||
const struct hv_status_info *info = &hv_status_infos[i];
|
||||
|
||||
if (info->code == code)
|
||||
return info;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Convert a hypercall result into a linux-friendly error code. */
|
||||
int hv_result_to_errno(u64 status)
|
||||
{
|
||||
const struct hv_status_info *info;
|
||||
|
||||
/* hv_do_hypercall() may return U64_MAX, hypercalls aren't possible */
|
||||
if (unlikely(status == U64_MAX))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
info = find_hv_status_info(status);
|
||||
if (info)
|
||||
return info->errno;
|
||||
|
||||
return -EIO;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hv_result_to_errno);
|
||||
|
||||
const char *hv_result_to_string(u64 status)
|
||||
{
|
||||
const struct hv_status_info *info;
|
||||
|
||||
if (unlikely(status == U64_MAX))
|
||||
return "Hypercall page missing!";
|
||||
|
||||
info = find_hv_status_info(status);
|
||||
if (info)
|
||||
return info->string;
|
||||
|
||||
return "Unknown";
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hv_result_to_string);
|
||||
|
@ -6,11 +6,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/cpuhotplug.h>
|
||||
#include <linux/minmax.h>
|
||||
#include <asm/hypervisor.h>
|
||||
#include <asm/mshyperv.h>
|
||||
#include <asm/apic.h>
|
||||
|
||||
#include <asm/trace/hyperv.h>
|
||||
|
||||
/*
|
||||
* See struct hv_deposit_memory. The first u64 is partition ID, the rest
|
||||
@ -91,8 +87,8 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
|
||||
page_count, 0, input_page, NULL);
|
||||
local_irq_restore(flags);
|
||||
if (!hv_result_success(status)) {
|
||||
pr_err("Failed to deposit pages: %lld\n", status);
|
||||
ret = hv_result(status);
|
||||
hv_status_err(status, "\n");
|
||||
ret = hv_result_to_errno(status);
|
||||
goto err_free_allocations;
|
||||
}
|
||||
|
||||
@ -111,6 +107,7 @@ free_buf:
|
||||
kfree(counts);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hv_call_deposit_pages);
|
||||
|
||||
int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
|
||||
{
|
||||
@ -118,7 +115,7 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
|
||||
struct hv_output_add_logical_processor *output;
|
||||
u64 status;
|
||||
unsigned long flags;
|
||||
int ret = HV_STATUS_SUCCESS;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* When adding a logical processor, the hypervisor may return
|
||||
@ -141,9 +138,9 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
|
||||
|
||||
if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
|
||||
if (!hv_result_success(status)) {
|
||||
pr_err("%s: cpu %u apic ID %u, %lld\n", __func__,
|
||||
lp_index, apic_id, status);
|
||||
ret = hv_result(status);
|
||||
hv_status_err(status, "cpu %u apic ID: %u\n",
|
||||
lp_index, apic_id);
|
||||
ret = hv_result_to_errno(status);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -158,7 +155,7 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
|
||||
struct hv_create_vp *input;
|
||||
u64 status;
|
||||
unsigned long irq_flags;
|
||||
int ret = HV_STATUS_SUCCESS;
|
||||
int ret = 0;
|
||||
|
||||
/* Root VPs don't seem to need pages deposited */
|
||||
if (partition_id != hv_current_partition_id) {
|
||||
@ -183,9 +180,9 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
|
||||
|
||||
if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
|
||||
if (!hv_result_success(status)) {
|
||||
pr_err("%s: vcpu %u, lp %u, %lld\n", __func__,
|
||||
vp_index, flags, status);
|
||||
ret = hv_result(status);
|
||||
hv_status_err(status, "vcpu: %u, lp: %u\n",
|
||||
vp_index, flags);
|
||||
ret = hv_result_to_errno(status);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -195,4 +192,4 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(hv_call_create_vp);
|
30
drivers/hv/mshv.h
Normal file
30
drivers/hv/mshv.h
Normal file
@ -0,0 +1,30 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2023, Microsoft Corporation.
|
||||
*/
|
||||
|
||||
#ifndef _MSHV_H_
|
||||
#define _MSHV_H_
|
||||
|
||||
#include <linux/stddef.h>
|
||||
#include <linux/string.h>
|
||||
#include <hyperv/hvhdk.h>
|
||||
|
||||
#define mshv_field_nonzero(STRUCT, MEMBER) \
|
||||
memchr_inv(&((STRUCT).MEMBER), \
|
||||
0, sizeof_field(typeof(STRUCT), MEMBER))
|
||||
|
||||
int hv_call_get_vp_registers(u32 vp_index, u64 partition_id, u16 count,
|
||||
union hv_input_vtl input_vtl,
|
||||
struct hv_register_assoc *registers);
|
||||
|
||||
int hv_call_set_vp_registers(u32 vp_index, u64 partition_id, u16 count,
|
||||
union hv_input_vtl input_vtl,
|
||||
struct hv_register_assoc *registers);
|
||||
|
||||
int hv_call_get_partition_property(u64 partition_id, u64 property_code,
|
||||
u64 *property_value);
|
||||
|
||||
int mshv_do_pre_guest_mode_work(ulong th_flags);
|
||||
|
||||
#endif /* _MSHV_H */
|
161
drivers/hv/mshv_common.c
Normal file
161
drivers/hv/mshv_common.c
Normal file
@ -0,0 +1,161 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2024, Microsoft Corporation.
|
||||
*
|
||||
* This file contains functions that will be called from one or more modules.
|
||||
* If any of these modules are configured to build, this file is built and just
|
||||
* statically linked in.
|
||||
*
|
||||
* Authors: Microsoft Linux virtualization team
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mm.h>
|
||||
#include <asm/mshyperv.h>
|
||||
#include <linux/resume_user_mode.h>
|
||||
|
||||
#include "mshv.h"
|
||||
|
||||
#define HV_GET_REGISTER_BATCH_SIZE \
|
||||
(HV_HYP_PAGE_SIZE / sizeof(union hv_register_value))
|
||||
#define HV_SET_REGISTER_BATCH_SIZE \
|
||||
((HV_HYP_PAGE_SIZE - sizeof(struct hv_input_set_vp_registers)) \
|
||||
/ sizeof(struct hv_register_assoc))
|
||||
|
||||
int hv_call_get_vp_registers(u32 vp_index, u64 partition_id, u16 count,
|
||||
union hv_input_vtl input_vtl,
|
||||
struct hv_register_assoc *registers)
|
||||
{
|
||||
struct hv_input_get_vp_registers *input_page;
|
||||
union hv_register_value *output_page;
|
||||
u16 completed = 0;
|
||||
unsigned long remaining = count;
|
||||
int rep_count, i;
|
||||
u64 status = HV_STATUS_SUCCESS;
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
input_page = *this_cpu_ptr(hyperv_pcpu_input_arg);
|
||||
output_page = *this_cpu_ptr(hyperv_pcpu_output_arg);
|
||||
|
||||
input_page->partition_id = partition_id;
|
||||
input_page->vp_index = vp_index;
|
||||
input_page->input_vtl.as_uint8 = input_vtl.as_uint8;
|
||||
input_page->rsvd_z8 = 0;
|
||||
input_page->rsvd_z16 = 0;
|
||||
|
||||
while (remaining) {
|
||||
rep_count = min(remaining, HV_GET_REGISTER_BATCH_SIZE);
|
||||
for (i = 0; i < rep_count; ++i)
|
||||
input_page->names[i] = registers[i].name;
|
||||
|
||||
status = hv_do_rep_hypercall(HVCALL_GET_VP_REGISTERS, rep_count,
|
||||
0, input_page, output_page);
|
||||
if (!hv_result_success(status))
|
||||
break;
|
||||
|
||||
completed = hv_repcomp(status);
|
||||
for (i = 0; i < completed; ++i)
|
||||
registers[i].value = output_page[i];
|
||||
|
||||
registers += completed;
|
||||
remaining -= completed;
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
|
||||
return hv_result_to_errno(status);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hv_call_get_vp_registers);
|
||||
|
||||
int hv_call_set_vp_registers(u32 vp_index, u64 partition_id, u16 count,
|
||||
union hv_input_vtl input_vtl,
|
||||
struct hv_register_assoc *registers)
|
||||
{
|
||||
struct hv_input_set_vp_registers *input_page;
|
||||
u16 completed = 0;
|
||||
unsigned long remaining = count;
|
||||
int rep_count;
|
||||
u64 status = HV_STATUS_SUCCESS;
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
input_page = *this_cpu_ptr(hyperv_pcpu_input_arg);
|
||||
|
||||
input_page->partition_id = partition_id;
|
||||
input_page->vp_index = vp_index;
|
||||
input_page->input_vtl.as_uint8 = input_vtl.as_uint8;
|
||||
input_page->rsvd_z8 = 0;
|
||||
input_page->rsvd_z16 = 0;
|
||||
|
||||
while (remaining) {
|
||||
rep_count = min(remaining, HV_SET_REGISTER_BATCH_SIZE);
|
||||
memcpy(input_page->elements, registers,
|
||||
sizeof(struct hv_register_assoc) * rep_count);
|
||||
|
||||
status = hv_do_rep_hypercall(HVCALL_SET_VP_REGISTERS, rep_count,
|
||||
0, input_page, NULL);
|
||||
if (!hv_result_success(status))
|
||||
break;
|
||||
|
||||
completed = hv_repcomp(status);
|
||||
registers += completed;
|
||||
remaining -= completed;
|
||||
}
|
||||
|
||||
local_irq_restore(flags);
|
||||
|
||||
return hv_result_to_errno(status);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hv_call_set_vp_registers);
|
||||
|
||||
int hv_call_get_partition_property(u64 partition_id,
|
||||
u64 property_code,
|
||||
u64 *property_value)
|
||||
{
|
||||
u64 status;
|
||||
unsigned long flags;
|
||||
struct hv_input_get_partition_property *input;
|
||||
struct hv_output_get_partition_property *output;
|
||||
|
||||
local_irq_save(flags);
|
||||
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
|
||||
output = *this_cpu_ptr(hyperv_pcpu_output_arg);
|
||||
memset(input, 0, sizeof(*input));
|
||||
input->partition_id = partition_id;
|
||||
input->property_code = property_code;
|
||||
status = hv_do_hypercall(HVCALL_GET_PARTITION_PROPERTY, input, output);
|
||||
|
||||
if (!hv_result_success(status)) {
|
||||
local_irq_restore(flags);
|
||||
return hv_result_to_errno(status);
|
||||
}
|
||||
*property_value = output->property_value;
|
||||
|
||||
local_irq_restore(flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hv_call_get_partition_property);
|
||||
|
||||
/*
|
||||
* Handle any pre-processing before going into the guest mode on this cpu, most
|
||||
* notably call schedule(). Must be invoked with both preemption and
|
||||
* interrupts enabled.
|
||||
*
|
||||
* Returns: 0 on success, -errno on error.
|
||||
*/
|
||||
int mshv_do_pre_guest_mode_work(ulong th_flags)
|
||||
{
|
||||
if (th_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
|
||||
return -EINTR;
|
||||
|
||||
if (th_flags & _TIF_NEED_RESCHED)
|
||||
schedule();
|
||||
|
||||
if (th_flags & _TIF_NOTIFY_RESUME)
|
||||
resume_user_mode_work(NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mshv_do_pre_guest_mode_work);
|
833
drivers/hv/mshv_eventfd.c
Normal file
833
drivers/hv/mshv_eventfd.c
Normal file
@ -0,0 +1,833 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* eventfd support for mshv
|
||||
*
|
||||
* Heavily inspired from KVM implementation of irqfd/ioeventfd. The basic
|
||||
* framework code is taken from the kvm implementation.
|
||||
*
|
||||
* All credits to kvm developers.
|
||||
*/
|
||||
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/poll.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/eventfd.h>
|
||||
|
||||
#if IS_ENABLED(CONFIG_X86_64)
|
||||
#include <asm/apic.h>
|
||||
#endif
|
||||
#include <asm/mshyperv.h>
|
||||
|
||||
#include "mshv_eventfd.h"
|
||||
#include "mshv.h"
|
||||
#include "mshv_root.h"
|
||||
|
||||
static struct workqueue_struct *irqfd_cleanup_wq;
|
||||
|
||||
void mshv_register_irq_ack_notifier(struct mshv_partition *partition,
|
||||
struct mshv_irq_ack_notifier *mian)
|
||||
{
|
||||
mutex_lock(&partition->pt_irq_lock);
|
||||
hlist_add_head_rcu(&mian->link, &partition->irq_ack_notifier_list);
|
||||
mutex_unlock(&partition->pt_irq_lock);
|
||||
}
|
||||
|
||||
void mshv_unregister_irq_ack_notifier(struct mshv_partition *partition,
|
||||
struct mshv_irq_ack_notifier *mian)
|
||||
{
|
||||
mutex_lock(&partition->pt_irq_lock);
|
||||
hlist_del_init_rcu(&mian->link);
|
||||
mutex_unlock(&partition->pt_irq_lock);
|
||||
synchronize_rcu();
|
||||
}
|
||||
|
||||
bool mshv_notify_acked_gsi(struct mshv_partition *partition, int gsi)
|
||||
{
|
||||
struct mshv_irq_ack_notifier *mian;
|
||||
bool acked = false;
|
||||
|
||||
rcu_read_lock();
|
||||
hlist_for_each_entry_rcu(mian, &partition->irq_ack_notifier_list,
|
||||
link) {
|
||||
if (mian->irq_ack_gsi == gsi) {
|
||||
mian->irq_acked(mian);
|
||||
acked = true;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return acked;
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_ARM64)
|
||||
static inline bool hv_should_clear_interrupt(enum hv_interrupt_type type)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#elif IS_ENABLED(CONFIG_X86_64)
|
||||
static inline bool hv_should_clear_interrupt(enum hv_interrupt_type type)
|
||||
{
|
||||
return type == HV_X64_INTERRUPT_TYPE_EXTINT;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void mshv_irqfd_resampler_ack(struct mshv_irq_ack_notifier *mian)
|
||||
{
|
||||
struct mshv_irqfd_resampler *resampler;
|
||||
struct mshv_partition *partition;
|
||||
struct mshv_irqfd *irqfd;
|
||||
int idx;
|
||||
|
||||
resampler = container_of(mian, struct mshv_irqfd_resampler,
|
||||
rsmplr_notifier);
|
||||
partition = resampler->rsmplr_partn;
|
||||
|
||||
idx = srcu_read_lock(&partition->pt_irq_srcu);
|
||||
|
||||
hlist_for_each_entry_rcu(irqfd, &resampler->rsmplr_irqfd_list,
|
||||
irqfd_resampler_hnode) {
|
||||
if (hv_should_clear_interrupt(irqfd->irqfd_lapic_irq.lapic_control.interrupt_type))
|
||||
hv_call_clear_virtual_interrupt(partition->pt_id);
|
||||
|
||||
eventfd_signal(irqfd->irqfd_resamplefd);
|
||||
}
|
||||
|
||||
srcu_read_unlock(&partition->pt_irq_srcu, idx);
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_X86_64)
|
||||
static bool
|
||||
mshv_vp_irq_vector_injected(union hv_vp_register_page_interrupt_vectors iv,
|
||||
u32 vector)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < iv.vector_count; i++) {
|
||||
if (iv.vector[i] == vector)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int mshv_vp_irq_try_set_vector(struct mshv_vp *vp, u32 vector)
|
||||
{
|
||||
union hv_vp_register_page_interrupt_vectors iv, new_iv;
|
||||
|
||||
iv = vp->vp_register_page->interrupt_vectors;
|
||||
new_iv = iv;
|
||||
|
||||
if (mshv_vp_irq_vector_injected(iv, vector))
|
||||
return 0;
|
||||
|
||||
if (iv.vector_count >= HV_VP_REGISTER_PAGE_MAX_VECTOR_COUNT)
|
||||
return -ENOSPC;
|
||||
|
||||
new_iv.vector[new_iv.vector_count++] = vector;
|
||||
|
||||
if (cmpxchg(&vp->vp_register_page->interrupt_vectors.as_uint64,
|
||||
iv.as_uint64, new_iv.as_uint64) != iv.as_uint64)
|
||||
return -EAGAIN;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mshv_vp_irq_set_vector(struct mshv_vp *vp, u32 vector)
|
||||
{
|
||||
int ret;
|
||||
|
||||
do {
|
||||
ret = mshv_vp_irq_try_set_vector(vp, vector);
|
||||
} while (ret == -EAGAIN && !need_resched());
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to raise irq for guest via shared vector array. hyp does the actual
|
||||
* inject of the interrupt.
|
||||
*/
|
||||
static int mshv_try_assert_irq_fast(struct mshv_irqfd *irqfd)
|
||||
{
|
||||
struct mshv_partition *partition = irqfd->irqfd_partn;
|
||||
struct mshv_lapic_irq *irq = &irqfd->irqfd_lapic_irq;
|
||||
struct mshv_vp *vp;
|
||||
|
||||
if (!(ms_hyperv.ext_features &
|
||||
HV_VP_DISPATCH_INTERRUPT_INJECTION_AVAILABLE))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (hv_scheduler_type != HV_SCHEDULER_TYPE_ROOT)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (irq->lapic_control.logical_dest_mode)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
vp = partition->pt_vp_array[irq->lapic_apic_id];
|
||||
|
||||
if (!vp->vp_register_page)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (mshv_vp_irq_set_vector(vp, irq->lapic_vector))
|
||||
return -EINVAL;
|
||||
|
||||
if (vp->run.flags.root_sched_dispatched &&
|
||||
vp->vp_register_page->interrupt_vectors.as_uint64)
|
||||
return -EBUSY;
|
||||
|
||||
wake_up(&vp->run.vp_suspend_queue);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#else /* CONFIG_X86_64 */
|
||||
static int mshv_try_assert_irq_fast(struct mshv_irqfd *irqfd)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void mshv_assert_irq_slow(struct mshv_irqfd *irqfd)
|
||||
{
|
||||
struct mshv_partition *partition = irqfd->irqfd_partn;
|
||||
struct mshv_lapic_irq *irq = &irqfd->irqfd_lapic_irq;
|
||||
unsigned int seq;
|
||||
int idx;
|
||||
|
||||
WARN_ON(irqfd->irqfd_resampler &&
|
||||
!irq->lapic_control.level_triggered);
|
||||
|
||||
idx = srcu_read_lock(&partition->pt_irq_srcu);
|
||||
if (irqfd->irqfd_girq_ent.guest_irq_num) {
|
||||
if (!irqfd->irqfd_girq_ent.girq_entry_valid) {
|
||||
srcu_read_unlock(&partition->pt_irq_srcu, idx);
|
||||
return;
|
||||
}
|
||||
|
||||
do {
|
||||
seq = read_seqcount_begin(&irqfd->irqfd_irqe_sc);
|
||||
} while (read_seqcount_retry(&irqfd->irqfd_irqe_sc, seq));
|
||||
}
|
||||
|
||||
hv_call_assert_virtual_interrupt(irqfd->irqfd_partn->pt_id,
|
||||
irq->lapic_vector, irq->lapic_apic_id,
|
||||
irq->lapic_control);
|
||||
srcu_read_unlock(&partition->pt_irq_srcu, idx);
|
||||
}
|
||||
|
||||
static void mshv_irqfd_resampler_shutdown(struct mshv_irqfd *irqfd)
|
||||
{
|
||||
struct mshv_irqfd_resampler *rp = irqfd->irqfd_resampler;
|
||||
struct mshv_partition *pt = rp->rsmplr_partn;
|
||||
|
||||
mutex_lock(&pt->irqfds_resampler_lock);
|
||||
|
||||
hlist_del_rcu(&irqfd->irqfd_resampler_hnode);
|
||||
synchronize_srcu(&pt->pt_irq_srcu);
|
||||
|
||||
if (hlist_empty(&rp->rsmplr_irqfd_list)) {
|
||||
hlist_del(&rp->rsmplr_hnode);
|
||||
mshv_unregister_irq_ack_notifier(pt, &rp->rsmplr_notifier);
|
||||
kfree(rp);
|
||||
}
|
||||
|
||||
mutex_unlock(&pt->irqfds_resampler_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Race-free decouple logic (ordering is critical)
|
||||
*/
|
||||
static void mshv_irqfd_shutdown(struct work_struct *work)
|
||||
{
|
||||
struct mshv_irqfd *irqfd =
|
||||
container_of(work, struct mshv_irqfd, irqfd_shutdown);
|
||||
|
||||
/*
|
||||
* Synchronize with the wait-queue and unhook ourselves to prevent
|
||||
* further events.
|
||||
*/
|
||||
remove_wait_queue(irqfd->irqfd_wqh, &irqfd->irqfd_wait);
|
||||
|
||||
if (irqfd->irqfd_resampler) {
|
||||
mshv_irqfd_resampler_shutdown(irqfd);
|
||||
eventfd_ctx_put(irqfd->irqfd_resamplefd);
|
||||
}
|
||||
|
||||
/*
|
||||
* It is now safe to release the object's resources
|
||||
*/
|
||||
eventfd_ctx_put(irqfd->irqfd_eventfd_ctx);
|
||||
kfree(irqfd);
|
||||
}
|
||||
|
||||
/* assumes partition->pt_irqfds_lock is held */
|
||||
static bool mshv_irqfd_is_active(struct mshv_irqfd *irqfd)
|
||||
{
|
||||
return !hlist_unhashed(&irqfd->irqfd_hnode);
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark the irqfd as inactive and schedule it for removal
|
||||
*
|
||||
* assumes partition->pt_irqfds_lock is held
|
||||
*/
|
||||
static void mshv_irqfd_deactivate(struct mshv_irqfd *irqfd)
|
||||
{
|
||||
if (!mshv_irqfd_is_active(irqfd))
|
||||
return;
|
||||
|
||||
hlist_del(&irqfd->irqfd_hnode);
|
||||
|
||||
queue_work(irqfd_cleanup_wq, &irqfd->irqfd_shutdown);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called with wqh->lock held and interrupts disabled
|
||||
*/
|
||||
static int mshv_irqfd_wakeup(wait_queue_entry_t *wait, unsigned int mode,
|
||||
int sync, void *key)
|
||||
{
|
||||
struct mshv_irqfd *irqfd = container_of(wait, struct mshv_irqfd,
|
||||
irqfd_wait);
|
||||
unsigned long flags = (unsigned long)key;
|
||||
int idx;
|
||||
unsigned int seq;
|
||||
struct mshv_partition *pt = irqfd->irqfd_partn;
|
||||
int ret = 0;
|
||||
|
||||
if (flags & POLLIN) {
|
||||
u64 cnt;
|
||||
|
||||
eventfd_ctx_do_read(irqfd->irqfd_eventfd_ctx, &cnt);
|
||||
idx = srcu_read_lock(&pt->pt_irq_srcu);
|
||||
do {
|
||||
seq = read_seqcount_begin(&irqfd->irqfd_irqe_sc);
|
||||
} while (read_seqcount_retry(&irqfd->irqfd_irqe_sc, seq));
|
||||
|
||||
/* An event has been signaled, raise an interrupt */
|
||||
ret = mshv_try_assert_irq_fast(irqfd);
|
||||
if (ret)
|
||||
mshv_assert_irq_slow(irqfd);
|
||||
|
||||
srcu_read_unlock(&pt->pt_irq_srcu, idx);
|
||||
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
if (flags & POLLHUP) {
|
||||
/* The eventfd is closing, detach from the partition */
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&pt->pt_irqfds_lock, flags);
|
||||
|
||||
/*
|
||||
* We must check if someone deactivated the irqfd before
|
||||
* we could acquire the pt_irqfds_lock since the item is
|
||||
* deactivated from the mshv side before it is unhooked from
|
||||
* the wait-queue. If it is already deactivated, we can
|
||||
* simply return knowing the other side will cleanup for us.
|
||||
* We cannot race against the irqfd going away since the
|
||||
* other side is required to acquire wqh->lock, which we hold
|
||||
*/
|
||||
if (mshv_irqfd_is_active(irqfd))
|
||||
mshv_irqfd_deactivate(irqfd);
|
||||
|
||||
spin_unlock_irqrestore(&pt->pt_irqfds_lock, flags);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Must be called under pt_irqfds_lock */
|
||||
static void mshv_irqfd_update(struct mshv_partition *pt,
|
||||
struct mshv_irqfd *irqfd)
|
||||
{
|
||||
write_seqcount_begin(&irqfd->irqfd_irqe_sc);
|
||||
irqfd->irqfd_girq_ent = mshv_ret_girq_entry(pt,
|
||||
irqfd->irqfd_irqnum);
|
||||
mshv_copy_girq_info(&irqfd->irqfd_girq_ent, &irqfd->irqfd_lapic_irq);
|
||||
write_seqcount_end(&irqfd->irqfd_irqe_sc);
|
||||
}
|
||||
|
||||
void mshv_irqfd_routing_update(struct mshv_partition *pt)
|
||||
{
|
||||
struct mshv_irqfd *irqfd;
|
||||
|
||||
spin_lock_irq(&pt->pt_irqfds_lock);
|
||||
hlist_for_each_entry(irqfd, &pt->pt_irqfds_list, irqfd_hnode)
|
||||
mshv_irqfd_update(pt, irqfd);
|
||||
spin_unlock_irq(&pt->pt_irqfds_lock);
|
||||
}
|
||||
|
||||
static void mshv_irqfd_queue_proc(struct file *file, wait_queue_head_t *wqh,
|
||||
poll_table *polltbl)
|
||||
{
|
||||
struct mshv_irqfd *irqfd =
|
||||
container_of(polltbl, struct mshv_irqfd, irqfd_polltbl);
|
||||
|
||||
irqfd->irqfd_wqh = wqh;
|
||||
add_wait_queue_priority(wqh, &irqfd->irqfd_wait);
|
||||
}
|
||||
|
||||
static int mshv_irqfd_assign(struct mshv_partition *pt,
|
||||
struct mshv_user_irqfd *args)
|
||||
{
|
||||
struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
|
||||
struct mshv_irqfd *irqfd, *tmp;
|
||||
unsigned int events;
|
||||
struct fd f;
|
||||
int ret;
|
||||
int idx;
|
||||
|
||||
irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
|
||||
if (!irqfd)
|
||||
return -ENOMEM;
|
||||
|
||||
irqfd->irqfd_partn = pt;
|
||||
irqfd->irqfd_irqnum = args->gsi;
|
||||
INIT_WORK(&irqfd->irqfd_shutdown, mshv_irqfd_shutdown);
|
||||
seqcount_spinlock_init(&irqfd->irqfd_irqe_sc, &pt->pt_irqfds_lock);
|
||||
|
||||
f = fdget(args->fd);
|
||||
if (!fd_file(f)) {
|
||||
ret = -EBADF;
|
||||
goto out;
|
||||
}
|
||||
|
||||
eventfd = eventfd_ctx_fileget(fd_file(f));
|
||||
if (IS_ERR(eventfd)) {
|
||||
ret = PTR_ERR(eventfd);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
irqfd->irqfd_eventfd_ctx = eventfd;
|
||||
|
||||
if (args->flags & BIT(MSHV_IRQFD_BIT_RESAMPLE)) {
|
||||
struct mshv_irqfd_resampler *rp;
|
||||
|
||||
resamplefd = eventfd_ctx_fdget(args->resamplefd);
|
||||
if (IS_ERR(resamplefd)) {
|
||||
ret = PTR_ERR(resamplefd);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
irqfd->irqfd_resamplefd = resamplefd;
|
||||
|
||||
mutex_lock(&pt->irqfds_resampler_lock);
|
||||
|
||||
hlist_for_each_entry(rp, &pt->irqfds_resampler_list,
|
||||
rsmplr_hnode) {
|
||||
if (rp->rsmplr_notifier.irq_ack_gsi ==
|
||||
irqfd->irqfd_irqnum) {
|
||||
irqfd->irqfd_resampler = rp;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!irqfd->irqfd_resampler) {
|
||||
rp = kzalloc(sizeof(*rp), GFP_KERNEL_ACCOUNT);
|
||||
if (!rp) {
|
||||
ret = -ENOMEM;
|
||||
mutex_unlock(&pt->irqfds_resampler_lock);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
rp->rsmplr_partn = pt;
|
||||
INIT_HLIST_HEAD(&rp->rsmplr_irqfd_list);
|
||||
rp->rsmplr_notifier.irq_ack_gsi = irqfd->irqfd_irqnum;
|
||||
rp->rsmplr_notifier.irq_acked =
|
||||
mshv_irqfd_resampler_ack;
|
||||
|
||||
hlist_add_head(&rp->rsmplr_hnode,
|
||||
&pt->irqfds_resampler_list);
|
||||
mshv_register_irq_ack_notifier(pt,
|
||||
&rp->rsmplr_notifier);
|
||||
irqfd->irqfd_resampler = rp;
|
||||
}
|
||||
|
||||
hlist_add_head_rcu(&irqfd->irqfd_resampler_hnode,
|
||||
&irqfd->irqfd_resampler->rsmplr_irqfd_list);
|
||||
|
||||
mutex_unlock(&pt->irqfds_resampler_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Install our own custom wake-up handling so we are notified via
|
||||
* a callback whenever someone signals the underlying eventfd
|
||||
*/
|
||||
init_waitqueue_func_entry(&irqfd->irqfd_wait, mshv_irqfd_wakeup);
|
||||
init_poll_funcptr(&irqfd->irqfd_polltbl, mshv_irqfd_queue_proc);
|
||||
|
||||
spin_lock_irq(&pt->pt_irqfds_lock);
|
||||
if (args->flags & BIT(MSHV_IRQFD_BIT_RESAMPLE) &&
|
||||
!irqfd->irqfd_lapic_irq.lapic_control.level_triggered) {
|
||||
/*
|
||||
* Resample Fd must be for level triggered interrupt
|
||||
* Otherwise return with failure
|
||||
*/
|
||||
spin_unlock_irq(&pt->pt_irqfds_lock);
|
||||
ret = -EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
ret = 0;
|
||||
hlist_for_each_entry(tmp, &pt->pt_irqfds_list, irqfd_hnode) {
|
||||
if (irqfd->irqfd_eventfd_ctx != tmp->irqfd_eventfd_ctx)
|
||||
continue;
|
||||
/* This fd is used for another irq already. */
|
||||
ret = -EBUSY;
|
||||
spin_unlock_irq(&pt->pt_irqfds_lock);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
idx = srcu_read_lock(&pt->pt_irq_srcu);
|
||||
mshv_irqfd_update(pt, irqfd);
|
||||
hlist_add_head(&irqfd->irqfd_hnode, &pt->pt_irqfds_list);
|
||||
spin_unlock_irq(&pt->pt_irqfds_lock);
|
||||
|
||||
/*
|
||||
* Check if there was an event already pending on the eventfd
|
||||
* before we registered, and trigger it as if we didn't miss it.
|
||||
*/
|
||||
events = vfs_poll(fd_file(f), &irqfd->irqfd_polltbl);
|
||||
|
||||
if (events & POLLIN)
|
||||
mshv_assert_irq_slow(irqfd);
|
||||
|
||||
srcu_read_unlock(&pt->pt_irq_srcu, idx);
|
||||
/*
|
||||
* do not drop the file until the irqfd is fully initialized, otherwise
|
||||
* we might race against the POLLHUP
|
||||
*/
|
||||
fdput(f);
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
if (irqfd->irqfd_resampler)
|
||||
mshv_irqfd_resampler_shutdown(irqfd);
|
||||
|
||||
if (resamplefd && !IS_ERR(resamplefd))
|
||||
eventfd_ctx_put(resamplefd);
|
||||
|
||||
if (eventfd && !IS_ERR(eventfd))
|
||||
eventfd_ctx_put(eventfd);
|
||||
|
||||
fdput(f);
|
||||
|
||||
out:
|
||||
kfree(irqfd);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* shutdown any irqfd's that match fd+gsi
|
||||
*/
|
||||
static int mshv_irqfd_deassign(struct mshv_partition *pt,
|
||||
struct mshv_user_irqfd *args)
|
||||
{
|
||||
struct mshv_irqfd *irqfd;
|
||||
struct hlist_node *n;
|
||||
struct eventfd_ctx *eventfd;
|
||||
|
||||
eventfd = eventfd_ctx_fdget(args->fd);
|
||||
if (IS_ERR(eventfd))
|
||||
return PTR_ERR(eventfd);
|
||||
|
||||
hlist_for_each_entry_safe(irqfd, n, &pt->pt_irqfds_list,
|
||||
irqfd_hnode) {
|
||||
if (irqfd->irqfd_eventfd_ctx == eventfd &&
|
||||
irqfd->irqfd_irqnum == args->gsi)
|
||||
|
||||
mshv_irqfd_deactivate(irqfd);
|
||||
}
|
||||
|
||||
eventfd_ctx_put(eventfd);
|
||||
|
||||
/*
|
||||
* Block until we know all outstanding shutdown jobs have completed
|
||||
* so that we guarantee there will not be any more interrupts on this
|
||||
* gsi once this deassign function returns.
|
||||
*/
|
||||
flush_workqueue(irqfd_cleanup_wq);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mshv_set_unset_irqfd(struct mshv_partition *pt,
|
||||
struct mshv_user_irqfd *args)
|
||||
{
|
||||
if (args->flags & ~MSHV_IRQFD_FLAGS_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
if (args->flags & BIT(MSHV_IRQFD_BIT_DEASSIGN))
|
||||
return mshv_irqfd_deassign(pt, args);
|
||||
|
||||
return mshv_irqfd_assign(pt, args);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is called as the mshv VM fd is being released.
|
||||
* Shutdown all irqfds that still remain open
|
||||
*/
|
||||
static void mshv_irqfd_release(struct mshv_partition *pt)
|
||||
{
|
||||
struct mshv_irqfd *irqfd;
|
||||
struct hlist_node *n;
|
||||
|
||||
spin_lock_irq(&pt->pt_irqfds_lock);
|
||||
|
||||
hlist_for_each_entry_safe(irqfd, n, &pt->pt_irqfds_list, irqfd_hnode)
|
||||
mshv_irqfd_deactivate(irqfd);
|
||||
|
||||
spin_unlock_irq(&pt->pt_irqfds_lock);
|
||||
|
||||
/*
|
||||
* Block until we know all outstanding shutdown jobs have completed
|
||||
* since we do not take a mshv_partition* reference.
|
||||
*/
|
||||
flush_workqueue(irqfd_cleanup_wq);
|
||||
}
|
||||
|
||||
int mshv_irqfd_wq_init(void)
|
||||
{
|
||||
irqfd_cleanup_wq = alloc_workqueue("mshv-irqfd-cleanup", 0, 0);
|
||||
if (!irqfd_cleanup_wq)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void mshv_irqfd_wq_cleanup(void)
|
||||
{
|
||||
destroy_workqueue(irqfd_cleanup_wq);
|
||||
}
|
||||
|
||||
/*
|
||||
* --------------------------------------------------------------------
|
||||
* ioeventfd: translate a MMIO memory write to an eventfd signal.
|
||||
*
|
||||
* userspace can register a MMIO address with an eventfd for receiving
|
||||
* notification when the memory has been touched.
|
||||
* --------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
static void ioeventfd_release(struct mshv_ioeventfd *p, u64 partition_id)
|
||||
{
|
||||
if (p->iovntfd_doorbell_id > 0)
|
||||
mshv_unregister_doorbell(partition_id, p->iovntfd_doorbell_id);
|
||||
eventfd_ctx_put(p->iovntfd_eventfd);
|
||||
kfree(p);
|
||||
}
|
||||
|
||||
/* MMIO writes trigger an event if the addr/val match */
|
||||
static void ioeventfd_mmio_write(int doorbell_id, void *data)
|
||||
{
|
||||
struct mshv_partition *partition = (struct mshv_partition *)data;
|
||||
struct mshv_ioeventfd *p;
|
||||
|
||||
rcu_read_lock();
|
||||
hlist_for_each_entry_rcu(p, &partition->ioeventfds_list, iovntfd_hnode)
|
||||
if (p->iovntfd_doorbell_id == doorbell_id) {
|
||||
eventfd_signal(p->iovntfd_eventfd);
|
||||
break;
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static bool ioeventfd_check_collision(struct mshv_partition *pt,
|
||||
struct mshv_ioeventfd *p)
|
||||
__must_hold(&pt->mutex)
|
||||
{
|
||||
struct mshv_ioeventfd *_p;
|
||||
|
||||
hlist_for_each_entry(_p, &pt->ioeventfds_list, iovntfd_hnode)
|
||||
if (_p->iovntfd_addr == p->iovntfd_addr &&
|
||||
_p->iovntfd_length == p->iovntfd_length &&
|
||||
(_p->iovntfd_wildcard || p->iovntfd_wildcard ||
|
||||
_p->iovntfd_datamatch == p->iovntfd_datamatch))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int mshv_assign_ioeventfd(struct mshv_partition *pt,
|
||||
struct mshv_user_ioeventfd *args)
|
||||
__must_hold(&pt->mutex)
|
||||
{
|
||||
struct mshv_ioeventfd *p;
|
||||
struct eventfd_ctx *eventfd;
|
||||
u64 doorbell_flags = 0;
|
||||
int ret;
|
||||
|
||||
/* This mutex is currently protecting ioeventfd.items list */
|
||||
WARN_ON_ONCE(!mutex_is_locked(&pt->pt_mutex));
|
||||
|
||||
if (args->flags & BIT(MSHV_IOEVENTFD_BIT_PIO))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/* must be natural-word sized */
|
||||
switch (args->len) {
|
||||
case 0:
|
||||
doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_ANY;
|
||||
break;
|
||||
case 1:
|
||||
doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_BYTE;
|
||||
break;
|
||||
case 2:
|
||||
doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_WORD;
|
||||
break;
|
||||
case 4:
|
||||
doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_DWORD;
|
||||
break;
|
||||
case 8:
|
||||
doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_QWORD;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* check for range overflow */
|
||||
if (args->addr + args->len < args->addr)
|
||||
return -EINVAL;
|
||||
|
||||
/* check for extra flags that we don't understand */
|
||||
if (args->flags & ~MSHV_IOEVENTFD_FLAGS_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
eventfd = eventfd_ctx_fdget(args->fd);
|
||||
if (IS_ERR(eventfd))
|
||||
return PTR_ERR(eventfd);
|
||||
|
||||
p = kzalloc(sizeof(*p), GFP_KERNEL);
|
||||
if (!p) {
|
||||
ret = -ENOMEM;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
p->iovntfd_addr = args->addr;
|
||||
p->iovntfd_length = args->len;
|
||||
p->iovntfd_eventfd = eventfd;
|
||||
|
||||
/* The datamatch feature is optional, otherwise this is a wildcard */
|
||||
if (args->flags & BIT(MSHV_IOEVENTFD_BIT_DATAMATCH)) {
|
||||
p->iovntfd_datamatch = args->datamatch;
|
||||
} else {
|
||||
p->iovntfd_wildcard = true;
|
||||
doorbell_flags |= HV_DOORBELL_FLAG_TRIGGER_ANY_VALUE;
|
||||
}
|
||||
|
||||
if (ioeventfd_check_collision(pt, p)) {
|
||||
ret = -EEXIST;
|
||||
goto unlock_fail;
|
||||
}
|
||||
|
||||
ret = mshv_register_doorbell(pt->pt_id, ioeventfd_mmio_write,
|
||||
(void *)pt, p->iovntfd_addr,
|
||||
p->iovntfd_datamatch, doorbell_flags);
|
||||
if (ret < 0)
|
||||
goto unlock_fail;
|
||||
|
||||
p->iovntfd_doorbell_id = ret;
|
||||
|
||||
hlist_add_head_rcu(&p->iovntfd_hnode, &pt->ioeventfds_list);
|
||||
|
||||
return 0;
|
||||
|
||||
unlock_fail:
|
||||
kfree(p);
|
||||
|
||||
fail:
|
||||
eventfd_ctx_put(eventfd);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int mshv_deassign_ioeventfd(struct mshv_partition *pt,
|
||||
struct mshv_user_ioeventfd *args)
|
||||
__must_hold(&pt->mutex)
|
||||
{
|
||||
struct mshv_ioeventfd *p;
|
||||
struct eventfd_ctx *eventfd;
|
||||
struct hlist_node *n;
|
||||
int ret = -ENOENT;
|
||||
|
||||
/* This mutex is currently protecting ioeventfd.items list */
|
||||
WARN_ON_ONCE(!mutex_is_locked(&pt->pt_mutex));
|
||||
|
||||
eventfd = eventfd_ctx_fdget(args->fd);
|
||||
if (IS_ERR(eventfd))
|
||||
return PTR_ERR(eventfd);
|
||||
|
||||
hlist_for_each_entry_safe(p, n, &pt->ioeventfds_list, iovntfd_hnode) {
|
||||
bool wildcard = !(args->flags & BIT(MSHV_IOEVENTFD_BIT_DATAMATCH));
|
||||
|
||||
if (p->iovntfd_eventfd != eventfd ||
|
||||
p->iovntfd_addr != args->addr ||
|
||||
p->iovntfd_length != args->len ||
|
||||
p->iovntfd_wildcard != wildcard)
|
||||
continue;
|
||||
|
||||
if (!p->iovntfd_wildcard &&
|
||||
p->iovntfd_datamatch != args->datamatch)
|
||||
continue;
|
||||
|
||||
hlist_del_rcu(&p->iovntfd_hnode);
|
||||
synchronize_rcu();
|
||||
ioeventfd_release(p, pt->pt_id);
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
eventfd_ctx_put(eventfd);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int mshv_set_unset_ioeventfd(struct mshv_partition *pt,
|
||||
struct mshv_user_ioeventfd *args)
|
||||
__must_hold(&pt->mutex)
|
||||
{
|
||||
if ((args->flags & ~MSHV_IOEVENTFD_FLAGS_MASK) ||
|
||||
mshv_field_nonzero(*args, rsvd))
|
||||
return -EINVAL;
|
||||
|
||||
/* PIO not yet implemented */
|
||||
if (args->flags & BIT(MSHV_IOEVENTFD_BIT_PIO))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (args->flags & BIT(MSHV_IOEVENTFD_BIT_DEASSIGN))
|
||||
return mshv_deassign_ioeventfd(pt, args);
|
||||
|
||||
return mshv_assign_ioeventfd(pt, args);
|
||||
}
|
||||
|
||||
void mshv_eventfd_init(struct mshv_partition *pt)
|
||||
{
|
||||
spin_lock_init(&pt->pt_irqfds_lock);
|
||||
INIT_HLIST_HEAD(&pt->pt_irqfds_list);
|
||||
|
||||
INIT_HLIST_HEAD(&pt->irqfds_resampler_list);
|
||||
mutex_init(&pt->irqfds_resampler_lock);
|
||||
|
||||
INIT_HLIST_HEAD(&pt->ioeventfds_list);
|
||||
}
|
||||
|
||||
void mshv_eventfd_release(struct mshv_partition *pt)
|
||||
{
|
||||
struct hlist_head items;
|
||||
struct hlist_node *n;
|
||||
struct mshv_ioeventfd *p;
|
||||
|
||||
hlist_move_list(&pt->ioeventfds_list, &items);
|
||||
synchronize_rcu();
|
||||
|
||||
hlist_for_each_entry_safe(p, n, &items, iovntfd_hnode) {
|
||||
hlist_del(&p->iovntfd_hnode);
|
||||
ioeventfd_release(p, pt->pt_id);
|
||||
}
|
||||
|
||||
mshv_irqfd_release(pt);
|
||||
}
|
71
drivers/hv/mshv_eventfd.h
Normal file
71
drivers/hv/mshv_eventfd.h
Normal file
@ -0,0 +1,71 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* irqfd: Allows an fd to be used to inject an interrupt to the guest.
|
||||
* ioeventfd: Allow an fd to be used to receive a signal from the guest.
|
||||
* All credit goes to kvm developers.
|
||||
*/
|
||||
|
||||
#ifndef __LINUX_MSHV_EVENTFD_H
|
||||
#define __LINUX_MSHV_EVENTFD_H
|
||||
|
||||
#include <linux/poll.h>
|
||||
|
||||
#include "mshv.h"
|
||||
#include "mshv_root.h"
|
||||
|
||||
/* struct to contain list of irqfds sharing an irq. Updates are protected by
|
||||
* partition.irqfds.resampler_lock
|
||||
*/
|
||||
struct mshv_irqfd_resampler {
|
||||
struct mshv_partition *rsmplr_partn;
|
||||
struct hlist_head rsmplr_irqfd_list;
|
||||
struct mshv_irq_ack_notifier rsmplr_notifier;
|
||||
struct hlist_node rsmplr_hnode;
|
||||
};
|
||||
|
||||
struct mshv_irqfd {
|
||||
struct mshv_partition *irqfd_partn;
|
||||
struct eventfd_ctx *irqfd_eventfd_ctx;
|
||||
struct mshv_guest_irq_ent irqfd_girq_ent;
|
||||
seqcount_spinlock_t irqfd_irqe_sc;
|
||||
u32 irqfd_irqnum;
|
||||
struct mshv_lapic_irq irqfd_lapic_irq;
|
||||
struct hlist_node irqfd_hnode;
|
||||
poll_table irqfd_polltbl;
|
||||
wait_queue_head_t *irqfd_wqh;
|
||||
wait_queue_entry_t irqfd_wait;
|
||||
struct work_struct irqfd_shutdown;
|
||||
struct mshv_irqfd_resampler *irqfd_resampler;
|
||||
struct eventfd_ctx *irqfd_resamplefd;
|
||||
struct hlist_node irqfd_resampler_hnode;
|
||||
};
|
||||
|
||||
void mshv_eventfd_init(struct mshv_partition *partition);
|
||||
void mshv_eventfd_release(struct mshv_partition *partition);
|
||||
|
||||
void mshv_register_irq_ack_notifier(struct mshv_partition *partition,
|
||||
struct mshv_irq_ack_notifier *mian);
|
||||
void mshv_unregister_irq_ack_notifier(struct mshv_partition *partition,
|
||||
struct mshv_irq_ack_notifier *mian);
|
||||
bool mshv_notify_acked_gsi(struct mshv_partition *partition, int gsi);
|
||||
|
||||
int mshv_set_unset_irqfd(struct mshv_partition *partition,
|
||||
struct mshv_user_irqfd *args);
|
||||
|
||||
int mshv_irqfd_wq_init(void);
|
||||
void mshv_irqfd_wq_cleanup(void);
|
||||
|
||||
struct mshv_ioeventfd {
|
||||
struct hlist_node iovntfd_hnode;
|
||||
u64 iovntfd_addr;
|
||||
int iovntfd_length;
|
||||
struct eventfd_ctx *iovntfd_eventfd;
|
||||
u64 iovntfd_datamatch;
|
||||
int iovntfd_doorbell_id;
|
||||
bool iovntfd_wildcard;
|
||||
};
|
||||
|
||||
int mshv_set_unset_ioeventfd(struct mshv_partition *pt,
|
||||
struct mshv_user_ioeventfd *args);
|
||||
|
||||
#endif /* __LINUX_MSHV_EVENTFD_H */
|
124
drivers/hv/mshv_irq.c
Normal file
124
drivers/hv/mshv_irq.c
Normal file
@ -0,0 +1,124 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2023, Microsoft Corporation.
|
||||
*
|
||||
* Authors: Microsoft Linux virtualization team
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <asm/mshyperv.h>
|
||||
|
||||
#include "mshv_eventfd.h"
|
||||
#include "mshv.h"
|
||||
#include "mshv_root.h"
|
||||
|
||||
/* called from the ioctl code, user wants to update the guest irq table */
|
||||
int mshv_update_routing_table(struct mshv_partition *partition,
|
||||
const struct mshv_user_irq_entry *ue,
|
||||
unsigned int numents)
|
||||
{
|
||||
struct mshv_girq_routing_table *new = NULL, *old;
|
||||
u32 i, nr_rt_entries = 0;
|
||||
int r = 0;
|
||||
|
||||
if (numents == 0)
|
||||
goto swap_routes;
|
||||
|
||||
for (i = 0; i < numents; i++) {
|
||||
if (ue[i].gsi >= MSHV_MAX_GUEST_IRQS)
|
||||
return -EINVAL;
|
||||
|
||||
if (ue[i].address_hi)
|
||||
return -EINVAL;
|
||||
|
||||
nr_rt_entries = max(nr_rt_entries, ue[i].gsi);
|
||||
}
|
||||
nr_rt_entries += 1;
|
||||
|
||||
new = kzalloc(struct_size(new, mshv_girq_info_tbl, nr_rt_entries),
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!new)
|
||||
return -ENOMEM;
|
||||
|
||||
new->num_rt_entries = nr_rt_entries;
|
||||
for (i = 0; i < numents; i++) {
|
||||
struct mshv_guest_irq_ent *girq;
|
||||
|
||||
girq = &new->mshv_girq_info_tbl[ue[i].gsi];
|
||||
|
||||
/*
|
||||
* Allow only one to one mapping between GSI and MSI routing.
|
||||
*/
|
||||
if (girq->guest_irq_num != 0) {
|
||||
r = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
girq->guest_irq_num = ue[i].gsi;
|
||||
girq->girq_addr_lo = ue[i].address_lo;
|
||||
girq->girq_addr_hi = ue[i].address_hi;
|
||||
girq->girq_irq_data = ue[i].data;
|
||||
girq->girq_entry_valid = true;
|
||||
}
|
||||
|
||||
swap_routes:
|
||||
mutex_lock(&partition->pt_irq_lock);
|
||||
old = rcu_dereference_protected(partition->pt_girq_tbl, 1);
|
||||
rcu_assign_pointer(partition->pt_girq_tbl, new);
|
||||
mshv_irqfd_routing_update(partition);
|
||||
mutex_unlock(&partition->pt_irq_lock);
|
||||
|
||||
synchronize_srcu_expedited(&partition->pt_irq_srcu);
|
||||
new = old;
|
||||
|
||||
out:
|
||||
kfree(new);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/* vm is going away, kfree the irq routing table */
|
||||
void mshv_free_routing_table(struct mshv_partition *partition)
|
||||
{
|
||||
struct mshv_girq_routing_table *rt =
|
||||
rcu_access_pointer(partition->pt_girq_tbl);
|
||||
|
||||
kfree(rt);
|
||||
}
|
||||
|
||||
struct mshv_guest_irq_ent
|
||||
mshv_ret_girq_entry(struct mshv_partition *partition, u32 irqnum)
|
||||
{
|
||||
struct mshv_guest_irq_ent entry = { 0 };
|
||||
struct mshv_girq_routing_table *girq_tbl;
|
||||
|
||||
girq_tbl = srcu_dereference_check(partition->pt_girq_tbl,
|
||||
&partition->pt_irq_srcu,
|
||||
lockdep_is_held(&partition->pt_irq_lock));
|
||||
if (!girq_tbl || irqnum >= girq_tbl->num_rt_entries) {
|
||||
/*
|
||||
* Premature register_irqfd, setting valid_entry = 0
|
||||
* would ignore this entry anyway
|
||||
*/
|
||||
entry.guest_irq_num = irqnum;
|
||||
return entry;
|
||||
}
|
||||
|
||||
return girq_tbl->mshv_girq_info_tbl[irqnum];
|
||||
}
|
||||
|
||||
void mshv_copy_girq_info(struct mshv_guest_irq_ent *ent,
|
||||
struct mshv_lapic_irq *lirq)
|
||||
{
|
||||
memset(lirq, 0, sizeof(*lirq));
|
||||
if (!ent || !ent->girq_entry_valid)
|
||||
return;
|
||||
|
||||
lirq->lapic_vector = ent->girq_irq_data & 0xFF;
|
||||
lirq->lapic_apic_id = (ent->girq_addr_lo >> 12) & 0xFF;
|
||||
lirq->lapic_control.interrupt_type = (ent->girq_irq_data & 0x700) >> 8;
|
||||
lirq->lapic_control.level_triggered = (ent->girq_irq_data >> 15) & 0x1;
|
||||
lirq->lapic_control.logical_dest_mode = (ent->girq_addr_lo >> 2) & 0x1;
|
||||
}
|
83
drivers/hv/mshv_portid_table.c
Normal file
83
drivers/hv/mshv_portid_table.c
Normal file
@ -0,0 +1,83 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/types.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/idr.h>
|
||||
#include <asm/mshyperv.h>
|
||||
|
||||
#include "mshv.h"
|
||||
#include "mshv_root.h"
|
||||
|
||||
/*
|
||||
* Ports and connections are hypervisor struct used for inter-partition
|
||||
* communication. Port represents the source and connection represents
|
||||
* the destination. Partitions are responsible for managing the port and
|
||||
* connection ids.
|
||||
*
|
||||
*/
|
||||
|
||||
#define PORTID_MIN 1
|
||||
#define PORTID_MAX INT_MAX
|
||||
|
||||
static DEFINE_IDR(port_table_idr);
|
||||
|
||||
void
|
||||
mshv_port_table_fini(void)
|
||||
{
|
||||
struct port_table_info *port_info;
|
||||
unsigned long i, tmp;
|
||||
|
||||
idr_lock(&port_table_idr);
|
||||
if (!idr_is_empty(&port_table_idr)) {
|
||||
idr_for_each_entry_ul(&port_table_idr, port_info, tmp, i) {
|
||||
port_info = idr_remove(&port_table_idr, i);
|
||||
kfree_rcu(port_info, portbl_rcu);
|
||||
}
|
||||
}
|
||||
idr_unlock(&port_table_idr);
|
||||
}
|
||||
|
||||
int
|
||||
mshv_portid_alloc(struct port_table_info *info)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
idr_lock(&port_table_idr);
|
||||
ret = idr_alloc(&port_table_idr, info, PORTID_MIN,
|
||||
PORTID_MAX, GFP_KERNEL);
|
||||
idr_unlock(&port_table_idr);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
mshv_portid_free(int port_id)
|
||||
{
|
||||
struct port_table_info *info;
|
||||
|
||||
idr_lock(&port_table_idr);
|
||||
info = idr_remove(&port_table_idr, port_id);
|
||||
WARN_ON(!info);
|
||||
idr_unlock(&port_table_idr);
|
||||
|
||||
synchronize_rcu();
|
||||
kfree(info);
|
||||
}
|
||||
|
||||
int
|
||||
mshv_portid_lookup(int port_id, struct port_table_info *info)
|
||||
{
|
||||
struct port_table_info *_info;
|
||||
int ret = -ENOENT;
|
||||
|
||||
rcu_read_lock();
|
||||
_info = idr_find(&port_table_idr, port_id);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (_info) {
|
||||
*info = *_info;
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
311
drivers/hv/mshv_root.h
Normal file
311
drivers/hv/mshv_root.h
Normal file
@ -0,0 +1,311 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2023, Microsoft Corporation.
|
||||
*/
|
||||
|
||||
#ifndef _MSHV_ROOT_H_
|
||||
#define _MSHV_ROOT_H_
|
||||
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/semaphore.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/srcu.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/hashtable.h>
|
||||
#include <linux/dev_printk.h>
|
||||
#include <linux/build_bug.h>
|
||||
#include <uapi/linux/mshv.h>
|
||||
|
||||
/*
|
||||
* Hypervisor must be between these version numbers (inclusive)
|
||||
* to guarantee compatibility
|
||||
*/
|
||||
#define MSHV_HV_MIN_VERSION (27744)
|
||||
#define MSHV_HV_MAX_VERSION (27751)
|
||||
|
||||
static_assert(HV_HYP_PAGE_SIZE == MSHV_HV_PAGE_SIZE);
|
||||
|
||||
#define MSHV_MAX_VPS 256
|
||||
|
||||
#define MSHV_PARTITIONS_HASH_BITS 9
|
||||
|
||||
#define MSHV_PIN_PAGES_BATCH_SIZE (0x10000000ULL / HV_HYP_PAGE_SIZE)
|
||||
|
||||
struct mshv_vp {
|
||||
u32 vp_index;
|
||||
struct mshv_partition *vp_partition;
|
||||
struct mutex vp_mutex;
|
||||
struct hv_vp_register_page *vp_register_page;
|
||||
struct hv_message *vp_intercept_msg_page;
|
||||
void *vp_ghcb_page;
|
||||
struct hv_stats_page *vp_stats_pages[2];
|
||||
struct {
|
||||
atomic64_t vp_signaled_count;
|
||||
struct {
|
||||
u64 intercept_suspend: 1;
|
||||
u64 root_sched_blocked: 1; /* root scheduler only */
|
||||
u64 root_sched_dispatched: 1; /* root scheduler only */
|
||||
u64 reserved: 61;
|
||||
} flags;
|
||||
unsigned int kicked_by_hv;
|
||||
wait_queue_head_t vp_suspend_queue;
|
||||
} run;
|
||||
};
|
||||
|
||||
#define vp_fmt(fmt) "p%lluvp%u: " fmt
|
||||
#define vp_devprintk(level, v, fmt, ...) \
|
||||
do { \
|
||||
const struct mshv_vp *__vp = (v); \
|
||||
const struct mshv_partition *__pt = __vp->vp_partition; \
|
||||
dev_##level(__pt->pt_module_dev, vp_fmt(fmt), __pt->pt_id, \
|
||||
__vp->vp_index, ##__VA_ARGS__); \
|
||||
} while (0)
|
||||
#define vp_emerg(v, fmt, ...) vp_devprintk(emerg, v, fmt, ##__VA_ARGS__)
|
||||
#define vp_crit(v, fmt, ...) vp_devprintk(crit, v, fmt, ##__VA_ARGS__)
|
||||
#define vp_alert(v, fmt, ...) vp_devprintk(alert, v, fmt, ##__VA_ARGS__)
|
||||
#define vp_err(v, fmt, ...) vp_devprintk(err, v, fmt, ##__VA_ARGS__)
|
||||
#define vp_warn(v, fmt, ...) vp_devprintk(warn, v, fmt, ##__VA_ARGS__)
|
||||
#define vp_notice(v, fmt, ...) vp_devprintk(notice, v, fmt, ##__VA_ARGS__)
|
||||
#define vp_info(v, fmt, ...) vp_devprintk(info, v, fmt, ##__VA_ARGS__)
|
||||
#define vp_dbg(v, fmt, ...) vp_devprintk(dbg, v, fmt, ##__VA_ARGS__)
|
||||
|
||||
struct mshv_mem_region {
|
||||
struct hlist_node hnode;
|
||||
u64 nr_pages;
|
||||
u64 start_gfn;
|
||||
u64 start_uaddr;
|
||||
u32 hv_map_flags;
|
||||
struct {
|
||||
u64 large_pages: 1; /* 2MiB */
|
||||
u64 range_pinned: 1;
|
||||
u64 reserved: 62;
|
||||
} flags;
|
||||
struct mshv_partition *partition;
|
||||
struct page *pages[];
|
||||
};
|
||||
|
||||
struct mshv_irq_ack_notifier {
|
||||
struct hlist_node link;
|
||||
unsigned int irq_ack_gsi;
|
||||
void (*irq_acked)(struct mshv_irq_ack_notifier *mian);
|
||||
};
|
||||
|
||||
struct mshv_partition {
|
||||
struct device *pt_module_dev;
|
||||
|
||||
struct hlist_node pt_hnode;
|
||||
u64 pt_id;
|
||||
refcount_t pt_ref_count;
|
||||
struct mutex pt_mutex;
|
||||
struct hlist_head pt_mem_regions; // not ordered
|
||||
|
||||
u32 pt_vp_count;
|
||||
struct mshv_vp *pt_vp_array[MSHV_MAX_VPS];
|
||||
|
||||
struct mutex pt_irq_lock;
|
||||
struct srcu_struct pt_irq_srcu;
|
||||
struct hlist_head irq_ack_notifier_list;
|
||||
|
||||
struct hlist_head pt_devices;
|
||||
|
||||
/*
|
||||
* MSHV does not support more than one async hypercall in flight
|
||||
* for a single partition. Thus, it is okay to define per partition
|
||||
* async hypercall status.
|
||||
*/
|
||||
struct completion async_hypercall;
|
||||
u64 async_hypercall_status;
|
||||
|
||||
spinlock_t pt_irqfds_lock;
|
||||
struct hlist_head pt_irqfds_list;
|
||||
struct mutex irqfds_resampler_lock;
|
||||
struct hlist_head irqfds_resampler_list;
|
||||
|
||||
struct hlist_head ioeventfds_list;
|
||||
|
||||
struct mshv_girq_routing_table __rcu *pt_girq_tbl;
|
||||
u64 isolation_type;
|
||||
bool import_completed;
|
||||
bool pt_initialized;
|
||||
};
|
||||
|
||||
#define pt_fmt(fmt) "p%llu: " fmt
|
||||
#define pt_devprintk(level, p, fmt, ...) \
|
||||
do { \
|
||||
const struct mshv_partition *__pt = (p); \
|
||||
dev_##level(__pt->pt_module_dev, pt_fmt(fmt), __pt->pt_id, \
|
||||
##__VA_ARGS__); \
|
||||
} while (0)
|
||||
#define pt_emerg(p, fmt, ...) pt_devprintk(emerg, p, fmt, ##__VA_ARGS__)
|
||||
#define pt_crit(p, fmt, ...) pt_devprintk(crit, p, fmt, ##__VA_ARGS__)
|
||||
#define pt_alert(p, fmt, ...) pt_devprintk(alert, p, fmt, ##__VA_ARGS__)
|
||||
#define pt_err(p, fmt, ...) pt_devprintk(err, p, fmt, ##__VA_ARGS__)
|
||||
#define pt_warn(p, fmt, ...) pt_devprintk(warn, p, fmt, ##__VA_ARGS__)
|
||||
#define pt_notice(p, fmt, ...) pt_devprintk(notice, p, fmt, ##__VA_ARGS__)
|
||||
#define pt_info(p, fmt, ...) pt_devprintk(info, p, fmt, ##__VA_ARGS__)
|
||||
#define pt_dbg(p, fmt, ...) pt_devprintk(dbg, p, fmt, ##__VA_ARGS__)
|
||||
|
||||
struct mshv_lapic_irq {
|
||||
u32 lapic_vector;
|
||||
u64 lapic_apic_id;
|
||||
union hv_interrupt_control lapic_control;
|
||||
};
|
||||
|
||||
#define MSHV_MAX_GUEST_IRQS 4096
|
||||
|
||||
/* representation of one guest irq entry, either msi or legacy */
|
||||
struct mshv_guest_irq_ent {
|
||||
u32 girq_entry_valid; /* vfio looks at this */
|
||||
u32 guest_irq_num; /* a unique number for each irq */
|
||||
u32 girq_addr_lo; /* guest irq msi address info */
|
||||
u32 girq_addr_hi;
|
||||
u32 girq_irq_data; /* idt vector in some cases */
|
||||
};
|
||||
|
||||
struct mshv_girq_routing_table {
|
||||
u32 num_rt_entries;
|
||||
struct mshv_guest_irq_ent mshv_girq_info_tbl[];
|
||||
};
|
||||
|
||||
struct hv_synic_pages {
|
||||
struct hv_message_page *synic_message_page;
|
||||
struct hv_synic_event_flags_page *synic_event_flags_page;
|
||||
struct hv_synic_event_ring_page *synic_event_ring_page;
|
||||
};
|
||||
|
||||
struct mshv_root {
|
||||
struct hv_synic_pages __percpu *synic_pages;
|
||||
spinlock_t pt_ht_lock;
|
||||
DECLARE_HASHTABLE(pt_htable, MSHV_PARTITIONS_HASH_BITS);
|
||||
};
|
||||
|
||||
/*
|
||||
* Callback for doorbell events.
|
||||
* NOTE: This is called in interrupt context. Callback
|
||||
* should defer slow and sleeping logic to later.
|
||||
*/
|
||||
typedef void (*doorbell_cb_t) (int doorbell_id, void *);
|
||||
|
||||
/*
|
||||
* port table information
|
||||
*/
|
||||
struct port_table_info {
|
||||
struct rcu_head portbl_rcu;
|
||||
enum hv_port_type hv_port_type;
|
||||
union {
|
||||
struct {
|
||||
u64 reserved[2];
|
||||
} hv_port_message;
|
||||
struct {
|
||||
u64 reserved[2];
|
||||
} hv_port_event;
|
||||
struct {
|
||||
u64 reserved[2];
|
||||
} hv_port_monitor;
|
||||
struct {
|
||||
doorbell_cb_t doorbell_cb;
|
||||
void *data;
|
||||
} hv_port_doorbell;
|
||||
};
|
||||
};
|
||||
|
||||
int mshv_update_routing_table(struct mshv_partition *partition,
|
||||
const struct mshv_user_irq_entry *entries,
|
||||
unsigned int numents);
|
||||
void mshv_free_routing_table(struct mshv_partition *partition);
|
||||
|
||||
struct mshv_guest_irq_ent mshv_ret_girq_entry(struct mshv_partition *partition,
|
||||
u32 irq_num);
|
||||
|
||||
void mshv_copy_girq_info(struct mshv_guest_irq_ent *src_irq,
|
||||
struct mshv_lapic_irq *dest_irq);
|
||||
|
||||
void mshv_irqfd_routing_update(struct mshv_partition *partition);
|
||||
|
||||
void mshv_port_table_fini(void);
|
||||
int mshv_portid_alloc(struct port_table_info *info);
|
||||
int mshv_portid_lookup(int port_id, struct port_table_info *info);
|
||||
void mshv_portid_free(int port_id);
|
||||
|
||||
int mshv_register_doorbell(u64 partition_id, doorbell_cb_t doorbell_cb,
|
||||
void *data, u64 gpa, u64 val, u64 flags);
|
||||
void mshv_unregister_doorbell(u64 partition_id, int doorbell_portid);
|
||||
|
||||
void mshv_isr(void);
|
||||
int mshv_synic_init(unsigned int cpu);
|
||||
int mshv_synic_cleanup(unsigned int cpu);
|
||||
|
||||
static inline bool mshv_partition_encrypted(struct mshv_partition *partition)
|
||||
{
|
||||
return partition->isolation_type == HV_PARTITION_ISOLATION_TYPE_SNP;
|
||||
}
|
||||
|
||||
struct mshv_partition *mshv_partition_get(struct mshv_partition *partition);
|
||||
void mshv_partition_put(struct mshv_partition *partition);
|
||||
struct mshv_partition *mshv_partition_find(u64 partition_id) __must_hold(RCU);
|
||||
|
||||
/* hypercalls */
|
||||
|
||||
int hv_call_withdraw_memory(u64 count, int node, u64 partition_id);
|
||||
int hv_call_create_partition(u64 flags,
|
||||
struct hv_partition_creation_properties creation_properties,
|
||||
union hv_partition_isolation_properties isolation_properties,
|
||||
u64 *partition_id);
|
||||
int hv_call_initialize_partition(u64 partition_id);
|
||||
int hv_call_finalize_partition(u64 partition_id);
|
||||
int hv_call_delete_partition(u64 partition_id);
|
||||
int hv_call_map_mmio_pages(u64 partition_id, u64 gfn, u64 mmio_spa, u64 numpgs);
|
||||
int hv_call_map_gpa_pages(u64 partition_id, u64 gpa_target, u64 page_count,
|
||||
u32 flags, struct page **pages);
|
||||
int hv_call_unmap_gpa_pages(u64 partition_id, u64 gpa_target, u64 page_count,
|
||||
u32 flags);
|
||||
int hv_call_delete_vp(u64 partition_id, u32 vp_index);
|
||||
int hv_call_assert_virtual_interrupt(u64 partition_id, u32 vector,
|
||||
u64 dest_addr,
|
||||
union hv_interrupt_control control);
|
||||
int hv_call_clear_virtual_interrupt(u64 partition_id);
|
||||
int hv_call_get_gpa_access_states(u64 partition_id, u32 count, u64 gpa_base_pfn,
|
||||
union hv_gpa_page_access_state_flags state_flags,
|
||||
int *written_total,
|
||||
union hv_gpa_page_access_state *states);
|
||||
int hv_call_get_vp_state(u32 vp_index, u64 partition_id,
|
||||
struct hv_vp_state_data state_data,
|
||||
/* Choose between pages and ret_output */
|
||||
u64 page_count, struct page **pages,
|
||||
union hv_output_get_vp_state *ret_output);
|
||||
int hv_call_set_vp_state(u32 vp_index, u64 partition_id,
|
||||
/* Choose between pages and bytes */
|
||||
struct hv_vp_state_data state_data, u64 page_count,
|
||||
struct page **pages, u32 num_bytes, u8 *bytes);
|
||||
int hv_call_map_vp_state_page(u64 partition_id, u32 vp_index, u32 type,
|
||||
union hv_input_vtl input_vtl,
|
||||
struct page **state_page);
|
||||
int hv_call_unmap_vp_state_page(u64 partition_id, u32 vp_index, u32 type,
|
||||
union hv_input_vtl input_vtl);
|
||||
int hv_call_create_port(u64 port_partition_id, union hv_port_id port_id,
|
||||
u64 connection_partition_id, struct hv_port_info *port_info,
|
||||
u8 port_vtl, u8 min_connection_vtl, int node);
|
||||
int hv_call_delete_port(u64 port_partition_id, union hv_port_id port_id);
|
||||
int hv_call_connect_port(u64 port_partition_id, union hv_port_id port_id,
|
||||
u64 connection_partition_id,
|
||||
union hv_connection_id connection_id,
|
||||
struct hv_connection_info *connection_info,
|
||||
u8 connection_vtl, int node);
|
||||
int hv_call_disconnect_port(u64 connection_partition_id,
|
||||
union hv_connection_id connection_id);
|
||||
int hv_call_notify_port_ring_empty(u32 sint_index);
|
||||
int hv_call_map_stat_page(enum hv_stats_object_type type,
|
||||
const union hv_stats_object_identity *identity,
|
||||
void **addr);
|
||||
int hv_call_unmap_stat_page(enum hv_stats_object_type type,
|
||||
const union hv_stats_object_identity *identity);
|
||||
int hv_call_modify_spa_host_access(u64 partition_id, struct page **pages,
|
||||
u64 page_struct_count, u32 host_access,
|
||||
u32 flags, u8 acquire);
|
||||
|
||||
extern struct mshv_root mshv_root;
|
||||
extern enum hv_scheduler_type hv_scheduler_type;
|
||||
extern u8 * __percpu *hv_synic_eventring_tail;
|
||||
|
||||
#endif /* _MSHV_ROOT_H_ */
|
849
drivers/hv/mshv_root_hv_call.c
Normal file
849
drivers/hv/mshv_root_hv_call.c
Normal file
@ -0,0 +1,849 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2023, Microsoft Corporation.
|
||||
*
|
||||
* Hypercall helper functions used by the mshv_root module.
|
||||
*
|
||||
* Authors: Microsoft Linux virtualization team
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mm.h>
|
||||
#include <asm/mshyperv.h>
|
||||
|
||||
#include "mshv_root.h"
|
||||
|
||||
/* Determined empirically */
|
||||
#define HV_INIT_PARTITION_DEPOSIT_PAGES 208
|
||||
#define HV_MAP_GPA_DEPOSIT_PAGES 256
|
||||
#define HV_UMAP_GPA_PAGES 512
|
||||
|
||||
#define HV_PAGE_COUNT_2M_ALIGNED(pg_count) (!((pg_count) & (0x200 - 1)))
|
||||
|
||||
#define HV_WITHDRAW_BATCH_SIZE (HV_HYP_PAGE_SIZE / sizeof(u64))
|
||||
#define HV_MAP_GPA_BATCH_SIZE \
|
||||
((HV_HYP_PAGE_SIZE - sizeof(struct hv_input_map_gpa_pages)) \
|
||||
/ sizeof(u64))
|
||||
#define HV_GET_VP_STATE_BATCH_SIZE \
|
||||
((HV_HYP_PAGE_SIZE - sizeof(struct hv_input_get_vp_state)) \
|
||||
/ sizeof(u64))
|
||||
#define HV_SET_VP_STATE_BATCH_SIZE \
|
||||
((HV_HYP_PAGE_SIZE - sizeof(struct hv_input_set_vp_state)) \
|
||||
/ sizeof(u64))
|
||||
#define HV_GET_GPA_ACCESS_STATES_BATCH_SIZE \
|
||||
((HV_HYP_PAGE_SIZE - sizeof(union hv_gpa_page_access_state)) \
|
||||
/ sizeof(union hv_gpa_page_access_state))
|
||||
#define HV_MODIFY_SPARSE_SPA_PAGE_HOST_ACCESS_MAX_PAGE_COUNT \
|
||||
((HV_HYP_PAGE_SIZE - \
|
||||
sizeof(struct hv_input_modify_sparse_spa_page_host_access)) / \
|
||||
sizeof(u64))
|
||||
|
||||
int hv_call_withdraw_memory(u64 count, int node, u64 partition_id)
|
||||
{
|
||||
struct hv_input_withdraw_memory *input_page;
|
||||
struct hv_output_withdraw_memory *output_page;
|
||||
struct page *page;
|
||||
u16 completed;
|
||||
unsigned long remaining = count;
|
||||
u64 status;
|
||||
int i;
|
||||
unsigned long flags;
|
||||
|
||||
page = alloc_page(GFP_KERNEL);
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
output_page = page_address(page);
|
||||
|
||||
while (remaining) {
|
||||
local_irq_save(flags);
|
||||
|
||||
input_page = *this_cpu_ptr(hyperv_pcpu_input_arg);
|
||||
|
||||
memset(input_page, 0, sizeof(*input_page));
|
||||
input_page->partition_id = partition_id;
|
||||
status = hv_do_rep_hypercall(HVCALL_WITHDRAW_MEMORY,
|
||||
min(remaining, HV_WITHDRAW_BATCH_SIZE),
|
||||
0, input_page, output_page);
|
||||
|
||||
local_irq_restore(flags);
|
||||
|
||||
completed = hv_repcomp(status);
|
||||
|
||||
for (i = 0; i < completed; i++)
|
||||
__free_page(pfn_to_page(output_page->gpa_page_list[i]));
|
||||
|
||||
if (!hv_result_success(status)) {
|
||||
if (hv_result(status) == HV_STATUS_NO_RESOURCES)
|
||||
status = HV_STATUS_SUCCESS;
|
||||
break;
|
||||
}
|
||||
|
||||
remaining -= completed;
|
||||
}
|
||||
free_page((unsigned long)output_page);
|
||||
|
||||
return hv_result_to_errno(status);
|
||||
}
|
||||
|
||||
int hv_call_create_partition(u64 flags,
|
||||
struct hv_partition_creation_properties creation_properties,
|
||||
union hv_partition_isolation_properties isolation_properties,
|
||||
u64 *partition_id)
|
||||
{
|
||||
struct hv_input_create_partition *input;
|
||||
struct hv_output_create_partition *output;
|
||||
u64 status;
|
||||
int ret;
|
||||
unsigned long irq_flags;
|
||||
|
||||
do {
|
||||
local_irq_save(irq_flags);
|
||||
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
|
||||
output = *this_cpu_ptr(hyperv_pcpu_output_arg);
|
||||
|
||||
memset(input, 0, sizeof(*input));
|
||||
input->flags = flags;
|
||||
input->compatibility_version = HV_COMPATIBILITY_21_H2;
|
||||
|
||||
memcpy(&input->partition_creation_properties, &creation_properties,
|
||||
sizeof(creation_properties));
|
||||
|
||||
memcpy(&input->isolation_properties, &isolation_properties,
|
||||
sizeof(isolation_properties));
|
||||
|
||||
status = hv_do_hypercall(HVCALL_CREATE_PARTITION,
|
||||
input, output);
|
||||
|
||||
if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
|
||||
if (hv_result_success(status))
|
||||
*partition_id = output->partition_id;
|
||||
local_irq_restore(irq_flags);
|
||||
ret = hv_result_to_errno(status);
|
||||
break;
|
||||
}
|
||||
local_irq_restore(irq_flags);
|
||||
ret = hv_call_deposit_pages(NUMA_NO_NODE,
|
||||
hv_current_partition_id, 1);
|
||||
} while (!ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int hv_call_initialize_partition(u64 partition_id)
|
||||
{
|
||||
struct hv_input_initialize_partition input;
|
||||
u64 status;
|
||||
int ret;
|
||||
|
||||
input.partition_id = partition_id;
|
||||
|
||||
ret = hv_call_deposit_pages(NUMA_NO_NODE, partition_id,
|
||||
HV_INIT_PARTITION_DEPOSIT_PAGES);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
do {
|
||||
status = hv_do_fast_hypercall8(HVCALL_INITIALIZE_PARTITION,
|
||||
*(u64 *)&input);
|
||||
|
||||
if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
|
||||
ret = hv_result_to_errno(status);
|
||||
break;
|
||||
}
|
||||
ret = hv_call_deposit_pages(NUMA_NO_NODE, partition_id, 1);
|
||||
} while (!ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int hv_call_finalize_partition(u64 partition_id)
|
||||
{
|
||||
struct hv_input_finalize_partition input;
|
||||
u64 status;
|
||||
|
||||
input.partition_id = partition_id;
|
||||
status = hv_do_fast_hypercall8(HVCALL_FINALIZE_PARTITION,
|
||||
*(u64 *)&input);
|
||||
|
||||
return hv_result_to_errno(status);
|
||||
}
|
||||
|
||||
int hv_call_delete_partition(u64 partition_id)
|
||||
{
|
||||
struct hv_input_delete_partition input;
|
||||
u64 status;
|
||||
|
||||
input.partition_id = partition_id;
|
||||
status = hv_do_fast_hypercall8(HVCALL_DELETE_PARTITION, *(u64 *)&input);
|
||||
|
||||
return hv_result_to_errno(status);
|
||||
}
|
||||
|
||||
/* Ask the hypervisor to map guest ram pages or the guest mmio space */
|
||||
static int hv_do_map_gpa_hcall(u64 partition_id, u64 gfn, u64 page_struct_count,
|
||||
u32 flags, struct page **pages, u64 mmio_spa)
|
||||
{
|
||||
struct hv_input_map_gpa_pages *input_page;
|
||||
u64 status, *pfnlist;
|
||||
unsigned long irq_flags, large_shift = 0;
|
||||
int ret = 0, done = 0;
|
||||
u64 page_count = page_struct_count;
|
||||
|
||||
if (page_count == 0 || (pages && mmio_spa))
|
||||
return -EINVAL;
|
||||
|
||||
if (flags & HV_MAP_GPA_LARGE_PAGE) {
|
||||
if (mmio_spa)
|
||||
return -EINVAL;
|
||||
|
||||
if (!HV_PAGE_COUNT_2M_ALIGNED(page_count))
|
||||
return -EINVAL;
|
||||
|
||||
large_shift = HV_HYP_LARGE_PAGE_SHIFT - HV_HYP_PAGE_SHIFT;
|
||||
page_count >>= large_shift;
|
||||
}
|
||||
|
||||
while (done < page_count) {
|
||||
ulong i, completed, remain = page_count - done;
|
||||
int rep_count = min(remain, HV_MAP_GPA_BATCH_SIZE);
|
||||
|
||||
local_irq_save(irq_flags);
|
||||
input_page = *this_cpu_ptr(hyperv_pcpu_input_arg);
|
||||
|
||||
input_page->target_partition_id = partition_id;
|
||||
input_page->target_gpa_base = gfn + (done << large_shift);
|
||||
input_page->map_flags = flags;
|
||||
pfnlist = input_page->source_gpa_page_list;
|
||||
|
||||
for (i = 0; i < rep_count; i++)
|
||||
if (flags & HV_MAP_GPA_NO_ACCESS) {
|
||||
pfnlist[i] = 0;
|
||||
} else if (pages) {
|
||||
u64 index = (done + i) << large_shift;
|
||||
|
||||
if (index >= page_struct_count) {
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
pfnlist[i] = page_to_pfn(pages[index]);
|
||||
} else {
|
||||
pfnlist[i] = mmio_spa + done + i;
|
||||
}
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
status = hv_do_rep_hypercall(HVCALL_MAP_GPA_PAGES, rep_count, 0,
|
||||
input_page, NULL);
|
||||
local_irq_restore(irq_flags);
|
||||
|
||||
completed = hv_repcomp(status);
|
||||
|
||||
if (hv_result(status) == HV_STATUS_INSUFFICIENT_MEMORY) {
|
||||
ret = hv_call_deposit_pages(NUMA_NO_NODE, partition_id,
|
||||
HV_MAP_GPA_DEPOSIT_PAGES);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
} else if (!hv_result_success(status)) {
|
||||
ret = hv_result_to_errno(status);
|
||||
break;
|
||||
}
|
||||
|
||||
done += completed;
|
||||
}
|
||||
|
||||
if (ret && done) {
|
||||
u32 unmap_flags = 0;
|
||||
|
||||
if (flags & HV_MAP_GPA_LARGE_PAGE)
|
||||
unmap_flags |= HV_UNMAP_GPA_LARGE_PAGE;
|
||||
hv_call_unmap_gpa_pages(partition_id, gfn, done, unmap_flags);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Ask the hypervisor to map guest ram pages */
|
||||
int hv_call_map_gpa_pages(u64 partition_id, u64 gpa_target, u64 page_count,
|
||||
u32 flags, struct page **pages)
|
||||
{
|
||||
return hv_do_map_gpa_hcall(partition_id, gpa_target, page_count,
|
||||
flags, pages, 0);
|
||||
}
|
||||
|
||||
/* Ask the hypervisor to map guest mmio space */
|
||||
int hv_call_map_mmio_pages(u64 partition_id, u64 gfn, u64 mmio_spa, u64 numpgs)
|
||||
{
|
||||
int i;
|
||||
u32 flags = HV_MAP_GPA_READABLE | HV_MAP_GPA_WRITABLE |
|
||||
HV_MAP_GPA_NOT_CACHED;
|
||||
|
||||
for (i = 0; i < numpgs; i++)
|
||||
if (page_is_ram(mmio_spa + i))
|
||||
return -EINVAL;
|
||||
|
||||
return hv_do_map_gpa_hcall(partition_id, gfn, numpgs, flags, NULL,
|
||||
mmio_spa);
|
||||
}
|
||||
|
||||
int hv_call_unmap_gpa_pages(u64 partition_id, u64 gfn, u64 page_count_4k,
|
||||
u32 flags)
|
||||
{
|
||||
struct hv_input_unmap_gpa_pages *input_page;
|
||||
u64 status, page_count = page_count_4k;
|
||||
unsigned long irq_flags, large_shift = 0;
|
||||
int ret = 0, done = 0;
|
||||
|
||||
if (page_count == 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (flags & HV_UNMAP_GPA_LARGE_PAGE) {
|
||||
if (!HV_PAGE_COUNT_2M_ALIGNED(page_count))
|
||||
return -EINVAL;
|
||||
|
||||
large_shift = HV_HYP_LARGE_PAGE_SHIFT - HV_HYP_PAGE_SHIFT;
|
||||
page_count >>= large_shift;
|
||||
}
|
||||
|
||||
while (done < page_count) {
|
||||
ulong completed, remain = page_count - done;
|
||||
int rep_count = min(remain, HV_UMAP_GPA_PAGES);
|
||||
|
||||
local_irq_save(irq_flags);
|
||||
input_page = *this_cpu_ptr(hyperv_pcpu_input_arg);
|
||||
|
||||
input_page->target_partition_id = partition_id;
|
||||
input_page->target_gpa_base = gfn + (done << large_shift);
|
||||
input_page->unmap_flags = flags;
|
||||
status = hv_do_rep_hypercall(HVCALL_UNMAP_GPA_PAGES, rep_count,
|
||||
0, input_page, NULL);
|
||||
local_irq_restore(irq_flags);
|
||||
|
||||
completed = hv_repcomp(status);
|
||||
if (!hv_result_success(status)) {
|
||||
ret = hv_result_to_errno(status);
|
||||
break;
|
||||
}
|
||||
|
||||
done += completed;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int hv_call_get_gpa_access_states(u64 partition_id, u32 count, u64 gpa_base_pfn,
|
||||
union hv_gpa_page_access_state_flags state_flags,
|
||||
int *written_total,
|
||||
union hv_gpa_page_access_state *states)
|
||||
{
|
||||
struct hv_input_get_gpa_pages_access_state *input_page;
|
||||
union hv_gpa_page_access_state *output_page;
|
||||
int completed = 0;
|
||||
unsigned long remaining = count;
|
||||
int rep_count, i;
|
||||
u64 status = 0;
|
||||
unsigned long flags;
|
||||
|
||||
*written_total = 0;
|
||||
while (remaining) {
|
||||
local_irq_save(flags);
|
||||
input_page = *this_cpu_ptr(hyperv_pcpu_input_arg);
|
||||
output_page = *this_cpu_ptr(hyperv_pcpu_output_arg);
|
||||
|
||||
input_page->partition_id = partition_id;
|
||||
input_page->hv_gpa_page_number = gpa_base_pfn + *written_total;
|
||||
input_page->flags = state_flags;
|
||||
rep_count = min(remaining, HV_GET_GPA_ACCESS_STATES_BATCH_SIZE);
|
||||
|
||||
status = hv_do_rep_hypercall(HVCALL_GET_GPA_PAGES_ACCESS_STATES, rep_count,
|
||||
0, input_page, output_page);
|
||||
if (!hv_result_success(status)) {
|
||||
local_irq_restore(flags);
|
||||
break;
|
||||
}
|
||||
completed = hv_repcomp(status);
|
||||
for (i = 0; i < completed; ++i)
|
||||
states[i].as_uint8 = output_page[i].as_uint8;
|
||||
|
||||
local_irq_restore(flags);
|
||||
states += completed;
|
||||
*written_total += completed;
|
||||
remaining -= completed;
|
||||
}
|
||||
|
||||
return hv_result_to_errno(status);
|
||||
}
|
||||
|
||||
int hv_call_assert_virtual_interrupt(u64 partition_id, u32 vector,
|
||||
u64 dest_addr,
|
||||
union hv_interrupt_control control)
|
||||
{
|
||||
struct hv_input_assert_virtual_interrupt *input;
|
||||
unsigned long flags;
|
||||
u64 status;
|
||||
|
||||
local_irq_save(flags);
|
||||
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
|
||||
memset(input, 0, sizeof(*input));
|
||||
input->partition_id = partition_id;
|
||||
input->vector = vector;
|
||||
input->dest_addr = dest_addr;
|
||||
input->control = control;
|
||||
status = hv_do_hypercall(HVCALL_ASSERT_VIRTUAL_INTERRUPT, input, NULL);
|
||||
local_irq_restore(flags);
|
||||
|
||||
return hv_result_to_errno(status);
|
||||
}
|
||||
|
||||
int hv_call_delete_vp(u64 partition_id, u32 vp_index)
|
||||
{
|
||||
union hv_input_delete_vp input = {};
|
||||
u64 status;
|
||||
|
||||
input.partition_id = partition_id;
|
||||
input.vp_index = vp_index;
|
||||
|
||||
status = hv_do_fast_hypercall16(HVCALL_DELETE_VP,
|
||||
input.as_uint64[0], input.as_uint64[1]);
|
||||
|
||||
return hv_result_to_errno(status);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hv_call_delete_vp);
|
||||
|
||||
int hv_call_get_vp_state(u32 vp_index, u64 partition_id,
|
||||
struct hv_vp_state_data state_data,
|
||||
/* Choose between pages and ret_output */
|
||||
u64 page_count, struct page **pages,
|
||||
union hv_output_get_vp_state *ret_output)
|
||||
{
|
||||
struct hv_input_get_vp_state *input;
|
||||
union hv_output_get_vp_state *output;
|
||||
u64 status;
|
||||
int i;
|
||||
u64 control;
|
||||
unsigned long flags;
|
||||
int ret = 0;
|
||||
|
||||
if (page_count > HV_GET_VP_STATE_BATCH_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
if (!page_count && !ret_output)
|
||||
return -EINVAL;
|
||||
|
||||
do {
|
||||
local_irq_save(flags);
|
||||
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
|
||||
output = *this_cpu_ptr(hyperv_pcpu_output_arg);
|
||||
memset(input, 0, sizeof(*input));
|
||||
memset(output, 0, sizeof(*output));
|
||||
|
||||
input->partition_id = partition_id;
|
||||
input->vp_index = vp_index;
|
||||
input->state_data = state_data;
|
||||
for (i = 0; i < page_count; i++)
|
||||
input->output_data_pfns[i] = page_to_pfn(pages[i]);
|
||||
|
||||
control = (HVCALL_GET_VP_STATE) |
|
||||
(page_count << HV_HYPERCALL_VARHEAD_OFFSET);
|
||||
|
||||
status = hv_do_hypercall(control, input, output);
|
||||
|
||||
if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
|
||||
if (hv_result_success(status) && ret_output)
|
||||
memcpy(ret_output, output, sizeof(*output));
|
||||
|
||||
local_irq_restore(flags);
|
||||
ret = hv_result_to_errno(status);
|
||||
break;
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
|
||||
ret = hv_call_deposit_pages(NUMA_NO_NODE,
|
||||
partition_id, 1);
|
||||
} while (!ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int hv_call_set_vp_state(u32 vp_index, u64 partition_id,
|
||||
/* Choose between pages and bytes */
|
||||
struct hv_vp_state_data state_data, u64 page_count,
|
||||
struct page **pages, u32 num_bytes, u8 *bytes)
|
||||
{
|
||||
struct hv_input_set_vp_state *input;
|
||||
u64 status;
|
||||
int i;
|
||||
u64 control;
|
||||
unsigned long flags;
|
||||
int ret = 0;
|
||||
u16 varhead_sz;
|
||||
|
||||
if (page_count > HV_SET_VP_STATE_BATCH_SIZE)
|
||||
return -EINVAL;
|
||||
if (sizeof(*input) + num_bytes > HV_HYP_PAGE_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
if (num_bytes)
|
||||
/* round up to 8 and divide by 8 */
|
||||
varhead_sz = (num_bytes + 7) >> 3;
|
||||
else if (page_count)
|
||||
varhead_sz = page_count;
|
||||
else
|
||||
return -EINVAL;
|
||||
|
||||
do {
|
||||
local_irq_save(flags);
|
||||
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
|
||||
memset(input, 0, sizeof(*input));
|
||||
|
||||
input->partition_id = partition_id;
|
||||
input->vp_index = vp_index;
|
||||
input->state_data = state_data;
|
||||
if (num_bytes) {
|
||||
memcpy((u8 *)input->data, bytes, num_bytes);
|
||||
} else {
|
||||
for (i = 0; i < page_count; i++)
|
||||
input->data[i].pfns = page_to_pfn(pages[i]);
|
||||
}
|
||||
|
||||
control = (HVCALL_SET_VP_STATE) |
|
||||
(varhead_sz << HV_HYPERCALL_VARHEAD_OFFSET);
|
||||
|
||||
status = hv_do_hypercall(control, input, NULL);
|
||||
|
||||
if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
|
||||
local_irq_restore(flags);
|
||||
ret = hv_result_to_errno(status);
|
||||
break;
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
|
||||
ret = hv_call_deposit_pages(NUMA_NO_NODE,
|
||||
partition_id, 1);
|
||||
} while (!ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int hv_call_map_vp_state_page(u64 partition_id, u32 vp_index, u32 type,
|
||||
union hv_input_vtl input_vtl,
|
||||
struct page **state_page)
|
||||
{
|
||||
struct hv_input_map_vp_state_page *input;
|
||||
struct hv_output_map_vp_state_page *output;
|
||||
u64 status;
|
||||
int ret;
|
||||
unsigned long flags;
|
||||
|
||||
do {
|
||||
local_irq_save(flags);
|
||||
|
||||
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
|
||||
output = *this_cpu_ptr(hyperv_pcpu_output_arg);
|
||||
|
||||
input->partition_id = partition_id;
|
||||
input->vp_index = vp_index;
|
||||
input->type = type;
|
||||
input->input_vtl = input_vtl;
|
||||
|
||||
status = hv_do_hypercall(HVCALL_MAP_VP_STATE_PAGE, input, output);
|
||||
|
||||
if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
|
||||
if (hv_result_success(status))
|
||||
*state_page = pfn_to_page(output->map_location);
|
||||
local_irq_restore(flags);
|
||||
ret = hv_result_to_errno(status);
|
||||
break;
|
||||
}
|
||||
|
||||
local_irq_restore(flags);
|
||||
|
||||
ret = hv_call_deposit_pages(NUMA_NO_NODE, partition_id, 1);
|
||||
} while (!ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int hv_call_unmap_vp_state_page(u64 partition_id, u32 vp_index, u32 type,
|
||||
union hv_input_vtl input_vtl)
|
||||
{
|
||||
unsigned long flags;
|
||||
u64 status;
|
||||
struct hv_input_unmap_vp_state_page *input;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
|
||||
|
||||
memset(input, 0, sizeof(*input));
|
||||
|
||||
input->partition_id = partition_id;
|
||||
input->vp_index = vp_index;
|
||||
input->type = type;
|
||||
input->input_vtl = input_vtl;
|
||||
|
||||
status = hv_do_hypercall(HVCALL_UNMAP_VP_STATE_PAGE, input, NULL);
|
||||
|
||||
local_irq_restore(flags);
|
||||
|
||||
return hv_result_to_errno(status);
|
||||
}
|
||||
|
||||
int
|
||||
hv_call_clear_virtual_interrupt(u64 partition_id)
|
||||
{
|
||||
int status;
|
||||
|
||||
status = hv_do_fast_hypercall8(HVCALL_CLEAR_VIRTUAL_INTERRUPT,
|
||||
partition_id);
|
||||
|
||||
return hv_result_to_errno(status);
|
||||
}
|
||||
|
||||
int
|
||||
hv_call_create_port(u64 port_partition_id, union hv_port_id port_id,
|
||||
u64 connection_partition_id,
|
||||
struct hv_port_info *port_info,
|
||||
u8 port_vtl, u8 min_connection_vtl, int node)
|
||||
{
|
||||
struct hv_input_create_port *input;
|
||||
unsigned long flags;
|
||||
int ret = 0;
|
||||
int status;
|
||||
|
||||
do {
|
||||
local_irq_save(flags);
|
||||
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
|
||||
memset(input, 0, sizeof(*input));
|
||||
|
||||
input->port_partition_id = port_partition_id;
|
||||
input->port_id = port_id;
|
||||
input->connection_partition_id = connection_partition_id;
|
||||
input->port_info = *port_info;
|
||||
input->port_vtl = port_vtl;
|
||||
input->min_connection_vtl = min_connection_vtl;
|
||||
input->proximity_domain_info = hv_numa_node_to_pxm_info(node);
|
||||
status = hv_do_hypercall(HVCALL_CREATE_PORT, input, NULL);
|
||||
local_irq_restore(flags);
|
||||
if (hv_result_success(status))
|
||||
break;
|
||||
|
||||
if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
|
||||
ret = hv_result_to_errno(status);
|
||||
break;
|
||||
}
|
||||
ret = hv_call_deposit_pages(NUMA_NO_NODE, port_partition_id, 1);
|
||||
|
||||
} while (!ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
hv_call_delete_port(u64 port_partition_id, union hv_port_id port_id)
|
||||
{
|
||||
union hv_input_delete_port input = { 0 };
|
||||
int status;
|
||||
|
||||
input.port_partition_id = port_partition_id;
|
||||
input.port_id = port_id;
|
||||
status = hv_do_fast_hypercall16(HVCALL_DELETE_PORT,
|
||||
input.as_uint64[0],
|
||||
input.as_uint64[1]);
|
||||
|
||||
return hv_result_to_errno(status);
|
||||
}
|
||||
|
||||
int
|
||||
hv_call_connect_port(u64 port_partition_id, union hv_port_id port_id,
|
||||
u64 connection_partition_id,
|
||||
union hv_connection_id connection_id,
|
||||
struct hv_connection_info *connection_info,
|
||||
u8 connection_vtl, int node)
|
||||
{
|
||||
struct hv_input_connect_port *input;
|
||||
unsigned long flags;
|
||||
int ret = 0, status;
|
||||
|
||||
do {
|
||||
local_irq_save(flags);
|
||||
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
|
||||
memset(input, 0, sizeof(*input));
|
||||
input->port_partition_id = port_partition_id;
|
||||
input->port_id = port_id;
|
||||
input->connection_partition_id = connection_partition_id;
|
||||
input->connection_id = connection_id;
|
||||
input->connection_info = *connection_info;
|
||||
input->connection_vtl = connection_vtl;
|
||||
input->proximity_domain_info = hv_numa_node_to_pxm_info(node);
|
||||
status = hv_do_hypercall(HVCALL_CONNECT_PORT, input, NULL);
|
||||
|
||||
local_irq_restore(flags);
|
||||
if (hv_result_success(status))
|
||||
break;
|
||||
|
||||
if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
|
||||
ret = hv_result_to_errno(status);
|
||||
break;
|
||||
}
|
||||
ret = hv_call_deposit_pages(NUMA_NO_NODE,
|
||||
connection_partition_id, 1);
|
||||
} while (!ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
hv_call_disconnect_port(u64 connection_partition_id,
|
||||
union hv_connection_id connection_id)
|
||||
{
|
||||
union hv_input_disconnect_port input = { 0 };
|
||||
int status;
|
||||
|
||||
input.connection_partition_id = connection_partition_id;
|
||||
input.connection_id = connection_id;
|
||||
input.is_doorbell = 1;
|
||||
status = hv_do_fast_hypercall16(HVCALL_DISCONNECT_PORT,
|
||||
input.as_uint64[0],
|
||||
input.as_uint64[1]);
|
||||
|
||||
return hv_result_to_errno(status);
|
||||
}
|
||||
|
||||
int
|
||||
hv_call_notify_port_ring_empty(u32 sint_index)
|
||||
{
|
||||
union hv_input_notify_port_ring_empty input = { 0 };
|
||||
int status;
|
||||
|
||||
input.sint_index = sint_index;
|
||||
status = hv_do_fast_hypercall8(HVCALL_NOTIFY_PORT_RING_EMPTY,
|
||||
input.as_uint64);
|
||||
|
||||
return hv_result_to_errno(status);
|
||||
}
|
||||
|
||||
int hv_call_map_stat_page(enum hv_stats_object_type type,
|
||||
const union hv_stats_object_identity *identity,
|
||||
void **addr)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct hv_input_map_stats_page *input;
|
||||
struct hv_output_map_stats_page *output;
|
||||
u64 status, pfn;
|
||||
int ret = 0;
|
||||
|
||||
do {
|
||||
local_irq_save(flags);
|
||||
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
|
||||
output = *this_cpu_ptr(hyperv_pcpu_output_arg);
|
||||
|
||||
memset(input, 0, sizeof(*input));
|
||||
input->type = type;
|
||||
input->identity = *identity;
|
||||
|
||||
status = hv_do_hypercall(HVCALL_MAP_STATS_PAGE, input, output);
|
||||
pfn = output->map_location;
|
||||
|
||||
local_irq_restore(flags);
|
||||
if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
|
||||
ret = hv_result_to_errno(status);
|
||||
if (hv_result_success(status))
|
||||
break;
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = hv_call_deposit_pages(NUMA_NO_NODE,
|
||||
hv_current_partition_id, 1);
|
||||
if (ret)
|
||||
return ret;
|
||||
} while (!ret);
|
||||
|
||||
*addr = page_address(pfn_to_page(pfn));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int hv_call_unmap_stat_page(enum hv_stats_object_type type,
|
||||
const union hv_stats_object_identity *identity)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct hv_input_unmap_stats_page *input;
|
||||
u64 status;
|
||||
|
||||
local_irq_save(flags);
|
||||
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
|
||||
|
||||
memset(input, 0, sizeof(*input));
|
||||
input->type = type;
|
||||
input->identity = *identity;
|
||||
|
||||
status = hv_do_hypercall(HVCALL_UNMAP_STATS_PAGE, input, NULL);
|
||||
local_irq_restore(flags);
|
||||
|
||||
return hv_result_to_errno(status);
|
||||
}
|
||||
|
||||
int hv_call_modify_spa_host_access(u64 partition_id, struct page **pages,
|
||||
u64 page_struct_count, u32 host_access,
|
||||
u32 flags, u8 acquire)
|
||||
{
|
||||
struct hv_input_modify_sparse_spa_page_host_access *input_page;
|
||||
u64 status;
|
||||
int done = 0;
|
||||
unsigned long irq_flags, large_shift = 0;
|
||||
u64 page_count = page_struct_count;
|
||||
u16 code = acquire ? HVCALL_ACQUIRE_SPARSE_SPA_PAGE_HOST_ACCESS :
|
||||
HVCALL_RELEASE_SPARSE_SPA_PAGE_HOST_ACCESS;
|
||||
|
||||
if (page_count == 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (flags & HV_MODIFY_SPA_PAGE_HOST_ACCESS_LARGE_PAGE) {
|
||||
if (!HV_PAGE_COUNT_2M_ALIGNED(page_count))
|
||||
return -EINVAL;
|
||||
large_shift = HV_HYP_LARGE_PAGE_SHIFT - HV_HYP_PAGE_SHIFT;
|
||||
page_count >>= large_shift;
|
||||
}
|
||||
|
||||
while (done < page_count) {
|
||||
ulong i, completed, remain = page_count - done;
|
||||
int rep_count = min(remain,
|
||||
HV_MODIFY_SPARSE_SPA_PAGE_HOST_ACCESS_MAX_PAGE_COUNT);
|
||||
|
||||
local_irq_save(irq_flags);
|
||||
input_page = *this_cpu_ptr(hyperv_pcpu_input_arg);
|
||||
|
||||
memset(input_page, 0, sizeof(*input_page));
|
||||
/* Only set the partition id if you are making the pages
|
||||
* exclusive
|
||||
*/
|
||||
if (flags & HV_MODIFY_SPA_PAGE_HOST_ACCESS_MAKE_EXCLUSIVE)
|
||||
input_page->partition_id = partition_id;
|
||||
input_page->flags = flags;
|
||||
input_page->host_access = host_access;
|
||||
|
||||
for (i = 0; i < rep_count; i++) {
|
||||
u64 index = (done + i) << large_shift;
|
||||
|
||||
if (index >= page_struct_count)
|
||||
return -EINVAL;
|
||||
|
||||
input_page->spa_page_list[i] =
|
||||
page_to_pfn(pages[index]);
|
||||
}
|
||||
|
||||
status = hv_do_rep_hypercall(code, rep_count, 0, input_page,
|
||||
NULL);
|
||||
local_irq_restore(irq_flags);
|
||||
|
||||
completed = hv_repcomp(status);
|
||||
|
||||
if (!hv_result_success(status))
|
||||
return hv_result_to_errno(status);
|
||||
|
||||
done += completed;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
2307
drivers/hv/mshv_root_main.c
Normal file
2307
drivers/hv/mshv_root_main.c
Normal file
File diff suppressed because it is too large
Load Diff
665
drivers/hv/mshv_synic.c
Normal file
665
drivers/hv/mshv_synic.c
Normal file
@ -0,0 +1,665 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2023, Microsoft Corporation.
|
||||
*
|
||||
* mshv_root module's main interrupt handler and associated functionality.
|
||||
*
|
||||
* Authors: Microsoft Linux virtualization team
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/random.h>
|
||||
#include <asm/mshyperv.h>
|
||||
|
||||
#include "mshv_eventfd.h"
|
||||
#include "mshv.h"
|
||||
|
||||
static u32 synic_event_ring_get_queued_port(u32 sint_index)
|
||||
{
|
||||
struct hv_synic_event_ring_page **event_ring_page;
|
||||
volatile struct hv_synic_event_ring *ring;
|
||||
struct hv_synic_pages *spages;
|
||||
u8 **synic_eventring_tail;
|
||||
u32 message;
|
||||
u8 tail;
|
||||
|
||||
spages = this_cpu_ptr(mshv_root.synic_pages);
|
||||
event_ring_page = &spages->synic_event_ring_page;
|
||||
synic_eventring_tail = (u8 **)this_cpu_ptr(hv_synic_eventring_tail);
|
||||
|
||||
if (unlikely(!*synic_eventring_tail)) {
|
||||
pr_debug("Missing synic event ring tail!\n");
|
||||
return 0;
|
||||
}
|
||||
tail = (*synic_eventring_tail)[sint_index];
|
||||
|
||||
if (unlikely(!*event_ring_page)) {
|
||||
pr_debug("Missing synic event ring page!\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
ring = &(*event_ring_page)->sint_event_ring[sint_index];
|
||||
|
||||
/*
|
||||
* Get the message.
|
||||
*/
|
||||
message = ring->data[tail];
|
||||
|
||||
if (!message) {
|
||||
if (ring->ring_full) {
|
||||
/*
|
||||
* Ring is marked full, but we would have consumed all
|
||||
* the messages. Notify the hypervisor that ring is now
|
||||
* empty and check again.
|
||||
*/
|
||||
ring->ring_full = 0;
|
||||
hv_call_notify_port_ring_empty(sint_index);
|
||||
message = ring->data[tail];
|
||||
}
|
||||
|
||||
if (!message) {
|
||||
ring->signal_masked = 0;
|
||||
/*
|
||||
* Unmask the signal and sync with hypervisor
|
||||
* before one last check for any message.
|
||||
*/
|
||||
mb();
|
||||
message = ring->data[tail];
|
||||
|
||||
/*
|
||||
* Ok, lets bail out.
|
||||
*/
|
||||
if (!message)
|
||||
return 0;
|
||||
}
|
||||
|
||||
ring->signal_masked = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear the message in the ring buffer.
|
||||
*/
|
||||
ring->data[tail] = 0;
|
||||
|
||||
if (++tail == HV_SYNIC_EVENT_RING_MESSAGE_COUNT)
|
||||
tail = 0;
|
||||
|
||||
(*synic_eventring_tail)[sint_index] = tail;
|
||||
|
||||
return message;
|
||||
}
|
||||
|
||||
static bool
|
||||
mshv_doorbell_isr(struct hv_message *msg)
|
||||
{
|
||||
struct hv_notification_message_payload *notification;
|
||||
u32 port;
|
||||
|
||||
if (msg->header.message_type != HVMSG_SYNIC_SINT_INTERCEPT)
|
||||
return false;
|
||||
|
||||
notification = (struct hv_notification_message_payload *)msg->u.payload;
|
||||
if (notification->sint_index != HV_SYNIC_DOORBELL_SINT_INDEX)
|
||||
return false;
|
||||
|
||||
while ((port = synic_event_ring_get_queued_port(HV_SYNIC_DOORBELL_SINT_INDEX))) {
|
||||
struct port_table_info ptinfo = { 0 };
|
||||
|
||||
if (mshv_portid_lookup(port, &ptinfo)) {
|
||||
pr_debug("Failed to get port info from port_table!\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ptinfo.hv_port_type != HV_PORT_TYPE_DOORBELL) {
|
||||
pr_debug("Not a doorbell port!, port: %d, port_type: %d\n",
|
||||
port, ptinfo.hv_port_type);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Invoke the callback */
|
||||
ptinfo.hv_port_doorbell.doorbell_cb(port,
|
||||
ptinfo.hv_port_doorbell.data);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool mshv_async_call_completion_isr(struct hv_message *msg)
|
||||
{
|
||||
bool handled = false;
|
||||
struct hv_async_completion_message_payload *async_msg;
|
||||
struct mshv_partition *partition;
|
||||
u64 partition_id;
|
||||
|
||||
if (msg->header.message_type != HVMSG_ASYNC_CALL_COMPLETION)
|
||||
goto out;
|
||||
|
||||
async_msg =
|
||||
(struct hv_async_completion_message_payload *)msg->u.payload;
|
||||
|
||||
partition_id = async_msg->partition_id;
|
||||
|
||||
/*
|
||||
* Hold this lock for the rest of the isr, because the partition could
|
||||
* be released anytime.
|
||||
* e.g. the MSHV_RUN_VP thread could wake on another cpu; it could
|
||||
* release the partition unless we hold this!
|
||||
*/
|
||||
rcu_read_lock();
|
||||
|
||||
partition = mshv_partition_find(partition_id);
|
||||
|
||||
if (unlikely(!partition)) {
|
||||
pr_debug("failed to find partition %llu\n", partition_id);
|
||||
goto unlock_out;
|
||||
}
|
||||
|
||||
partition->async_hypercall_status = async_msg->status;
|
||||
complete(&partition->async_hypercall);
|
||||
|
||||
handled = true;
|
||||
|
||||
unlock_out:
|
||||
rcu_read_unlock();
|
||||
out:
|
||||
return handled;
|
||||
}
|
||||
|
||||
static void kick_vp(struct mshv_vp *vp)
|
||||
{
|
||||
atomic64_inc(&vp->run.vp_signaled_count);
|
||||
vp->run.kicked_by_hv = 1;
|
||||
wake_up(&vp->run.vp_suspend_queue);
|
||||
}
|
||||
|
||||
static void
|
||||
handle_bitset_message(const struct hv_vp_signal_bitset_scheduler_message *msg)
|
||||
{
|
||||
int bank_idx, vps_signaled = 0, bank_mask_size;
|
||||
struct mshv_partition *partition;
|
||||
const struct hv_vpset *vpset;
|
||||
const u64 *bank_contents;
|
||||
u64 partition_id = msg->partition_id;
|
||||
|
||||
if (msg->vp_bitset.bitset.format != HV_GENERIC_SET_SPARSE_4K) {
|
||||
pr_debug("scheduler message format is not HV_GENERIC_SET_SPARSE_4K");
|
||||
return;
|
||||
}
|
||||
|
||||
if (msg->vp_count == 0) {
|
||||
pr_debug("scheduler message with no VP specified");
|
||||
return;
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
partition = mshv_partition_find(partition_id);
|
||||
if (unlikely(!partition)) {
|
||||
pr_debug("failed to find partition %llu\n", partition_id);
|
||||
goto unlock_out;
|
||||
}
|
||||
|
||||
vpset = &msg->vp_bitset.bitset;
|
||||
|
||||
bank_idx = -1;
|
||||
bank_contents = vpset->bank_contents;
|
||||
bank_mask_size = sizeof(vpset->valid_bank_mask) * BITS_PER_BYTE;
|
||||
|
||||
while (true) {
|
||||
int vp_bank_idx = -1;
|
||||
int vp_bank_size = sizeof(*bank_contents) * BITS_PER_BYTE;
|
||||
int vp_index;
|
||||
|
||||
bank_idx = find_next_bit((unsigned long *)&vpset->valid_bank_mask,
|
||||
bank_mask_size, bank_idx + 1);
|
||||
if (bank_idx == bank_mask_size)
|
||||
break;
|
||||
|
||||
while (true) {
|
||||
struct mshv_vp *vp;
|
||||
|
||||
vp_bank_idx = find_next_bit((unsigned long *)bank_contents,
|
||||
vp_bank_size, vp_bank_idx + 1);
|
||||
if (vp_bank_idx == vp_bank_size)
|
||||
break;
|
||||
|
||||
vp_index = (bank_idx * vp_bank_size) + vp_bank_idx;
|
||||
|
||||
/* This shouldn't happen, but just in case. */
|
||||
if (unlikely(vp_index >= MSHV_MAX_VPS)) {
|
||||
pr_debug("VP index %u out of bounds\n",
|
||||
vp_index);
|
||||
goto unlock_out;
|
||||
}
|
||||
|
||||
vp = partition->pt_vp_array[vp_index];
|
||||
if (unlikely(!vp)) {
|
||||
pr_debug("failed to find VP %u\n", vp_index);
|
||||
goto unlock_out;
|
||||
}
|
||||
|
||||
kick_vp(vp);
|
||||
vps_signaled++;
|
||||
}
|
||||
|
||||
bank_contents++;
|
||||
}
|
||||
|
||||
unlock_out:
|
||||
rcu_read_unlock();
|
||||
|
||||
if (vps_signaled != msg->vp_count)
|
||||
pr_debug("asked to signal %u VPs but only did %u\n",
|
||||
msg->vp_count, vps_signaled);
|
||||
}
|
||||
|
||||
static void
|
||||
handle_pair_message(const struct hv_vp_signal_pair_scheduler_message *msg)
|
||||
{
|
||||
struct mshv_partition *partition = NULL;
|
||||
struct mshv_vp *vp;
|
||||
int idx;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
for (idx = 0; idx < msg->vp_count; idx++) {
|
||||
u64 partition_id = msg->partition_ids[idx];
|
||||
u32 vp_index = msg->vp_indexes[idx];
|
||||
|
||||
if (idx == 0 || partition->pt_id != partition_id) {
|
||||
partition = mshv_partition_find(partition_id);
|
||||
if (unlikely(!partition)) {
|
||||
pr_debug("failed to find partition %llu\n",
|
||||
partition_id);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* This shouldn't happen, but just in case. */
|
||||
if (unlikely(vp_index >= MSHV_MAX_VPS)) {
|
||||
pr_debug("VP index %u out of bounds\n", vp_index);
|
||||
break;
|
||||
}
|
||||
|
||||
vp = partition->pt_vp_array[vp_index];
|
||||
if (!vp) {
|
||||
pr_debug("failed to find VP %u\n", vp_index);
|
||||
break;
|
||||
}
|
||||
|
||||
kick_vp(vp);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static bool
|
||||
mshv_scheduler_isr(struct hv_message *msg)
|
||||
{
|
||||
if (msg->header.message_type != HVMSG_SCHEDULER_VP_SIGNAL_BITSET &&
|
||||
msg->header.message_type != HVMSG_SCHEDULER_VP_SIGNAL_PAIR)
|
||||
return false;
|
||||
|
||||
if (msg->header.message_type == HVMSG_SCHEDULER_VP_SIGNAL_BITSET)
|
||||
handle_bitset_message((struct hv_vp_signal_bitset_scheduler_message *)
|
||||
msg->u.payload);
|
||||
else
|
||||
handle_pair_message((struct hv_vp_signal_pair_scheduler_message *)
|
||||
msg->u.payload);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
mshv_intercept_isr(struct hv_message *msg)
|
||||
{
|
||||
struct mshv_partition *partition;
|
||||
bool handled = false;
|
||||
struct mshv_vp *vp;
|
||||
u64 partition_id;
|
||||
u32 vp_index;
|
||||
|
||||
partition_id = msg->header.sender;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
partition = mshv_partition_find(partition_id);
|
||||
if (unlikely(!partition)) {
|
||||
pr_debug("failed to find partition %llu\n",
|
||||
partition_id);
|
||||
goto unlock_out;
|
||||
}
|
||||
|
||||
if (msg->header.message_type == HVMSG_X64_APIC_EOI) {
|
||||
/*
|
||||
* Check if this gsi is registered in the
|
||||
* ack_notifier list and invoke the callback
|
||||
* if registered.
|
||||
*/
|
||||
|
||||
/*
|
||||
* If there is a notifier, the ack callback is supposed
|
||||
* to handle the VMEXIT. So we need not pass this message
|
||||
* to vcpu thread.
|
||||
*/
|
||||
struct hv_x64_apic_eoi_message *eoi_msg =
|
||||
(struct hv_x64_apic_eoi_message *)&msg->u.payload[0];
|
||||
|
||||
if (mshv_notify_acked_gsi(partition, eoi_msg->interrupt_vector)) {
|
||||
handled = true;
|
||||
goto unlock_out;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We should get an opaque intercept message here for all intercept
|
||||
* messages, since we're using the mapped VP intercept message page.
|
||||
*
|
||||
* The intercept message will have been placed in intercept message
|
||||
* page at this point.
|
||||
*
|
||||
* Make sure the message type matches our expectation.
|
||||
*/
|
||||
if (msg->header.message_type != HVMSG_OPAQUE_INTERCEPT) {
|
||||
pr_debug("wrong message type %d", msg->header.message_type);
|
||||
goto unlock_out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Since we directly index the vp, and it has to exist for us to be here
|
||||
* (because the vp is only deleted when the partition is), no additional
|
||||
* locking is needed here
|
||||
*/
|
||||
vp_index =
|
||||
((struct hv_opaque_intercept_message *)msg->u.payload)->vp_index;
|
||||
vp = partition->pt_vp_array[vp_index];
|
||||
if (unlikely(!vp)) {
|
||||
pr_debug("failed to find VP %u\n", vp_index);
|
||||
goto unlock_out;
|
||||
}
|
||||
|
||||
kick_vp(vp);
|
||||
|
||||
handled = true;
|
||||
|
||||
unlock_out:
|
||||
rcu_read_unlock();
|
||||
|
||||
return handled;
|
||||
}
|
||||
|
||||
void mshv_isr(void)
|
||||
{
|
||||
struct hv_synic_pages *spages = this_cpu_ptr(mshv_root.synic_pages);
|
||||
struct hv_message_page **msg_page = &spages->synic_message_page;
|
||||
struct hv_message *msg;
|
||||
bool handled;
|
||||
|
||||
if (unlikely(!(*msg_page))) {
|
||||
pr_debug("Missing synic page!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
msg = &((*msg_page)->sint_message[HV_SYNIC_INTERCEPTION_SINT_INDEX]);
|
||||
|
||||
/*
|
||||
* If the type isn't set, there isn't really a message;
|
||||
* it may be some other hyperv interrupt
|
||||
*/
|
||||
if (msg->header.message_type == HVMSG_NONE)
|
||||
return;
|
||||
|
||||
handled = mshv_doorbell_isr(msg);
|
||||
|
||||
if (!handled)
|
||||
handled = mshv_scheduler_isr(msg);
|
||||
|
||||
if (!handled)
|
||||
handled = mshv_async_call_completion_isr(msg);
|
||||
|
||||
if (!handled)
|
||||
handled = mshv_intercept_isr(msg);
|
||||
|
||||
if (handled) {
|
||||
/*
|
||||
* Acknowledge message with hypervisor if another message is
|
||||
* pending.
|
||||
*/
|
||||
msg->header.message_type = HVMSG_NONE;
|
||||
/*
|
||||
* Ensure the write is complete so the hypervisor will deliver
|
||||
* the next message if available.
|
||||
*/
|
||||
mb();
|
||||
if (msg->header.message_flags.msg_pending)
|
||||
hv_set_non_nested_msr(HV_MSR_EOM, 0);
|
||||
|
||||
#ifdef HYPERVISOR_CALLBACK_VECTOR
|
||||
add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR);
|
||||
#endif
|
||||
} else {
|
||||
pr_warn_once("%s: unknown message type 0x%x\n", __func__,
|
||||
msg->header.message_type);
|
||||
}
|
||||
}
|
||||
|
||||
int mshv_synic_init(unsigned int cpu)
|
||||
{
|
||||
union hv_synic_simp simp;
|
||||
union hv_synic_siefp siefp;
|
||||
union hv_synic_sirbp sirbp;
|
||||
#ifdef HYPERVISOR_CALLBACK_VECTOR
|
||||
union hv_synic_sint sint;
|
||||
#endif
|
||||
union hv_synic_scontrol sctrl;
|
||||
struct hv_synic_pages *spages = this_cpu_ptr(mshv_root.synic_pages);
|
||||
struct hv_message_page **msg_page = &spages->synic_message_page;
|
||||
struct hv_synic_event_flags_page **event_flags_page =
|
||||
&spages->synic_event_flags_page;
|
||||
struct hv_synic_event_ring_page **event_ring_page =
|
||||
&spages->synic_event_ring_page;
|
||||
|
||||
/* Setup the Synic's message page */
|
||||
simp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIMP);
|
||||
simp.simp_enabled = true;
|
||||
*msg_page = memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
|
||||
HV_HYP_PAGE_SIZE,
|
||||
MEMREMAP_WB);
|
||||
|
||||
if (!(*msg_page))
|
||||
return -EFAULT;
|
||||
|
||||
hv_set_non_nested_msr(HV_MSR_SIMP, simp.as_uint64);
|
||||
|
||||
/* Setup the Synic's event flags page */
|
||||
siefp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIEFP);
|
||||
siefp.siefp_enabled = true;
|
||||
*event_flags_page = memremap(siefp.base_siefp_gpa << PAGE_SHIFT,
|
||||
PAGE_SIZE, MEMREMAP_WB);
|
||||
|
||||
if (!(*event_flags_page))
|
||||
goto cleanup;
|
||||
|
||||
hv_set_non_nested_msr(HV_MSR_SIEFP, siefp.as_uint64);
|
||||
|
||||
/* Setup the Synic's event ring page */
|
||||
sirbp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIRBP);
|
||||
sirbp.sirbp_enabled = true;
|
||||
*event_ring_page = memremap(sirbp.base_sirbp_gpa << PAGE_SHIFT,
|
||||
PAGE_SIZE, MEMREMAP_WB);
|
||||
|
||||
if (!(*event_ring_page))
|
||||
goto cleanup;
|
||||
|
||||
hv_set_non_nested_msr(HV_MSR_SIRBP, sirbp.as_uint64);
|
||||
|
||||
#ifdef HYPERVISOR_CALLBACK_VECTOR
|
||||
/* Enable intercepts */
|
||||
sint.as_uint64 = 0;
|
||||
sint.vector = HYPERVISOR_CALLBACK_VECTOR;
|
||||
sint.masked = false;
|
||||
sint.auto_eoi = hv_recommend_using_aeoi();
|
||||
hv_set_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_INTERCEPTION_SINT_INDEX,
|
||||
sint.as_uint64);
|
||||
|
||||
/* Doorbell SINT */
|
||||
sint.as_uint64 = 0;
|
||||
sint.vector = HYPERVISOR_CALLBACK_VECTOR;
|
||||
sint.masked = false;
|
||||
sint.as_intercept = 1;
|
||||
sint.auto_eoi = hv_recommend_using_aeoi();
|
||||
hv_set_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_DOORBELL_SINT_INDEX,
|
||||
sint.as_uint64);
|
||||
#endif
|
||||
|
||||
/* Enable global synic bit */
|
||||
sctrl.as_uint64 = hv_get_non_nested_msr(HV_MSR_SCONTROL);
|
||||
sctrl.enable = 1;
|
||||
hv_set_non_nested_msr(HV_MSR_SCONTROL, sctrl.as_uint64);
|
||||
|
||||
return 0;
|
||||
|
||||
cleanup:
|
||||
if (*event_ring_page) {
|
||||
sirbp.sirbp_enabled = false;
|
||||
hv_set_non_nested_msr(HV_MSR_SIRBP, sirbp.as_uint64);
|
||||
memunmap(*event_ring_page);
|
||||
}
|
||||
if (*event_flags_page) {
|
||||
siefp.siefp_enabled = false;
|
||||
hv_set_non_nested_msr(HV_MSR_SIEFP, siefp.as_uint64);
|
||||
memunmap(*event_flags_page);
|
||||
}
|
||||
if (*msg_page) {
|
||||
simp.simp_enabled = false;
|
||||
hv_set_non_nested_msr(HV_MSR_SIMP, simp.as_uint64);
|
||||
memunmap(*msg_page);
|
||||
}
|
||||
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
int mshv_synic_cleanup(unsigned int cpu)
|
||||
{
|
||||
union hv_synic_sint sint;
|
||||
union hv_synic_simp simp;
|
||||
union hv_synic_siefp siefp;
|
||||
union hv_synic_sirbp sirbp;
|
||||
union hv_synic_scontrol sctrl;
|
||||
struct hv_synic_pages *spages = this_cpu_ptr(mshv_root.synic_pages);
|
||||
struct hv_message_page **msg_page = &spages->synic_message_page;
|
||||
struct hv_synic_event_flags_page **event_flags_page =
|
||||
&spages->synic_event_flags_page;
|
||||
struct hv_synic_event_ring_page **event_ring_page =
|
||||
&spages->synic_event_ring_page;
|
||||
|
||||
/* Disable the interrupt */
|
||||
sint.as_uint64 = hv_get_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_INTERCEPTION_SINT_INDEX);
|
||||
sint.masked = true;
|
||||
hv_set_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_INTERCEPTION_SINT_INDEX,
|
||||
sint.as_uint64);
|
||||
|
||||
/* Disable Doorbell SINT */
|
||||
sint.as_uint64 = hv_get_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_DOORBELL_SINT_INDEX);
|
||||
sint.masked = true;
|
||||
hv_set_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_DOORBELL_SINT_INDEX,
|
||||
sint.as_uint64);
|
||||
|
||||
/* Disable Synic's event ring page */
|
||||
sirbp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIRBP);
|
||||
sirbp.sirbp_enabled = false;
|
||||
hv_set_non_nested_msr(HV_MSR_SIRBP, sirbp.as_uint64);
|
||||
memunmap(*event_ring_page);
|
||||
|
||||
/* Disable Synic's event flags page */
|
||||
siefp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIEFP);
|
||||
siefp.siefp_enabled = false;
|
||||
hv_set_non_nested_msr(HV_MSR_SIEFP, siefp.as_uint64);
|
||||
memunmap(*event_flags_page);
|
||||
|
||||
/* Disable Synic's message page */
|
||||
simp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIMP);
|
||||
simp.simp_enabled = false;
|
||||
hv_set_non_nested_msr(HV_MSR_SIMP, simp.as_uint64);
|
||||
memunmap(*msg_page);
|
||||
|
||||
/* Disable global synic bit */
|
||||
sctrl.as_uint64 = hv_get_non_nested_msr(HV_MSR_SCONTROL);
|
||||
sctrl.enable = 0;
|
||||
hv_set_non_nested_msr(HV_MSR_SCONTROL, sctrl.as_uint64);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
mshv_register_doorbell(u64 partition_id, doorbell_cb_t doorbell_cb, void *data,
|
||||
u64 gpa, u64 val, u64 flags)
|
||||
{
|
||||
struct hv_connection_info connection_info = { 0 };
|
||||
union hv_connection_id connection_id = { 0 };
|
||||
struct port_table_info *port_table_info;
|
||||
struct hv_port_info port_info = { 0 };
|
||||
union hv_port_id port_id = { 0 };
|
||||
int ret;
|
||||
|
||||
port_table_info = kmalloc(sizeof(*port_table_info), GFP_KERNEL);
|
||||
if (!port_table_info)
|
||||
return -ENOMEM;
|
||||
|
||||
port_table_info->hv_port_type = HV_PORT_TYPE_DOORBELL;
|
||||
port_table_info->hv_port_doorbell.doorbell_cb = doorbell_cb;
|
||||
port_table_info->hv_port_doorbell.data = data;
|
||||
ret = mshv_portid_alloc(port_table_info);
|
||||
if (ret < 0) {
|
||||
kfree(port_table_info);
|
||||
return ret;
|
||||
}
|
||||
|
||||
port_id.u.id = ret;
|
||||
port_info.port_type = HV_PORT_TYPE_DOORBELL;
|
||||
port_info.doorbell_port_info.target_sint = HV_SYNIC_DOORBELL_SINT_INDEX;
|
||||
port_info.doorbell_port_info.target_vp = HV_ANY_VP;
|
||||
ret = hv_call_create_port(hv_current_partition_id, port_id, partition_id,
|
||||
&port_info,
|
||||
0, 0, NUMA_NO_NODE);
|
||||
|
||||
if (ret < 0) {
|
||||
mshv_portid_free(port_id.u.id);
|
||||
return ret;
|
||||
}
|
||||
|
||||
connection_id.u.id = port_id.u.id;
|
||||
connection_info.port_type = HV_PORT_TYPE_DOORBELL;
|
||||
connection_info.doorbell_connection_info.gpa = gpa;
|
||||
connection_info.doorbell_connection_info.trigger_value = val;
|
||||
connection_info.doorbell_connection_info.flags = flags;
|
||||
|
||||
ret = hv_call_connect_port(hv_current_partition_id, port_id, partition_id,
|
||||
connection_id, &connection_info, 0, NUMA_NO_NODE);
|
||||
if (ret < 0) {
|
||||
hv_call_delete_port(hv_current_partition_id, port_id);
|
||||
mshv_portid_free(port_id.u.id);
|
||||
return ret;
|
||||
}
|
||||
|
||||
// lets use the port_id as the doorbell_id
|
||||
return port_id.u.id;
|
||||
}
|
||||
|
||||
void
|
||||
mshv_unregister_doorbell(u64 partition_id, int doorbell_portid)
|
||||
{
|
||||
union hv_port_id port_id = { 0 };
|
||||
union hv_connection_id connection_id = { 0 };
|
||||
|
||||
connection_id.u.id = doorbell_portid;
|
||||
hv_call_disconnect_port(partition_id, connection_id);
|
||||
|
||||
port_id.u.id = doorbell_portid;
|
||||
hv_call_delete_port(hv_current_partition_id, port_id);
|
||||
|
||||
mshv_portid_free(doorbell_portid);
|
||||
}
|
@ -1611,18 +1611,18 @@ static ssize_t target_cpu_show(struct vmbus_channel *channel, char *buf)
|
||||
{
|
||||
return sprintf(buf, "%u\n", channel->target_cpu);
|
||||
}
|
||||
static ssize_t target_cpu_store(struct vmbus_channel *channel,
|
||||
const char *buf, size_t count)
|
||||
|
||||
int vmbus_channel_set_cpu(struct vmbus_channel *channel, u32 target_cpu)
|
||||
{
|
||||
u32 target_cpu, origin_cpu;
|
||||
ssize_t ret = count;
|
||||
u32 origin_cpu;
|
||||
int ret = 0;
|
||||
|
||||
lockdep_assert_cpus_held();
|
||||
lockdep_assert_held(&vmbus_connection.channel_mutex);
|
||||
|
||||
if (vmbus_proto_version < VERSION_WIN10_V4_1)
|
||||
return -EIO;
|
||||
|
||||
if (sscanf(buf, "%uu", &target_cpu) != 1)
|
||||
return -EIO;
|
||||
|
||||
/* Validate target_cpu for the cpumask_test_cpu() operation below. */
|
||||
if (target_cpu >= nr_cpumask_bits)
|
||||
return -EINVAL;
|
||||
@ -1630,22 +1630,17 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
|
||||
if (!cpumask_test_cpu(target_cpu, housekeeping_cpumask(HK_TYPE_MANAGED_IRQ)))
|
||||
return -EINVAL;
|
||||
|
||||
/* No CPUs should come up or down during this. */
|
||||
cpus_read_lock();
|
||||
|
||||
if (!cpu_online(target_cpu)) {
|
||||
cpus_read_unlock();
|
||||
if (!cpu_online(target_cpu))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Synchronizes target_cpu_store() and channel closure:
|
||||
* Synchronizes vmbus_channel_set_cpu() and channel closure:
|
||||
*
|
||||
* { Initially: state = CHANNEL_OPENED }
|
||||
*
|
||||
* CPU1 CPU2
|
||||
*
|
||||
* [target_cpu_store()] [vmbus_disconnect_ring()]
|
||||
* [vmbus_channel_set_cpu()] [vmbus_disconnect_ring()]
|
||||
*
|
||||
* LOCK channel_mutex LOCK channel_mutex
|
||||
* LOAD r1 = state LOAD r2 = state
|
||||
@ -1660,7 +1655,6 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
|
||||
* Note. The host processes the channel messages "sequentially", in
|
||||
* the order in which they are received on a per-partition basis.
|
||||
*/
|
||||
mutex_lock(&vmbus_connection.channel_mutex);
|
||||
|
||||
/*
|
||||
* Hyper-V will ignore MODIFYCHANNEL messages for "non-open" channels;
|
||||
@ -1668,17 +1662,17 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
|
||||
*/
|
||||
if (channel->state != CHANNEL_OPENED_STATE) {
|
||||
ret = -EIO;
|
||||
goto cpu_store_unlock;
|
||||
goto end;
|
||||
}
|
||||
|
||||
origin_cpu = channel->target_cpu;
|
||||
if (target_cpu == origin_cpu)
|
||||
goto cpu_store_unlock;
|
||||
goto end;
|
||||
|
||||
if (vmbus_send_modifychannel(channel,
|
||||
hv_cpu_number_to_vp_number(target_cpu))) {
|
||||
ret = -EIO;
|
||||
goto cpu_store_unlock;
|
||||
goto end;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1708,10 +1702,26 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
|
||||
origin_cpu, target_cpu);
|
||||
}
|
||||
|
||||
cpu_store_unlock:
|
||||
end:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t target_cpu_store(struct vmbus_channel *channel,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
u32 target_cpu;
|
||||
ssize_t ret;
|
||||
|
||||
if (sscanf(buf, "%uu", &target_cpu) != 1)
|
||||
return -EIO;
|
||||
|
||||
cpus_read_lock();
|
||||
mutex_lock(&vmbus_connection.channel_mutex);
|
||||
ret = vmbus_channel_set_cpu(channel, target_cpu);
|
||||
mutex_unlock(&vmbus_connection.channel_mutex);
|
||||
cpus_read_unlock();
|
||||
return ret;
|
||||
|
||||
return ret ?: count;
|
||||
}
|
||||
static VMBUS_CHAN_ATTR(cpu, 0644, target_cpu_show, target_cpu_store);
|
||||
|
||||
@ -2659,7 +2669,7 @@ static int __init hv_acpi_init(void)
|
||||
if (!hv_is_hyperv_initialized())
|
||||
return -ENODEV;
|
||||
|
||||
if (hv_root_partition && !hv_nested)
|
||||
if (hv_root_partition() && !hv_nested)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
|
@ -130,7 +130,7 @@ static int __init hyperv_prepare_irq_remapping(void)
|
||||
x86_init.hyper.msi_ext_dest_id())
|
||||
return -ENODEV;
|
||||
|
||||
if (hv_root_partition) {
|
||||
if (hv_root_partition()) {
|
||||
name = "HYPERV-ROOT-IR";
|
||||
ops = &hyperv_root_ir_domain_ops;
|
||||
} else {
|
||||
@ -151,7 +151,7 @@ static int __init hyperv_prepare_irq_remapping(void)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
if (hv_root_partition)
|
||||
if (hv_root_partition())
|
||||
return 0; /* The rest is only relevant to guests */
|
||||
|
||||
/*
|
||||
@ -217,7 +217,7 @@ hyperv_root_ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg)
|
||||
status = hv_unmap_ioapic_interrupt(ioapic_id, &entry);
|
||||
|
||||
if (status != HV_STATUS_SUCCESS)
|
||||
pr_debug("%s: unexpected unmap status %lld\n", __func__, status);
|
||||
hv_status_debug(status, "failed to unmap\n");
|
||||
|
||||
data->entry.ioapic_rte.as_uint64 = 0;
|
||||
data->entry.source = 0; /* Invalid source */
|
||||
@ -228,7 +228,7 @@ hyperv_root_ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg)
|
||||
vector, &entry);
|
||||
|
||||
if (status != HV_STATUS_SUCCESS) {
|
||||
pr_err("%s: map hypercall failed, status %lld\n", __func__, status);
|
||||
hv_status_err(status, "map failed\n");
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -28,9 +28,15 @@
|
||||
|
||||
#define VTPM_BASE_ADDRESS 0xfed40000
|
||||
|
||||
enum hv_partition_type {
|
||||
HV_PARTITION_TYPE_GUEST,
|
||||
HV_PARTITION_TYPE_ROOT,
|
||||
};
|
||||
|
||||
struct ms_hyperv_info {
|
||||
u32 features;
|
||||
u32 priv_high;
|
||||
u32 ext_features;
|
||||
u32 misc_features;
|
||||
u32 hints;
|
||||
u32 nested_features;
|
||||
@ -58,15 +64,32 @@ struct ms_hyperv_info {
|
||||
};
|
||||
extern struct ms_hyperv_info ms_hyperv;
|
||||
extern bool hv_nested;
|
||||
extern u64 hv_current_partition_id;
|
||||
extern enum hv_partition_type hv_curr_partition_type;
|
||||
|
||||
extern void * __percpu *hyperv_pcpu_input_arg;
|
||||
extern void * __percpu *hyperv_pcpu_output_arg;
|
||||
|
||||
extern u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr);
|
||||
extern u64 hv_do_fast_hypercall8(u16 control, u64 input8);
|
||||
u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr);
|
||||
u64 hv_do_fast_hypercall8(u16 control, u64 input8);
|
||||
u64 hv_do_fast_hypercall16(u16 control, u64 input1, u64 input2);
|
||||
|
||||
bool hv_isolation_type_snp(void);
|
||||
bool hv_isolation_type_tdx(void);
|
||||
|
||||
/*
|
||||
* On architectures where Hyper-V doesn't support AEOI (e.g., ARM64),
|
||||
* it doesn't provide a recommendation flag and AEOI must be disabled.
|
||||
*/
|
||||
static inline bool hv_recommend_using_aeoi(void)
|
||||
{
|
||||
#ifdef HV_DEPRECATING_AEOI_RECOMMENDED
|
||||
return !(ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED);
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline struct hv_proximity_domain_info hv_numa_node_to_pxm_info(int node)
|
||||
{
|
||||
struct hv_proximity_domain_info pxm_info = {};
|
||||
@ -185,12 +208,11 @@ void hv_setup_kexec_handler(void (*handler)(void));
|
||||
void hv_remove_kexec_handler(void);
|
||||
void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs));
|
||||
void hv_remove_crash_handler(void);
|
||||
void hv_setup_mshv_handler(void (*handler)(void));
|
||||
|
||||
extern int vmbus_interrupt;
|
||||
extern int vmbus_irq;
|
||||
|
||||
extern bool hv_root_partition;
|
||||
|
||||
#if IS_ENABLED(CONFIG_HYPERV)
|
||||
/*
|
||||
* Hypervisor's notion of virtual processor ID is different from
|
||||
@ -207,10 +229,12 @@ extern u64 (*hv_read_reference_counter)(void);
|
||||
#define VP_INVAL U32_MAX
|
||||
|
||||
int __init hv_common_init(void);
|
||||
void __init hv_get_partition_id(void);
|
||||
void __init hv_common_free(void);
|
||||
void __init ms_hyperv_late_init(void);
|
||||
int hv_common_cpu_init(unsigned int cpu);
|
||||
int hv_common_cpu_die(unsigned int cpu);
|
||||
void hv_identify_partition_type(void);
|
||||
|
||||
void *hv_alloc_hyperv_page(void);
|
||||
void *hv_alloc_hyperv_zeroed_page(void);
|
||||
@ -291,6 +315,20 @@ static inline int cpumask_to_vpset_skip(struct hv_vpset *vpset,
|
||||
return __cpumask_to_vpset(vpset, cpus, func);
|
||||
}
|
||||
|
||||
#define _hv_status_fmt(fmt) "%s: Hyper-V status: %#x = %s: " fmt
|
||||
#define hv_status_printk(level, status, fmt, ...) \
|
||||
do { \
|
||||
u64 __status = (status); \
|
||||
pr_##level(_hv_status_fmt(fmt), __func__, hv_result(__status), \
|
||||
hv_result_to_string(__status), ##__VA_ARGS__); \
|
||||
} while (0)
|
||||
#define hv_status_err(status, fmt, ...) \
|
||||
hv_status_printk(err, status, fmt, ##__VA_ARGS__)
|
||||
#define hv_status_debug(status, fmt, ...) \
|
||||
hv_status_printk(debug, status, fmt, ##__VA_ARGS__)
|
||||
|
||||
const char *hv_result_to_string(u64 hv_status);
|
||||
int hv_result_to_errno(u64 status);
|
||||
void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die);
|
||||
bool hv_is_hyperv_initialized(void);
|
||||
bool hv_is_hibernation_supported(void);
|
||||
@ -303,6 +341,7 @@ void hyperv_cleanup(void);
|
||||
bool hv_query_ext_cap(u64 cap_query);
|
||||
void hv_setup_dma_ops(struct device *dev, bool coherent);
|
||||
#else /* CONFIG_HYPERV */
|
||||
static inline void hv_identify_partition_type(void) {}
|
||||
static inline bool hv_is_hyperv_initialized(void) { return false; }
|
||||
static inline bool hv_is_hibernation_supported(void) { return false; }
|
||||
static inline void hyperv_cleanup(void) {}
|
||||
@ -314,4 +353,29 @@ static inline enum hv_isolation_type hv_get_isolation_type(void)
|
||||
}
|
||||
#endif /* CONFIG_HYPERV */
|
||||
|
||||
#if IS_ENABLED(CONFIG_MSHV_ROOT)
|
||||
static inline bool hv_root_partition(void)
|
||||
{
|
||||
return hv_curr_partition_type == HV_PARTITION_TYPE_ROOT;
|
||||
}
|
||||
int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
|
||||
int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
|
||||
int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
|
||||
|
||||
#else /* CONFIG_MSHV_ROOT */
|
||||
static inline bool hv_root_partition(void) { return false; }
|
||||
static inline int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
static inline int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
static inline int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
#endif /* CONFIG_MSHV_ROOT */
|
||||
|
||||
#endif
|
||||
|
@ -13,7 +13,7 @@ struct hv_u128 {
|
||||
u64 high_part;
|
||||
} __packed;
|
||||
|
||||
/* NOTE: when adding below, update hv_status_to_string() */
|
||||
/* NOTE: when adding below, update hv_result_to_string() */
|
||||
#define HV_STATUS_SUCCESS 0x0
|
||||
#define HV_STATUS_INVALID_HYPERCALL_CODE 0x2
|
||||
#define HV_STATUS_INVALID_HYPERCALL_INPUT 0x3
|
||||
@ -51,6 +51,7 @@ struct hv_u128 {
|
||||
#define HV_HYP_PAGE_SHIFT 12
|
||||
#define HV_HYP_PAGE_SIZE BIT(HV_HYP_PAGE_SHIFT)
|
||||
#define HV_HYP_PAGE_MASK (~(HV_HYP_PAGE_SIZE - 1))
|
||||
#define HV_HYP_LARGE_PAGE_SHIFT 21
|
||||
|
||||
#define HV_PARTITION_ID_INVALID ((u64)0)
|
||||
#define HV_PARTITION_ID_SELF ((u64)-1)
|
||||
@ -182,7 +183,7 @@ struct hv_tsc_emulation_control { /* HV_TSC_INVARIANT_CONTROL */
|
||||
|
||||
#endif /* CONFIG_X86 */
|
||||
|
||||
struct hv_get_partition_id { /* HV_OUTPUT_GET_PARTITION_ID */
|
||||
struct hv_output_get_partition_id {
|
||||
u64 partition_id;
|
||||
} __packed;
|
||||
|
||||
@ -204,7 +205,14 @@ union hv_reference_tsc_msr {
|
||||
/* The number of vCPUs in one sparse bank */
|
||||
#define HV_VCPUS_PER_SPARSE_BANK (64)
|
||||
|
||||
/* Some of Hyper-V structs do not use hv_vpset where linux uses them */
|
||||
/*
|
||||
* Some of Hyper-V structs do not use hv_vpset where linux uses them.
|
||||
*
|
||||
* struct hv_vpset is usually used as part of hypercall input. The portion
|
||||
* that counts as "fixed size input header" vs. "variable size input header"
|
||||
* varies per hypercall. See comments at relevant hypercall call sites as to
|
||||
* how the "valid_bank_mask" field should be accounted.
|
||||
*/
|
||||
struct hv_vpset { /* HV_VP_SET */
|
||||
u64 format;
|
||||
u64 valid_bank_mask;
|
||||
@ -374,6 +382,10 @@ union hv_hypervisor_version_info {
|
||||
#define HV_SHARED_GPA_BOUNDARY_ACTIVE BIT(5)
|
||||
#define HV_SHARED_GPA_BOUNDARY_BITS GENMASK(11, 6)
|
||||
|
||||
/* HYPERV_CPUID_FEATURES.ECX bits. */
|
||||
#define HV_VP_DISPATCH_INTERRUPT_INJECTION_AVAILABLE BIT(9)
|
||||
#define HV_VP_GHCB_ROOT_MAPPING_AVAILABLE BIT(10)
|
||||
|
||||
enum hv_isolation_type {
|
||||
HV_ISOLATION_TYPE_NONE = 0, /* HV_PARTITION_ISOLATION_TYPE_NONE */
|
||||
HV_ISOLATION_TYPE_VBS = 1,
|
||||
@ -436,10 +448,13 @@ union hv_vp_assist_msr_contents { /* HV_REGISTER_VP_ASSIST_PAGE */
|
||||
#define HVCALL_WITHDRAW_MEMORY 0x0049
|
||||
#define HVCALL_MAP_GPA_PAGES 0x004b
|
||||
#define HVCALL_UNMAP_GPA_PAGES 0x004c
|
||||
#define HVCALL_INSTALL_INTERCEPT 0x004d
|
||||
#define HVCALL_CREATE_VP 0x004e
|
||||
#define HVCALL_DELETE_VP 0x004f
|
||||
#define HVCALL_GET_VP_REGISTERS 0x0050
|
||||
#define HVCALL_SET_VP_REGISTERS 0x0051
|
||||
#define HVCALL_TRANSLATE_VIRTUAL_ADDRESS 0x0052
|
||||
#define HVCALL_CLEAR_VIRTUAL_INTERRUPT 0x0056
|
||||
#define HVCALL_DELETE_PORT 0x0058
|
||||
#define HVCALL_DISCONNECT_PORT 0x005b
|
||||
#define HVCALL_POST_MESSAGE 0x005c
|
||||
@ -447,12 +462,15 @@ union hv_vp_assist_msr_contents { /* HV_REGISTER_VP_ASSIST_PAGE */
|
||||
#define HVCALL_POST_DEBUG_DATA 0x0069
|
||||
#define HVCALL_RETRIEVE_DEBUG_DATA 0x006a
|
||||
#define HVCALL_RESET_DEBUG_SESSION 0x006b
|
||||
#define HVCALL_MAP_STATS_PAGE 0x006c
|
||||
#define HVCALL_UNMAP_STATS_PAGE 0x006d
|
||||
#define HVCALL_ADD_LOGICAL_PROCESSOR 0x0076
|
||||
#define HVCALL_GET_SYSTEM_PROPERTY 0x007b
|
||||
#define HVCALL_MAP_DEVICE_INTERRUPT 0x007c
|
||||
#define HVCALL_UNMAP_DEVICE_INTERRUPT 0x007d
|
||||
#define HVCALL_RETARGET_INTERRUPT 0x007e
|
||||
#define HVCALL_NOTIFY_PORT_RING_EMPTY 0x008b
|
||||
#define HVCALL_REGISTER_INTERCEPT_RESULT 0x0091
|
||||
#define HVCALL_ASSERT_VIRTUAL_INTERRUPT 0x0094
|
||||
#define HVCALL_CREATE_PORT 0x0095
|
||||
#define HVCALL_CONNECT_PORT 0x0096
|
||||
@ -460,12 +478,18 @@ union hv_vp_assist_msr_contents { /* HV_REGISTER_VP_ASSIST_PAGE */
|
||||
#define HVCALL_GET_VP_ID_FROM_APIC_ID 0x009a
|
||||
#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
|
||||
#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
|
||||
#define HVCALL_SIGNAL_EVENT_DIRECT 0x00c0
|
||||
#define HVCALL_POST_MESSAGE_DIRECT 0x00c1
|
||||
#define HVCALL_DISPATCH_VP 0x00c2
|
||||
#define HVCALL_GET_GPA_PAGES_ACCESS_STATES 0x00c9
|
||||
#define HVCALL_ACQUIRE_SPARSE_SPA_PAGE_HOST_ACCESS 0x00d7
|
||||
#define HVCALL_RELEASE_SPARSE_SPA_PAGE_HOST_ACCESS 0x00d8
|
||||
#define HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY 0x00db
|
||||
#define HVCALL_MAP_VP_STATE_PAGE 0x00e1
|
||||
#define HVCALL_UNMAP_VP_STATE_PAGE 0x00e2
|
||||
#define HVCALL_GET_VP_STATE 0x00e3
|
||||
#define HVCALL_SET_VP_STATE 0x00e4
|
||||
#define HVCALL_GET_VP_CPUID_VALUES 0x00f4
|
||||
#define HVCALL_MMIO_READ 0x0106
|
||||
#define HVCALL_MMIO_WRITE 0x0107
|
||||
|
||||
@ -775,10 +799,10 @@ struct hv_message_page {
|
||||
|
||||
/* Define timer message payload structure. */
|
||||
struct hv_timer_message_payload {
|
||||
__u32 timer_index;
|
||||
__u32 reserved;
|
||||
__u64 expiration_time; /* When the timer expired */
|
||||
__u64 delivery_time; /* When the message was delivered */
|
||||
u32 timer_index;
|
||||
u32 reserved;
|
||||
u64 expiration_time; /* When the timer expired */
|
||||
u64 delivery_time; /* When the message was delivered */
|
||||
} __packed;
|
||||
|
||||
struct hv_x64_segment_register {
|
||||
@ -807,6 +831,8 @@ struct hv_x64_table_register {
|
||||
u64 base;
|
||||
} __packed;
|
||||
|
||||
#define HV_NORMAL_VTL 0
|
||||
|
||||
union hv_input_vtl {
|
||||
u8 as_uint8;
|
||||
struct {
|
||||
@ -1325,6 +1351,49 @@ struct hv_retarget_device_interrupt { /* HV_INPUT_RETARGET_DEVICE_INTERRUPT */
|
||||
struct hv_device_interrupt_target int_target;
|
||||
} __packed __aligned(8);
|
||||
|
||||
enum hv_intercept_type {
|
||||
#if defined(CONFIG_X86)
|
||||
HV_INTERCEPT_TYPE_X64_IO_PORT = 0x00000000,
|
||||
HV_INTERCEPT_TYPE_X64_MSR = 0x00000001,
|
||||
HV_INTERCEPT_TYPE_X64_CPUID = 0x00000002,
|
||||
#endif
|
||||
HV_INTERCEPT_TYPE_EXCEPTION = 0x00000003,
|
||||
/* Used to be HV_INTERCEPT_TYPE_REGISTER */
|
||||
HV_INTERCEPT_TYPE_RESERVED0 = 0x00000004,
|
||||
HV_INTERCEPT_TYPE_MMIO = 0x00000005,
|
||||
#if defined(CONFIG_X86)
|
||||
HV_INTERCEPT_TYPE_X64_GLOBAL_CPUID = 0x00000006,
|
||||
HV_INTERCEPT_TYPE_X64_APIC_SMI = 0x00000007,
|
||||
#endif
|
||||
HV_INTERCEPT_TYPE_HYPERCALL = 0x00000008,
|
||||
#if defined(CONFIG_X86)
|
||||
HV_INTERCEPT_TYPE_X64_APIC_INIT_SIPI = 0x00000009,
|
||||
HV_INTERCEPT_MC_UPDATE_PATCH_LEVEL_MSR_READ = 0x0000000A,
|
||||
HV_INTERCEPT_TYPE_X64_APIC_WRITE = 0x0000000B,
|
||||
HV_INTERCEPT_TYPE_X64_MSR_INDEX = 0x0000000C,
|
||||
#endif
|
||||
HV_INTERCEPT_TYPE_MAX,
|
||||
HV_INTERCEPT_TYPE_INVALID = 0xFFFFFFFF,
|
||||
};
|
||||
|
||||
union hv_intercept_parameters {
|
||||
/* HV_INTERCEPT_PARAMETERS is defined to be an 8-byte field. */
|
||||
u64 as_uint64;
|
||||
#if defined(CONFIG_X86)
|
||||
/* HV_INTERCEPT_TYPE_X64_IO_PORT */
|
||||
u16 io_port;
|
||||
/* HV_INTERCEPT_TYPE_X64_CPUID */
|
||||
u32 cpuid_index;
|
||||
/* HV_INTERCEPT_TYPE_X64_APIC_WRITE */
|
||||
u32 apic_write_mask;
|
||||
/* HV_INTERCEPT_TYPE_EXCEPTION */
|
||||
u16 exception_vector;
|
||||
/* HV_INTERCEPT_TYPE_X64_MSR_INDEX */
|
||||
u32 msr_index;
|
||||
#endif
|
||||
/* N.B. Other intercept types do not have any parameters. */
|
||||
};
|
||||
|
||||
/* Data structures for HVCALL_MMIO_READ and HVCALL_MMIO_WRITE */
|
||||
#define HV_HYPERCALL_MMIO_MAX_DATA_LENGTH 64
|
||||
|
||||
|
@ -19,11 +19,24 @@
|
||||
|
||||
#define HV_VP_REGISTER_PAGE_VERSION_1 1u
|
||||
|
||||
#define HV_VP_REGISTER_PAGE_MAX_VECTOR_COUNT 7
|
||||
|
||||
union hv_vp_register_page_interrupt_vectors {
|
||||
u64 as_uint64;
|
||||
struct {
|
||||
u8 vector_count;
|
||||
u8 vector[HV_VP_REGISTER_PAGE_MAX_VECTOR_COUNT];
|
||||
} __packed;
|
||||
};
|
||||
|
||||
struct hv_vp_register_page {
|
||||
u16 version;
|
||||
u8 isvalid;
|
||||
u8 rsvdz;
|
||||
u32 dirty;
|
||||
|
||||
#if IS_ENABLED(CONFIG_X86)
|
||||
|
||||
union {
|
||||
struct {
|
||||
/* General purpose registers
|
||||
@ -95,6 +108,22 @@ struct hv_vp_register_page {
|
||||
union hv_x64_pending_interruption_register pending_interruption;
|
||||
union hv_x64_interrupt_state_register interrupt_state;
|
||||
u64 instruction_emulation_hints;
|
||||
u64 xfem;
|
||||
|
||||
/*
|
||||
* Fields from this point are not included in the register page save chunk.
|
||||
* The reserved field is intended to maintain alignment for unsaved fields.
|
||||
*/
|
||||
u8 reserved1[0x100];
|
||||
|
||||
/*
|
||||
* Interrupts injected as part of HvCallDispatchVp.
|
||||
*/
|
||||
union hv_vp_register_page_interrupt_vectors interrupt_vectors;
|
||||
|
||||
#elif IS_ENABLED(CONFIG_ARM64)
|
||||
/* Not yet supported in ARM */
|
||||
#endif
|
||||
} __packed;
|
||||
|
||||
#define HV_PARTITION_PROCESSOR_FEATURES_BANKS 2
|
||||
@ -299,10 +328,11 @@ union hv_partition_isolation_properties {
|
||||
#define HV_PARTITION_ISOLATION_HOST_TYPE_RESERVED 0x2
|
||||
|
||||
/* Note: Exo partition is enabled by default */
|
||||
#define HV_PARTITION_CREATION_FLAG_EXO_PARTITION BIT(8)
|
||||
#define HV_PARTITION_CREATION_FLAG_LAPIC_ENABLED BIT(13)
|
||||
#define HV_PARTITION_CREATION_FLAG_INTERCEPT_MESSAGE_PAGE_ENABLED BIT(19)
|
||||
#define HV_PARTITION_CREATION_FLAG_X2APIC_CAPABLE BIT(22)
|
||||
#define HV_PARTITION_CREATION_FLAG_GPA_SUPER_PAGES_ENABLED BIT(4)
|
||||
#define HV_PARTITION_CREATION_FLAG_EXO_PARTITION BIT(8)
|
||||
#define HV_PARTITION_CREATION_FLAG_LAPIC_ENABLED BIT(13)
|
||||
#define HV_PARTITION_CREATION_FLAG_INTERCEPT_MESSAGE_PAGE_ENABLED BIT(19)
|
||||
#define HV_PARTITION_CREATION_FLAG_X2APIC_CAPABLE BIT(22)
|
||||
|
||||
struct hv_input_create_partition {
|
||||
u64 flags;
|
||||
@ -349,13 +379,23 @@ struct hv_input_set_partition_property {
|
||||
enum hv_vp_state_page_type {
|
||||
HV_VP_STATE_PAGE_REGISTERS = 0,
|
||||
HV_VP_STATE_PAGE_INTERCEPT_MESSAGE = 1,
|
||||
HV_VP_STATE_PAGE_GHCB = 2,
|
||||
HV_VP_STATE_PAGE_COUNT
|
||||
};
|
||||
|
||||
struct hv_input_map_vp_state_page {
|
||||
u64 partition_id;
|
||||
u32 vp_index;
|
||||
u32 type; /* enum hv_vp_state_page_type */
|
||||
u16 type; /* enum hv_vp_state_page_type */
|
||||
union hv_input_vtl input_vtl;
|
||||
union {
|
||||
u8 as_uint8;
|
||||
struct {
|
||||
u8 map_location_provided : 1;
|
||||
u8 reserved : 7;
|
||||
};
|
||||
} flags;
|
||||
u64 requested_map_location;
|
||||
} __packed;
|
||||
|
||||
struct hv_output_map_vp_state_page {
|
||||
@ -365,7 +405,14 @@ struct hv_output_map_vp_state_page {
|
||||
struct hv_input_unmap_vp_state_page {
|
||||
u64 partition_id;
|
||||
u32 vp_index;
|
||||
u32 type; /* enum hv_vp_state_page_type */
|
||||
u16 type; /* enum hv_vp_state_page_type */
|
||||
union hv_input_vtl input_vtl;
|
||||
u8 reserved0;
|
||||
} __packed;
|
||||
|
||||
struct hv_x64_apic_eoi_message {
|
||||
u32 vp_index;
|
||||
u32 interrupt_vector;
|
||||
} __packed;
|
||||
|
||||
struct hv_opaque_intercept_message {
|
||||
@ -515,6 +562,13 @@ struct hv_synthetic_timers_state {
|
||||
u64 reserved[5];
|
||||
} __packed;
|
||||
|
||||
struct hv_async_completion_message_payload {
|
||||
u64 partition_id;
|
||||
u32 status;
|
||||
u32 completion_count;
|
||||
u64 sub_status;
|
||||
} __packed;
|
||||
|
||||
union hv_input_delete_vp {
|
||||
u64 as_uint64[2];
|
||||
struct {
|
||||
@ -649,6 +703,57 @@ struct hv_input_set_vp_state {
|
||||
union hv_input_set_vp_state_data data[];
|
||||
} __packed;
|
||||
|
||||
union hv_x64_vp_execution_state {
|
||||
u16 as_uint16;
|
||||
struct {
|
||||
u16 cpl:2;
|
||||
u16 cr0_pe:1;
|
||||
u16 cr0_am:1;
|
||||
u16 efer_lma:1;
|
||||
u16 debug_active:1;
|
||||
u16 interruption_pending:1;
|
||||
u16 vtl:4;
|
||||
u16 enclave_mode:1;
|
||||
u16 interrupt_shadow:1;
|
||||
u16 virtualization_fault_active:1;
|
||||
u16 reserved:2;
|
||||
} __packed;
|
||||
};
|
||||
|
||||
struct hv_x64_intercept_message_header {
|
||||
u32 vp_index;
|
||||
u8 instruction_length:4;
|
||||
u8 cr8:4; /* Only set for exo partitions */
|
||||
u8 intercept_access_type;
|
||||
union hv_x64_vp_execution_state execution_state;
|
||||
struct hv_x64_segment_register cs_segment;
|
||||
u64 rip;
|
||||
u64 rflags;
|
||||
} __packed;
|
||||
|
||||
union hv_x64_memory_access_info {
|
||||
u8 as_uint8;
|
||||
struct {
|
||||
u8 gva_valid:1;
|
||||
u8 gva_gpa_valid:1;
|
||||
u8 hypercall_output_pending:1;
|
||||
u8 tlb_locked_no_overlay:1;
|
||||
u8 reserved:4;
|
||||
} __packed;
|
||||
};
|
||||
|
||||
struct hv_x64_memory_intercept_message {
|
||||
struct hv_x64_intercept_message_header header;
|
||||
u32 cache_type; /* enum hv_cache_type */
|
||||
u8 instruction_byte_count;
|
||||
union hv_x64_memory_access_info memory_access_info;
|
||||
u8 tpr_priority;
|
||||
u8 reserved1;
|
||||
u64 guest_virtual_address;
|
||||
u64 guest_physical_address;
|
||||
u8 instruction_bytes[16];
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Dispatch state for the VP communicated by the hypervisor to the
|
||||
* VP-dispatching thread in the root on return from HVCALL_DISPATCH_VP.
|
||||
@ -716,6 +821,7 @@ static_assert(sizeof(struct hv_vp_signal_pair_scheduler_message) ==
|
||||
#define HV_DISPATCH_VP_FLAG_SKIP_VP_SPEC_FLUSH 0x8
|
||||
#define HV_DISPATCH_VP_FLAG_SKIP_CALLER_SPEC_FLUSH 0x10
|
||||
#define HV_DISPATCH_VP_FLAG_SKIP_CALLER_USER_SPEC_FLUSH 0x20
|
||||
#define HV_DISPATCH_VP_FLAG_SCAN_INTERRUPT_INJECTION 0x40
|
||||
|
||||
struct hv_input_dispatch_vp {
|
||||
u64 partition_id;
|
||||
@ -730,4 +836,18 @@ struct hv_output_dispatch_vp {
|
||||
u32 dispatch_event; /* enum hv_vp_dispatch_event */
|
||||
} __packed;
|
||||
|
||||
struct hv_input_modify_sparse_spa_page_host_access {
|
||||
u32 host_access : 2;
|
||||
u32 reserved : 30;
|
||||
u32 flags;
|
||||
u64 partition_id;
|
||||
u64 spa_page_list[];
|
||||
} __packed;
|
||||
|
||||
/* hv_input_modify_sparse_spa_page_host_access flags */
|
||||
#define HV_MODIFY_SPA_PAGE_HOST_ACCESS_MAKE_EXCLUSIVE 0x1
|
||||
#define HV_MODIFY_SPA_PAGE_HOST_ACCESS_MAKE_SHARED 0x2
|
||||
#define HV_MODIFY_SPA_PAGE_HOST_ACCESS_LARGE_PAGE 0x4
|
||||
#define HV_MODIFY_SPA_PAGE_HOST_ACCESS_HUGE_PAGE 0x8
|
||||
|
||||
#endif /* _HV_HVHDK_H */
|
||||
|
@ -36,6 +36,52 @@ enum hv_scheduler_type {
|
||||
HV_SCHEDULER_TYPE_MAX
|
||||
};
|
||||
|
||||
/* HV_STATS_AREA_TYPE */
|
||||
enum hv_stats_area_type {
|
||||
HV_STATS_AREA_SELF = 0,
|
||||
HV_STATS_AREA_PARENT = 1,
|
||||
HV_STATS_AREA_INTERNAL = 2,
|
||||
HV_STATS_AREA_COUNT
|
||||
};
|
||||
|
||||
enum hv_stats_object_type {
|
||||
HV_STATS_OBJECT_HYPERVISOR = 0x00000001,
|
||||
HV_STATS_OBJECT_LOGICAL_PROCESSOR = 0x00000002,
|
||||
HV_STATS_OBJECT_PARTITION = 0x00010001,
|
||||
HV_STATS_OBJECT_VP = 0x00010002
|
||||
};
|
||||
|
||||
union hv_stats_object_identity {
|
||||
/* hv_stats_hypervisor */
|
||||
struct {
|
||||
u8 reserved[15];
|
||||
u8 stats_area_type;
|
||||
} __packed hv;
|
||||
|
||||
/* hv_stats_logical_processor */
|
||||
struct {
|
||||
u32 lp_index;
|
||||
u8 reserved[11];
|
||||
u8 stats_area_type;
|
||||
} __packed lp;
|
||||
|
||||
/* hv_stats_partition */
|
||||
struct {
|
||||
u64 partition_id;
|
||||
u8 reserved[7];
|
||||
u8 stats_area_type;
|
||||
} __packed partition;
|
||||
|
||||
/* hv_stats_vp */
|
||||
struct {
|
||||
u64 partition_id;
|
||||
u32 vp_index;
|
||||
u16 flags;
|
||||
u8 reserved;
|
||||
u8 stats_area_type;
|
||||
} __packed vp;
|
||||
};
|
||||
|
||||
enum hv_partition_property_code {
|
||||
/* Privilege properties */
|
||||
HV_PARTITION_PROPERTY_PRIVILEGE_FLAGS = 0x00010000,
|
||||
@ -47,19 +93,45 @@ enum hv_partition_property_code {
|
||||
|
||||
/* Compatibility properties */
|
||||
HV_PARTITION_PROPERTY_PROCESSOR_XSAVE_FEATURES = 0x00060002,
|
||||
HV_PARTITION_PROPERTY_XSAVE_STATES = 0x00060007,
|
||||
HV_PARTITION_PROPERTY_MAX_XSAVE_DATA_SIZE = 0x00060008,
|
||||
HV_PARTITION_PROPERTY_PROCESSOR_CLOCK_FREQUENCY = 0x00060009,
|
||||
};
|
||||
|
||||
enum hv_snp_status {
|
||||
HV_SNP_STATUS_NONE = 0,
|
||||
HV_SNP_STATUS_AVAILABLE = 1,
|
||||
HV_SNP_STATUS_INCOMPATIBLE = 2,
|
||||
HV_SNP_STATUS_PSP_UNAVAILABLE = 3,
|
||||
HV_SNP_STATUS_PSP_INIT_FAILED = 4,
|
||||
HV_SNP_STATUS_PSP_BAD_FW_VERSION = 5,
|
||||
HV_SNP_STATUS_BAD_CONFIGURATION = 6,
|
||||
HV_SNP_STATUS_PSP_FW_UPDATE_IN_PROGRESS = 7,
|
||||
HV_SNP_STATUS_PSP_RB_INIT_FAILED = 8,
|
||||
HV_SNP_STATUS_PSP_PLATFORM_STATUS_FAILED = 9,
|
||||
HV_SNP_STATUS_PSP_INIT_LATE_FAILED = 10,
|
||||
};
|
||||
|
||||
enum hv_system_property {
|
||||
/* Add more values when needed */
|
||||
HV_SYSTEM_PROPERTY_SCHEDULER_TYPE = 15,
|
||||
HV_DYNAMIC_PROCESSOR_FEATURE_PROPERTY = 21,
|
||||
};
|
||||
|
||||
enum hv_dynamic_processor_feature_property {
|
||||
/* Add more values when needed */
|
||||
HV_X64_DYNAMIC_PROCESSOR_FEATURE_MAX_ENCRYPTED_PARTITIONS = 13,
|
||||
HV_X64_DYNAMIC_PROCESSOR_FEATURE_SNP_STATUS = 16,
|
||||
};
|
||||
|
||||
struct hv_input_get_system_property {
|
||||
u32 property_id; /* enum hv_system_property */
|
||||
union {
|
||||
u32 as_uint32;
|
||||
#if IS_ENABLED(CONFIG_X86)
|
||||
/* enum hv_dynamic_processor_feature_property */
|
||||
u32 hv_processor_feature;
|
||||
#endif
|
||||
/* More fields to be filled in when needed */
|
||||
};
|
||||
} __packed;
|
||||
@ -67,9 +139,28 @@ struct hv_input_get_system_property {
|
||||
struct hv_output_get_system_property {
|
||||
union {
|
||||
u32 scheduler_type; /* enum hv_scheduler_type */
|
||||
#if IS_ENABLED(CONFIG_X86)
|
||||
u64 hv_processor_feature_value;
|
||||
#endif
|
||||
};
|
||||
} __packed;
|
||||
|
||||
struct hv_input_map_stats_page {
|
||||
u32 type; /* enum hv_stats_object_type */
|
||||
u32 padding;
|
||||
union hv_stats_object_identity identity;
|
||||
} __packed;
|
||||
|
||||
struct hv_output_map_stats_page {
|
||||
u64 map_location;
|
||||
} __packed;
|
||||
|
||||
struct hv_input_unmap_stats_page {
|
||||
u32 type; /* enum hv_stats_object_type */
|
||||
u32 padding;
|
||||
union hv_stats_object_identity identity;
|
||||
} __packed;
|
||||
|
||||
struct hv_proximity_domain_flags {
|
||||
u32 proximity_preferred : 1;
|
||||
u32 reserved : 30;
|
||||
|
@ -371,19 +371,6 @@ struct vmtransfer_page_packet_header {
|
||||
struct vmtransfer_page_range ranges[];
|
||||
} __packed;
|
||||
|
||||
struct vmgpadl_packet_header {
|
||||
struct vmpacket_descriptor d;
|
||||
u32 gpadl;
|
||||
u32 reserved;
|
||||
} __packed;
|
||||
|
||||
struct vmadd_remove_transfer_page_set {
|
||||
struct vmpacket_descriptor d;
|
||||
u32 gpadl;
|
||||
u16 xfer_pageset_id;
|
||||
u16 reserved;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* This structure defines a range in guest physical space that can be made to
|
||||
* look virtually contiguous.
|
||||
@ -394,30 +381,6 @@ struct gpa_range {
|
||||
u64 pfn_array[];
|
||||
};
|
||||
|
||||
/*
|
||||
* This is the format for an Establish Gpadl packet, which contains a handle by
|
||||
* which this GPADL will be known and a set of GPA ranges associated with it.
|
||||
* This can be converted to a MDL by the guest OS. If there are multiple GPA
|
||||
* ranges, then the resulting MDL will be "chained," representing multiple VA
|
||||
* ranges.
|
||||
*/
|
||||
struct vmestablish_gpadl {
|
||||
struct vmpacket_descriptor d;
|
||||
u32 gpadl;
|
||||
u32 range_cnt;
|
||||
struct gpa_range range[1];
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* This is the format for a Teardown Gpadl packet, which indicates that the
|
||||
* GPADL handle in the Establish Gpadl packet will never be referenced again.
|
||||
*/
|
||||
struct vmteardown_gpadl {
|
||||
struct vmpacket_descriptor d;
|
||||
u32 gpadl;
|
||||
u32 reserved; /* for alignment to a 8-byte boundary */
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* This is the format for a GPA-Direct packet, which contains a set of GPA
|
||||
* ranges, in addition to commands and/or data.
|
||||
@ -429,25 +392,6 @@ struct vmdata_gpa_direct {
|
||||
struct gpa_range range[1];
|
||||
} __packed;
|
||||
|
||||
/* This is the format for a Additional Data Packet. */
|
||||
struct vmadditional_data {
|
||||
struct vmpacket_descriptor d;
|
||||
u64 total_bytes;
|
||||
u32 offset;
|
||||
u32 byte_cnt;
|
||||
unsigned char data[1];
|
||||
} __packed;
|
||||
|
||||
union vmpacket_largest_possible_header {
|
||||
struct vmpacket_descriptor simple_hdr;
|
||||
struct vmtransfer_page_packet_header xfer_page_hdr;
|
||||
struct vmgpadl_packet_header gpadl_hdr;
|
||||
struct vmadd_remove_transfer_page_set add_rm_xfer_page_hdr;
|
||||
struct vmestablish_gpadl establish_gpadl_hdr;
|
||||
struct vmteardown_gpadl teardown_gpadl_hdr;
|
||||
struct vmdata_gpa_direct data_gpa_direct_hdr;
|
||||
};
|
||||
|
||||
#define VMPACKET_DATA_START_ADDRESS(__packet) \
|
||||
(void *)(((unsigned char *)__packet) + \
|
||||
((struct vmpacket_descriptor)__packet)->offset8 * 8)
|
||||
@ -1661,6 +1605,7 @@ int vmbus_send_tl_connect_request(const guid_t *shv_guest_servie_id,
|
||||
const guid_t *shv_host_servie_id);
|
||||
int vmbus_send_modifychannel(struct vmbus_channel *channel, u32 target_vp);
|
||||
void vmbus_set_event(struct vmbus_channel *channel);
|
||||
int vmbus_channel_set_cpu(struct vmbus_channel *channel, u32 target_cpu);
|
||||
|
||||
/* Get the start of the ring buffer. */
|
||||
static inline void *
|
||||
|
291
include/uapi/linux/mshv.h
Normal file
291
include/uapi/linux/mshv.h
Normal file
@ -0,0 +1,291 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
* Userspace interfaces for /dev/mshv* devices and derived fds
|
||||
*
|
||||
* This file is divided into sections containing data structures and IOCTLs for
|
||||
* a particular set of related devices or derived file descriptors.
|
||||
*
|
||||
* The IOCTL definitions are at the end of each section. They are grouped by
|
||||
* device/fd, so that new IOCTLs can easily be added with a monotonically
|
||||
* increasing number.
|
||||
*/
|
||||
#ifndef _UAPI_LINUX_MSHV_H
|
||||
#define _UAPI_LINUX_MSHV_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#define MSHV_IOCTL 0xB8
|
||||
|
||||
/*
|
||||
*******************************************
|
||||
* Entry point to main VMM APIs: /dev/mshv *
|
||||
*******************************************
|
||||
*/
|
||||
|
||||
enum {
|
||||
MSHV_PT_BIT_LAPIC,
|
||||
MSHV_PT_BIT_X2APIC,
|
||||
MSHV_PT_BIT_GPA_SUPER_PAGES,
|
||||
MSHV_PT_BIT_COUNT,
|
||||
};
|
||||
|
||||
#define MSHV_PT_FLAGS_MASK ((1 << MSHV_PT_BIT_COUNT) - 1)
|
||||
|
||||
enum {
|
||||
MSHV_PT_ISOLATION_NONE,
|
||||
MSHV_PT_ISOLATION_COUNT,
|
||||
};
|
||||
|
||||
/**
|
||||
* struct mshv_create_partition - arguments for MSHV_CREATE_PARTITION
|
||||
* @pt_flags: Bitmask of 1 << MSHV_PT_BIT_*
|
||||
* @pt_isolation: MSHV_PT_ISOLATION_*
|
||||
*
|
||||
* Returns a file descriptor to act as a handle to a guest partition.
|
||||
* At this point the partition is not yet initialized in the hypervisor.
|
||||
* Some operations must be done with the partition in this state, e.g. setting
|
||||
* so-called "early" partition properties. The partition can then be
|
||||
* initialized with MSHV_INITIALIZE_PARTITION.
|
||||
*/
|
||||
struct mshv_create_partition {
|
||||
__u64 pt_flags;
|
||||
__u64 pt_isolation;
|
||||
};
|
||||
|
||||
/* /dev/mshv */
|
||||
#define MSHV_CREATE_PARTITION _IOW(MSHV_IOCTL, 0x00, struct mshv_create_partition)
|
||||
|
||||
/*
|
||||
************************
|
||||
* Child partition APIs *
|
||||
************************
|
||||
*/
|
||||
|
||||
struct mshv_create_vp {
|
||||
__u32 vp_index;
|
||||
};
|
||||
|
||||
enum {
|
||||
MSHV_SET_MEM_BIT_WRITABLE,
|
||||
MSHV_SET_MEM_BIT_EXECUTABLE,
|
||||
MSHV_SET_MEM_BIT_UNMAP,
|
||||
MSHV_SET_MEM_BIT_COUNT
|
||||
};
|
||||
|
||||
#define MSHV_SET_MEM_FLAGS_MASK ((1 << MSHV_SET_MEM_BIT_COUNT) - 1)
|
||||
|
||||
/* The hypervisor's "native" page size */
|
||||
#define MSHV_HV_PAGE_SIZE 0x1000
|
||||
|
||||
/**
|
||||
* struct mshv_user_mem_region - arguments for MSHV_SET_GUEST_MEMORY
|
||||
* @size: Size of the memory region (bytes). Must be aligned to
|
||||
* MSHV_HV_PAGE_SIZE
|
||||
* @guest_pfn: Base guest page number to map
|
||||
* @userspace_addr: Base address of userspace memory. Must be aligned to
|
||||
* MSHV_HV_PAGE_SIZE
|
||||
* @flags: Bitmask of 1 << MSHV_SET_MEM_BIT_*. If (1 << MSHV_SET_MEM_BIT_UNMAP)
|
||||
* is set, ignore other bits.
|
||||
* @rsvd: MBZ
|
||||
*
|
||||
* Map or unmap a region of userspace memory to Guest Physical Addresses (GPA).
|
||||
* Mappings can't overlap in GPA space or userspace.
|
||||
* To unmap, these fields must match an existing mapping.
|
||||
*/
|
||||
struct mshv_user_mem_region {
|
||||
__u64 size;
|
||||
__u64 guest_pfn;
|
||||
__u64 userspace_addr;
|
||||
__u8 flags;
|
||||
__u8 rsvd[7];
|
||||
};
|
||||
|
||||
enum {
|
||||
MSHV_IRQFD_BIT_DEASSIGN,
|
||||
MSHV_IRQFD_BIT_RESAMPLE,
|
||||
MSHV_IRQFD_BIT_COUNT,
|
||||
};
|
||||
|
||||
#define MSHV_IRQFD_FLAGS_MASK ((1 << MSHV_IRQFD_BIT_COUNT) - 1)
|
||||
|
||||
struct mshv_user_irqfd {
|
||||
__s32 fd;
|
||||
__s32 resamplefd;
|
||||
__u32 gsi;
|
||||
__u32 flags;
|
||||
};
|
||||
|
||||
enum {
|
||||
MSHV_IOEVENTFD_BIT_DATAMATCH,
|
||||
MSHV_IOEVENTFD_BIT_PIO,
|
||||
MSHV_IOEVENTFD_BIT_DEASSIGN,
|
||||
MSHV_IOEVENTFD_BIT_COUNT,
|
||||
};
|
||||
|
||||
#define MSHV_IOEVENTFD_FLAGS_MASK ((1 << MSHV_IOEVENTFD_BIT_COUNT) - 1)
|
||||
|
||||
struct mshv_user_ioeventfd {
|
||||
__u64 datamatch;
|
||||
__u64 addr; /* legal pio/mmio address */
|
||||
__u32 len; /* 1, 2, 4, or 8 bytes */
|
||||
__s32 fd;
|
||||
__u32 flags;
|
||||
__u8 rsvd[4];
|
||||
};
|
||||
|
||||
struct mshv_user_irq_entry {
|
||||
__u32 gsi;
|
||||
__u32 address_lo;
|
||||
__u32 address_hi;
|
||||
__u32 data;
|
||||
};
|
||||
|
||||
struct mshv_user_irq_table {
|
||||
__u32 nr;
|
||||
__u32 rsvd; /* MBZ */
|
||||
struct mshv_user_irq_entry entries[];
|
||||
};
|
||||
|
||||
enum {
|
||||
MSHV_GPAP_ACCESS_TYPE_ACCESSED,
|
||||
MSHV_GPAP_ACCESS_TYPE_DIRTY,
|
||||
MSHV_GPAP_ACCESS_TYPE_COUNT /* Count of enum members */
|
||||
};
|
||||
|
||||
enum {
|
||||
MSHV_GPAP_ACCESS_OP_NOOP,
|
||||
MSHV_GPAP_ACCESS_OP_CLEAR,
|
||||
MSHV_GPAP_ACCESS_OP_SET,
|
||||
MSHV_GPAP_ACCESS_OP_COUNT /* Count of enum members */
|
||||
};
|
||||
|
||||
/**
|
||||
* struct mshv_gpap_access_bitmap - arguments for MSHV_GET_GPAP_ACCESS_BITMAP
|
||||
* @access_type: MSHV_GPAP_ACCESS_TYPE_* - The type of access to record in the
|
||||
* bitmap
|
||||
* @access_op: MSHV_GPAP_ACCESS_OP_* - Allows an optional clear or set of all
|
||||
* the access states in the range, after retrieving the current
|
||||
* states.
|
||||
* @rsvd: MBZ
|
||||
* @page_count: Number of pages
|
||||
* @gpap_base: Base gpa page number
|
||||
* @bitmap_ptr: Output buffer for bitmap, at least (page_count + 7) / 8 bytes
|
||||
*
|
||||
* Retrieve a bitmap of either ACCESSED or DIRTY bits for a given range of guest
|
||||
* memory, and optionally clear or set the bits.
|
||||
*/
|
||||
struct mshv_gpap_access_bitmap {
|
||||
__u8 access_type;
|
||||
__u8 access_op;
|
||||
__u8 rsvd[6];
|
||||
__u64 page_count;
|
||||
__u64 gpap_base;
|
||||
__u64 bitmap_ptr;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct mshv_root_hvcall - arguments for MSHV_ROOT_HVCALL
|
||||
* @code: Hypercall code (HVCALL_*)
|
||||
* @reps: in: Rep count ('repcount')
|
||||
* out: Reps completed ('repcomp'). MBZ unless rep hvcall
|
||||
* @in_sz: Size of input incl rep data. <= MSHV_HV_PAGE_SIZE
|
||||
* @out_sz: Size of output buffer. <= MSHV_HV_PAGE_SIZE. MBZ if out_ptr is 0
|
||||
* @status: in: MBZ
|
||||
* out: HV_STATUS_* from hypercall
|
||||
* @rsvd: MBZ
|
||||
* @in_ptr: Input data buffer (struct hv_input_*). If used with partition or
|
||||
* vp fd, partition id field is populated by kernel.
|
||||
* @out_ptr: Output data buffer (optional)
|
||||
*/
|
||||
struct mshv_root_hvcall {
|
||||
__u16 code;
|
||||
__u16 reps;
|
||||
__u16 in_sz;
|
||||
__u16 out_sz;
|
||||
__u16 status;
|
||||
__u8 rsvd[6];
|
||||
__u64 in_ptr;
|
||||
__u64 out_ptr;
|
||||
};
|
||||
|
||||
/* Partition fds created with MSHV_CREATE_PARTITION */
|
||||
#define MSHV_INITIALIZE_PARTITION _IO(MSHV_IOCTL, 0x00)
|
||||
#define MSHV_CREATE_VP _IOW(MSHV_IOCTL, 0x01, struct mshv_create_vp)
|
||||
#define MSHV_SET_GUEST_MEMORY _IOW(MSHV_IOCTL, 0x02, struct mshv_user_mem_region)
|
||||
#define MSHV_IRQFD _IOW(MSHV_IOCTL, 0x03, struct mshv_user_irqfd)
|
||||
#define MSHV_IOEVENTFD _IOW(MSHV_IOCTL, 0x04, struct mshv_user_ioeventfd)
|
||||
#define MSHV_SET_MSI_ROUTING _IOW(MSHV_IOCTL, 0x05, struct mshv_user_irq_table)
|
||||
#define MSHV_GET_GPAP_ACCESS_BITMAP _IOWR(MSHV_IOCTL, 0x06, struct mshv_gpap_access_bitmap)
|
||||
/* Generic hypercall */
|
||||
#define MSHV_ROOT_HVCALL _IOWR(MSHV_IOCTL, 0x07, struct mshv_root_hvcall)
|
||||
|
||||
/*
|
||||
********************************
|
||||
* VP APIs for child partitions *
|
||||
********************************
|
||||
*/
|
||||
|
||||
#define MSHV_RUN_VP_BUF_SZ 256
|
||||
|
||||
/*
|
||||
* VP state pages may be mapped to userspace via mmap().
|
||||
* To specify which state page, use MSHV_VP_MMAP_OFFSET_ values multiplied by
|
||||
* the system page size.
|
||||
* e.g.
|
||||
* long page_size = sysconf(_SC_PAGE_SIZE);
|
||||
* void *reg_page = mmap(NULL, MSHV_HV_PAGE_SIZE, PROT_READ|PROT_WRITE,
|
||||
* MAP_SHARED, vp_fd,
|
||||
* MSHV_VP_MMAP_OFFSET_REGISTERS * page_size);
|
||||
*/
|
||||
enum {
|
||||
MSHV_VP_MMAP_OFFSET_REGISTERS,
|
||||
MSHV_VP_MMAP_OFFSET_INTERCEPT_MESSAGE,
|
||||
MSHV_VP_MMAP_OFFSET_GHCB,
|
||||
MSHV_VP_MMAP_OFFSET_COUNT
|
||||
};
|
||||
|
||||
/**
|
||||
* struct mshv_run_vp - argument for MSHV_RUN_VP
|
||||
* @msg_buf: On success, the intercept message is copied here. It can be
|
||||
* interpreted using the relevant hypervisor definitions.
|
||||
*/
|
||||
struct mshv_run_vp {
|
||||
__u8 msg_buf[MSHV_RUN_VP_BUF_SZ];
|
||||
};
|
||||
|
||||
enum {
|
||||
MSHV_VP_STATE_LAPIC, /* Local interrupt controller state (either arch) */
|
||||
MSHV_VP_STATE_XSAVE, /* XSAVE data in compacted form (x86_64) */
|
||||
MSHV_VP_STATE_SIMP,
|
||||
MSHV_VP_STATE_SIEFP,
|
||||
MSHV_VP_STATE_SYNTHETIC_TIMERS,
|
||||
MSHV_VP_STATE_COUNT,
|
||||
};
|
||||
|
||||
/**
|
||||
* struct mshv_get_set_vp_state - arguments for MSHV_[GET,SET]_VP_STATE
|
||||
* @type: MSHV_VP_STATE_*
|
||||
* @rsvd: MBZ
|
||||
* @buf_sz: in: 4k page-aligned size of buffer
|
||||
* out: Actual size of data (on EINVAL, check this to see if buffer
|
||||
* was too small)
|
||||
* @buf_ptr: 4k page-aligned data buffer
|
||||
*/
|
||||
struct mshv_get_set_vp_state {
|
||||
__u8 type;
|
||||
__u8 rsvd[3];
|
||||
__u32 buf_sz;
|
||||
__u64 buf_ptr;
|
||||
};
|
||||
|
||||
/* VP fds created with MSHV_CREATE_VP */
|
||||
#define MSHV_RUN_VP _IOR(MSHV_IOCTL, 0x00, struct mshv_run_vp)
|
||||
#define MSHV_GET_VP_STATE _IOWR(MSHV_IOCTL, 0x01, struct mshv_get_set_vp_state)
|
||||
#define MSHV_SET_VP_STATE _IOWR(MSHV_IOCTL, 0x02, struct mshv_get_set_vp_state)
|
||||
/*
|
||||
* Generic hypercall
|
||||
* Defined above in partition IOCTLs, avoid redefining it here
|
||||
* #define MSHV_ROOT_HVCALL _IOWR(MSHV_IOCTL, 0x07, struct mshv_root_hvcall)
|
||||
*/
|
||||
|
||||
#endif
|
@ -526,6 +526,7 @@ void lockdep_assert_cpus_held(void)
|
||||
|
||||
percpu_rwsem_assert_held(&cpu_hotplug_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(lockdep_assert_cpus_held);
|
||||
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
int lockdep_is_cpus_held(void)
|
||||
|
Loading…
x
Reference in New Issue
Block a user