Updates for UML for this cycle, notably:

- proper nofault accesses and read-only rodata
  - hostfs fix for host inode number reuse
  - fixes for host errno handling
  - various cleanups/small fixes
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEpeA8sTs3M8SN2hR410qiO8sPaAAFAmfs6R0ACgkQ10qiO8sP
 aAC6VxAAoIqPoyeOqH5DRnHQZQuZOE2lelhKCKckH3xg8peXqHdBl5Mx3kE2j851
 S+ufysSJkbZIYfPw/B59ZzpQg7d1JmrCoDCvleH/LdWhi6sosqe9F7Zwxk2p9STA
 swCifnACaIZc0iir70MrCAd/1/bBZq7PG8BWO1XFDrqNBLBGQxtSiMCmzBWsKjuj
 33uocQMi5PvVikBRxfz2uo4PJuJumhGs3sWAoFlA61ogHP4JrwKjW/HvuHJGaHKV
 YgaObr/JPhDkbGn7bXdQpLT+Qz7FwBeZFt9AUHOk+IibcwQY126ArXD11zzAAPkT
 3Q9H8eNV+MpieGtpA2+3Gwe//QsNjEOj3ACfV+S7veQ0Vxk+Bd/wMSDBKLF+z71g
 qpFqFeO0wS/XmwFI6RVN+GW6rZZ6mR3c7r/5mtAOa5+iJTnqDadyE/4oouQht2of
 IrS4LugnTB0KCgRZZDmtTOFT8lGOjey3e+AO42Qi+Z64oolI6zKUakTdBWvywmk4
 V9w9OUmEZAy64a0luvavYfxx+6WoTHURyQ/L99Ysk6ns9BrUk7U+hpfsLZBWZiyT
 3jfOlRGgt4N7iHaVqQwB6l6/Q/FtrdVK7SrTtsGzURhuCSy3SP0HZxWh9qhaL/2j
 Af8Qz5OAOEBYmTaN9lLYsHXp02NyM+4hlsR1DvEcNGuWQsyepIw=
 =MSjq
 -----END PGP SIGNATURE-----

Merge tag 'uml-for-linux-6.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/uml/linux

Pull UML updates from Johannes Berg:

 - proper nofault accesses and read-only rodata

 - hostfs fix for host inode number reuse

 - fixes for host errno handling

 - various cleanups/small fixes

* tag 'uml-for-linux-6.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/uml/linux:
  um: Rewrite the sigio workaround based on epoll and tgkill
  um: Prohibit the VM_CLONE flag in run_helper_thread()
  um: Switch to the pthread-based helper in sigio workaround
  um: ubd: Switch to the pthread-based helper
  um: Add pthread-based helper support
  um: x86: clean up elf specific definitions
  um: Store full CSGSFS and SS register from mcontext
  um: virt-pci: Refactor virtio_pcidev into its own module
  um: work around sched_yield not yielding in time-travel mode
  um/locking: Remove semicolon from "lock" prefix
  um: Update min_low_pfn to match changes in uml_reserved
  um: use str_yes_no() to remove hardcoded "yes" and "no"
  um: hostfs: avoid issues on inode number reuse by host
  um: Allocate vdso page pointer statically
  um: remove copy_from_kernel_nofault_allowed
  um: mark rodata read-only and implement _nofault accesses
  um: Pass the correct Rust target and options with gcc
This commit is contained in:
Linus Torvalds 2025-04-02 12:25:03 -07:00
commit 8a6b94032e
44 changed files with 1146 additions and 1136 deletions

View File

@ -12,6 +12,7 @@ config UML
select ARCH_HAS_KCOV
select ARCH_HAS_STRNCPY_FROM_USER
select ARCH_HAS_STRNLEN_USER
select ARCH_HAS_STRICT_KERNEL_RWX
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_KASAN if X86_64
select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN

View File

@ -345,16 +345,20 @@ config UML_RTC
by providing a fake RTC clock that causes a wakeup at the right
time.
config UML_PCI_OVER_VIRTIO
bool "Enable PCI over VIRTIO device simulation"
# in theory, just VIRTIO is enough, but that causes recursion
depends on VIRTIO_UML
config UML_PCI
bool
select FORCE_PCI
select UML_IOMEM_EMULATION
select UML_DMA_EMULATION
select PCI_MSI
select PCI_LOCKLESS_CONFIG
config UML_PCI_OVER_VIRTIO
bool "Enable PCI over VIRTIO device simulation"
# in theory, just VIRTIO is enough, but that causes recursion
depends on VIRTIO_UML
select UML_PCI
config UML_PCI_OVER_VIRTIO_DEVICE_ID
int "set the virtio device ID for PCI emulation"
default -1

View File

@ -60,7 +60,8 @@ obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o
obj-$(CONFIG_UML_RANDOM) += random.o
obj-$(CONFIG_VIRTIO_UML) += virtio_uml.o
obj-$(CONFIG_UML_RTC) += rtc.o
obj-$(CONFIG_UML_PCI_OVER_VIRTIO) += virt-pci.o
obj-$(CONFIG_UML_PCI) += virt-pci.o
obj-$(CONFIG_UML_PCI_OVER_VIRTIO) += virtio_pcidev.o
# pcap_user.o must be added explicitly.
USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o vde_user.o vector_user.o

View File

@ -79,7 +79,7 @@ static int __init rng_init (void)
if (err < 0)
goto err_out_cleanup_hw;
sigio_broken(random_fd);
sigio_broken();
hwrng.name = RNG_MODULE_NAME;
hwrng.read = rng_dev_read;

View File

@ -39,7 +39,7 @@ int uml_rtc_start(bool timetravel)
}
/* apparently timerfd won't send SIGIO, use workaround */
sigio_broken(uml_rtc_irq_fds[0]);
sigio_broken();
err = add_sigio_fd(uml_rtc_irq_fds[0]);
if (err < 0) {
close(uml_rtc_irq_fds[0]);

View File

@ -7,8 +7,10 @@
#ifndef __UM_UBD_USER_H
#define __UM_UBD_USER_H
extern int start_io_thread(unsigned long sp, int *fds_out);
extern int io_thread(void *arg);
#include <os.h>
int start_io_thread(struct os_helper_thread **td_out, int *fd_out);
void *io_thread(void *arg);
extern int kernel_fd;
extern int ubd_read_poll(int timeout);

View File

@ -474,12 +474,12 @@ static irqreturn_t ubd_intr(int irq, void *dev)
}
/* Only changed by ubd_init, which is an initcall. */
static int io_pid = -1;
static struct os_helper_thread *io_td;
static void kill_io_thread(void)
{
if(io_pid != -1)
os_kill_process(io_pid, 1);
if (io_td)
os_kill_helper_thread(io_td);
}
__uml_exitcall(kill_io_thread);
@ -1104,8 +1104,8 @@ static int __init ubd_init(void)
late_initcall(ubd_init);
static int __init ubd_driver_init(void){
unsigned long stack;
static int __init ubd_driver_init(void)
{
int err;
/* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
@ -1114,13 +1114,11 @@ static int __init ubd_driver_init(void){
/* Letting ubd=sync be like using ubd#s= instead of ubd#= is
* enough. So use anyway the io thread. */
}
stack = alloc_stack(0, 0);
io_pid = start_io_thread(stack + PAGE_SIZE, &thread_fd);
if(io_pid < 0){
err = start_io_thread(&io_td, &thread_fd);
if (err < 0) {
printk(KERN_ERR
"ubd : Failed to start I/O thread (errno = %d) - "
"falling back to synchronous I/O\n", -io_pid);
io_pid = -1;
"falling back to synchronous I/O\n", -err);
return 0;
}
err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
@ -1496,12 +1494,11 @@ int kernel_fd = -1;
/* Only changed by the io thread. XXX: currently unused. */
static int io_count;
int io_thread(void *arg)
void *io_thread(void *arg)
{
int n, count, written, res;
os_set_pdeathsig();
os_fix_helper_signals();
os_fix_helper_thread_signals();
while(1){
n = bulk_req_safe_read(
@ -1543,5 +1540,5 @@ int io_thread(void *arg)
} while (written < n);
}
return 0;
return NULL;
}

View File

@ -25,9 +25,9 @@
static struct pollfd kernel_pollfd;
int start_io_thread(unsigned long sp, int *fd_out)
int start_io_thread(struct os_helper_thread **td_out, int *fd_out)
{
int pid, fds[2], err;
int fds[2], err;
err = os_pipe(fds, 1, 1);
if(err < 0){
@ -47,14 +47,14 @@ int start_io_thread(unsigned long sp, int *fd_out)
goto out_close;
}
pid = clone(io_thread, (void *) sp, CLONE_FILES | CLONE_VM, NULL);
if(pid < 0){
err = -errno;
printk("start_io_thread - clone failed : errno = %d\n", errno);
err = os_run_helper_thread(td_out, io_thread, NULL);
if (err < 0) {
printk("%s - failed to run helper thread, err = %d\n",
__func__, -err);
goto out_close;
}
return(pid);
return 0;
out_close:
os_close_file(fds[0]);

View File

@ -5,52 +5,19 @@
*/
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/virtio.h>
#include <linux/virtio_config.h>
#include <linux/logic_iomem.h>
#include <linux/of_platform.h>
#include <linux/irqdomain.h>
#include <linux/virtio_pcidev.h>
#include <linux/virtio-uml.h>
#include <linux/delay.h>
#include <linux/msi.h>
#include <linux/unaligned.h>
#include <irq_kern.h>
#include "virt-pci.h"
#define MAX_DEVICES 8
#define MAX_MSI_VECTORS 32
#define CFG_SPACE_SIZE 4096
/* for MSI-X we have a 32-bit payload */
#define MAX_IRQ_MSG_SIZE (sizeof(struct virtio_pcidev_msg) + sizeof(u32))
#define NUM_IRQ_MSGS 10
struct um_pci_message_buffer {
struct virtio_pcidev_msg hdr;
u8 data[8];
};
struct um_pci_device {
struct virtio_device *vdev;
/* for now just standard BARs */
u8 resptr[PCI_STD_NUM_BARS];
struct virtqueue *cmd_vq, *irq_vq;
#define UM_PCI_WRITE_BUFS 20
struct um_pci_message_buffer bufs[UM_PCI_WRITE_BUFS + 1];
void *extra_ptrs[UM_PCI_WRITE_BUFS + 1];
DECLARE_BITMAP(used_bufs, UM_PCI_WRITE_BUFS);
#define UM_PCI_STAT_WAITING 0
unsigned long status;
int irq;
bool platform;
};
struct um_pci_device_reg {
struct um_pci_device *dev;
void __iomem *iomem;
@ -65,179 +32,15 @@ static struct irq_domain *um_pci_inner_domain;
static struct irq_domain *um_pci_msi_domain;
static unsigned long um_pci_msi_used[BITS_TO_LONGS(MAX_MSI_VECTORS)];
static unsigned int um_pci_max_delay_us = 40000;
module_param_named(max_delay_us, um_pci_max_delay_us, uint, 0644);
static int um_pci_get_buf(struct um_pci_device *dev, bool *posted)
{
int i;
for (i = 0; i < UM_PCI_WRITE_BUFS; i++) {
if (!test_and_set_bit(i, dev->used_bufs))
return i;
}
*posted = false;
return UM_PCI_WRITE_BUFS;
}
static void um_pci_free_buf(struct um_pci_device *dev, void *buf)
{
int i;
if (buf == &dev->bufs[UM_PCI_WRITE_BUFS]) {
kfree(dev->extra_ptrs[UM_PCI_WRITE_BUFS]);
dev->extra_ptrs[UM_PCI_WRITE_BUFS] = NULL;
return;
}
for (i = 0; i < UM_PCI_WRITE_BUFS; i++) {
if (buf == &dev->bufs[i]) {
kfree(dev->extra_ptrs[i]);
dev->extra_ptrs[i] = NULL;
WARN_ON(!test_and_clear_bit(i, dev->used_bufs));
return;
}
}
WARN_ON(1);
}
static int um_pci_send_cmd(struct um_pci_device *dev,
struct virtio_pcidev_msg *cmd,
unsigned int cmd_size,
const void *extra, unsigned int extra_size,
void *out, unsigned int out_size)
{
struct scatterlist out_sg, extra_sg, in_sg;
struct scatterlist *sgs_list[] = {
[0] = &out_sg,
[1] = extra ? &extra_sg : &in_sg,
[2] = extra ? &in_sg : NULL,
};
struct um_pci_message_buffer *buf;
int delay_count = 0;
bool bounce_out;
int ret, len;
int buf_idx;
bool posted;
if (WARN_ON(cmd_size < sizeof(*cmd) || cmd_size > sizeof(*buf)))
return -EINVAL;
switch (cmd->op) {
case VIRTIO_PCIDEV_OP_CFG_WRITE:
case VIRTIO_PCIDEV_OP_MMIO_WRITE:
case VIRTIO_PCIDEV_OP_MMIO_MEMSET:
/* in PCI, writes are posted, so don't wait */
posted = !out;
WARN_ON(!posted);
break;
default:
posted = false;
break;
}
bounce_out = !posted && cmd_size <= sizeof(*cmd) &&
out && out_size <= sizeof(buf->data);
buf_idx = um_pci_get_buf(dev, &posted);
buf = &dev->bufs[buf_idx];
memcpy(buf, cmd, cmd_size);
if (posted && extra && extra_size > sizeof(buf) - cmd_size) {
dev->extra_ptrs[buf_idx] = kmemdup(extra, extra_size,
GFP_ATOMIC);
if (!dev->extra_ptrs[buf_idx]) {
um_pci_free_buf(dev, buf);
return -ENOMEM;
}
extra = dev->extra_ptrs[buf_idx];
} else if (extra && extra_size <= sizeof(buf) - cmd_size) {
memcpy((u8 *)buf + cmd_size, extra, extra_size);
cmd_size += extra_size;
extra_size = 0;
extra = NULL;
cmd = (void *)buf;
} else {
cmd = (void *)buf;
}
sg_init_one(&out_sg, cmd, cmd_size);
if (extra)
sg_init_one(&extra_sg, extra, extra_size);
/* allow stack for small buffers */
if (bounce_out)
sg_init_one(&in_sg, buf->data, out_size);
else if (out)
sg_init_one(&in_sg, out, out_size);
/* add to internal virtio queue */
ret = virtqueue_add_sgs(dev->cmd_vq, sgs_list,
extra ? 2 : 1,
out ? 1 : 0,
cmd, GFP_ATOMIC);
if (ret) {
um_pci_free_buf(dev, buf);
return ret;
}
if (posted) {
virtqueue_kick(dev->cmd_vq);
return 0;
}
/* kick and poll for getting a response on the queue */
set_bit(UM_PCI_STAT_WAITING, &dev->status);
virtqueue_kick(dev->cmd_vq);
ret = 0;
while (1) {
void *completed = virtqueue_get_buf(dev->cmd_vq, &len);
if (completed == buf)
break;
if (completed)
um_pci_free_buf(dev, completed);
if (WARN_ONCE(virtqueue_is_broken(dev->cmd_vq) ||
++delay_count > um_pci_max_delay_us,
"um virt-pci delay: %d", delay_count)) {
ret = -EIO;
break;
}
udelay(1);
}
clear_bit(UM_PCI_STAT_WAITING, &dev->status);
if (bounce_out)
memcpy(out, buf->data, out_size);
um_pci_free_buf(dev, buf);
return ret;
}
static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset,
int size)
{
struct um_pci_device_reg *reg = priv;
struct um_pci_device *dev = reg->dev;
struct virtio_pcidev_msg hdr = {
.op = VIRTIO_PCIDEV_OP_CFG_READ,
.size = size,
.addr = offset,
};
/* max 8, we might not use it all */
u8 data[8];
if (!dev)
return ULONG_MAX;
memset(data, 0xff, sizeof(data));
switch (size) {
case 1:
case 2:
@ -251,23 +54,7 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset,
return ULONG_MAX;
}
if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, size))
return ULONG_MAX;
switch (size) {
case 1:
return data[0];
case 2:
return le16_to_cpup((void *)data);
case 4:
return le32_to_cpup((void *)data);
#ifdef CONFIG_64BIT
case 8:
return le64_to_cpup((void *)data);
#endif
default:
return ULONG_MAX;
}
return dev->ops->cfgspace_read(dev, offset, size);
}
static void um_pci_cfgspace_write(void *priv, unsigned int offset, int size,
@ -275,42 +62,24 @@ static void um_pci_cfgspace_write(void *priv, unsigned int offset, int size,
{
struct um_pci_device_reg *reg = priv;
struct um_pci_device *dev = reg->dev;
struct {
struct virtio_pcidev_msg hdr;
/* maximum size - we may only use parts of it */
u8 data[8];
} msg = {
.hdr = {
.op = VIRTIO_PCIDEV_OP_CFG_WRITE,
.size = size,
.addr = offset,
},
};
if (!dev)
return;
switch (size) {
case 1:
msg.data[0] = (u8)val;
break;
case 2:
put_unaligned_le16(val, (void *)msg.data);
break;
case 4:
put_unaligned_le32(val, (void *)msg.data);
break;
#ifdef CONFIG_64BIT
case 8:
put_unaligned_le64(val, (void *)msg.data);
break;
#endif
break;
default:
WARN(1, "invalid config space write size %d\n", size);
return;
}
WARN_ON(um_pci_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0));
dev->ops->cfgspace_write(dev, offset, size, val);
}
static const struct logic_iomem_ops um_pci_device_cfgspace_ops = {
@ -318,6 +87,56 @@ static const struct logic_iomem_ops um_pci_device_cfgspace_ops = {
.write = um_pci_cfgspace_write,
};
static unsigned long um_pci_bar_read(void *priv, unsigned int offset,
int size)
{
u8 *resptr = priv;
struct um_pci_device *dev = container_of(resptr - *resptr,
struct um_pci_device,
resptr[0]);
u8 bar = *resptr;
switch (size) {
case 1:
case 2:
case 4:
#ifdef CONFIG_64BIT
case 8:
#endif
break;
default:
WARN(1, "invalid bar read size %d\n", size);
return ULONG_MAX;
}
return dev->ops->bar_read(dev, bar, offset, size);
}
static void um_pci_bar_write(void *priv, unsigned int offset, int size,
unsigned long val)
{
u8 *resptr = priv;
struct um_pci_device *dev = container_of(resptr - *resptr,
struct um_pci_device,
resptr[0]);
u8 bar = *resptr;
switch (size) {
case 1:
case 2:
case 4:
#ifdef CONFIG_64BIT
case 8:
#endif
break;
default:
WARN(1, "invalid bar write size %d\n", size);
return;
}
dev->ops->bar_write(dev, bar, offset, size, val);
}
static void um_pci_bar_copy_from(void *priv, void *buffer,
unsigned int offset, int size)
{
@ -325,53 +144,9 @@ static void um_pci_bar_copy_from(void *priv, void *buffer,
struct um_pci_device *dev = container_of(resptr - *resptr,
struct um_pci_device,
resptr[0]);
struct virtio_pcidev_msg hdr = {
.op = VIRTIO_PCIDEV_OP_MMIO_READ,
.bar = *resptr,
.size = size,
.addr = offset,
};
u8 bar = *resptr;
memset(buffer, 0xff, size);
um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, buffer, size);
}
static unsigned long um_pci_bar_read(void *priv, unsigned int offset,
int size)
{
/* 8 is maximum size - we may only use parts of it */
u8 data[8];
switch (size) {
case 1:
case 2:
case 4:
#ifdef CONFIG_64BIT
case 8:
#endif
break;
default:
WARN(1, "invalid config space read size %d\n", size);
return ULONG_MAX;
}
um_pci_bar_copy_from(priv, data, offset, size);
switch (size) {
case 1:
return data[0];
case 2:
return le16_to_cpup((void *)data);
case 4:
return le32_to_cpup((void *)data);
#ifdef CONFIG_64BIT
case 8:
return le64_to_cpup((void *)data);
#endif
default:
return ULONG_MAX;
}
dev->ops->bar_copy_from(dev, bar, buffer, offset, size);
}
static void um_pci_bar_copy_to(void *priv, unsigned int offset,
@ -381,43 +156,9 @@ static void um_pci_bar_copy_to(void *priv, unsigned int offset,
struct um_pci_device *dev = container_of(resptr - *resptr,
struct um_pci_device,
resptr[0]);
struct virtio_pcidev_msg hdr = {
.op = VIRTIO_PCIDEV_OP_MMIO_WRITE,
.bar = *resptr,
.size = size,
.addr = offset,
};
u8 bar = *resptr;
um_pci_send_cmd(dev, &hdr, sizeof(hdr), buffer, size, NULL, 0);
}
static void um_pci_bar_write(void *priv, unsigned int offset, int size,
unsigned long val)
{
/* maximum size - we may only use parts of it */
u8 data[8];
switch (size) {
case 1:
data[0] = (u8)val;
break;
case 2:
put_unaligned_le16(val, (void *)data);
break;
case 4:
put_unaligned_le32(val, (void *)data);
break;
#ifdef CONFIG_64BIT
case 8:
put_unaligned_le64(val, (void *)data);
break;
#endif
default:
WARN(1, "invalid config space write size %d\n", size);
return;
}
um_pci_bar_copy_to(priv, offset, data, size);
dev->ops->bar_copy_to(dev, bar, offset, buffer, size);
}
static void um_pci_bar_set(void *priv, unsigned int offset, u8 value, int size)
@ -426,20 +167,9 @@ static void um_pci_bar_set(void *priv, unsigned int offset, u8 value, int size)
struct um_pci_device *dev = container_of(resptr - *resptr,
struct um_pci_device,
resptr[0]);
struct {
struct virtio_pcidev_msg hdr;
u8 data;
} msg = {
.hdr = {
.op = VIRTIO_PCIDEV_OP_CFG_WRITE,
.bar = *resptr,
.size = size,
.addr = offset,
},
.data = value,
};
u8 bar = *resptr;
um_pci_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0);
dev->ops->bar_set(dev, bar, offset, value, size);
}
static const struct logic_iomem_ops um_pci_device_bar_ops = {
@ -486,76 +216,6 @@ static void um_pci_rescan(void)
pci_unlock_rescan_remove();
}
static void um_pci_irq_vq_addbuf(struct virtqueue *vq, void *buf, bool kick)
{
struct scatterlist sg[1];
sg_init_one(sg, buf, MAX_IRQ_MSG_SIZE);
if (virtqueue_add_inbuf(vq, sg, 1, buf, GFP_ATOMIC))
kfree(buf);
else if (kick)
virtqueue_kick(vq);
}
static void um_pci_handle_irq_message(struct virtqueue *vq,
struct virtio_pcidev_msg *msg)
{
struct virtio_device *vdev = vq->vdev;
struct um_pci_device *dev = vdev->priv;
if (!dev->irq)
return;
/* we should properly chain interrupts, but on ARCH=um we don't care */
switch (msg->op) {
case VIRTIO_PCIDEV_OP_INT:
generic_handle_irq(dev->irq);
break;
case VIRTIO_PCIDEV_OP_MSI:
/* our MSI message is just the interrupt number */
if (msg->size == sizeof(u32))
generic_handle_irq(le32_to_cpup((void *)msg->data));
else
generic_handle_irq(le16_to_cpup((void *)msg->data));
break;
case VIRTIO_PCIDEV_OP_PME:
/* nothing to do - we already woke up due to the message */
break;
default:
dev_err(&vdev->dev, "unexpected virt-pci message %d\n", msg->op);
break;
}
}
static void um_pci_cmd_vq_cb(struct virtqueue *vq)
{
struct virtio_device *vdev = vq->vdev;
struct um_pci_device *dev = vdev->priv;
void *cmd;
int len;
if (test_bit(UM_PCI_STAT_WAITING, &dev->status))
return;
while ((cmd = virtqueue_get_buf(vq, &len)))
um_pci_free_buf(dev, cmd);
}
static void um_pci_irq_vq_cb(struct virtqueue *vq)
{
struct virtio_pcidev_msg *msg;
int len;
while ((msg = virtqueue_get_buf(vq, &len))) {
if (len >= sizeof(*msg))
um_pci_handle_irq_message(vq, msg);
/* recycle the message buffer */
um_pci_irq_vq_addbuf(vq, msg, true);
}
}
#ifdef CONFIG_OF
/* Copied from arch/x86/kernel/devicetree.c */
struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus)
@ -577,200 +237,6 @@ struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus)
}
#endif
static int um_pci_init_vqs(struct um_pci_device *dev)
{
struct virtqueue_info vqs_info[] = {
{ "cmd", um_pci_cmd_vq_cb },
{ "irq", um_pci_irq_vq_cb },
};
struct virtqueue *vqs[2];
int err, i;
err = virtio_find_vqs(dev->vdev, 2, vqs, vqs_info, NULL);
if (err)
return err;
dev->cmd_vq = vqs[0];
dev->irq_vq = vqs[1];
virtio_device_ready(dev->vdev);
for (i = 0; i < NUM_IRQ_MSGS; i++) {
void *msg = kzalloc(MAX_IRQ_MSG_SIZE, GFP_KERNEL);
if (msg)
um_pci_irq_vq_addbuf(dev->irq_vq, msg, false);
}
virtqueue_kick(dev->irq_vq);
return 0;
}
static void __um_pci_virtio_platform_remove(struct virtio_device *vdev,
struct um_pci_device *dev)
{
virtio_reset_device(vdev);
vdev->config->del_vqs(vdev);
mutex_lock(&um_pci_mtx);
um_pci_platform_device = NULL;
mutex_unlock(&um_pci_mtx);
kfree(dev);
}
static int um_pci_virtio_platform_probe(struct virtio_device *vdev,
struct um_pci_device *dev)
{
int ret;
dev->platform = true;
mutex_lock(&um_pci_mtx);
if (um_pci_platform_device) {
mutex_unlock(&um_pci_mtx);
ret = -EBUSY;
goto out_free;
}
ret = um_pci_init_vqs(dev);
if (ret) {
mutex_unlock(&um_pci_mtx);
goto out_free;
}
um_pci_platform_device = dev;
mutex_unlock(&um_pci_mtx);
ret = of_platform_default_populate(vdev->dev.of_node, NULL, &vdev->dev);
if (ret)
__um_pci_virtio_platform_remove(vdev, dev);
return ret;
out_free:
kfree(dev);
return ret;
}
static int um_pci_virtio_probe(struct virtio_device *vdev)
{
struct um_pci_device *dev;
int i, free = -1;
int err = -ENOSPC;
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
if (!dev)
return -ENOMEM;
dev->vdev = vdev;
vdev->priv = dev;
if (of_device_is_compatible(vdev->dev.of_node, "simple-bus"))
return um_pci_virtio_platform_probe(vdev, dev);
mutex_lock(&um_pci_mtx);
for (i = 0; i < MAX_DEVICES; i++) {
if (um_pci_devices[i].dev)
continue;
free = i;
break;
}
if (free < 0)
goto error;
err = um_pci_init_vqs(dev);
if (err)
goto error;
dev->irq = irq_alloc_desc(numa_node_id());
if (dev->irq < 0) {
err = dev->irq;
goto err_reset;
}
um_pci_devices[free].dev = dev;
vdev->priv = dev;
mutex_unlock(&um_pci_mtx);
device_set_wakeup_enable(&vdev->dev, true);
/*
* In order to do suspend-resume properly, don't allow VQs
* to be suspended.
*/
virtio_uml_set_no_vq_suspend(vdev, true);
um_pci_rescan();
return 0;
err_reset:
virtio_reset_device(vdev);
vdev->config->del_vqs(vdev);
error:
mutex_unlock(&um_pci_mtx);
kfree(dev);
return err;
}
static void um_pci_virtio_remove(struct virtio_device *vdev)
{
struct um_pci_device *dev = vdev->priv;
int i;
if (dev->platform) {
of_platform_depopulate(&vdev->dev);
__um_pci_virtio_platform_remove(vdev, dev);
return;
}
device_set_wakeup_enable(&vdev->dev, false);
mutex_lock(&um_pci_mtx);
for (i = 0; i < MAX_DEVICES; i++) {
if (um_pci_devices[i].dev != dev)
continue;
um_pci_devices[i].dev = NULL;
irq_free_desc(dev->irq);
break;
}
mutex_unlock(&um_pci_mtx);
if (i < MAX_DEVICES) {
struct pci_dev *pci_dev;
pci_dev = pci_get_slot(bridge->bus, i);
if (pci_dev)
pci_stop_and_remove_bus_device_locked(pci_dev);
}
/* Stop all virtqueues */
virtio_reset_device(vdev);
dev->cmd_vq = NULL;
dev->irq_vq = NULL;
vdev->config->del_vqs(vdev);
kfree(dev);
}
static struct virtio_device_id id_table[] = {
{ CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID, VIRTIO_DEV_ANY_ID },
{ 0 },
};
MODULE_DEVICE_TABLE(virtio, id_table);
static struct virtio_driver um_pci_virtio_driver = {
.driver.name = "virtio-pci",
.id_table = id_table,
.probe = um_pci_virtio_probe,
.remove = um_pci_virtio_remove,
};
static struct resource virt_cfgspace_resource = {
.name = "PCI config space",
.start = 0xf0000000 - MAX_DEVICES * CFG_SPACE_SIZE,
@ -889,7 +355,7 @@ static void um_pci_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
}
static struct irq_chip um_pci_msi_bottom_irq_chip = {
.name = "UM virtio MSI",
.name = "UM virtual MSI",
.irq_compose_msi_msg = um_pci_compose_msi_msg,
};
@ -939,7 +405,7 @@ static const struct irq_domain_ops um_pci_inner_domain_ops = {
};
static struct irq_chip um_pci_msi_irq_chip = {
.name = "UM virtio PCIe MSI",
.name = "UM virtual PCIe MSI",
.irq_mask = pci_msi_mask_irq,
.irq_unmask = pci_msi_unmask_irq,
};
@ -998,6 +464,78 @@ static struct resource virt_platform_resource = {
.flags = IORESOURCE_MEM,
};
int um_pci_device_register(struct um_pci_device *dev)
{
int i, free = -1;
int err = 0;
mutex_lock(&um_pci_mtx);
for (i = 0; i < MAX_DEVICES; i++) {
if (um_pci_devices[i].dev)
continue;
free = i;
break;
}
if (free < 0) {
err = -ENOSPC;
goto out;
}
dev->irq = irq_alloc_desc(numa_node_id());
if (dev->irq < 0) {
err = dev->irq;
goto out;
}
um_pci_devices[free].dev = dev;
out:
mutex_unlock(&um_pci_mtx);
if (!err)
um_pci_rescan();
return err;
}
void um_pci_device_unregister(struct um_pci_device *dev)
{
int i;
mutex_lock(&um_pci_mtx);
for (i = 0; i < MAX_DEVICES; i++) {
if (um_pci_devices[i].dev != dev)
continue;
um_pci_devices[i].dev = NULL;
irq_free_desc(dev->irq);
break;
}
mutex_unlock(&um_pci_mtx);
if (i < MAX_DEVICES) {
struct pci_dev *pci_dev;
pci_dev = pci_get_slot(bridge->bus, i);
if (pci_dev)
pci_stop_and_remove_bus_device_locked(pci_dev);
}
}
int um_pci_platform_device_register(struct um_pci_device *dev)
{
guard(mutex)(&um_pci_mtx);
if (um_pci_platform_device)
return -EBUSY;
um_pci_platform_device = dev;
return 0;
}
void um_pci_platform_device_unregister(struct um_pci_device *dev)
{
guard(mutex)(&um_pci_mtx);
if (um_pci_platform_device == dev)
um_pci_platform_device = NULL;
}
static int __init um_pci_init(void)
{
struct irq_domain_info inner_domain_info = {
@ -1014,10 +552,6 @@ static int __init um_pci_init(void)
WARN_ON(logic_iomem_add_region(&virt_platform_resource,
&um_pci_platform_ops));
if (WARN(CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID < 0,
"No virtio device ID configured for PCI - no PCI support\n"))
return 0;
bridge = pci_alloc_host_bridge(0);
if (!bridge) {
err = -ENOMEM;
@ -1065,10 +599,8 @@ static int __init um_pci_init(void)
if (err)
goto free;
err = register_virtio_driver(&um_pci_virtio_driver);
if (err)
goto free;
return 0;
free:
if (!IS_ERR_OR_NULL(um_pci_inner_domain))
irq_domain_remove(um_pci_inner_domain);
@ -1080,11 +612,10 @@ free:
}
return err;
}
module_init(um_pci_init);
device_initcall(um_pci_init);
static void __exit um_pci_exit(void)
{
unregister_virtio_driver(&um_pci_virtio_driver);
irq_domain_remove(um_pci_msi_domain);
irq_domain_remove(um_pci_inner_domain);
pci_free_resource_list(&bridge->windows);

View File

@ -0,0 +1,41 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __UM_VIRT_PCI_H
#define __UM_VIRT_PCI_H
#include <linux/pci.h>
struct um_pci_device {
const struct um_pci_ops *ops;
/* for now just standard BARs */
u8 resptr[PCI_STD_NUM_BARS];
int irq;
};
struct um_pci_ops {
unsigned long (*cfgspace_read)(struct um_pci_device *dev,
unsigned int offset, int size);
void (*cfgspace_write)(struct um_pci_device *dev, unsigned int offset,
int size, unsigned long val);
unsigned long (*bar_read)(struct um_pci_device *dev, int bar,
unsigned int offset, int size);
void (*bar_write)(struct um_pci_device *dev, int bar,
unsigned int offset, int size, unsigned long val);
void (*bar_copy_from)(struct um_pci_device *dev, int bar, void *buffer,
unsigned int offset, int size);
void (*bar_copy_to)(struct um_pci_device *dev, int bar,
unsigned int offset, const void *buffer, int size);
void (*bar_set)(struct um_pci_device *dev, int bar,
unsigned int offset, u8 value, int size);
};
int um_pci_device_register(struct um_pci_device *dev);
void um_pci_device_unregister(struct um_pci_device *dev);
int um_pci_platform_device_register(struct um_pci_device *dev);
void um_pci_platform_device_unregister(struct um_pci_device *dev);
#endif /* __UM_VIRT_PCI_H */

View File

@ -0,0 +1,628 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2020 Intel Corporation
* Author: Johannes Berg <johannes@sipsolutions.net>
*/
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/virtio.h>
#include <linux/virtio_config.h>
#include <linux/logic_iomem.h>
#include <linux/of_platform.h>
#include <linux/irqdomain.h>
#include <linux/virtio_pcidev.h>
#include <linux/virtio-uml.h>
#include <linux/delay.h>
#include <linux/msi.h>
#include <linux/unaligned.h>
#include <irq_kern.h>
#include "virt-pci.h"
#define to_virtio_pcidev(_pdev) \
container_of(_pdev, struct virtio_pcidev_device, pdev)
/* for MSI-X we have a 32-bit payload */
#define MAX_IRQ_MSG_SIZE (sizeof(struct virtio_pcidev_msg) + sizeof(u32))
#define NUM_IRQ_MSGS 10
struct virtio_pcidev_message_buffer {
struct virtio_pcidev_msg hdr;
u8 data[8];
};
struct virtio_pcidev_device {
struct um_pci_device pdev;
struct virtio_device *vdev;
struct virtqueue *cmd_vq, *irq_vq;
#define VIRTIO_PCIDEV_WRITE_BUFS 20
struct virtio_pcidev_message_buffer bufs[VIRTIO_PCIDEV_WRITE_BUFS + 1];
void *extra_ptrs[VIRTIO_PCIDEV_WRITE_BUFS + 1];
DECLARE_BITMAP(used_bufs, VIRTIO_PCIDEV_WRITE_BUFS);
#define UM_PCI_STAT_WAITING 0
unsigned long status;
bool platform;
};
static unsigned int virtio_pcidev_max_delay_us = 40000;
module_param_named(max_delay_us, virtio_pcidev_max_delay_us, uint, 0644);
static int virtio_pcidev_get_buf(struct virtio_pcidev_device *dev, bool *posted)
{
int i;
for (i = 0; i < VIRTIO_PCIDEV_WRITE_BUFS; i++) {
if (!test_and_set_bit(i, dev->used_bufs))
return i;
}
*posted = false;
return VIRTIO_PCIDEV_WRITE_BUFS;
}
static void virtio_pcidev_free_buf(struct virtio_pcidev_device *dev, void *buf)
{
int i;
if (buf == &dev->bufs[VIRTIO_PCIDEV_WRITE_BUFS]) {
kfree(dev->extra_ptrs[VIRTIO_PCIDEV_WRITE_BUFS]);
dev->extra_ptrs[VIRTIO_PCIDEV_WRITE_BUFS] = NULL;
return;
}
for (i = 0; i < VIRTIO_PCIDEV_WRITE_BUFS; i++) {
if (buf == &dev->bufs[i]) {
kfree(dev->extra_ptrs[i]);
dev->extra_ptrs[i] = NULL;
WARN_ON(!test_and_clear_bit(i, dev->used_bufs));
return;
}
}
WARN_ON(1);
}
static int virtio_pcidev_send_cmd(struct virtio_pcidev_device *dev,
struct virtio_pcidev_msg *cmd,
unsigned int cmd_size,
const void *extra, unsigned int extra_size,
void *out, unsigned int out_size)
{
struct scatterlist out_sg, extra_sg, in_sg;
struct scatterlist *sgs_list[] = {
[0] = &out_sg,
[1] = extra ? &extra_sg : &in_sg,
[2] = extra ? &in_sg : NULL,
};
struct virtio_pcidev_message_buffer *buf;
int delay_count = 0;
bool bounce_out;
int ret, len;
int buf_idx;
bool posted;
if (WARN_ON(cmd_size < sizeof(*cmd) || cmd_size > sizeof(*buf)))
return -EINVAL;
switch (cmd->op) {
case VIRTIO_PCIDEV_OP_CFG_WRITE:
case VIRTIO_PCIDEV_OP_MMIO_WRITE:
case VIRTIO_PCIDEV_OP_MMIO_MEMSET:
/* in PCI, writes are posted, so don't wait */
posted = !out;
WARN_ON(!posted);
break;
default:
posted = false;
break;
}
bounce_out = !posted && cmd_size <= sizeof(*cmd) &&
out && out_size <= sizeof(buf->data);
buf_idx = virtio_pcidev_get_buf(dev, &posted);
buf = &dev->bufs[buf_idx];
memcpy(buf, cmd, cmd_size);
if (posted && extra && extra_size > sizeof(buf) - cmd_size) {
dev->extra_ptrs[buf_idx] = kmemdup(extra, extra_size,
GFP_ATOMIC);
if (!dev->extra_ptrs[buf_idx]) {
virtio_pcidev_free_buf(dev, buf);
return -ENOMEM;
}
extra = dev->extra_ptrs[buf_idx];
} else if (extra && extra_size <= sizeof(buf) - cmd_size) {
memcpy((u8 *)buf + cmd_size, extra, extra_size);
cmd_size += extra_size;
extra_size = 0;
extra = NULL;
cmd = (void *)buf;
} else {
cmd = (void *)buf;
}
sg_init_one(&out_sg, cmd, cmd_size);
if (extra)
sg_init_one(&extra_sg, extra, extra_size);
/* allow stack for small buffers */
if (bounce_out)
sg_init_one(&in_sg, buf->data, out_size);
else if (out)
sg_init_one(&in_sg, out, out_size);
/* add to internal virtio queue */
ret = virtqueue_add_sgs(dev->cmd_vq, sgs_list,
extra ? 2 : 1,
out ? 1 : 0,
cmd, GFP_ATOMIC);
if (ret) {
virtio_pcidev_free_buf(dev, buf);
return ret;
}
if (posted) {
virtqueue_kick(dev->cmd_vq);
return 0;
}
/* kick and poll for getting a response on the queue */
set_bit(UM_PCI_STAT_WAITING, &dev->status);
virtqueue_kick(dev->cmd_vq);
ret = 0;
while (1) {
void *completed = virtqueue_get_buf(dev->cmd_vq, &len);
if (completed == buf)
break;
if (completed)
virtio_pcidev_free_buf(dev, completed);
if (WARN_ONCE(virtqueue_is_broken(dev->cmd_vq) ||
++delay_count > virtio_pcidev_max_delay_us,
"um virt-pci delay: %d", delay_count)) {
ret = -EIO;
break;
}
udelay(1);
}
clear_bit(UM_PCI_STAT_WAITING, &dev->status);
if (bounce_out)
memcpy(out, buf->data, out_size);
virtio_pcidev_free_buf(dev, buf);
return ret;
}
static unsigned long virtio_pcidev_cfgspace_read(struct um_pci_device *pdev,
unsigned int offset, int size)
{
struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev);
struct virtio_pcidev_msg hdr = {
.op = VIRTIO_PCIDEV_OP_CFG_READ,
.size = size,
.addr = offset,
};
/* max 8, we might not use it all */
u8 data[8];
memset(data, 0xff, sizeof(data));
/* size has been checked in um_pci_cfgspace_read() */
if (virtio_pcidev_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, size))
return ULONG_MAX;
switch (size) {
case 1:
return data[0];
case 2:
return le16_to_cpup((void *)data);
case 4:
return le32_to_cpup((void *)data);
#ifdef CONFIG_64BIT
case 8:
return le64_to_cpup((void *)data);
#endif
default:
return ULONG_MAX;
}
}
static void virtio_pcidev_cfgspace_write(struct um_pci_device *pdev,
unsigned int offset, int size,
unsigned long val)
{
struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev);
struct {
struct virtio_pcidev_msg hdr;
/* maximum size - we may only use parts of it */
u8 data[8];
} msg = {
.hdr = {
.op = VIRTIO_PCIDEV_OP_CFG_WRITE,
.size = size,
.addr = offset,
},
};
/* size has been checked in um_pci_cfgspace_write() */
switch (size) {
case 1:
msg.data[0] = (u8)val;
break;
case 2:
put_unaligned_le16(val, (void *)msg.data);
break;
case 4:
put_unaligned_le32(val, (void *)msg.data);
break;
#ifdef CONFIG_64BIT
case 8:
put_unaligned_le64(val, (void *)msg.data);
break;
#endif
}
WARN_ON(virtio_pcidev_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0));
}
static void virtio_pcidev_bar_copy_from(struct um_pci_device *pdev,
int bar, void *buffer,
unsigned int offset, int size)
{
struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev);
struct virtio_pcidev_msg hdr = {
.op = VIRTIO_PCIDEV_OP_MMIO_READ,
.bar = bar,
.size = size,
.addr = offset,
};
memset(buffer, 0xff, size);
virtio_pcidev_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, buffer, size);
}
static unsigned long virtio_pcidev_bar_read(struct um_pci_device *pdev, int bar,
unsigned int offset, int size)
{
/* 8 is maximum size - we may only use parts of it */
u8 data[8];
/* size has been checked in um_pci_bar_read() */
virtio_pcidev_bar_copy_from(pdev, bar, data, offset, size);
switch (size) {
case 1:
return data[0];
case 2:
return le16_to_cpup((void *)data);
case 4:
return le32_to_cpup((void *)data);
#ifdef CONFIG_64BIT
case 8:
return le64_to_cpup((void *)data);
#endif
default:
return ULONG_MAX;
}
}
static void virtio_pcidev_bar_copy_to(struct um_pci_device *pdev,
int bar, unsigned int offset,
const void *buffer, int size)
{
struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev);
struct virtio_pcidev_msg hdr = {
.op = VIRTIO_PCIDEV_OP_MMIO_WRITE,
.bar = bar,
.size = size,
.addr = offset,
};
virtio_pcidev_send_cmd(dev, &hdr, sizeof(hdr), buffer, size, NULL, 0);
}
static void virtio_pcidev_bar_write(struct um_pci_device *pdev, int bar,
unsigned int offset, int size,
unsigned long val)
{
/* maximum size - we may only use parts of it */
u8 data[8];
/* size has been checked in um_pci_bar_write() */
switch (size) {
case 1:
data[0] = (u8)val;
break;
case 2:
put_unaligned_le16(val, (void *)data);
break;
case 4:
put_unaligned_le32(val, (void *)data);
break;
#ifdef CONFIG_64BIT
case 8:
put_unaligned_le64(val, (void *)data);
break;
#endif
}
virtio_pcidev_bar_copy_to(pdev, bar, offset, data, size);
}
static void virtio_pcidev_bar_set(struct um_pci_device *pdev, int bar,
unsigned int offset, u8 value, int size)
{
struct virtio_pcidev_device *dev = to_virtio_pcidev(pdev);
struct {
struct virtio_pcidev_msg hdr;
u8 data;
} msg = {
.hdr = {
.op = VIRTIO_PCIDEV_OP_CFG_WRITE,
.bar = bar,
.size = size,
.addr = offset,
},
.data = value,
};
virtio_pcidev_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0);
}
static const struct um_pci_ops virtio_pcidev_um_pci_ops = {
.cfgspace_read = virtio_pcidev_cfgspace_read,
.cfgspace_write = virtio_pcidev_cfgspace_write,
.bar_read = virtio_pcidev_bar_read,
.bar_write = virtio_pcidev_bar_write,
.bar_copy_from = virtio_pcidev_bar_copy_from,
.bar_copy_to = virtio_pcidev_bar_copy_to,
.bar_set = virtio_pcidev_bar_set,
};
static void virtio_pcidev_irq_vq_addbuf(struct virtqueue *vq, void *buf, bool kick)
{
struct scatterlist sg[1];
sg_init_one(sg, buf, MAX_IRQ_MSG_SIZE);
if (virtqueue_add_inbuf(vq, sg, 1, buf, GFP_ATOMIC))
kfree(buf);
else if (kick)
virtqueue_kick(vq);
}
static void virtio_pcidev_handle_irq_message(struct virtqueue *vq,
struct virtio_pcidev_msg *msg)
{
struct virtio_device *vdev = vq->vdev;
struct virtio_pcidev_device *dev = vdev->priv;
if (!dev->pdev.irq)
return;
/* we should properly chain interrupts, but on ARCH=um we don't care */
switch (msg->op) {
case VIRTIO_PCIDEV_OP_INT:
generic_handle_irq(dev->pdev.irq);
break;
case VIRTIO_PCIDEV_OP_MSI:
/* our MSI message is just the interrupt number */
if (msg->size == sizeof(u32))
generic_handle_irq(le32_to_cpup((void *)msg->data));
else
generic_handle_irq(le16_to_cpup((void *)msg->data));
break;
case VIRTIO_PCIDEV_OP_PME:
/* nothing to do - we already woke up due to the message */
break;
default:
dev_err(&vdev->dev, "unexpected virt-pci message %d\n", msg->op);
break;
}
}
static void virtio_pcidev_cmd_vq_cb(struct virtqueue *vq)
{
struct virtio_device *vdev = vq->vdev;
struct virtio_pcidev_device *dev = vdev->priv;
void *cmd;
int len;
if (test_bit(UM_PCI_STAT_WAITING, &dev->status))
return;
while ((cmd = virtqueue_get_buf(vq, &len)))
virtio_pcidev_free_buf(dev, cmd);
}
static void virtio_pcidev_irq_vq_cb(struct virtqueue *vq)
{
struct virtio_pcidev_msg *msg;
int len;
while ((msg = virtqueue_get_buf(vq, &len))) {
if (len >= sizeof(*msg))
virtio_pcidev_handle_irq_message(vq, msg);
/* recycle the message buffer */
virtio_pcidev_irq_vq_addbuf(vq, msg, true);
}
}
static int virtio_pcidev_init_vqs(struct virtio_pcidev_device *dev)
{
struct virtqueue_info vqs_info[] = {
{ "cmd", virtio_pcidev_cmd_vq_cb },
{ "irq", virtio_pcidev_irq_vq_cb },
};
struct virtqueue *vqs[2];
int err, i;
err = virtio_find_vqs(dev->vdev, 2, vqs, vqs_info, NULL);
if (err)
return err;
dev->cmd_vq = vqs[0];
dev->irq_vq = vqs[1];
virtio_device_ready(dev->vdev);
for (i = 0; i < NUM_IRQ_MSGS; i++) {
void *msg = kzalloc(MAX_IRQ_MSG_SIZE, GFP_KERNEL);
if (msg)
virtio_pcidev_irq_vq_addbuf(dev->irq_vq, msg, false);
}
virtqueue_kick(dev->irq_vq);
return 0;
}
static void __virtio_pcidev_virtio_platform_remove(struct virtio_device *vdev,
struct virtio_pcidev_device *dev)
{
um_pci_platform_device_unregister(&dev->pdev);
virtio_reset_device(vdev);
vdev->config->del_vqs(vdev);
kfree(dev);
}
static int virtio_pcidev_virtio_platform_probe(struct virtio_device *vdev,
struct virtio_pcidev_device *dev)
{
int err;
dev->platform = true;
err = virtio_pcidev_init_vqs(dev);
if (err)
goto err_free;
err = um_pci_platform_device_register(&dev->pdev);
if (err)
goto err_reset;
err = of_platform_default_populate(vdev->dev.of_node, NULL, &vdev->dev);
if (err)
goto err_unregister;
return 0;
err_unregister:
um_pci_platform_device_unregister(&dev->pdev);
err_reset:
virtio_reset_device(vdev);
vdev->config->del_vqs(vdev);
err_free:
kfree(dev);
return err;
}
static int virtio_pcidev_virtio_probe(struct virtio_device *vdev)
{
struct virtio_pcidev_device *dev;
int err;
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
if (!dev)
return -ENOMEM;
dev->vdev = vdev;
vdev->priv = dev;
dev->pdev.ops = &virtio_pcidev_um_pci_ops;
if (of_device_is_compatible(vdev->dev.of_node, "simple-bus"))
return virtio_pcidev_virtio_platform_probe(vdev, dev);
err = virtio_pcidev_init_vqs(dev);
if (err)
goto err_free;
err = um_pci_device_register(&dev->pdev);
if (err)
goto err_reset;
device_set_wakeup_enable(&vdev->dev, true);
/*
* In order to do suspend-resume properly, don't allow VQs
* to be suspended.
*/
virtio_uml_set_no_vq_suspend(vdev, true);
return 0;
err_reset:
virtio_reset_device(vdev);
vdev->config->del_vqs(vdev);
err_free:
kfree(dev);
return err;
}
static void virtio_pcidev_virtio_remove(struct virtio_device *vdev)
{
struct virtio_pcidev_device *dev = vdev->priv;
if (dev->platform) {
of_platform_depopulate(&vdev->dev);
__virtio_pcidev_virtio_platform_remove(vdev, dev);
return;
}
device_set_wakeup_enable(&vdev->dev, false);
um_pci_device_unregister(&dev->pdev);
/* Stop all virtqueues */
virtio_reset_device(vdev);
dev->cmd_vq = NULL;
dev->irq_vq = NULL;
vdev->config->del_vqs(vdev);
kfree(dev);
}
static struct virtio_device_id id_table[] = {
{ CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID, VIRTIO_DEV_ANY_ID },
{ 0 },
};
MODULE_DEVICE_TABLE(virtio, id_table);
static struct virtio_driver virtio_pcidev_virtio_driver = {
.driver.name = "virtio-pci",
.id_table = id_table,
.probe = virtio_pcidev_virtio_probe,
.remove = virtio_pcidev_virtio_remove,
};
static int __init virtio_pcidev_init(void)
{
if (WARN(CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID < 0,
"No virtio device ID configured for PCI - no PCI support\n"))
return 0;
return register_virtio_driver(&virtio_pcidev_virtio_driver);
}
late_initcall(virtio_pcidev_init);
static void __exit virtio_pcidev_exit(void)
{
unregister_virtio_driver(&virtio_pcidev_virtio_driver);
}
module_exit(virtio_pcidev_exit);

View File

@ -13,6 +13,7 @@ generic-y += irq_work.h
generic-y += kdebug.h
generic-y += mcs_spinlock.h
generic-y += mmiowb.h
generic-y += module.h
generic-y += module.lds.h
generic-y += param.h
generic-y += parport.h

View File

@ -31,6 +31,8 @@ struct thread_struct {
} thread;
} request;
void *segv_continue;
/* Contains variable sized FP registers */
struct pt_regs regs;
};

View File

@ -9,6 +9,7 @@
#include <asm/elf.h>
#include <linux/unaligned.h>
#include <sysdep/faultinfo.h>
#define __under_task_size(addr, size) \
(((unsigned long) (addr) < TASK_SIZE) && \
@ -44,19 +45,28 @@ static inline int __access_ok(const void __user *ptr, unsigned long size)
__access_ok_vsyscall(addr, size));
}
/* no pagefaults for kernel addresses in um */
#define __get_kernel_nofault(dst, src, type, err_label) \
do { \
*((type *)dst) = get_unaligned((type *)(src)); \
if (0) /* make sure the label looks used to the compiler */ \
int __faulted; \
\
___backtrack_faulted(__faulted); \
if (__faulted) { \
*((type *)dst) = (type) 0; \
goto err_label; \
} \
*((type *)dst) = get_unaligned((type *)(src)); \
current->thread.segv_continue = NULL; \
} while (0)
#define __put_kernel_nofault(dst, src, type, err_label) \
do { \
put_unaligned(*((type *)src), (type *)(dst)); \
if (0) /* make sure the label looks used to the compiler */ \
int __faulted; \
\
___backtrack_faulted(__faulted); \
if (__faulted) \
goto err_label; \
put_unaligned(*((type *)src), (type *)(dst)); \
current->thread.segv_continue = NULL; \
} while (0)
#endif

View File

@ -83,6 +83,8 @@ extern void time_travel_not_configured(void);
#define time_travel_del_event(...) time_travel_not_configured()
#endif /* CONFIG_UML_TIME_TRAVEL_SUPPORT */
extern unsigned long tt_extra_sched_jiffies;
/*
* Without CONFIG_UML_TIME_TRAVEL_SUPPORT this is a linker error if used,
* which is intentional since we really shouldn't link it in that case.

View File

@ -12,4 +12,6 @@ extern void arch_check_bugs(void);
extern int arch_fixup(unsigned long address, struct uml_pt_regs *regs);
extern void arch_examine_signal(int sig, struct uml_pt_regs *regs);
void mc_set_rip(void *_mc, void *target);
#endif

View File

@ -50,7 +50,7 @@ extern int linux_main(int argc, char **argv, char **envp);
extern void uml_finishsetup(void);
struct siginfo;
extern void (*sig_info[])(int, struct siginfo *si, struct uml_pt_regs *);
extern void (*sig_info[])(int, struct siginfo *si, struct uml_pt_regs *, void *);
#endif

View File

@ -15,7 +15,8 @@ enum um_irq_type {
};
struct siginfo;
extern void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
extern void sigio_handler(int sig, struct siginfo *unused_si,
struct uml_pt_regs *regs, void *mc);
void sigio_run_timetravel_handlers(void);
extern void free_irq_by_fd(int fd);
extern void deactivate_fd(int fd, int irqnum);

View File

@ -24,10 +24,12 @@ extern void free_stack(unsigned long stack, int order);
struct pt_regs;
extern void do_signal(struct pt_regs *regs);
extern void interrupt_end(void);
extern void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs);
extern void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs,
void *mc);
extern unsigned long segv(struct faultinfo fi, unsigned long ip,
int is_user, struct uml_pt_regs *regs);
int is_user, struct uml_pt_regs *regs,
void *mc);
extern int handle_page_fault(unsigned long address, unsigned long ip,
int is_write, int is_user, int *code_out);
@ -59,8 +61,10 @@ extern unsigned long from_irq_stack(int nested);
extern int singlestepping(void);
extern void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
extern void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
extern void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs,
void *mc);
extern void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs,
void *mc);
extern void fatal_sigsegv(void) __attribute__ ((noreturn));
void um_idle_sleep(void);

View File

@ -213,7 +213,6 @@ extern int os_protect_memory(void *addr, unsigned long len,
extern int os_unmap_memory(void *addr, int len);
extern int os_drop_memory(void *addr, int length);
extern int can_drop_memory(void);
extern int os_mincore(void *addr, unsigned long len);
void os_set_pdeathsig(void);
@ -225,6 +224,11 @@ extern int run_helper_thread(int (*proc)(void *), void *arg,
unsigned int flags, unsigned long *stack_out);
extern int helper_wait(int pid);
struct os_helper_thread;
int os_run_helper_thread(struct os_helper_thread **td_out,
void *(*routine)(void *), void *arg);
void os_kill_helper_thread(struct os_helper_thread *td);
void os_fix_helper_thread_signals(void);
/* umid.c */
extern int umid_file_name(char *name, char *buf, int len);
@ -310,7 +314,7 @@ extern void um_irqs_resume(void);
extern int add_sigio_fd(int fd);
extern int ignore_sigio_fd(int fd);
extern void maybe_sigio_broken(int fd);
extern void sigio_broken(int fd);
extern void sigio_broken(void);
/*
* unlocked versions for IRQ controller code.
*

View File

@ -6,7 +6,6 @@
#ifndef __SIGIO_H__
#define __SIGIO_H__
extern int write_sigio_irq(int fd);
extern void sigio_lock(void);
extern void sigio_unlock(void);

View File

@ -17,7 +17,7 @@ extra-y := vmlinux.lds
obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \
physmem.o process.o ptrace.o reboot.o sigio.o \
signal.o sysrq.o time.o tlb.o trap.o \
um_arch.o umid.o maccess.o kmsg_dump.o capflags.o skas/
um_arch.o umid.o kmsg_dump.o capflags.o skas/
obj-y += load_file.o
obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o

View File

@ -236,7 +236,8 @@ static void _sigio_handler(struct uml_pt_regs *regs,
free_irqs();
}
void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs,
void *mc)
{
preempt_disable();
_sigio_handler(regs, irqs_suspended);

View File

@ -1,19 +0,0 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2013 Richard Weinberger <richrd@nod.at>
*/
#include <linux/uaccess.h>
#include <linux/kernel.h>
#include <os.h>
bool copy_from_kernel_nofault_allowed(const void *src, size_t size)
{
void *psrc = (void *)rounddown((unsigned long)src, PAGE_SIZE);
if ((unsigned long)src < PAGE_SIZE || size <= 0)
return false;
if (os_mincore(psrc, size + src - psrc) <= 0)
return false;
return true;
}

View File

@ -9,6 +9,8 @@
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <asm/sections.h>
#include <asm/page.h>
#include <asm/pgalloc.h>
#include <as-layout.h>
@ -66,6 +68,7 @@ void __init arch_mm_preinit(void)
map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0);
memblock_free((void *)brk_end, uml_reserved - brk_end);
uml_reserved = brk_end;
min_low_pfn = PFN_UP(__pa(uml_reserved));
max_pfn = max_low_pfn;
}
@ -242,3 +245,11 @@ static const pgprot_t protection_map[16] = {
[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED
};
DECLARE_VM_GET_PAGE_PROT
void mark_rodata_ro(void)
{
unsigned long rodata_start = PFN_ALIGN(__start_rodata);
unsigned long rodata_end = PFN_ALIGN(__end_rodata);
os_protect_memory((void *)rodata_start, rodata_end - rodata_start, 1, 0, 0);
}

View File

@ -8,32 +8,6 @@
#include <os.h>
#include <sigio.h>
/* Protected by sigio_lock() called from write_sigio_workaround */
static int sigio_irq_fd = -1;
static irqreturn_t sigio_interrupt(int irq, void *data)
{
char c;
os_read_file(sigio_irq_fd, &c, sizeof(c));
return IRQ_HANDLED;
}
int write_sigio_irq(int fd)
{
int err;
err = um_request_irq(SIGIO_WRITE_IRQ, fd, IRQ_READ, sigio_interrupt,
0, "write sigio", NULL);
if (err < 0) {
printk(KERN_ERR "write_sigio_irq : um_request_irq failed, "
"err = %d\n", err);
return -1;
}
sigio_irq_fd = fd;
return 0;
}
/* These are called from os-Linux/sigio.c to protect its pollfds arrays. */
static DEFINE_MUTEX(sigio_mutex);

View File

@ -31,6 +31,17 @@ void handle_syscall(struct uml_pt_regs *r)
goto out;
syscall = UPT_SYSCALL_NR(r);
/*
* If no time passes, then sched_yield may not actually yield, causing
* broken spinlock implementations in userspace (ASAN) to hang for long
* periods of time.
*/
if ((time_travel_mode == TT_MODE_INFCPU ||
time_travel_mode == TT_MODE_EXTERNAL) &&
syscall == __NR_sched_yield)
tt_extra_sched_jiffies += 1;
if (syscall >= 0 && syscall < __NR_syscalls) {
unsigned long ret = EXECUTE_SYSCALL(syscall, regs);

View File

@ -16,6 +16,7 @@
#include <kern_util.h>
#include <os.h>
#include <skas.h>
#include <arch.h>
/*
* Note this is constrained to return 0, -EFAULT, -EACCES, -ENOMEM by
@ -175,12 +176,14 @@ void fatal_sigsegv(void)
* @sig: the signal number
* @unused_si: the signal info struct; unused in this handler
* @regs: the ptrace register information
* @mc: the mcontext of the signal
*
* The handler first extracts the faultinfo from the UML ptrace regs struct.
* If the userfault did not happen in an UML userspace process, bad_segv is called.
* Otherwise the signal did happen in a cloned userspace process, handle it.
*/
void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs,
void *mc)
{
struct faultinfo * fi = UPT_FAULTINFO(regs);
@ -189,7 +192,7 @@ void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
bad_segv(*fi, UPT_IP(regs));
return;
}
segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs);
segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs, mc);
}
/*
@ -199,7 +202,7 @@ void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
* give us bad data!
*/
unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
struct uml_pt_regs *regs)
struct uml_pt_regs *regs, void *mc)
{
int si_code;
int err;
@ -223,6 +226,19 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
goto out;
}
else if (current->mm == NULL) {
if (current->pagefault_disabled) {
if (!mc) {
show_regs(container_of(regs, struct pt_regs, regs));
panic("Segfault with pagefaults disabled but no mcontext");
}
if (!current->thread.segv_continue) {
show_regs(container_of(regs, struct pt_regs, regs));
panic("Segfault without recovery target");
}
mc_set_rip(mc, current->thread.segv_continue);
current->thread.segv_continue = NULL;
goto out;
}
show_regs(container_of(regs, struct pt_regs, regs));
panic("Segfault with no mm");
}
@ -274,7 +290,8 @@ out:
return 0;
}
void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs)
void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs,
void *mc)
{
int code, err;
if (!UPT_IS_USER(regs)) {
@ -302,7 +319,8 @@ void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs)
}
}
void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs,
void *mc)
{
do_IRQ(WINCH_IRQ, regs);
}

View File

@ -12,6 +12,7 @@
#include <linux/panic_notifier.h>
#include <linux/seq_file.h>
#include <linux/string.h>
#include <linux/string_choices.h>
#include <linux/utsname.h>
#include <linux/sched.h>
#include <linux/sched/task.h>
@ -78,7 +79,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
seq_printf(m, "model name\t: UML\n");
seq_printf(m, "mode\t\t: skas\n");
seq_printf(m, "host\t\t: %s\n", host_info);
seq_printf(m, "fpu\t\t: %s\n", cpu_has(&boot_cpu_data, X86_FEATURE_FPU) ? "yes" : "no");
seq_printf(m, "fpu\t\t: %s\n", str_yes_no(cpu_has(&boot_cpu_data, X86_FEATURE_FPU)));
seq_printf(m, "flags\t\t:");
for (i = 0; i < 32*NCAPINTS; i++)
if (cpu_has(&boot_cpu_data, i) && (x86_cap_flags[i] != NULL))

View File

@ -8,6 +8,7 @@
#include <unistd.h>
#include <errno.h>
#include <sched.h>
#include <pthread.h>
#include <linux/limits.h>
#include <sys/socket.h>
#include <sys/wait.h>
@ -121,6 +122,10 @@ int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags,
unsigned long stack, sp;
int pid, status, err;
/* To share memory space, use os_run_helper_thread() instead. */
if (flags & CLONE_VM)
return -EINVAL;
stack = alloc_stack(0, __uml_cant_sleep());
if (stack == 0)
return -ENOMEM;
@ -167,3 +172,65 @@ int helper_wait(int pid)
} else
return 0;
}
struct os_helper_thread {
pthread_t handle;
};
int os_run_helper_thread(struct os_helper_thread **td_out,
void *(*routine)(void *), void *arg)
{
struct os_helper_thread *td;
sigset_t sigset, oset;
int err, flags;
flags = __uml_cant_sleep() ? UM_GFP_ATOMIC : UM_GFP_KERNEL;
td = uml_kmalloc(sizeof(*td), flags);
if (!td)
return -ENOMEM;
sigfillset(&sigset);
if (sigprocmask(SIG_SETMASK, &sigset, &oset) < 0) {
err = -errno;
kfree(td);
return err;
}
err = pthread_create(&td->handle, NULL, routine, arg);
if (sigprocmask(SIG_SETMASK, &oset, NULL) < 0)
panic("Failed to restore the signal mask: %d", errno);
if (err != 0)
kfree(td);
else
*td_out = td;
return -err;
}
void os_kill_helper_thread(struct os_helper_thread *td)
{
pthread_cancel(td->handle);
pthread_join(td->handle, NULL);
kfree(td);
}
void os_fix_helper_thread_signals(void)
{
sigset_t sigset;
sigemptyset(&sigset);
sigaddset(&sigset, SIGWINCH);
sigaddset(&sigset, SIGPIPE);
sigaddset(&sigset, SIGPROF);
sigaddset(&sigset, SIGINT);
sigaddset(&sigset, SIGTERM);
sigaddset(&sigset, SIGCHLD);
sigaddset(&sigset, SIGALRM);
sigaddset(&sigset, SIGIO);
sigaddset(&sigset, SIGUSR1);
pthread_sigmask(SIG_SETMASK, &sigset, NULL);
}

View File

@ -142,57 +142,6 @@ out:
return ok;
}
static int os_page_mincore(void *addr)
{
char vec[2];
int ret;
ret = mincore(addr, UM_KERN_PAGE_SIZE, vec);
if (ret < 0) {
if (errno == ENOMEM || errno == EINVAL)
return 0;
else
return -errno;
}
return vec[0] & 1;
}
int os_mincore(void *addr, unsigned long len)
{
char *vec;
int ret, i;
if (len <= UM_KERN_PAGE_SIZE)
return os_page_mincore(addr);
vec = calloc(1, (len + UM_KERN_PAGE_SIZE - 1) / UM_KERN_PAGE_SIZE);
if (!vec)
return -ENOMEM;
ret = mincore(addr, UM_KERN_PAGE_SIZE, vec);
if (ret < 0) {
if (errno == ENOMEM || errno == EINVAL)
ret = 0;
else
ret = -errno;
goto out;
}
for (i = 0; i < ((len + UM_KERN_PAGE_SIZE - 1) / UM_KERN_PAGE_SIZE); i++) {
if (!(vec[i] & 1)) {
ret = 0;
goto out;
}
}
ret = 1;
out:
free(vec);
return ret;
}
void init_new_thread_signals(void)
{
set_handler(SIGSEGV);

View File

@ -11,6 +11,7 @@
#include <sched.h>
#include <signal.h>
#include <string.h>
#include <sys/epoll.h>
#include <kern_util.h>
#include <init.h>
#include <os.h>
@ -21,184 +22,51 @@
* Protected by sigio_lock(), also used by sigio_cleanup, which is an
* exitcall.
*/
static int write_sigio_pid = -1;
static unsigned long write_sigio_stack;
static struct os_helper_thread *write_sigio_td;
/*
* These arrays are initialized before the sigio thread is started, and
* the descriptors closed after it is killed. So, it can't see them change.
* On the UML side, they are changed under the sigio_lock.
*/
#define SIGIO_FDS_INIT {-1, -1}
static int epollfd = -1;
static int write_sigio_fds[2] = SIGIO_FDS_INIT;
static int sigio_private[2] = SIGIO_FDS_INIT;
#define MAX_EPOLL_EVENTS 64
struct pollfds {
struct pollfd *poll;
int size;
int used;
};
static struct epoll_event epoll_events[MAX_EPOLL_EVENTS];
/*
* Protected by sigio_lock(). Used by the sigio thread, but the UML thread
* synchronizes with it.
*/
static struct pollfds current_poll;
static struct pollfds next_poll;
static struct pollfds all_sigio_fds;
static int write_sigio_thread(void *unused)
static void *write_sigio_thread(void *unused)
{
struct pollfds *fds, tmp;
struct pollfd *p;
int i, n, respond_fd;
char c;
int pid = getpid();
int r;
os_fix_helper_thread_signals();
os_set_pdeathsig();
os_fix_helper_signals();
fds = &current_poll;
while (1) {
n = poll(fds->poll, fds->used, -1);
if (n < 0) {
r = epoll_wait(epollfd, epoll_events, MAX_EPOLL_EVENTS, -1);
if (r < 0) {
if (errno == EINTR)
continue;
printk(UM_KERN_ERR "write_sigio_thread : poll returned "
"%d, errno = %d\n", n, errno);
printk(UM_KERN_ERR "%s: epoll_wait failed, errno = %d\n",
__func__, errno);
}
for (i = 0; i < fds->used; i++) {
p = &fds->poll[i];
if (p->revents == 0)
continue;
if (p->fd == sigio_private[1]) {
CATCH_EINTR(n = read(sigio_private[1], &c,
sizeof(c)));
if (n != sizeof(c))
printk(UM_KERN_ERR
"write_sigio_thread : "
"read on socket failed, "
"err = %d\n", errno);
tmp = current_poll;
current_poll = next_poll;
next_poll = tmp;
respond_fd = sigio_private[1];
}
else {
respond_fd = write_sigio_fds[1];
fds->used--;
memmove(&fds->poll[i], &fds->poll[i + 1],
(fds->used - i) * sizeof(*fds->poll));
}
CATCH_EINTR(n = write(respond_fd, &c, sizeof(c)));
if (n != sizeof(c))
printk(UM_KERN_ERR "write_sigio_thread : "
"write on socket failed, err = %d\n",
errno);
}
CATCH_EINTR(r = tgkill(pid, pid, SIGIO));
if (r < 0)
printk(UM_KERN_ERR "%s: tgkill failed, errno = %d\n",
__func__, errno);
}
return 0;
}
static int need_poll(struct pollfds *polls, int n)
{
struct pollfd *new;
if (n <= polls->size)
return 0;
new = uml_kmalloc(n * sizeof(struct pollfd), UM_GFP_ATOMIC);
if (new == NULL) {
printk(UM_KERN_ERR "need_poll : failed to allocate new "
"pollfds\n");
return -ENOMEM;
}
memcpy(new, polls->poll, polls->used * sizeof(struct pollfd));
kfree(polls->poll);
polls->poll = new;
polls->size = n;
return 0;
}
/*
* Must be called with sigio_lock held, because it's needed by the marked
* critical section.
*/
static void update_thread(void)
{
unsigned long flags;
int n;
char c;
flags = um_set_signals_trace(0);
CATCH_EINTR(n = write(sigio_private[0], &c, sizeof(c)));
if (n != sizeof(c)) {
printk(UM_KERN_ERR "update_thread : write failed, err = %d\n",
errno);
goto fail;
}
CATCH_EINTR(n = read(sigio_private[0], &c, sizeof(c)));
if (n != sizeof(c)) {
printk(UM_KERN_ERR "update_thread : read failed, err = %d\n",
errno);
goto fail;
}
um_set_signals_trace(flags);
return;
fail:
/* Critical section start */
if (write_sigio_pid != -1) {
os_kill_process(write_sigio_pid, 1);
free_stack(write_sigio_stack, 0);
}
write_sigio_pid = -1;
close(sigio_private[0]);
close(sigio_private[1]);
close(write_sigio_fds[0]);
close(write_sigio_fds[1]);
/* Critical section end */
um_set_signals_trace(flags);
return NULL;
}
int __add_sigio_fd(int fd)
{
struct pollfd *p;
int err, i, n;
struct epoll_event event = {
.data.fd = fd,
.events = EPOLLIN | EPOLLET,
};
int r;
for (i = 0; i < all_sigio_fds.used; i++) {
if (all_sigio_fds.poll[i].fd == fd)
break;
}
if (i == all_sigio_fds.used)
return -ENOSPC;
p = &all_sigio_fds.poll[i];
for (i = 0; i < current_poll.used; i++) {
if (current_poll.poll[i].fd == fd)
return 0;
}
n = current_poll.used;
err = need_poll(&next_poll, n + 1);
if (err)
return err;
memcpy(next_poll.poll, current_poll.poll,
current_poll.used * sizeof(struct pollfd));
next_poll.poll[n] = *p;
next_poll.used = n + 1;
update_thread();
return 0;
CATCH_EINTR(r = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event));
return r < 0 ? -errno : 0;
}
int add_sigio_fd(int fd)
{
int err;
@ -212,38 +80,11 @@ int add_sigio_fd(int fd)
int __ignore_sigio_fd(int fd)
{
struct pollfd *p;
int err, i, n = 0;
struct epoll_event event;
int r;
/*
* This is called from exitcalls elsewhere in UML - if
* sigio_cleanup has already run, then update_thread will hang
* or fail because the thread is no longer running.
*/
if (write_sigio_pid == -1)
return -EIO;
for (i = 0; i < current_poll.used; i++) {
if (current_poll.poll[i].fd == fd)
break;
}
if (i == current_poll.used)
return -ENOENT;
err = need_poll(&next_poll, current_poll.used - 1);
if (err)
return err;
for (i = 0; i < current_poll.used; i++) {
p = &current_poll.poll[i];
if (p->fd != fd)
next_poll.poll[n++] = *p;
}
next_poll.used = current_poll.used - 1;
update_thread();
return 0;
CATCH_EINTR(r = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, &event));
return r < 0 ? -errno : 0;
}
int ignore_sigio_fd(int fd)
@ -257,127 +98,39 @@ int ignore_sigio_fd(int fd)
return err;
}
static struct pollfd *setup_initial_poll(int fd)
{
struct pollfd *p;
p = uml_kmalloc(sizeof(struct pollfd), UM_GFP_KERNEL);
if (p == NULL) {
printk(UM_KERN_ERR "setup_initial_poll : failed to allocate "
"poll\n");
return NULL;
}
*p = ((struct pollfd) { .fd = fd,
.events = POLLIN,
.revents = 0 });
return p;
}
static void write_sigio_workaround(void)
{
struct pollfd *p;
int err;
int l_write_sigio_fds[2];
int l_sigio_private[2];
int l_write_sigio_pid;
/* We call this *tons* of times - and most ones we must just fail. */
sigio_lock();
l_write_sigio_pid = write_sigio_pid;
sigio_unlock();
if (l_write_sigio_pid != -1)
return;
err = os_pipe(l_write_sigio_fds, 1, 1);
if (err < 0) {
printk(UM_KERN_ERR "write_sigio_workaround - os_pipe 1 failed, "
"err = %d\n", -err);
return;
}
err = os_pipe(l_sigio_private, 1, 1);
if (err < 0) {
printk(UM_KERN_ERR "write_sigio_workaround - os_pipe 2 failed, "
"err = %d\n", -err);
goto out_close1;
}
p = setup_initial_poll(l_sigio_private[1]);
if (!p)
goto out_close2;
sigio_lock();
/*
* Did we race? Don't try to optimize this, please, it's not so likely
* to happen, and no more than once at the boot.
*/
if (write_sigio_pid != -1)
goto out_free;
current_poll = ((struct pollfds) { .poll = p,
.used = 1,
.size = 1 });
if (write_sigio_irq(l_write_sigio_fds[0]))
goto out_clear_poll;
memcpy(write_sigio_fds, l_write_sigio_fds, sizeof(l_write_sigio_fds));
memcpy(sigio_private, l_sigio_private, sizeof(l_sigio_private));
write_sigio_pid = run_helper_thread(write_sigio_thread, NULL,
CLONE_FILES | CLONE_VM,
&write_sigio_stack);
if (write_sigio_pid < 0)
goto out_clear;
sigio_unlock();
return;
out_clear:
write_sigio_pid = -1;
write_sigio_fds[0] = -1;
write_sigio_fds[1] = -1;
sigio_private[0] = -1;
sigio_private[1] = -1;
out_clear_poll:
current_poll = ((struct pollfds) { .poll = NULL,
.size = 0,
.used = 0 });
out_free:
sigio_unlock();
kfree(p);
out_close2:
close(l_sigio_private[0]);
close(l_sigio_private[1]);
out_close1:
close(l_write_sigio_fds[0]);
close(l_write_sigio_fds[1]);
}
void sigio_broken(int fd)
{
int err;
write_sigio_workaround();
sigio_lock();
err = need_poll(&all_sigio_fds, all_sigio_fds.used + 1);
if (err) {
printk(UM_KERN_ERR "maybe_sigio_broken - failed to add pollfd "
"for descriptor %d\n", fd);
if (write_sigio_td)
goto out;
epollfd = epoll_create(MAX_EPOLL_EVENTS);
if (epollfd < 0) {
printk(UM_KERN_ERR "%s: epoll_create failed, errno = %d\n",
__func__, errno);
goto out;
}
err = os_run_helper_thread(&write_sigio_td, write_sigio_thread, NULL);
if (err < 0) {
printk(UM_KERN_ERR "%s: os_run_helper_thread failed, errno = %d\n",
__func__, -err);
close(epollfd);
epollfd = -1;
goto out;
}
all_sigio_fds.poll[all_sigio_fds.used++] =
((struct pollfd) { .fd = fd,
.events = POLLIN,
.revents = 0 });
out:
sigio_unlock();
}
void sigio_broken(void)
{
write_sigio_workaround();
}
/* Changed during early boot */
static int pty_output_sigio;
@ -389,17 +142,16 @@ void maybe_sigio_broken(int fd)
if (pty_output_sigio)
return;
sigio_broken(fd);
sigio_broken();
}
static void sigio_cleanup(void)
{
if (write_sigio_pid == -1)
if (!write_sigio_td)
return;
os_kill_process(write_sigio_pid, 1);
free_stack(write_sigio_stack, 0);
write_sigio_pid = -1;
os_kill_helper_thread(write_sigio_td);
write_sigio_td = NULL;
}
__uml_exitcall(sigio_cleanup);

View File

@ -21,7 +21,7 @@
#include <sys/ucontext.h>
#include <timetravel.h>
void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = {
void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *, void *mc) = {
[SIGTRAP] = relay_signal,
[SIGFPE] = relay_signal,
[SIGILL] = relay_signal,
@ -47,7 +47,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
if ((sig != SIGIO) && (sig != SIGWINCH))
unblock_signals_trace();
(*sig_info[sig])(sig, si, &r);
(*sig_info[sig])(sig, si, &r, mc);
errno = save_errno;
}

View File

@ -166,7 +166,7 @@ static void get_skas_faultinfo(int pid, struct faultinfo *fi)
static void handle_segv(int pid, struct uml_pt_regs *regs)
{
get_skas_faultinfo(pid, &regs->faultinfo);
segv(regs->faultinfo, 0, 1, NULL);
segv(regs->faultinfo, 0, 1, NULL, NULL);
}
static void handle_trap(int pid, struct uml_pt_regs *regs)
@ -525,7 +525,7 @@ void userspace(struct uml_pt_regs *regs)
get_skas_faultinfo(pid,
&regs->faultinfo);
(*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si,
regs);
regs, NULL);
}
else handle_segv(pid, regs);
break;
@ -533,7 +533,7 @@ void userspace(struct uml_pt_regs *regs)
handle_trap(pid, regs);
break;
case SIGTRAP:
relay_signal(SIGTRAP, (struct siginfo *)&si, regs);
relay_signal(SIGTRAP, (struct siginfo *)&si, regs, NULL);
break;
case SIGALRM:
break;
@ -543,7 +543,7 @@ void userspace(struct uml_pt_regs *regs)
case SIGFPE:
case SIGWINCH:
block_signals_trace();
(*sig_info[sig])(sig, (struct siginfo *)&si, regs);
(*sig_info[sig])(sig, (struct siginfo *)&si, regs, NULL);
unblock_signals_trace();
break;
default:

View File

@ -7,12 +7,13 @@ core-y += arch/x86/crypto/
# GCC versions < 11. See:
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99652
#
ifeq ($(CONFIG_CC_IS_CLANG),y)
KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx
KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json
ifeq ($(call gcc-min-version, 110000)$(CONFIG_CC_IS_CLANG),y)
KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx
KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2
endif
KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json
ifeq ($(CONFIG_X86_32),y)
START := 0x8048000

View File

@ -12,9 +12,9 @@
*/
#ifdef CONFIG_X86_32
#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
#define mb() alternative("lock addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
#define rmb() alternative("lock addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
#define wmb() alternative("lock addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
#else /* CONFIG_X86_32 */

View File

@ -1,24 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __UM_MODULE_H
#define __UM_MODULE_H
/* UML is simple */
struct mod_arch_specific
{
};
#ifdef CONFIG_X86_32
#define Elf_Shdr Elf32_Shdr
#define Elf_Sym Elf32_Sym
#define Elf_Ehdr Elf32_Ehdr
#else
#define Elf_Shdr Elf64_Shdr
#define Elf_Sym Elf64_Sym
#define Elf_Ehdr Elf64_Ehdr
#endif
#endif

View File

@ -4,6 +4,7 @@
#include <asm/ptrace.h>
#include <sysdep/ptrace.h>
#include <sysdep/mcontext.h>
#include <arch.h>
void get_regs_from_mc(struct uml_pt_regs *regs, mcontext_t *mc)
{
@ -27,7 +28,17 @@ void get_regs_from_mc(struct uml_pt_regs *regs, mcontext_t *mc)
COPY(RIP);
COPY2(EFLAGS, EFL);
COPY2(CS, CSGSFS);
regs->gp[CS / sizeof(unsigned long)] &= 0xffff;
regs->gp[CS / sizeof(unsigned long)] |= 3;
regs->gp[SS / sizeof(unsigned long)] = mc->gregs[REG_CSGSFS] >> 48;
#endif
}
void mc_set_rip(void *_mc, void *target)
{
mcontext_t *mc = _mc;
#ifdef __i386__
mc->gregs[REG_EIP] = (unsigned long)target;
#else
mc->gregs[REG_RIP] = (unsigned long)target;
#endif
}

View File

@ -29,4 +29,16 @@ struct faultinfo {
#define PTRACE_FULL_FAULTINFO 0
#define ___backtrack_faulted(_faulted) \
asm volatile ( \
"mov $0, %0\n" \
"movl $__get_kernel_nofault_faulted_%=,%1\n" \
"jmp _end_%=\n" \
"__get_kernel_nofault_faulted_%=:\n" \
"mov $1, %0;" \
"_end_%=:" \
: "=r" (_faulted), \
"=m" (current->thread.segv_continue) :: \
)
#endif

View File

@ -29,4 +29,16 @@ struct faultinfo {
#define PTRACE_FULL_FAULTINFO 1
#define ___backtrack_faulted(_faulted) \
asm volatile ( \
"mov $0, %0\n" \
"movq $__get_kernel_nofault_faulted_%=,%1\n" \
"jmp _end_%=\n" \
"__get_kernel_nofault_faulted_%=:\n" \
"mov $1, %0;" \
"_end_%=:" \
: "=r" (_faulted), \
"=m" (current->thread.segv_continue) :: \
)
#endif

View File

@ -12,33 +12,22 @@
static unsigned int __read_mostly vdso_enabled = 1;
unsigned long um_vdso_addr;
static struct page *um_vdso;
extern unsigned long task_size;
extern char vdso_start[], vdso_end[];
static struct page **vdsop;
static int __init init_vdso(void)
{
struct page *um_vdso;
BUG_ON(vdso_end - vdso_start > PAGE_SIZE);
um_vdso_addr = task_size - PAGE_SIZE;
vdsop = kmalloc(sizeof(struct page *), GFP_KERNEL);
if (!vdsop)
goto oom;
um_vdso = alloc_page(GFP_KERNEL);
if (!um_vdso) {
kfree(vdsop);
if (!um_vdso)
goto oom;
}
copy_page(page_address(um_vdso), vdso_start);
*vdsop = um_vdso;
return 0;
@ -56,6 +45,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
struct mm_struct *mm = current->mm;
static struct vm_special_mapping vdso_mapping = {
.name = "[vdso]",
.pages = &um_vdso,
};
if (!vdso_enabled)
@ -64,7 +54,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
if (mmap_write_lock_killable(mm))
return -EINTR;
vdso_mapping.pages = vdsop;
vma = _install_special_mapping(mm, um_vdso_addr, PAGE_SIZE,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,

View File

@ -60,7 +60,7 @@ struct hostfs_stat {
unsigned int uid;
unsigned int gid;
unsigned long long size;
struct hostfs_timespec atime, mtime, ctime;
struct hostfs_timespec atime, mtime, ctime, btime;
unsigned int blksize;
unsigned long long blocks;
struct {

View File

@ -33,6 +33,7 @@ struct hostfs_inode_info {
struct inode vfs_inode;
struct mutex open_mutex;
dev_t dev;
struct hostfs_timespec btime;
};
static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
@ -547,6 +548,7 @@ static int hostfs_inode_set(struct inode *ino, void *data)
}
HOSTFS_I(ino)->dev = dev;
HOSTFS_I(ino)->btime = st->btime;
ino->i_ino = st->ino;
ino->i_mode = st->mode;
return hostfs_inode_update(ino, st);
@ -557,7 +559,10 @@ static int hostfs_inode_test(struct inode *inode, void *data)
const struct hostfs_stat *st = data;
dev_t dev = MKDEV(st->dev.maj, st->dev.min);
return inode->i_ino == st->ino && HOSTFS_I(inode)->dev == dev;
return inode->i_ino == st->ino && HOSTFS_I(inode)->dev == dev &&
(inode->i_mode & S_IFMT) == (st->mode & S_IFMT) &&
HOSTFS_I(inode)->btime.tv_sec == st->btime.tv_sec &&
HOSTFS_I(inode)->btime.tv_nsec == st->btime.tv_nsec;
}
static struct inode *hostfs_iget(struct super_block *sb, char *name)

View File

@ -18,39 +18,48 @@
#include "hostfs.h"
#include <utime.h>
static void stat64_to_hostfs(const struct stat64 *buf, struct hostfs_stat *p)
static void statx_to_hostfs(const struct statx *buf, struct hostfs_stat *p)
{
p->ino = buf->st_ino;
p->mode = buf->st_mode;
p->nlink = buf->st_nlink;
p->uid = buf->st_uid;
p->gid = buf->st_gid;
p->size = buf->st_size;
p->atime.tv_sec = buf->st_atime;
p->atime.tv_nsec = 0;
p->ctime.tv_sec = buf->st_ctime;
p->ctime.tv_nsec = 0;
p->mtime.tv_sec = buf->st_mtime;
p->mtime.tv_nsec = 0;
p->blksize = buf->st_blksize;
p->blocks = buf->st_blocks;
p->rdev.maj = os_major(buf->st_rdev);
p->rdev.min = os_minor(buf->st_rdev);
p->dev.maj = os_major(buf->st_dev);
p->dev.min = os_minor(buf->st_dev);
p->ino = buf->stx_ino;
p->mode = buf->stx_mode;
p->nlink = buf->stx_nlink;
p->uid = buf->stx_uid;
p->gid = buf->stx_gid;
p->size = buf->stx_size;
p->atime.tv_sec = buf->stx_atime.tv_sec;
p->atime.tv_nsec = buf->stx_atime.tv_nsec;
p->ctime.tv_sec = buf->stx_ctime.tv_sec;
p->ctime.tv_nsec = buf->stx_ctime.tv_nsec;
p->mtime.tv_sec = buf->stx_mtime.tv_sec;
p->mtime.tv_nsec = buf->stx_mtime.tv_nsec;
if (buf->stx_mask & STATX_BTIME) {
p->btime.tv_sec = buf->stx_btime.tv_sec;
p->btime.tv_nsec = buf->stx_btime.tv_nsec;
} else {
memset(&p->btime, 0, sizeof(p->btime));
}
p->blksize = buf->stx_blksize;
p->blocks = buf->stx_blocks;
p->rdev.maj = buf->stx_rdev_major;
p->rdev.min = buf->stx_rdev_minor;
p->dev.maj = buf->stx_dev_major;
p->dev.min = buf->stx_dev_minor;
}
int stat_file(const char *path, struct hostfs_stat *p, int fd)
{
struct stat64 buf;
struct statx buf;
int flags = AT_SYMLINK_NOFOLLOW;
if (fd >= 0) {
if (fstat64(fd, &buf) < 0)
return -errno;
} else if (lstat64(path, &buf) < 0) {
return -errno;
flags |= AT_EMPTY_PATH;
path = "";
}
stat64_to_hostfs(&buf, p);
if ((statx(fd, path, flags, STATX_BASIC_STATS | STATX_BTIME, &buf)) < 0)
return -errno;
statx_to_hostfs(&buf, p);
return 0;
}