Merge branch 'next' into for-linus

Prepare input updates for 6.15 merge window.
This commit is contained in:
Dmitry Torokhov 2025-04-04 23:04:35 -07:00
commit 946661e3be
10012 changed files with 445379 additions and 148574 deletions

View File

@ -1,5 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
msrv = "1.78.0"
check-private-items = true
disallowed-macros = [

1
.gitignore vendored
View File

@ -22,6 +22,7 @@
*.dtb.S
*.dtbo.S
*.dwo
*.dylib
*.elf
*.gcno
*.gcda

View File

@ -83,6 +83,13 @@ Anirudh Ghayal <quic_aghayal@quicinc.com> <aghayal@codeaurora.org>
Antoine Tenart <atenart@kernel.org> <antoine.tenart@bootlin.com>
Antoine Tenart <atenart@kernel.org> <antoine.tenart@free-electrons.com>
Antonio Ospite <ao2@ao2.it> <ao2@amarulasolutions.com>
Antonio Quartulli <antonio@mandelbit.com> <antonio@meshcoding.com>
Antonio Quartulli <antonio@mandelbit.com> <antonio@open-mesh.com>
Antonio Quartulli <antonio@mandelbit.com> <antonio.quartulli@open-mesh.com>
Antonio Quartulli <antonio@mandelbit.com> <ordex@autistici.org>
Antonio Quartulli <antonio@mandelbit.com> <ordex@ritirata.org>
Antonio Quartulli <antonio@mandelbit.com> <antonio@openvpn.net>
Antonio Quartulli <antonio@mandelbit.com> <a@unstable.cc>
Anup Patel <anup@brainfault.org> <anup.patel@wdc.com>
Archit Taneja <archit@ti.com>
Ard Biesheuvel <ardb@kernel.org> <ard.biesheuvel@linaro.org>
@ -121,6 +128,8 @@ Ben Widawsky <bwidawsk@kernel.org> <benjamin.widawsky@intel.com>
Benjamin Poirier <benjamin.poirier@gmail.com> <bpoirier@suse.de>
Benjamin Tissoires <bentiss@kernel.org> <benjamin.tissoires@gmail.com>
Benjamin Tissoires <bentiss@kernel.org> <benjamin.tissoires@redhat.com>
Bingwu Zhang <xtex@aosc.io> <xtexchooser@duck.com>
Bingwu Zhang <xtex@aosc.io> <xtex@xtexx.eu.org>
Bjorn Andersson <andersson@kernel.org> <bjorn@kryo.se>
Bjorn Andersson <andersson@kernel.org> <bjorn.andersson@linaro.org>
Bjorn Andersson <andersson@kernel.org> <bjorn.andersson@sonymobile.com>
@ -133,13 +142,17 @@ Boris Brezillon <bbrezillon@kernel.org> <boris.brezillon@bootlin.com>
Boris Brezillon <bbrezillon@kernel.org> <boris.brezillon@free-electrons.com>
Brendan Higgins <brendan.higgins@linux.dev> <brendanhiggins@google.com>
Brian Avery <b.avery@hp.com>
Brian Cain <bcain@kernel.org> <brian.cain@oss.qualcomm.com>
Brian Cain <bcain@kernel.org> <bcain@quicinc.com>
Brian King <brking@us.ibm.com>
Brian Silverman <bsilver16384@gmail.com> <brian.silverman@bluerivertech.com>
Bryan Tan <bryan-bt.tan@broadcom.com> <bryantan@vmware.com>
Cai Huoqing <cai.huoqing@linux.dev> <caihuoqing@baidu.com>
Can Guo <quic_cang@quicinc.com> <cang@codeaurora.org>
Carl Huang <quic_cjhuang@quicinc.com> <cjhuang@codeaurora.org>
Carlos Bilbao <carlos.bilbao.osdev@gmail.com> <carlos.bilbao@amd.com>
Carlos Bilbao <carlos.bilbao@kernel.org> <carlos.bilbao@amd.com>
Carlos Bilbao <carlos.bilbao@kernel.org> <carlos.bilbao.osdev@gmail.com>
Carlos Bilbao <carlos.bilbao@kernel.org> <bilbao@vt.edu>
Changbin Du <changbin.du@intel.com> <changbin.du@gmail.com>
Changbin Du <changbin.du@intel.com> <changbin.du@intel.com>
Chao Yu <chao@kernel.org> <chao2.yu@samsung.com>
@ -156,6 +169,7 @@ Christian Brauner <brauner@kernel.org> <christian.brauner@canonical.com>
Christian Brauner <brauner@kernel.org> <christian.brauner@ubuntu.com>
Christian Marangi <ansuelsmth@gmail.com>
Christophe Ricard <christophe.ricard@gmail.com>
Christopher Obbard <christopher.obbard@linaro.org> <chris.obbard@collabora.com>
Christoph Hellwig <hch@lst.de>
Chuck Lever <chuck.lever@oracle.com> <cel@kernel.org>
Chuck Lever <chuck.lever@oracle.com> <cel@netapp.com>
@ -200,6 +214,7 @@ Elliot Berman <quic_eberman@quicinc.com> <eberman@codeaurora.org>
Enric Balletbo i Serra <eballetbo@kernel.org> <enric.balletbo@collabora.com>
Enric Balletbo i Serra <eballetbo@kernel.org> <eballetbo@iseebcn.com>
Erik Kaneda <erik.kaneda@intel.com> <erik.schmauss@intel.com>
Ethan Carter Edwards <ethan@ethancedwards.com> Ethan Edwards <ethancarteredwards@gmail.com>
Eugen Hristev <eugen.hristev@linaro.org> <eugen.hristev@microchip.com>
Eugen Hristev <eugen.hristev@linaro.org> <eugen.hristev@collabora.com>
Evgeniy Polyakov <johnpol@2ka.mipt.ru>
@ -211,6 +226,7 @@ Fangrui Song <i@maskray.me> <maskray@google.com>
Felipe W Damasio <felipewd@terra.com.br>
Felix Kuhling <fxkuehl@gmx.de>
Felix Moeller <felix@derklecks.de>
Feng Tang <feng.79.tang@gmail.com> <feng.tang@intel.com>
Fenglin Wu <quic_fenglinw@quicinc.com> <fenglinw@codeaurora.org>
Filipe Lautert <filipe@icewall.org>
Finn Thain <fthain@linux-m68k.org> <fthain@telegraphics.com.au>
@ -251,6 +267,7 @@ Guo Ren <guoren@kernel.org> <ren_guo@c-sky.com>
Guru Das Srinagesh <quic_gurus@quicinc.com> <gurus@codeaurora.org>
Gustavo Padovan <gustavo@las.ic.unicamp.br>
Gustavo Padovan <padovan@profusion.mobi>
Hamza Mahfooz <hamzamahfooz@linux.microsoft.com> <hamza.mahfooz@amd.com>
Hanjun Guo <guohanjun@huawei.com> <hanjun.guo@linaro.org>
Hans Verkuil <hverkuil@xs4all.nl> <hansverk@cisco.com>
Hans Verkuil <hverkuil@xs4all.nl> <hverkuil-cisco@xs4all.nl>
@ -301,6 +318,8 @@ Jayachandran C <c.jayachandran@gmail.com> <jnair@caviumnetworks.com>
Jean Tourrilhes <jt@hpl.hp.com>
Jeevan Shriram <quic_jshriram@quicinc.com> <jshriram@codeaurora.org>
Jeff Garzik <jgarzik@pretzel.yyz.us>
Jeff Johnson <jeff.johnson@oss.qualcomm.com> <jjohnson@codeaurora.org>
Jeff Johnson <jeff.johnson@oss.qualcomm.com> <quic_jjohnson@quicinc.com>
Jeff Layton <jlayton@kernel.org> <jlayton@poochiereds.net>
Jeff Layton <jlayton@kernel.org> <jlayton@primarydata.com>
Jeff Layton <jlayton@kernel.org> <jlayton@redhat.com>
@ -360,6 +379,7 @@ Juha Yrjola <juha.yrjola@solidboot.com>
Julien Thierry <julien.thierry.kdev@gmail.com> <julien.thierry@arm.com>
Iskren Chernev <me@iskren.info> <iskren.chernev@gmail.com>
Kalle Valo <kvalo@kernel.org> <kvalo@codeaurora.org>
Kalle Valo <kvalo@kernel.org> <quic_kvalo@quicinc.com>
Kalyan Thota <quic_kalyant@quicinc.com> <kalyan_t@codeaurora.org>
Karthikeyan Periyasamy <quic_periyasa@quicinc.com> <periyasa@codeaurora.org>
Kathiravan T <quic_kathirav@quicinc.com> <kathirav@codeaurora.org>
@ -405,6 +425,7 @@ Liam Mark <quic_lmark@quicinc.com> <lmark@codeaurora.org>
Linas Vepstas <linas@austin.ibm.com>
Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@ascom.ch>
Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@web.de>
Linus Lüssing <linus.luessing@c0d3.blue> <ll@simonwunderlich.de>
<linux-hardening@vger.kernel.org> <kernel-hardening@lists.openwall.com>
Li Yang <leoyang.li@nxp.com> <leoli@freescale.com>
Li Yang <leoyang.li@nxp.com> <leo@zh-kernel.org>
@ -427,6 +448,8 @@ Marcin Nowakowski <marcin.nowakowski@mips.com> <marcin.nowakowski@imgtec.com>
Marc Zyngier <maz@kernel.org> <marc.zyngier@arm.com>
Marek Behún <kabel@kernel.org> <marek.behun@nic.cz>
Marek Behún <kabel@kernel.org> Marek Behun <marek.behun@nic.cz>
Marek Lindner <marek.lindner@mailbox.org> <lindner_marek@yahoo.de>
Marek Lindner <marek.lindner@mailbox.org> <mareklindner@neomailbox.ch>
Mark Brown <broonie@sirena.org.uk>
Mark Starovoytov <mstarovo@pm.me> <mstarovoitov@marvell.com>
Markus Schneider-Pargmann <msp@baylibre.com> <mpa@pengutronix.de>
@ -435,7 +458,7 @@ Martin Kepplinger <martink@posteo.de> <martin.kepplinger@ginzinger.com>
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@puri.sm>
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
Martyna Szapar-Mudlaw <martyna.szapar-mudlaw@linux.intel.com> <martyna.szapar-mudlaw@intel.com>
Mathieu Othacehe <m.othacehe@gmail.com> <othacehe@gnu.org>
Mathieu Othacehe <othacehe@gnu.org> <m.othacehe@gmail.com>
Mat Martineau <martineau@kernel.org> <mathew.j.martineau@linux.intel.com>
Mat Martineau <martineau@kernel.org> <mathewm@codeaurora.org>
Matthew Wilcox <willy@infradead.org> <matthew.r.wilcox@intel.com>
@ -511,6 +534,7 @@ Nicholas Piggin <npiggin@gmail.com> <npiggin@kernel.dk>
Nicholas Piggin <npiggin@gmail.com> <npiggin@suse.de>
Nicholas Piggin <npiggin@gmail.com> <nickpiggin@yahoo.com.au>
Nicholas Piggin <npiggin@gmail.com> <piggin@cyberone.com.au>
Nick Desaulniers <nick.desaulniers+lkml@gmail.com> <ndesaulniers@google.com>
Nicolas Ferre <nicolas.ferre@microchip.com> <nicolas.ferre@atmel.com>
Nicolas Pitre <nico@fluxnic.net> <nicolas.pitre@linaro.org>
Nicolas Pitre <nico@fluxnic.net> <nico@linaro.org>
@ -529,6 +553,8 @@ Oleksij Rempel <linux@rempel-privat.de> <external.Oleksij.Rempel@de.bosch.com>
Oleksij Rempel <linux@rempel-privat.de> <fixed-term.Oleksij.Rempel@de.bosch.com>
Oleksij Rempel <o.rempel@pengutronix.de>
Oleksij Rempel <o.rempel@pengutronix.de> <ore@pengutronix.de>
Oliver Hartkopp <socketcan@hartkopp.net> <oliver.hartkopp@volkswagen.de>
Oliver Hartkopp <socketcan@hartkopp.net> <oliver@hartkopp.net>
Oliver Upton <oliver.upton@linux.dev> <oupton@google.com>
Ondřej Jirman <megi@xff.cz> <megous@megous.com>
Oza Pawandeep <quic_poza@quicinc.com> <poza@codeaurora.org>
@ -640,6 +666,11 @@ Simona Vetter <simona.vetter@ffwll.ch> <daniel@biene.ffwll.ch>
Simon Horman <horms@kernel.org> <simon.horman@corigine.com>
Simon Horman <horms@kernel.org> <simon.horman@netronome.com>
Simon Kelley <simon@thekelleys.org.uk>
Simon Wunderlich <sw@simonwunderlich.de> <simon.wunderlich@open-mesh.com>
Simon Wunderlich <sw@simonwunderlich.de> <simon.wunderlich@s2003.tu-chemnitz.de>
Simon Wunderlich <sw@simonwunderlich.de> <simon.wunderlich@saxnet.de>
Simon Wunderlich <sw@simonwunderlich.de> <simon@open-mesh.com>
Simon Wunderlich <sw@simonwunderlich.de> <siwu@hrz.tu-chemnitz.de>
Sricharan Ramabadhran <quic_srichara@quicinc.com> <sricharan@codeaurora.org>
Srinivas Ramana <quic_sramana@quicinc.com> <sramana@codeaurora.org>
Sriram R <quic_srirrama@quicinc.com> <srirrama@codeaurora.org>
@ -660,6 +691,11 @@ Sudarshan Rajagopalan <quic_sudaraja@quicinc.com> <sudaraja@codeaurora.org>
Sudeep Holla <sudeep.holla@arm.com> Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
Sumit Semwal <sumit.semwal@ti.com>
Surabhi Vishnoi <quic_svishnoi@quicinc.com> <svishnoi@codeaurora.org>
Sven Eckelmann <sven@narfation.org> <seckelmann@datto.com>
Sven Eckelmann <sven@narfation.org> <sven.eckelmann@gmx.de>
Sven Eckelmann <sven@narfation.org> <sven.eckelmann@open-mesh.com>
Sven Eckelmann <sven@narfation.org> <sven.eckelmann@openmesh.com>
Sven Eckelmann <sven@narfation.org> <sven@open-mesh.com>
Takashi YOSHII <takashi.yoshii.zj@renesas.com>
Tamizh Chelvam Raja <quic_tamizhr@quicinc.com> <tamizhr@codeaurora.org>
Taniya Das <quic_tdas@quicinc.com> <tdas@codeaurora.org>
@ -735,6 +771,8 @@ Wolfram Sang <wsa@kernel.org> <w.sang@pengutronix.de>
Wolfram Sang <wsa@kernel.org> <wsa@the-dreams.de>
Yakir Yang <kuankuan.y@gmail.com> <ykk@rock-chips.com>
Yanteng Si <si.yanteng@linux.dev> <siyanteng@loongson.cn>
Ying Huang <huang.ying.caritas@gmail.com> <ying.huang@intel.com>
Yosry Ahmed <yosry.ahmed@linux.dev> <yosryahmed@google.com>
Yusuke Goda <goda.yusuke@renesas.com>
Zack Rusin <zack.rusin@broadcom.com> <zackr@vmware.com>
Zhu Yanjun <zyjzyj2000@gmail.com> <yanjunz@nvidia.com>

20
CREDITS
View File

@ -20,6 +20,10 @@ N: Thomas Abraham
E: thomas.ab@samsung.com
D: Samsung pin controller driver
N: Jose Abreu
E: jose.abreu@synopsys.com
D: Synopsys DesignWare XPCS MDIO/PCS driver.
N: Dragos Acostachioaie
E: dragos@iname.com
W: http://www.arbornet.org/~dragos
@ -1428,6 +1432,10 @@ S: 8124 Constitution Apt. 7
S: Sterling Heights, Michigan 48313
S: USA
N: Andy Gospodarek
E: andy@greyhouse.net
D: Maintenance and contributions to the network interface bonding driver.
N: Wolfgang Grandegger
E: wg@grandegger.com
D: Controller Area Network (device drivers)
@ -1812,6 +1820,10 @@ D: Author/maintainer of most DRM drivers (especially ATI, MGA)
D: Core DRM templates, general DRM and 3D-related hacking
S: No fixed address
N: Woojung Huh
E: woojung.huh@microchip.com
D: Microchip LAN78XX USB Ethernet driver
N: Kenn Humborg
E: kenn@wombat.ie
D: Mods to loop device to support sparse backing files
@ -2503,11 +2515,9 @@ D: SLS distribution
D: Initial implementation of VC's, pty's and select()
N: Pavel Machek
E: pavel@ucw.cz
E: pavel@kernel.org
P: 4096R/92DFCE96 4FA7 9EEF FCD4 C44F C585 B8C7 C060 2241 92DF CE96
D: Softcursor for vga, hypertech cdrom support, vcsa bugfix, nbd,
D: sun4/330 port, capabilities for elf, speedup for rm on ext2, USB,
D: work on suspend-to-ram/disk, killing duplicates from ioctl32,
D: NBD, Sun4/330 port, USB, work on suspend-to-ram/disk,
D: Altera SoCFPGA and Nokia N900 support.
S: Czech Republic
@ -4327,7 +4337,7 @@ D: Freescale Highspeed USB device driver
D: Freescale QE SoC support and Ethernet driver
S: B-1206 Jingmao Guojigongyu
S: 16 Baliqiao Nanjie, Beijing 101100
S: People's Repulic of China
S: People's Republic of China
N: Vlad Yasevich
E: vyasevich@gmail.com

View File

@ -1,3 +1,6 @@
The cxl driver is no longer maintained, and will be removed from the kernel in
the near future.
Please note that attributes that are shared between devices are stored in
the directory pointed to by the symlink device/.
For example, the real path of the attribute /sys/class/cxl/afu0.0s/irqs_max is

View File

@ -0,0 +1,9 @@
What: /sys/class/bluetooth/hci<index>/reset
Date: 14-Jan-2025
KernelVersion: 6.13
Contact: linux-bluetooth@vger.kernel.org
Description: This write-only attribute allows users to trigger the vendor reset
method on the Bluetooth device when arbitrary data is written.
The reset may or may not be done through the device transport
(e.g., UART/USB), and can also be done through an out-of-band
approach such as GPIO.

View File

@ -0,0 +1,15 @@
What: /sys/bus/coresight/devices/dummy_source<N>/enable_source
Date: Dec 2024
KernelVersion: 6.14
Contact: Mao Jinlong <quic_jinlmao@quicinc.com>
Description: (RW) Enable/disable tracing of dummy source. A sink should be activated
before enabling the source. The path of coresight components linking
the source to the sink is configured and managed automatically by the
coresight framework.
What: /sys/bus/coresight/devices/dummy_source<N>/traceid
Date: Dec 2024
KernelVersion: 6.14
Contact: Mao Jinlong <quic_jinlmao@quicinc.com>
Description: (R) Show the trace ID that will appear in the trace stream
coming from this trace entity.

View File

@ -0,0 +1,24 @@
What: /sys/bus/event_source/devices/<pmu>
Date: 2014/02/24
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description: Performance Monitoring Unit (<pmu>)
Each <pmu> directory, for a PMU device, is a name
optionally followed by an underscore and then either a
decimal or hexadecimal number. For example, cpu is a
PMU name without a suffix as is intel_bts,
uncore_imc_0 is a PMU name with a 0 numeric suffix,
ddr_pmu_87e1b0000000 is a PMU name with a hex
suffix. The hex suffix must be more than two
characters long to avoid ambiguity with PMUs like the
S390 cpum_cf.
Tools can treat PMUs with the same name that differ by
suffix as instances of the same PMU for the sake of,
for example, opening an event. For example, the PMUs
uncore_imc_free_running_0 and
uncore_imc_free_running_1 have an event data_read;
opening the data_read event on a PMU specified as
uncore_imc_free_running should be treated as opening
the data_read event on PMU uncore_imc_free_running_0
and PMU uncore_imc_free_running_1.

View File

@ -37,11 +37,13 @@ Description: Per-pmu performance monitoring events specific to the running syste
performance monitoring event supported by the <pmu>. The name
of the file is the name of the event.
As performance monitoring event names are case
insensitive in the perf tool, the perf tool only looks
for lower or upper case event names in sysfs to avoid
As performance monitoring event names are case insensitive
in the perf tool, the perf tool only looks for all lower
case or all upper case event names in sysfs to avoid
scanning the directory. It is therefore required the
name of the event here is either lower or upper case.
name of the event here is either completely lower or upper
case, with no mixed-case characters. Numbers, '.', '_', and
'-' are also allowed.
File contents:

View File

@ -168,18 +168,6 @@ Description:
is required is a consistent labeling. Units after application
of scale and offset are millivolts.
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_raw
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_supply_raw
KernelVersion: 3.17
Contact: linux-iio@vger.kernel.org
Description:
Raw (unscaled no bias removal etc.) current measurement from
channel Y. In special cases where the channel does not
correspond to externally available input one of the named
versions may be used. The number must always be specified and
unique to allow association with event codes. Units after
application of scale and offset are milliamps.
What: /sys/bus/iio/devices/iio:deviceX/in_powerY_raw
KernelVersion: 4.5
Contact: linux-iio@vger.kernel.org
@ -227,7 +215,7 @@ Description:
same scaling as _raw.
What: /sys/bus/iio/devices/iio:deviceX/in_temp_raw
What: /sys/bus/iio/devices/iio:deviceX/in_tempX_raw
What: /sys/bus/iio/devices/iio:deviceX/in_tempY_raw
What: /sys/bus/iio/devices/iio:deviceX/in_temp_x_raw
What: /sys/bus/iio/devices/iio:deviceX/in_temp_y_raw
What: /sys/bus/iio/devices/iio:deviceX/in_temp_ambient_raw
@ -416,11 +404,11 @@ Contact: linux-iio@vger.kernel.org
Description:
Scaled humidity measurement in milli percent.
What: /sys/bus/iio/devices/iio:deviceX/in_X_mean_raw
What: /sys/bus/iio/devices/iio:deviceX/in_Y_mean_raw
KernelVersion: 3.5
Contact: linux-iio@vger.kernel.org
Description:
Averaged raw measurement from channel X. The number of values
Averaged raw measurement from channel Y. The number of values
used for averaging is device specific. The converting rules for
normal raw values also applies to the averaged raw values.
@ -448,7 +436,7 @@ What: /sys/bus/iio/devices/iio:deviceX/in_humidityrelative_offset
What: /sys/bus/iio/devices/iio:deviceX/in_magn_offset
What: /sys/bus/iio/devices/iio:deviceX/in_rot_offset
What: /sys/bus/iio/devices/iio:deviceX/in_angl_offset
What: /sys/bus/iio/devices/iio:deviceX/in_capacitanceX_offset
What: /sys/bus/iio/devices/iio:deviceX/in_capacitanceY_offset
KernelVersion: 2.6.35
Contact: linux-iio@vger.kernel.org
Description:
@ -508,6 +496,9 @@ What: /sys/bus/iio/devices/iio:deviceX/in_angl_scale
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_x_scale
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_y_scale
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_z_scale
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_red_scale
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_green_scale
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_blue_scale
What: /sys/bus/iio/devices/iio:deviceX/in_concentration_co2_scale
KernelVersion: 2.6.35
Contact: linux-iio@vger.kernel.org
@ -660,10 +651,10 @@ What: /sys/.../iio:deviceX/in_magn_scale_available
What: /sys/.../iio:deviceX/in_illuminance_scale_available
What: /sys/.../iio:deviceX/in_intensity_scale_available
What: /sys/.../iio:deviceX/in_proximity_scale_available
What: /sys/.../iio:deviceX/in_voltageX_scale_available
What: /sys/.../iio:deviceX/in_voltageY_scale_available
What: /sys/.../iio:deviceX/in_voltage-voltage_scale_available
What: /sys/.../iio:deviceX/out_voltageX_scale_available
What: /sys/.../iio:deviceX/out_altvoltageX_scale_available
What: /sys/.../iio:deviceX/out_voltageY_scale_available
What: /sys/.../iio:deviceX/out_altvoltageY_scale_available
What: /sys/.../iio:deviceX/in_capacitance_scale_available
What: /sys/.../iio:deviceX/in_pressure_scale_available
What: /sys/.../iio:deviceX/in_pressureY_scale_available
@ -681,6 +672,7 @@ What: /sys/bus/iio/devices/iio:deviceX/in_intensity_red_hardwaregain
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_green_hardwaregain
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_blue_hardwaregain
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_clear_hardwaregain
What: /sys/bus/iio/devices/iio:deviceX/in_illuminance_hardwaregain
KernelVersion: 2.6.35
Contact: linux-iio@vger.kernel.org
Description:
@ -1562,7 +1554,7 @@ Description:
This attribute is used to read the amount of quadrature error
present in the device at a given time.
What: /sys/.../iio:deviceX/in_accelX_power_mode
What: /sys/.../iio:deviceX/in_accelY_power_mode
KernelVersion: 3.11
Contact: linux-iio@vger.kernel.org
Description:
@ -1633,6 +1625,10 @@ What: /sys/.../iio:deviceX/in_intensityY_uv_raw
What: /sys/.../iio:deviceX/in_intensityY_uva_raw
What: /sys/.../iio:deviceX/in_intensityY_uvb_raw
What: /sys/.../iio:deviceX/in_intensityY_duv_raw
What: /sys/.../iio:deviceX/in_intensity_red_raw
What: /sys/.../iio:deviceX/in_intensity_green_raw
What: /sys/.../iio:deviceX/in_intensity_blue_raw
What: /sys/.../iio:deviceX/in_intensity_clear_raw
KernelVersion: 3.4
Contact: linux-iio@vger.kernel.org
Description:
@ -1691,16 +1687,19 @@ Description:
Raw value of rotation from true/magnetic north measured with
or without compensation from tilt sensors.
What: /sys/bus/iio/devices/iio:deviceX/in_currentX_raw
What: /sys/bus/iio/devices/iio:deviceX/in_currentX_i_raw
What: /sys/bus/iio/devices/iio:deviceX/in_currentX_q_raw
KernelVersion: 3.18
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_raw
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_supply_raw
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_i_raw
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_q_raw
KernelVersion: 3.17
Contact: linux-iio@vger.kernel.org
Description:
Raw current measurement from channel X. Units are in milliamps
Raw current measurement from channel Y. Units are in milliamps
after application of scale and offset. If no offset or scale is
present, output should be considered as processed with the
unit in milliamps.
unit in milliamps. In special cases where the channel does not
correspond to externally available input one of the named
versions may be used.
Channels with 'i' and 'q' modifiers always exist in pairs and both
channels refer to the same signal. The 'i' channel contains the in-phase
@ -1864,9 +1863,9 @@ Description:
hardware fifo watermark level.
What: /sys/bus/iio/devices/iio:deviceX/in_temp_calibemissivity
What: /sys/bus/iio/devices/iio:deviceX/in_tempX_calibemissivity
What: /sys/bus/iio/devices/iio:deviceX/in_tempY_calibemissivity
What: /sys/bus/iio/devices/iio:deviceX/in_temp_object_calibemissivity
What: /sys/bus/iio/devices/iio:deviceX/in_tempX_object_calibemissivity
What: /sys/bus/iio/devices/iio:deviceX/in_tempY_object_calibemissivity
KernelVersion: 4.1
Contact: linux-iio@vger.kernel.org
Description:
@ -1887,17 +1886,17 @@ Description:
is considered as one sample for <type>[_name]_sampling_frequency.
What: /sys/bus/iio/devices/iio:deviceX/in_concentration_raw
What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_raw
What: /sys/bus/iio/devices/iio:deviceX/in_concentrationY_raw
What: /sys/bus/iio/devices/iio:deviceX/in_concentration_co2_raw
What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_co2_raw
What: /sys/bus/iio/devices/iio:deviceX/in_concentrationY_co2_raw
What: /sys/bus/iio/devices/iio:deviceX/in_concentration_ethanol_raw
What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_ethanol_raw
What: /sys/bus/iio/devices/iio:deviceX/in_concentrationY_ethanol_raw
What: /sys/bus/iio/devices/iio:deviceX/in_concentration_h2_raw
What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_h2_raw
What: /sys/bus/iio/devices/iio:deviceX/in_concentrationY_h2_raw
What: /sys/bus/iio/devices/iio:deviceX/in_concentration_o2_raw
What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_o2_raw
What: /sys/bus/iio/devices/iio:deviceX/in_concentrationY_o2_raw
What: /sys/bus/iio/devices/iio:deviceX/in_concentration_voc_raw
What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_voc_raw
What: /sys/bus/iio/devices/iio:deviceX/in_concentrationY_voc_raw
KernelVersion: 4.3
Contact: linux-iio@vger.kernel.org
Description:
@ -1905,9 +1904,9 @@ Description:
after application of scale and offset are percents.
What: /sys/bus/iio/devices/iio:deviceX/in_resistance_raw
What: /sys/bus/iio/devices/iio:deviceX/in_resistanceX_raw
What: /sys/bus/iio/devices/iio:deviceX/in_resistanceY_raw
What: /sys/bus/iio/devices/iio:deviceX/out_resistance_raw
What: /sys/bus/iio/devices/iio:deviceX/out_resistanceX_raw
What: /sys/bus/iio/devices/iio:deviceX/out_resistanceY_raw
KernelVersion: 4.3
Contact: linux-iio@vger.kernel.org
Description:
@ -2096,7 +2095,7 @@ Description:
One of the following thermocouple types: B, E, J, K, N, R, S, T.
What: /sys/bus/iio/devices/iio:deviceX/in_temp_object_calibambient
What: /sys/bus/iio/devices/iio:deviceX/in_tempX_object_calibambient
What: /sys/bus/iio/devices/iio:deviceX/in_tempY_object_calibambient
KernelVersion: 5.10
Contact: linux-iio@vger.kernel.org
Description:
@ -2172,9 +2171,9 @@ Description:
- a range specified as "[min step max]"
What: /sys/bus/iio/devices/iio:deviceX/in_voltageX_sampling_frequency
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_sampling_frequency
What: /sys/bus/iio/devices/iio:deviceX/in_powerY_sampling_frequency
What: /sys/bus/iio/devices/iio:deviceX/in_currentZ_sampling_frequency
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_sampling_frequency
KernelVersion: 5.20
Contact: linux-iio@vger.kernel.org
Description:

View File

@ -0,0 +1,23 @@
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_sys_calibration
KernelVersion: 5.5
Contact: linux-iio@vger.kernel.org
Description:
This attribute, if available, initiates the system calibration procedure. This is done on a
single channel at a time. Write '1' to start the calibration.
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_sys_calibration_mode_available
KernelVersion: 5.5
Contact: linux-iio@vger.kernel.org
Description:
This attribute, if available, returns a list with the possible calibration modes.
There are two available options:
"zero_scale" - calibrate to zero scale
"full_scale" - calibrate to full scale
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_sys_calibration_mode
KernelVersion: 5.5
Contact: linux-iio@vger.kernel.org
Description:
This attribute, if available, sets up the calibration mode used in the system calibration
procedure. Reading returns the current calibration mode.
Writing sets the system calibration mode.

View File

@ -19,33 +19,9 @@ Description:
the bridge can be disconnected (when it is not being used
using the bridge_switch_en attribute.
What: /sys/bus/iio/devices/iio:deviceX/in_voltagex_sys_calibration
KernelVersion:
Contact: linux-iio@vger.kernel.org
Description:
Initiates the system calibration procedure. This is done on a
single channel at a time. Write '1' to start the calibration.
What: /sys/bus/iio/devices/iio:deviceX/in_voltage2-voltage2_shorted_raw
KernelVersion:
Contact: linux-iio@vger.kernel.org
Description:
Measure voltage from AIN2 pin connected to AIN(+)
and AIN(-) shorted.
What: /sys/bus/iio/devices/iio:deviceX/in_voltagex_sys_calibration_mode_available
KernelVersion:
Contact: linux-iio@vger.kernel.org
Description:
Reading returns a list with the possible calibration modes.
There are two available options:
"zero_scale" - calibrate to zero scale
"full_scale" - calibrate to full scale
What: /sys/bus/iio/devices/iio:deviceX/in_voltagex_sys_calibration_mode
KernelVersion:
Contact: linux-iio@vger.kernel.org
Description:
Sets up the calibration mode used in the system calibration
procedure. Reading returns the current calibration mode.
Writing sets the system calibration mode.

View File

@ -0,0 +1,48 @@
What: /sys/class/platform-profile/platform-profile-X/name
Date: March 2025
KernelVersion: 6.14
Description: Name of the class device given by the driver.
RO
What: /sys/class/platform-profile/platform-profile-X/choices
Date: March 2025
KernelVersion: 6.14
Description: This file contains a space-separated list of profiles supported
for this device.
Drivers must use the following standard profile-names:
==================== ========================================
low-power Low power consumption
cool Cooler operation
quiet Quieter operation
balanced Balance between low power consumption
and performance
balanced-performance Balance between performance and low
power consumption with a slight bias
towards performance
performance High performance operation
custom Driver defined custom profile
==================== ========================================
RO
What: /sys/class/platform-profile/platform-profile-X/profile
Date: March 2025
KernelVersion: 6.14
Description: Reading this file gives the current selected profile for this
device. Writing this file with one of the strings from
platform_profile_choices changes the profile to the new value.
This file can be monitored for changes by polling for POLLPRI,
POLLPRI will be signaled on any changes, independent of those
changes coming from a userspace write; or coming from another
source such as e.g. a hotkey triggered profile change handled
either directly by the embedded-controller or fully handled
inside the kernel.
This file may also emit the string 'custom' to indicate
that the driver is using a driver defined custom profile.
RW

View File

@ -407,10 +407,30 @@ Description:
Access: Read, Write
Reading this returns the current active value, e.g. 'Standard'.
Check charge_types to get the values supported by the battery.
Valid values:
"Unknown", "N/A", "Trickle", "Fast", "Standard",
"Adaptive", "Custom", "Long Life", "Bypass"
What: /sys/class/power_supply/<supply_name>/charge_types
Date: December 2024
Contact: linux-pm@vger.kernel.org
Description:
Identical to charge_type but reading returns a list of supported
charge-types with the currently active type surrounded by square
brackets, e.g.: "Fast [Standard] Long_Life".
power_supply class devices may support both charge_type and
charge_types for backward compatibility. In this case both will
always have the same active value and the active value can be
changed by writing either property.
Note charge-types which contain a space such as "Long Life" will
have the space replaced by a '_' resulting in e.g. "Long_Life".
When writing charge-types both variants are accepted.
What: /sys/class/power_supply/<supply_name>/charge_term_current
Date: July 2014
Contact: linux-pm@vger.kernel.org
@ -433,7 +453,7 @@ Description:
Valid values:
"Unknown", "Good", "Overheat", "Dead",
"Over voltage", "Unspecified failure", "Cold",
"Over voltage", "Under voltage", "Unspecified failure", "Cold",
"Watchdog timer expire", "Safety timer expire",
"Over current", "Calibration required", "Warm",
"Cool", "Hot", "No battery"
@ -793,3 +813,12 @@ Description:
Access: Read
Valid values: 1-31
What: /sys/class/power_supply/<supply_name>/extensions/<extension_name>
Date: March 2025
Contact: linux-pm@vger.kernel.org
Description:
Reports the extensions registered to the power supply.
Each entry is a link to the device which registered the extension.
Access: Read

View File

@ -0,0 +1,32 @@
What: /sys/class/power_supply/max1720x/temp_ain1
Date: January 2025
KernelVersion: 6.14
Contact: Dimitri Fedrau <dimitri.fedrau@liebherr.com>
Description:
Reports the current temperature reading from AIN1 thermistor.
Access: Read
Valid values: Represented in 1/10 Degrees Celsius
What: /sys/class/power_supply/max1720x/temp_ain2
Date: January 2025
KernelVersion: 6.14
Contact: Dimitri Fedrau <dimitri.fedrau@liebherr.com>
Description:
Reports the current temperature reading from AIN2 thermistor.
Access: Read
Valid values: Represented in 1/10 Degrees Celsius
What: /sys/class/power_supply/max1720x/temp_int
Date: January 2025
KernelVersion: 6.14
Contact: Dimitri Fedrau <dimitri.fedrau@liebherr.com>
Description:
Reports the current temperature reading from internal die.
Access: Read
Valid values: Represented in 1/10 Degrees Celsius

View File

@ -55,6 +55,15 @@ Description:
An attribute which indicates whether the patch supports
atomic-replace.
What: /sys/kernel/livepatch/<patch>/stack_order
Date: Jan 2025
KernelVersion: 6.14.0
Description:
This attribute specifies the sequence in which live patch modules
are applied to the system. If multiple live patches modify the same
function, the implementation with the biggest 'stack_order' number
is used, unless a transition is currently in progress.
What: /sys/kernel/livepatch/<patch>/<object>
Date: Nov 2014
KernelVersion: 3.19.0

View File

@ -355,10 +355,15 @@ Description: If 'target' is written to the 'type' file, writing to or
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/filters/<F>/matching
Date: Dec 2022
Contact: SeongJae Park <sj@kernel.org>
Description: Writing 'Y' or 'N' to this file sets whether to filter out
pages that do or do not match to the 'type' and 'memcg_path',
respectively. Filter out means the action of the scheme will
not be applied to.
Description: Writing 'Y' or 'N' to this file sets whether the filter is for
the memory of the 'type', or all except the 'type'.
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/filters/<F>/allow
Date: Jan 2025
Contact: SeongJae Park <sj@kernel.org>
Description: Writing 'Y' or 'N' to this file sets whether to allow or reject
applying the scheme's action to the memory that satisfies the
'type' and the 'matching' of the directory.
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/nr_tried
Date: Mar 2022
@ -384,6 +389,12 @@ Contact: SeongJae Park <sj@kernel.org>
Description: Reading this file returns the total size of regions that the
action of the scheme has successfully applied in bytes.
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/sz_ops_filter_passed
Date: Dec 2024
Contact: SeongJae Park <sj@kernel.org>
Description: Reading this file returns the total size of memory that passed
DAMON operations layer-handled filters of the scheme in bytes.
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/qt_exceeds
Date: Mar 2022
Contact: SeongJae Park <sj@kernel.org>
@ -424,3 +435,10 @@ Contact: SeongJae Park <sj@kernel.org>
Description: Reading this file returns the 'age' of a memory region that
corresponding DAMON-based Operation Scheme's action has tried
to be applied.
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/tried_regions/<R>/sz_filter_passed
Date: Dec 2024
Contact: SeongJae Park <sj@kernel.org>
Description: Reading this file returns the size of the memory in the region
that passed DAMON operations layer-handled filters of the
scheme in bytes.

View File

@ -0,0 +1,64 @@
HID Driver Description
MLNXBFD0 mlxbf-pmc Performance counters (BlueField-1)
MLNXBFD1 mlxbf-pmc Performance counters (BlueField-2)
MLNXBFD2 mlxbf-pmc Performance counters (BlueField-3)
What: /sys/bus/platform/devices/<HID>/hwmon/hwmonX/<block>/event_list
Date: Dec 2020
KernelVersion: 5.10
Contact: "Shravan Kumar Ramani <shravankr@nvidia.com>"
Description:
List of events supported by the counters in the specific block.
It is used to extract the event number or ID associated with
each event.
What: /sys/bus/platform/devices/<HID>/hwmon/hwmonX/<block>/event<N>
Date: Dec 2020
KernelVersion: 5.10
Contact: "Shravan Kumar Ramani <shravankr@nvidia.com>"
Description:
Event monitored by corresponding counter. This is used to
program or read back the event that should be or is currently
being monitored by counter<N>.
What: /sys/bus/platform/devices/<HID>/hwmon/hwmonX/<block>/counter<N>
Date: Dec 2020
KernelVersion: 5.10
Contact: "Shravan Kumar Ramani <shravankr@nvidia.com>"
Description:
Counter value of the event being monitored. This is used to
read the counter value of the event which was programmed using
event<N>. This is also used to clear or reset the counter value
by writing 0 to the counter sysfs.
What: /sys/bus/platform/devices/<HID>/hwmon/hwmonX/<block>/enable
Date: Dec 2020
KernelVersion: 5.10
Contact: "Shravan Kumar Ramani <shravankr@nvidia.com>"
Description:
Start or stop counters. This is used to start the counters
for monitoring the programmed events and also to stop the
counters after the desired duration. Writing value 1 will
start all the counters in the block, and writing 0 will
stop all the counters together.
What: /sys/bus/platform/devices/<HID>/hwmon/hwmonX/<block>/<reg>
Date: Dec 2020
KernelVersion: 5.10
Contact: "Shravan Kumar Ramani <shravankr@nvidia.com>"
Description:
Value of register. This is used to read or reset the registers
where various performance statistics are counted for each block.
Writing 0 to the sysfs will clear the counter, writing any other
value is not allowed.
What: /sys/bus/platform/devices/<HID>/hwmon/hwmonX/<block>/count_clock
Date: Mar 2025
KernelVersion: 6.14
Contact: "Shravan Kumar Ramani <shravankr@nvidia.com>"
Description:
Use a counter for counting cycles. This is used to repurpose/dedicate
any of the counters in the block to counting cycles. Each counter is
represented by a bit (bit 0 for counter0, bit1 for counter1 and so on)
and setting the corresponding bit will reserve that specific counter
for counting cycles and override the event<N> setting.

View File

@ -33,3 +33,8 @@ Description: Reading this file gives the current selected profile for this
source such as e.g. a hotkey triggered profile change handled
either directly by the embedded-controller or fully handled
inside the kernel.
This file may also emit the string 'custom' to indicate
that multiple platform profiles drivers are in use but
have different values. This string can not be written to
this interface and is solely for informational purposes.

View File

@ -0,0 +1,43 @@
What: /sys/class/pps-gen/
Date: February 2025
KernelVersion: 6.13
Contact: Rodolfo Giometti <giometti@enneenne.com>
Description:
The /sys/class/pps-gen/ directory contains files and
directories that provide a unified interface to the PPS
generators.
What: /sys/class/pps-gen/pps-genX/
Date: February 2025
KernelVersion: 6.13
Contact: Rodolfo Giometti <giometti@enneenne.com>
Description:
The /sys/class/pps-gen/pps-genX/ directory is related to X-th
PPS generator in the system. Each directory contain files to
manage and control its PPS generator.
What: /sys/class/pps-gen/pps-genX/enable
Date: February 2025
KernelVersion: 6.13
Contact: Rodolfo Giometti <giometti@enneenne.com>
Description:
This write-only file enables or disables generation of the
PPS signal.
What: /sys/class/pps-gen/pps-genX/system
Date: February 2025
KernelVersion: 6.13
Contact: Rodolfo Giometti <giometti@enneenne.com>
Description:
This read-only file returns "1" if the generator takes the
timing from the system clock, while it returns "0" if not
(i.e. from a peripheral device clock).
What: /sys/class/pps-gen/pps-genX/time
Date: February 2025
KernelVersion: 6.13
Contact: Rodolfo Giometti <giometti@enneenne.com>
Description:
This read-only file contains the current time stored into the
generator clock as two integers representing the current time
seconds and nanoseconds.

View File

@ -104,7 +104,7 @@ quiet_cmd_sphinx = SPHINX $@ --> file://$(abspath $(BUILDDIR)/$3/$4)
YNL_INDEX:=$(srctree)/Documentation/networking/netlink_spec/index.rst
YNL_RST_DIR:=$(srctree)/Documentation/networking/netlink_spec
YNL_YAML_DIR:=$(srctree)/Documentation/netlink/specs
YNL_TOOL:=$(srctree)/tools/net/ynl/ynl-gen-rst.py
YNL_TOOL:=$(srctree)/tools/net/ynl/pyynl/ynl_gen_rst.py
YNL_RST_FILES_TMP := $(patsubst %.yaml,%.rst,$(wildcard $(YNL_YAML_DIR)/*.yaml))
YNL_RST_FILES := $(patsubst $(YNL_YAML_DIR)%,$(YNL_RST_DIR)%, $(YNL_RST_FILES_TMP))

View File

@ -15,6 +15,7 @@ PCI Endpoint Framework
pci-ntb-howto
pci-vntb-function
pci-vntb-howto
pci-nvme-function
function/binding/pci-test
function/binding/pci-ntb

View File

@ -0,0 +1,13 @@
.. SPDX-License-Identifier: GPL-2.0
=================
PCI NVMe Function
=================
:Author: Damien Le Moal <dlemoal@kernel.org>
The PCI NVMe endpoint function implements a PCI NVMe controller using the NVMe
subsystem target core code. The driver for this function resides with the NVMe
subsystem as drivers/nvme/target/nvmet-pciep.c.
See Documentation/nvme/nvme-pci-endpoint-target.rst for more details.

View File

@ -81,8 +81,8 @@ device, the following commands can be used::
# echo 0x104c > functions/pci_epf_test/func1/vendorid
# echo 0xb500 > functions/pci_epf_test/func1/deviceid
# echo 16 > functions/pci_epf_test/func1/msi_interrupts
# echo 8 > functions/pci_epf_test/func1/msix_interrupts
# echo 32 > functions/pci_epf_test/func1/msi_interrupts
# echo 2048 > functions/pci_epf_test/func1/msix_interrupts
Binding pci-epf-test Device to EP Controller
@ -123,113 +123,83 @@ above::
Using Endpoint Test function Device
-----------------------------------
pcitest.sh added in tools/pci/ can be used to run all the default PCI endpoint
tests. To compile this tool the following commands should be used::
Kselftest added in tools/testing/selftests/pci_endpoint can be used to run all
the default PCI endpoint tests. To build the Kselftest for PCI endpoint
subsystem, the following commands should be used::
# cd <kernel-dir>
# make -C tools/pci
# make -C tools/testing/selftests/pci_endpoint
or if you desire to compile and install in your system::
# cd <kernel-dir>
# make -C tools/pci install
# make -C tools/testing/selftests/pci_endpoint INSTALL_PATH=/usr/bin install
The tool and script will be located in <rootfs>/usr/bin/
The test will be located in <rootfs>/usr/bin/
pcitest.sh Output
~~~~~~~~~~~~~~~~~
Kselftest Output
~~~~~~~~~~~~~~~~
::
# pcitest.sh
BAR tests
# pci_endpoint_test
TAP version 13
1..16
# Starting 16 tests from 9 test cases.
# RUN pci_ep_bar.BAR0.BAR_TEST ...
# OK pci_ep_bar.BAR0.BAR_TEST
ok 1 pci_ep_bar.BAR0.BAR_TEST
# RUN pci_ep_bar.BAR1.BAR_TEST ...
# OK pci_ep_bar.BAR1.BAR_TEST
ok 2 pci_ep_bar.BAR1.BAR_TEST
# RUN pci_ep_bar.BAR2.BAR_TEST ...
# OK pci_ep_bar.BAR2.BAR_TEST
ok 3 pci_ep_bar.BAR2.BAR_TEST
# RUN pci_ep_bar.BAR3.BAR_TEST ...
# OK pci_ep_bar.BAR3.BAR_TEST
ok 4 pci_ep_bar.BAR3.BAR_TEST
# RUN pci_ep_bar.BAR4.BAR_TEST ...
# OK pci_ep_bar.BAR4.BAR_TEST
ok 5 pci_ep_bar.BAR4.BAR_TEST
# RUN pci_ep_bar.BAR5.BAR_TEST ...
# OK pci_ep_bar.BAR5.BAR_TEST
ok 6 pci_ep_bar.BAR5.BAR_TEST
# RUN pci_ep_basic.CONSECUTIVE_BAR_TEST ...
# OK pci_ep_basic.CONSECUTIVE_BAR_TEST
ok 7 pci_ep_basic.CONSECUTIVE_BAR_TEST
# RUN pci_ep_basic.LEGACY_IRQ_TEST ...
# OK pci_ep_basic.LEGACY_IRQ_TEST
ok 8 pci_ep_basic.LEGACY_IRQ_TEST
# RUN pci_ep_basic.MSI_TEST ...
# OK pci_ep_basic.MSI_TEST
ok 9 pci_ep_basic.MSI_TEST
# RUN pci_ep_basic.MSIX_TEST ...
# OK pci_ep_basic.MSIX_TEST
ok 10 pci_ep_basic.MSIX_TEST
# RUN pci_ep_data_transfer.memcpy.READ_TEST ...
# OK pci_ep_data_transfer.memcpy.READ_TEST
ok 11 pci_ep_data_transfer.memcpy.READ_TEST
# RUN pci_ep_data_transfer.memcpy.WRITE_TEST ...
# OK pci_ep_data_transfer.memcpy.WRITE_TEST
ok 12 pci_ep_data_transfer.memcpy.WRITE_TEST
# RUN pci_ep_data_transfer.memcpy.COPY_TEST ...
# OK pci_ep_data_transfer.memcpy.COPY_TEST
ok 13 pci_ep_data_transfer.memcpy.COPY_TEST
# RUN pci_ep_data_transfer.dma.READ_TEST ...
# OK pci_ep_data_transfer.dma.READ_TEST
ok 14 pci_ep_data_transfer.dma.READ_TEST
# RUN pci_ep_data_transfer.dma.WRITE_TEST ...
# OK pci_ep_data_transfer.dma.WRITE_TEST
ok 15 pci_ep_data_transfer.dma.WRITE_TEST
# RUN pci_ep_data_transfer.dma.COPY_TEST ...
# OK pci_ep_data_transfer.dma.COPY_TEST
ok 16 pci_ep_data_transfer.dma.COPY_TEST
# PASSED: 16 / 16 tests passed.
# Totals: pass:16 fail:0 xfail:0 xpass:0 skip:0 error:0
BAR0: OKAY
BAR1: OKAY
BAR2: OKAY
BAR3: OKAY
BAR4: NOT OKAY
BAR5: NOT OKAY
Interrupt tests
Testcase 16 (pci_ep_data_transfer.dma.COPY_TEST) will fail for most of the DMA
capable endpoint controllers due to the absence of the MEMCPY over DMA. For such
controllers, it is advisable to skip this testcase using this
command::
SET IRQ TYPE TO LEGACY: OKAY
LEGACY IRQ: NOT OKAY
SET IRQ TYPE TO MSI: OKAY
MSI1: OKAY
MSI2: OKAY
MSI3: OKAY
MSI4: OKAY
MSI5: OKAY
MSI6: OKAY
MSI7: OKAY
MSI8: OKAY
MSI9: OKAY
MSI10: OKAY
MSI11: OKAY
MSI12: OKAY
MSI13: OKAY
MSI14: OKAY
MSI15: OKAY
MSI16: OKAY
MSI17: NOT OKAY
MSI18: NOT OKAY
MSI19: NOT OKAY
MSI20: NOT OKAY
MSI21: NOT OKAY
MSI22: NOT OKAY
MSI23: NOT OKAY
MSI24: NOT OKAY
MSI25: NOT OKAY
MSI26: NOT OKAY
MSI27: NOT OKAY
MSI28: NOT OKAY
MSI29: NOT OKAY
MSI30: NOT OKAY
MSI31: NOT OKAY
MSI32: NOT OKAY
SET IRQ TYPE TO MSI-X: OKAY
MSI-X1: OKAY
MSI-X2: OKAY
MSI-X3: OKAY
MSI-X4: OKAY
MSI-X5: OKAY
MSI-X6: OKAY
MSI-X7: OKAY
MSI-X8: OKAY
MSI-X9: NOT OKAY
MSI-X10: NOT OKAY
MSI-X11: NOT OKAY
MSI-X12: NOT OKAY
MSI-X13: NOT OKAY
MSI-X14: NOT OKAY
MSI-X15: NOT OKAY
MSI-X16: NOT OKAY
[...]
MSI-X2047: NOT OKAY
MSI-X2048: NOT OKAY
Read Tests
SET IRQ TYPE TO MSI: OKAY
READ ( 1 bytes): OKAY
READ ( 1024 bytes): OKAY
READ ( 1025 bytes): OKAY
READ (1024000 bytes): OKAY
READ (1024001 bytes): OKAY
Write Tests
WRITE ( 1 bytes): OKAY
WRITE ( 1024 bytes): OKAY
WRITE ( 1025 bytes): OKAY
WRITE (1024000 bytes): OKAY
WRITE (1024001 bytes): OKAY
Copy Tests
COPY ( 1 bytes): OKAY
COPY ( 1024 bytes): OKAY
COPY ( 1025 bytes): OKAY
COPY (1024000 bytes): OKAY
COPY (1024001 bytes): OKAY
# pci_endpoint_test -f pci_ep_bar -f pci_ep_basic -v memcpy -T COPY_TEST -v dma

View File

@ -0,0 +1,281 @@
.. SPDX-License-Identifier: GPL-2.0-only
.. include:: <isonum.txt>
=========
AMD NPU
=========
:Copyright: |copy| 2024 Advanced Micro Devices, Inc.
:Author: Sonal Santan <sonal.santan@amd.com>
Overview
========
AMD NPU (Neural Processing Unit) is a multi-user AI inference accelerator
integrated into AMD client APU. NPU enables efficient execution of Machine
Learning applications like CNN, LLM, etc. NPU is based on
`AMD XDNA Architecture`_. NPU is managed by **amdxdna** driver.
Hardware Description
====================
AMD NPU consists of the following hardware components:
AMD XDNA Array
--------------
AMD XDNA Array comprises of 2D array of compute and memory tiles built with
`AMD AI Engine Technology`_. Each column has 4 rows of compute tiles and 1
row of memory tile. Each compute tile contains a VLIW processor with its own
dedicated program and data memory. The memory tile acts as L2 memory. The 2D
array can be partitioned at a column boundary creating a spatially isolated
partition which can be bound to a workload context.
Each column also has dedicated DMA engines to move data between host DDR and
memory tile.
AMD Phoenix and AMD Hawk Point client NPU have a 4x5 topology, i.e., 4 rows of
compute tiles arranged into 5 columns. AMD Strix Point client APU have 4x8
topology, i.e., 4 rows of compute tiles arranged into 8 columns.
Shared L2 Memory
----------------
The single row of memory tiles create a pool of software managed on chip L2
memory. DMA engines are used to move data between host DDR and memory tiles.
AMD Phoenix and AMD Hawk Point NPUs have a total of 2560 KB of L2 memory.
AMD Strix Point NPU has a total of 4096 KB of L2 memory.
Microcontroller
---------------
A microcontroller runs NPU Firmware which is responsible for command processing,
XDNA Array partition setup, XDNA Array configuration, workload context
management and workload orchestration.
NPU Firmware uses a dedicated instance of an isolated non-privileged context
called ERT to service each workload context. ERT is also used to execute user
provided ``ctrlcode`` associated with the workload context.
NPU Firmware uses a single isolated privileged context called MERT to service
management commands from the amdxdna driver.
Mailboxes
---------
The microcontroller and amdxdna driver use a privileged channel for management
tasks like setting up of contexts, telemetry, query, error handling, setting up
user channel, etc. As mentioned before, privileged channel requests are
serviced by MERT. The privileged channel is bound to a single mailbox.
The microcontroller and amdxdna driver use a dedicated user channel per
workload context. The user channel is primarily used for submitting work to
the NPU. As mentioned before, a user channel requests are serviced by an
instance of ERT. Each user channel is bound to its own dedicated mailbox.
PCIe EP
-------
NPU is visible to the x86 host CPU as a PCIe device with multiple BARs and some
MSI-X interrupt vectors. NPU uses a dedicated high bandwidth SoC level fabric
for reading or writing into host memory. Each instance of ERT gets its own
dedicated MSI-X interrupt. MERT gets a single instance of MSI-X interrupt.
The number of PCIe BARs varies depending on the specific device. Based on their
functions, PCIe BARs can generally be categorized into the following types.
* PSP BAR: Expose the AMD PSP (Platform Security Processor) function
* SMU BAR: Expose the AMD SMU (System Management Unit) function
* SRAM BAR: Expose ring buffers for the mailbox
* Mailbox BAR: Expose the mailbox control registers (head, tail and ISR
registers etc.)
* Public Register BAR: Expose public registers
On specific devices, the above-mentioned BAR type might be combined into a
single physical PCIe BAR. Or a module might require two physical PCIe BARs to
be fully functional. For example,
* On AMD Phoenix device, PSP, SMU, Public Register BARs are on PCIe BAR index 0.
* On AMD Strix Point device, Mailbox and Public Register BARs are on PCIe BAR
index 0. The PSP has some registers in PCIe BAR index 0 (Public Register BAR)
and PCIe BAR index 4 (PSP BAR).
Process Isolation Hardware
--------------------------
As explained before, XDNA Array can be dynamically divided into isolated
spatial partitions, each of which may have one or more columns. The spatial
partition is setup by programming the column isolation registers by the
microcontroller. Each spatial partition is associated with a PASID which is
also programmed by the microcontroller. Hence multiple spatial partitions in
the NPU can make concurrent host access protected by PASID.
The NPU FW itself uses microcontroller MMU enforced isolated contexts for
servicing user and privileged channel requests.
Mixed Spatial and Temporal Scheduling
=====================================
AMD XDNA architecture supports mixed spatial and temporal (time sharing)
scheduling of 2D array. This means that spatial partitions may be setup and
torn down dynamically to accommodate various workloads. A *spatial* partition
may be *exclusively* bound to one workload context while another partition may
be *temporarily* bound to more than one workload contexts. The microcontroller
updates the PASID for a temporarily shared partition to match the context that
has been bound to the partition at any moment.
Resource Solver
---------------
The Resource Solver component of the amdxdna driver manages the allocation
of 2D array among various workloads. Every workload describes the number
of columns required to run the NPU binary in its metadata. The Resource Solver
component uses hints passed by the workload and its own heuristics to
decide 2D array (re)partition strategy and mapping of workloads for spatial and
temporal sharing of columns. The FW enforces the context-to-column(s) resource
binding decisions made by the Resource Solver.
AMD Phoenix and AMD Hawk Point client NPU can support 6 concurrent workload
contexts. AMD Strix Point can support 16 concurrent workload contexts.
Application Binaries
====================
A NPU application workload is comprised of two separate binaries which are
generated by the NPU compiler.
1. AMD XDNA Array overlay, which is used to configure a NPU spatial partition.
The overlay contains instructions for setting up the stream switch
configuration and ELF for the compute tiles. The overlay is loaded on the
spatial partition bound to the workload by the associated ERT instance.
Refer to the
`Versal Adaptive SoC AIE-ML Architecture Manual (AM020)`_ for more details.
2. ``ctrlcode``, used for orchestrating the overlay loaded on the spatial
partition. ``ctrlcode`` is executed by the ERT running in protected mode on
the microcontroller in the context of the workload. ``ctrlcode`` is made up
of a sequence of opcodes named ``XAie_TxnOpcode``. Refer to the
`AI Engine Run Time`_ for more details.
Special Host Buffers
====================
Per-context Instruction Buffer
------------------------------
Every workload context uses a host resident 64 MB buffer which is memory
mapped into the ERT instance created to service the workload. The ``ctrlcode``
used by the workload is copied into this special memory. This buffer is
protected by PASID like all other input/output buffers used by that workload.
Instruction buffer is also mapped into the user space of the workload.
Global Privileged Buffer
------------------------
In addition, the driver also allocates a single buffer for maintenance tasks
like recording errors from MERT. This global buffer uses the global IOMMU
domain and is only accessible by MERT.
High-level Use Flow
===================
Here are the steps to run a workload on AMD NPU:
1. Compile the workload into an overlay and a ``ctrlcode`` binary.
2. Userspace opens a context in the driver and provides the overlay.
3. The driver checks with the Resource Solver for provisioning a set of columns
for the workload.
4. The driver then asks MERT to create a context on the device with the desired
columns.
5. MERT then creates an instance of ERT. MERT also maps the Instruction Buffer
into ERT memory.
6. The userspace then copies the ``ctrlcode`` to the Instruction Buffer.
7. Userspace then creates a command buffer with pointers to input, output, and
instruction buffer; it then submits command buffer with the driver and goes
to sleep waiting for completion.
8. The driver sends the command over the Mailbox to ERT.
9. ERT *executes* the ``ctrlcode`` in the instruction buffer.
10. Execution of the ``ctrlcode`` kicks off DMAs to and from the host DDR while
AMD XDNA Array is running.
11. When ERT reaches end of ``ctrlcode``, it raises an MSI-X to send completion
signal to the driver which then wakes up the waiting workload.
Boot Flow
=========
amdxdna driver uses PSP to securely load signed NPU FW and kick off the boot
of the NPU microcontroller. amdxdna driver then waits for the alive signal in
a special location on BAR 0. The NPU is switched off during SoC suspend and
turned on after resume where the NPU FW is reloaded, and the handshake is
performed again.
Userspace components
====================
Compiler
--------
Peano is an LLVM based open-source compiler for AMD XDNA Array compute tile
available at:
https://github.com/Xilinx/llvm-aie
The open-source IREE compiler supports graph compilation of ML models for AMD
NPU and uses Peano underneath. It is available at:
https://github.com/nod-ai/iree-amd-aie
Usermode Driver (UMD)
---------------------
The open-source XRT runtime stack interfaces with amdxdna kernel driver. XRT
can be found at:
https://github.com/Xilinx/XRT
The open-source XRT shim for NPU is can be found at:
https://github.com/amd/xdna-driver
DMA Operation
=============
DMA operation instructions are encoded in the ``ctrlcode`` as
``XAIE_IO_BLOCKWRITE`` opcode. When ERT executes ``XAIE_IO_BLOCKWRITE``, DMA
operations between host DDR and L2 memory are effected.
Error Handling
==============
When MERT detects an error in AMD XDNA Array, it pauses execution for that
workload context and sends an asynchronous message to the driver over the
privileged channel. The driver then sends a buffer pointer to MERT to capture
the register states for the partition bound to faulting workload context. The
driver then decodes the error by reading the contents of the buffer pointer.
Telemetry
=========
MERT can report various kinds of telemetry information like the following:
* L1 interrupt counter
* DMA counter
* Deep Sleep counter
* etc.
References
==========
- `AMD XDNA Architecture <https://www.amd.com/en/technologies/xdna.html>`_
- `AMD AI Engine Technology <https://www.xilinx.com/products/technology/ai-engine.html>`_
- `Peano <https://github.com/Xilinx/llvm-aie>`_
- `Versal Adaptive SoC AIE-ML Architecture Manual (AM020) <https://docs.amd.com/r/en-US/am020-versal-aie-ml>`_
- `AI Engine Run Time <https://github.com/Xilinx/aie-rt/tree/release/main_aig>`_

View File

@ -0,0 +1,11 @@
.. SPDX-License-Identifier: GPL-2.0-only
=====================================
accel/amdxdna NPU driver
=====================================
The accel/amdxdna driver supports the AMD NPU (Neural Processing Unit).
.. toctree::
amdnpu

View File

@ -8,6 +8,7 @@ Compute Accelerators
:maxdepth: 1
introduction
amdxdna/index
qaic/index
.. only:: subproject and html

View File

@ -100,29 +100,29 @@ Get delays, since system boot, for pid 10::
# ./getdelays -d -p 10
(output similar to next case)
Get sum of delays, since system boot, for all pids with tgid 5::
Get sum and peak of delays, since system boot, for all pids with tgid 242::
# ./getdelays -d -t 5
bash-4.4# ./getdelays -d -t 242
print delayacct stats ON
TGID 5
TGID 242
CPU count real total virtual total delay total delay average
8 7000000 6872122 3382277 0.423ms
IO count delay total delay average
0 0 0.000ms
SWAP count delay total delay average
0 0 0.000ms
RECLAIM count delay total delay average
0 0 0.000ms
THRASHING count delay total delay average
0 0 0.000ms
COMPACT count delay total delay average
0 0 0.000ms
WPCOPY count delay total delay average
0 0 0.000ms
IRQ count delay total delay average
0 0 0.000ms
CPU count real total virtual total delay total delay average delay max delay min
39 156000000 156576579 2111069 0.054ms 0.212296ms 0.031307ms
IO count delay total delay average delay max delay min
0 0 0.000ms 0.000000ms 0.000000ms
SWAP count delay total delay average delay max delay min
0 0 0.000ms 0.000000ms 0.000000ms
RECLAIM count delay total delay average delay max delay min
0 0 0.000ms 0.000000ms 0.000000ms
THRASHING count delay total delay average delay max delay min
0 0 0.000ms 0.000000ms 0.000000ms
COMPACT count delay total delay average delay max delay min
0 0 0.000ms 0.000000ms 0.000000ms
WPCOPY count delay total delay average delay max delay min
156 11215873 0.072ms 0.207403ms 0.033913ms
IRQ count delay total delay average delay max delay min
0 0 0.000ms 0.000000ms 0.000000ms
Get IO accounting for pid 1, it works only with -p::

View File

@ -47,7 +47,7 @@ should not change the relative position of each field within the struct.
1) Common and basic accounting fields::
/* The version number of this struct. This field is always set to
* TAKSTATS_VERSION, which is defined in <linux/taskstats.h>.
* TASKSTATS_VERSION, which is defined in <linux/taskstats.h>.
* Each time the struct is changed, the value should be incremented.
*/
__u16 version;

View File

@ -356,5 +356,5 @@ instructions at 'Documentation/admin-guide/reporting-issues.rst'.
Hints on understanding kernel bug reports are in
'Documentation/admin-guide/bug-hunting.rst'. More on debugging the kernel
with gdb is in 'Documentation/dev-tools/gdb-kernel-debugging.rst' and
'Documentation/dev-tools/kgdb.rst'.
with gdb is in 'Documentation/process/debugging/gdb-kernel-debugging.rst' and
'Documentation/process/debugging/kgdb.rst'.

View File

@ -121,14 +121,14 @@ compression algorithm to use external pre-trained dictionary, pass full
path to the `dict` along with other parameters::
#pass path to pre-trained zstd dictionary
echo "algo=zstd dict=/etc/dictioary" > /sys/block/zram0/algorithm_params
echo "algo=zstd dict=/etc/dictionary" > /sys/block/zram0/algorithm_params
#same, but using algorithm priority
echo "priority=1 dict=/etc/dictioary" > \
echo "priority=1 dict=/etc/dictionary" > \
/sys/block/zram0/algorithm_params
#pass path to pre-trained zstd dictionary and compression level
echo "algo=zstd level=8 dict=/etc/dictioary" > \
echo "algo=zstd level=8 dict=/etc/dictionary" > \
/sys/block/zram0/algorithm_params
Parameters are algorithm specific: not all algorithms support pre-trained

View File

@ -21,8 +21,8 @@ override the baud rate to 115200, etc.
By default, the braille device will just show the last kernel message (console
mode). To review previous messages, press the Insert key to switch to the VT
review mode. In review mode, the arrow keys permit to browse in the VT content,
:kbd:`PAGE-UP`/:kbd:`PAGE-DOWN` keys go at the top/bottom of the screen, and
the :kbd:`HOME` key goes back
`PAGE-UP`/`PAGE-DOWN` keys go at the top/bottom of the screen, and
the `HOME` key goes back
to the cursor, hence providing very basic screen reviewing facility.
Sound feedback can be obtained by adding the ``braille_console.sound=1`` kernel

View File

@ -368,12 +368,3 @@ processed by ``klogd``::
Aug 29 09:51:01 blizard kernel: Call Trace: [oops:_oops_ioctl+48/80] [_sys_ioctl+254/272] [_system_call+82/128]
Aug 29 09:51:01 blizard kernel: Code: c7 00 05 00 00 00 eb 08 90 90 90 90 90 90 90 90 89 ec 5d c3
---------------------------------------------------------------------------
::
Dr. G.W. Wettstein Oncology Research Div. Computing Facility
Roger Maris Cancer Center INTERNET: greg@wind.rmcc.com
820 4th St. N.
Fargo, ND 58122
Phone: 701-234-7556

View File

@ -64,13 +64,14 @@ v1 is available under :ref:`Documentation/admin-guide/cgroup-v1/index.rst <cgrou
5-6. Device
5-7. RDMA
5-7-1. RDMA Interface Files
5-8. HugeTLB
5.8-1. HugeTLB Interface Files
5-9. Misc
5.9-1 Miscellaneous cgroup Interface Files
5.9-2 Migration and Ownership
5-10. Others
5-10-1. perf_event
5-8. DMEM
5-9. HugeTLB
5.9-1. HugeTLB Interface Files
5-10. Misc
5.10-1 Miscellaneous cgroup Interface Files
5.10-2 Migration and Ownership
5-11. Others
5-11-1. perf_event
5-N. Non-normative information
5-N-1. CPU controller root cgroup process behaviour
5-N-2. IO controller root cgroup process behaviour
@ -2626,6 +2627,49 @@ RDMA Interface Files
mlx4_0 hca_handle=1 hca_object=20
ocrdma1 hca_handle=1 hca_object=23
DMEM
----
The "dmem" controller regulates the distribution and accounting of
device memory regions. Because each memory region may have its own page size,
which does not have to be equal to the system page size, the units are always bytes.
DMEM Interface Files
~~~~~~~~~~~~~~~~~~~~
dmem.max, dmem.min, dmem.low
A readwrite nested-keyed file that exists for all the cgroups
except root that describes current configured resource limit
for a region.
An example for xe follows::
drm/0000:03:00.0/vram0 1073741824
drm/0000:03:00.0/stolen max
The semantics are the same as for the memory cgroup controller, and are
calculated in the same way.
dmem.capacity
A read-only file that describes maximum region capacity.
It only exists on the root cgroup. Not all memory can be
allocated by cgroups, as the kernel reserves some for
internal use.
An example for xe follows::
drm/0000:03:00.0/vram0 8514437120
drm/0000:03:00.0/stolen 67108864
dmem.current
A read-only file that describes current resource usage.
It exists for all the cgroup except root.
An example for xe follows::
drm/0000:03:00.0/vram0 12550144
drm/0000:03:00.0/stolen 8650752
HugeTLB
-------

View File

@ -7,6 +7,9 @@ added to the kernel over time. There is, as yet, little overall order or
organization here — this material was not written to be a single, coherent
document! With luck things will improve quickly over time.
General guides to kernel administration
---------------------------------------
This initial section contains overall information, including the README
file describing the kernel as a whole, documentation on kernel parameters,
etc.
@ -15,19 +18,44 @@ etc.
:maxdepth: 1
README
kernel-parameters
devices
sysctl/index
abi
features
This section describes CPU vulnerabilities and their mitigations.
A big part of the kernel's administrative interface is the /proc and sysfs
virtual filesystems; these documents describe how to interact with tem
.. toctree::
:maxdepth: 1
sysfs-rules
sysctl/index
cputopology
abi
Security-related documentation:
.. toctree::
:maxdepth: 1
hw-vuln/index
LSM/index
perf-security
Booting the kernel
------------------
.. toctree::
:maxdepth: 1
bootconfig
kernel-parameters
efi-stub
initrd
Tracking down and identifying problems
--------------------------------------
Here is a set of documents aimed at users who are trying to track down
problems and bugs in particular.
@ -48,15 +76,97 @@ problems and bugs in particular.
kdump/index
perf/index
pstore-blk
clearing-warn-once
kernel-per-CPU-kthreads
lockup-watchdogs
RAS/index
sysrq
This is the beginning of a section with information of interest to
application developers. Documents covering various aspects of the kernel
ABI will be found here.
Core-kernel subsystems
----------------------
These documents describe core-kernel administration interfaces that are
likely to be of interest on almost any system.
.. toctree::
:maxdepth: 1
sysfs-rules
cgroup-v2
cgroup-v1/index
cpu-load
mm/index
module-signing
namespaces/index
numastat
pm/index
syscall-user-dispatch
Support for non-native binary formats. Note that some of these
documents are ... old ...
.. toctree::
:maxdepth: 1
binfmt-misc
java
mono
Block-layer and filesystem administration
-----------------------------------------
.. toctree::
:maxdepth: 1
bcache
binderfs
blockdev/index
cifs/index
device-mapper/index
ext4
filesystem-monitoring
nfs/index
iostats
jfs
md
ufs
xfs
Device-specific guides
----------------------
How to configure your hardware within your Linux system.
.. toctree::
:maxdepth: 1
acpi/index
aoe/index
auxdisplay/index
braille-console
btmrvl
dell_rbu
edid
gpio/index
hw_random
laptops/index
lcd-panel-cgram
media/index
nvme-multipath
parport
pnp
rapidio
rtc
serial-console
svga
thermal/index
thunderbolt
vga-softcursor
video-output
Workload analysis
-----------------
This is the beginning of a section with information of interest to
application developers and system integrators doing analysis of the
@ -69,73 +179,17 @@ subsystems expectations will be found here.
workload-tracing
The rest of this manual consists of various unordered guides on how to
configure specific aspects of kernel behavior to your liking.
Everything else
---------------
A few hard-to-categorize and generally obsolete documents.
.. toctree::
:maxdepth: 1
acpi/index
aoe/index
auxdisplay/index
bcache
binderfs
binfmt-misc
blockdev/index
bootconfig
braille-console
btmrvl
cgroup-v1/index
cgroup-v2
cifs/index
clearing-warn-once
cpu-load
cputopology
dell_rbu
device-mapper/index
edid
efi-stub
ext4
filesystem-monitoring
nfs/index
gpio/index
highuid
hw_random
initrd
iostats
java
jfs
kernel-per-CPU-kthreads
laptops/index
lcd-panel-cgram
ldm
lockup-watchdogs
LSM/index
md
media/index
mm/index
module-signing
mono
namespaces/index
numastat
parport
perf-security
pm/index
pnp
rapidio
RAS/index
rtc
serial-console
svga
syscall-user-dispatch
sysrq
thermal/index
thunderbolt
ufs
unicode
vga-softcursor
video-output
xfs
.. only:: subproject and html

View File

@ -194,8 +194,6 @@ is applicable::
WDT Watchdog support is enabled.
X86-32 X86-32, aka i386 architecture is enabled.
X86-64 X86-64 architecture is enabled.
More X86-64 boot options can be found in
Documentation/arch/x86/x86_64/boot-options.rst.
X86 Either 32-bit or 64-bit x86 (same as X86-32+X86-64)
X86_UV SGI UV support is enabled.
XEN Xen support is enabled
@ -213,7 +211,6 @@ Do not modify the syntax of boot loader parameters without extreme
need or coordination with <Documentation/arch/x86/boot.rst>.
There are also arch-specific kernel-parameters not documented here.
See for example <Documentation/arch/x86/x86_64/boot-options.rst>.
Note that ALL kernel parameters listed below are CASE SENSITIVE, and that
a trailing = on the name of any parameter states that that parameter will

View File

@ -21,6 +21,10 @@
strictly ACPI specification compliant.
rsdt -- prefer RSDT over (default) XSDT
copy_dsdt -- copy DSDT to memory
nocmcff -- Disable firmware first mode for corrected
errors. This disables parsing the HEST CMC error
source to check if firmware has set the FF flag. This
may result in duplicate corrected error reports.
nospcr -- disable console in ACPI SPCR table as
default _serial_ console on ARM64
For ARM64, ONLY "acpi=off", "acpi=on", "acpi=force" or
@ -405,6 +409,8 @@
not play well with APC CPU idle - disable it if you have
APC and your system crashes randomly.
apic [APIC,X86-64] Use IO-APIC. Default.
apic= [APIC,X86,EARLY] Advanced Programmable Interrupt Controller
Change the output verbosity while booting
Format: { quiet (default) | verbose | debug }
@ -424,6 +430,10 @@
useful so that a dump capture kernel won't be
shot down by NMI
apicpmtimer Do APIC timer calibration using the pmtimer. Implies
apicmaintimer. Useful when your PIT timer is totally
broken.
autoconf= [IPV6]
See Documentation/networking/ipv6.rst.
@ -1726,6 +1736,8 @@
off: Disable GDS mitigation.
gbpages [X86] Use GB pages for kernel direct mappings.
gcov_persist= [GCOV] When non-zero (default), profiling data for
kernel modules is saved and remains accessible via
debugfs, even when the module is unloaded/reloaded.
@ -2008,12 +2020,21 @@
idle= [X86,EARLY]
Format: idle=poll, idle=halt, idle=nomwait
Poll forces a polling idle loop that can slightly
improve the performance of waking up a idle CPU, but
will use a lot of power and make the system run hot.
Not recommended.
idle=poll: Don't do power saving in the idle loop
using HLT, but poll for rescheduling event. This will
make the CPUs eat a lot more power, but may be useful
to get slightly better performance in multiprocessor
benchmarks. It also makes some profiling using
performance counters more accurate. Please note that
on systems with MONITOR/MWAIT support (like Intel
EM64T CPUs) this option has no performance advantage
over the normal idle loop. It may also interact badly
with hyperthreading.
idle=halt: Halt is forced to be used for CPU idle.
In such case C2/C3 won't be used again.
idle=nomwait: Disable mwait for CPU C-states
idxd.sva= [HW]
@ -2311,20 +2332,73 @@
relaxed
iommu= [X86,EARLY]
off
Don't initialize and use any kind of IOMMU.
force
Force the use of the hardware IOMMU even when
it is not actually needed (e.g. because < 3 GB
memory).
noforce
Don't force hardware IOMMU usage when it is not
needed. (default).
biomerge
panic
nopanic
merge
nomerge
soft
pt [X86]
nopt [X86]
nobypass [PPC/POWERNV]
Use software bounce buffering (SWIOTLB) (default for
Intel machines). This can be used to prevent the usage
of an available hardware IOMMU.
[X86]
pt
[X86]
nopt
[PPC/POWERNV]
nobypass
Disable IOMMU bypass, using IOMMU for PCI devices.
[X86]
AMD Gart HW IOMMU-specific options:
<size>
Set the size of the remapping area in bytes.
allowed
Overwrite iommu off workarounds for specific chipsets
fullflush
Flush IOMMU on each allocation (default).
nofullflush
Don't use IOMMU fullflush.
memaper[=<order>]
Allocate an own aperture over RAM with size
32MB<<order. (default: order=1, i.e. 64MB)
merge
Do scatter-gather (SG) merging. Implies "force"
(experimental).
nomerge
Don't do scatter-gather (SG) merging.
noaperture
Ask the IOMMU not to touch the aperture for AGP.
noagp
Don't initialize the AGP driver and use full aperture.
panic
Always panic when IOMMU overflows.
iommu.forcedac= [ARM64,X86,EARLY] Control IOVA allocation for PCI devices.
Format: { "0" | "1" }
0 - Try to allocate a 32-bit DMA address first, before
@ -2432,7 +2506,9 @@
specified in the flag list (default: domain):
nohz
Disable the tick when a single task runs.
Disable the tick when a single task runs as well as
disabling other kernel noises like having RCU callbacks
offloaded. This is equivalent to the nohz_full parameter.
A residual 1Hz tick is offloaded to workqueues, which you
need to affine to housekeeping through the global
@ -2695,7 +2771,7 @@
VMs, i.e. on the 0=>1 and 1=>0 transitions of the
number of VMs.
Enabling virtualization at module lode avoids potential
Enabling virtualization at module load avoids potential
latency for creation of the 0=>1 VM, as KVM serializes
virtualization enabling across all online CPUs. The
"cost" of enabling virtualization when KVM is loaded,
@ -2748,17 +2824,21 @@
nvhe: Standard nVHE-based mode, without support for
protected guests.
protected: nVHE-based mode with support for guests whose
state is kept private from the host.
protected: Mode with support for guests whose state is
kept private from the host, using VHE or
nVHE depending on HW support.
nested: VHE-based mode with support for nested
virtualization. Requires at least ARMv8.3
hardware.
virtualization. Requires at least ARMv8.4
hardware (with FEAT_NV2).
Defaults to VHE/nVHE based on hardware support. Setting
mode to "protected" will disable kexec and hibernation
for the host. "nested" is experimental and should be
used with extreme caution.
for the host. To force nVHE on VHE hardware, add
"arm64_sw.hvhe=0 id_aa64mmfr1.vh=0" to the
command-line.
"nested" is experimental and should be used with
extreme caution.
kvm-arm.vgic_v3_group0_trap=
[KVM,ARM,EARLY] Trap guest accesses to GICv3 group-0
@ -3259,9 +3339,77 @@
devices can be requested on-demand with the
/dev/loop-control interface.
mce [X86-32] Machine Check Exception
mce= [X86-{32,64}]
Please see Documentation/arch/x86/x86_64/machinecheck.rst for sysfs runtime tunables.
off
disable machine check
no_cmci
disable CMCI(Corrected Machine Check Interrupt) that
Intel processor supports. Usually this disablement is
not recommended, but it might be handy if your
hardware is misbehaving.
Note that you'll get more problems without CMCI than
with due to the shared banks, i.e. you might get
duplicated error logs.
dont_log_ce
don't make logs for corrected errors. All events
reported as corrected are silently cleared by OS. This
option will be useful if you have no interest in any
of corrected errors.
ignore_ce
disable features for corrected errors, e.g.
polling timer and CMCI. All events reported as
corrected are not cleared by OS and remained in its
error banks.
Usually this disablement is not recommended, however
if there is an agent checking/clearing corrected
errors (e.g. BIOS or hardware monitoring
applications), conflicting with OS's error handling,
and you cannot deactivate the agent, then this option
will be a help.
no_lmce
do not opt-in to Local MCE delivery. Use legacy method
to broadcast MCEs.
bootlog
enable logging of machine checks left over from
booting. Disabled by default on AMD Fam10h and older
because some BIOS leave bogus ones.
If your BIOS doesn't do that it's a good idea to
enable though to make sure you log even machine check
events that result in a reboot. On Intel systems it is
enabled by default.
nobootlog
disable boot machine check logging.
monarchtimeout (number)
sets the time in us to wait for other CPUs on machine
checks. 0 to disable.
bios_cmci_threshold
don't overwrite the bios-set CMCI threshold. This boot
option prevents Linux from overwriting the CMCI
threshold set by the bios. Without this option, Linux
always sets the CMCI threshold to 1. Enabling this may
make memory predictive failure analysis less effective
if the bios sets thresholds for memory errors since we
will not see details for all errors.
recovery
force-enable recoverable machine check code paths
Everything else is in sysfs now.
mce=option [X86-64] See Documentation/arch/x86/x86_64/boot-options.rst
md= [HW] RAID subsystems devices and level
See Documentation/admin-guide/md.rst.
@ -3351,8 +3499,8 @@
[KNL] Set the initial state for the memory hotplug
onlining policy. If not specified, the default value is
set according to the
CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE kernel config
option.
CONFIG_MHP_DEFAULT_ONLINE_TYPE kernel config
options.
See Documentation/admin-guide/mm/memory-hotplug.rst.
memmap=exactmap [KNL,X86,EARLY] Enable setting of an exact
@ -3887,6 +4035,8 @@
noapic [SMP,APIC,EARLY] Tells the kernel to not make use of any
IOAPICs that may be present in the system.
noapictimer [APIC,X86] Don't set up the APIC timer
noautogroup Disable scheduler automatic task group creation.
nocache [ARM,EARLY]
@ -3934,6 +4084,8 @@
register save and restore. The kernel will only save
legacy floating-point registers on task switch.
nogbpages [X86] Do not use GB pages for kernel direct mappings.
no_hash_pointers
[KNL,EARLY]
Force pointers printed to the console or buffers to be
@ -3960,6 +4112,8 @@
the impact of the sleep instructions. This is also
useful when using JTAG debugger.
nohpet [X86] Don't use the HPET timer.
nohugeiomap [KNL,X86,PPC,ARM64,EARLY] Disable kernel huge I/O mappings.
nohugevmalloc [KNL,X86,PPC,ARM64,EARLY] Disable kernel huge vmalloc mappings.
@ -4111,8 +4265,10 @@
nosync [HW,M68K] Disables sync negotiation for all devices.
no_timer_check [X86,APIC] Disables the code which tests for
broken timer IRQ sources.
no_timer_check [X86,APIC] Disables the code which tests for broken
timer IRQ sources, i.e., the IO-APIC timer. This can
work around problems with incorrect timer
initialization on some boards.
no_uaccess_flush
[PPC,EARLY] Don't flush the L1-D cache after accessing user data.
@ -4192,6 +4348,11 @@
If given as an integer followed by 'U', it will
divide each physical node into N emulated nodes.
numa=noacpi [X86] Don't parse the SRAT table for NUMA setup
numa=nohmat [X86] Don't parse the HMAT table for NUMA setup, or
soft-reserved memory partitioning.
numa_balancing= [KNL,ARM64,PPC,RISCV,S390,X86] Enable or disable automatic
NUMA balancing.
Allowed values are enable and disable
@ -4673,7 +4834,7 @@
'1' force enabled
'x' unchanged
For example,
pci=config_acs=10x
pci=config_acs=10x@pci:0:0
would configure all devices that support
ACS to enable P2P Request Redirect, disable
Translation Blocking, and leave Source
@ -5367,7 +5528,42 @@
rcutorture.gp_cond= [KNL]
Use conditional/asynchronous update-side
primitives, if available.
normal-grace-period primitives, if available.
rcutorture.gp_cond_exp= [KNL]
Use conditional/asynchronous update-side
expedited-grace-period primitives, if available.
rcutorture.gp_cond_full= [KNL]
Use conditional/asynchronous update-side
normal-grace-period primitives that also take
concurrent expedited grace periods into account,
if available.
rcutorture.gp_cond_exp_full= [KNL]
Use conditional/asynchronous update-side
expedited-grace-period primitives that also take
concurrent normal grace periods into account,
if available.
rcutorture.gp_cond_wi= [KNL]
Nominal wait interval for normal conditional
grace periods (specified by rcutorture's
gp_cond and gp_cond_full module parameters),
in microseconds. The actual wait interval will
be randomly selected to nanosecond granularity up
to this wait interval. Defaults to 16 jiffies,
for example, 16,000 microseconds on a system
with HZ=1000.
rcutorture.gp_cond_wi_exp= [KNL]
Nominal wait interval for expedited conditional
grace periods (specified by rcutorture's
gp_cond_exp and gp_cond_exp_full module
parameters), in microseconds. The actual wait
interval will be randomly selected to nanosecond
granularity up to this wait interval. Defaults to
128 microseconds.
rcutorture.gp_exp= [KNL]
Use expedited update-side primitives, if available.
@ -5376,6 +5572,43 @@
Use normal (non-expedited) asynchronous
update-side primitives, if available.
rcutorture.gp_poll= [KNL]
Use polled update-side normal-grace-period
primitives, if available.
rcutorture.gp_poll_exp= [KNL]
Use polled update-side expedited-grace-period
primitives, if available.
rcutorture.gp_poll_full= [KNL]
Use polled update-side normal-grace-period
primitives that also take concurrent expedited
grace periods into account, if available.
rcutorture.gp_poll_exp_full= [KNL]
Use polled update-side expedited-grace-period
primitives that also take concurrent normal
grace periods into account, if available.
rcutorture.gp_poll_wi= [KNL]
Nominal wait interval for normal conditional
grace periods (specified by rcutorture's
gp_poll and gp_poll_full module parameters),
in microseconds. The actual wait interval will
be randomly selected to nanosecond granularity up
to this wait interval. Defaults to 16 jiffies,
for example, 16,000 microseconds on a system
with HZ=1000.
rcutorture.gp_poll_wi_exp= [KNL]
Nominal wait interval for expedited conditional
grace periods (specified by rcutorture's
gp_poll_exp and gp_poll_exp_full module
parameters), in microseconds. The actual wait
interval will be randomly selected to nanosecond
granularity up to this wait interval. Defaults to
128 microseconds.
rcutorture.gp_sync= [KNL]
Use normal (non-expedited) synchronous
update-side primitives, if available. If all
@ -5429,6 +5662,22 @@
Set time (jiffies) between CPU-hotplug operations,
or zero to disable CPU-hotplug testing.
rcutorture.preempt_duration= [KNL]
Set duration (in milliseconds) of preemptions
by a high-priority FIFO real-time task. Set to
zero (the default) to disable. The CPUs to
preempt are selected randomly from the set that
are online at a given point in time. Races with
CPUs going offline are ignored, with that attempt
at preemption skipped.
rcutorture.preempt_interval= [KNL]
Set interval (in milliseconds, defaulting to one
second) between preemptions by a high-priority
FIFO real-time task. This delay is mediated
by an hrtimer and is further fuzzed to avoid
inadvertent synchronizations.
rcutorture.read_exit_burst= [KNL]
The number of times in a given read-then-exit
episode that a set of read-then-exit kthreads
@ -5715,6 +5964,55 @@
reboot_cpu is s[mp]#### with #### being the processor
to be used for rebooting.
acpi
Use the ACPI RESET_REG in the FADT. If ACPI is not
configured or the ACPI reset does not work, the reboot
path attempts the reset using the keyboard controller.
bios
Use the CPU reboot vector for warm reset
cold
Set the cold reboot flag
default
There are some built-in platform specific "quirks"
- you may see: "reboot: <name> series board detected.
Selecting <type> for reboots." In the case where you
think the quirk is in error (e.g. you have newer BIOS,
or newer board) using this option will ignore the
built-in quirk table, and use the generic default
reboot actions.
efi
Use efi reset_system runtime service. If EFI is not
configured or the EFI reset does not work, the reboot
path attempts the reset using the keyboard controller.
force
Don't stop other CPUs on reboot. This can make reboot
more reliable in some cases.
kbd
Use the keyboard controller. cold reset (default)
pci
Use a write to the PCI config space register 0xcf9 to
trigger reboot.
triple
Force a triple fault (init)
warm
Don't set the cold reboot flag
Using warm reset will be much faster especially on big
memory systems because the BIOS will not go through
the memory check. Disadvantage is that not all
hardware will be completely reinitialized on reboot so
there may be boot problems on some systems.
refscale.holdoff= [KNL]
Set test-start holdoff period. The purpose of
this parameter is to delay the start of the
@ -6106,7 +6404,16 @@
serialnumber [BUGS=X86-32]
sev=option[,option...] [X86-64] See Documentation/arch/x86/x86_64/boot-options.rst
sev=option[,option...] [X86-64]
debug
Enable debug messages.
nosnp
Do not enable SEV-SNP (applies to host/hypervisor
only). Setting 'nosnp' avoids the RMP check overhead
in memory accesses when users do not want to run
SEV-SNP guests.
shapers= [NET]
Maximal number of shapers.
@ -6858,6 +7165,14 @@
comma-separated list of trace events to enable. See
also Documentation/trace/events.rst
To enable modules, use :mod: keyword:
trace_event=:mod:<module>
The value before :mod: will only enable specific events
that are part of the module. See the above mentioned
document for more information.
trace_instance=[instance-info]
[FTRACE] Create a ring buffer instance early in boot up.
This will be listed in:
@ -6992,6 +7307,13 @@
See Documentation/admin-guide/mm/transhuge.rst
for more details.
transparent_hugepage_tmpfs= [KNL]
Format: [always|within_size|advise|never]
Can be used to control the default hugepage allocation policy
for the tmpfs mount.
See Documentation/admin-guide/mm/transhuge.rst
for more details.
trusted.source= [KEYS]
Format: <string>
This parameter identifies the trust source as a backend
@ -7474,7 +7796,7 @@
vt.cur_default= [VT] Default cursor shape.
Format: 0xCCBBAA, where AA, BB, and CC are the same as
the parameters of the <Esc>[?A;B;Cc escape sequence;
see VGA-softcursor.txt. Default: 2 = underline.
see vga-softcursor.rst. Default: 2 = underline.
vt.default_blu= [VT]
Format: <blue0>,<blue1>,<blue2>,...,<blue15>

View File

@ -445,8 +445,10 @@ event code Key Notes
0x1008 0x07 FN+F8 IBM: toggle screen expand
Lenovo: configure UltraNav,
or toggle screen expand.
On newer platforms (2024+)
replaced by 0x131f (see below)
On 2024 platforms replaced by
0x131f (see below) and on newer
platforms (2025 +) keycode is
replaced by 0x1401 (see below).
0x1009 0x08 FN+F9 -
@ -506,9 +508,11 @@ event code Key Notes
0x1019 0x18 unknown
0x131f ... FN+F8 Platform Mode change.
0x131f ... FN+F8 Platform Mode change (2024 systems).
Implemented in driver.
0x1401 ... FN+F8 Platform Mode change (2025 + systems).
Implemented in driver.
... ... ...
0x1020 0x1F unknown

View File

@ -98,7 +98,7 @@ frames in packed raw Bayer format to IPU3 CSI2 receiver.
# and that ov5670 sensor is connected to i2c bus 10 with address 0x36
export SDEV=$(media-ctl -d $MDEV -e "ov5670 10-0036")
# Establish the link for the media devices using media-ctl [#f3]_
# Establish the link for the media devices using media-ctl
media-ctl -d $MDEV -l "ov5670:0 -> ipu3-csi2 0:0[1]"
# Set the format for the media devices
@ -589,12 +589,8 @@ preserved.
References
==========
.. [#f5] drivers/staging/media/ipu3/include/uapi/intel-ipu3.h
.. [#f1] https://github.com/intel/nvt
.. [#f2] http://git.ideasonboard.org/yavta.git
.. [#f3] http://git.ideasonboard.org/?p=media-ctl.git;a=summary
.. [#f4] ImgU limitation requires an additional 16x16 for all input resolutions

View File

@ -42,32 +42,45 @@ the execution. ::
$ git clone https://github.com/sjp38/masim; cd masim; make
$ sudo damo start "./masim ./configs/stairs.cfg --quiet"
$ sudo ./damo show
0 addr [85.541 TiB , 85.541 TiB ) (57.707 MiB ) access 0 % age 10.400 s
1 addr [85.541 TiB , 85.542 TiB ) (413.285 MiB) access 0 % age 11.400 s
2 addr [127.649 TiB , 127.649 TiB) (57.500 MiB ) access 0 % age 1.600 s
3 addr [127.649 TiB , 127.649 TiB) (32.500 MiB ) access 0 % age 500 ms
4 addr [127.649 TiB , 127.649 TiB) (9.535 MiB ) access 100 % age 300 ms
5 addr [127.649 TiB , 127.649 TiB) (8.000 KiB ) access 60 % age 0 ns
6 addr [127.649 TiB , 127.649 TiB) (6.926 MiB ) access 0 % age 1 s
7 addr [127.998 TiB , 127.998 TiB) (120.000 KiB) access 0 % age 11.100 s
8 addr [127.998 TiB , 127.998 TiB) (8.000 KiB ) access 40 % age 100 ms
9 addr [127.998 TiB , 127.998 TiB) (4.000 KiB ) access 0 % age 11 s
total size: 577.590 MiB
$ sudo ./damo stop
$ sudo damo report access
heatmap: 641111111000000000000000000000000000000000000000000000[...]33333333333333335557984444[...]7
# min/max temperatures: -1,840,000,000, 370,010,000, column size: 3.925 MiB
0 addr 86.182 TiB size 8.000 KiB access 0 % age 14.900 s
1 addr 86.182 TiB size 8.000 KiB access 60 % age 0 ns
2 addr 86.182 TiB size 3.422 MiB access 0 % age 4.100 s
3 addr 86.182 TiB size 2.004 MiB access 95 % age 2.200 s
4 addr 86.182 TiB size 29.688 MiB access 0 % age 14.100 s
5 addr 86.182 TiB size 29.516 MiB access 0 % age 16.700 s
6 addr 86.182 TiB size 29.633 MiB access 0 % age 17.900 s
7 addr 86.182 TiB size 117.652 MiB access 0 % age 18.400 s
8 addr 126.990 TiB size 62.332 MiB access 0 % age 9.500 s
9 addr 126.990 TiB size 13.980 MiB access 0 % age 5.200 s
10 addr 126.990 TiB size 9.539 MiB access 100 % age 3.700 s
11 addr 126.990 TiB size 16.098 MiB access 0 % age 6.400 s
12 addr 127.987 TiB size 132.000 KiB access 0 % age 2.900 s
total size: 314.008 MiB
$ sudo damo stop
The first command of the above example downloads and builds an artificial
memory access generator program called ``masim``. The second command asks DAMO
to execute the artificial generator process start via the given command and
make DAMON monitors the generator process. The third command retrieves the
current snapshot of the monitored access pattern of the process from DAMON and
shows the pattern in a human readable format.
to start the program via the given command and make DAMON monitors the newly
started process. The third command retrieves the current snapshot of the
monitored access pattern of the process from DAMON and shows the pattern in a
human readable format.
Each line of the output shows which virtual address range (``addr [XX, XX)``)
of the process is how frequently (``access XX %``) accessed for how long time
(``age XX``). For example, the fifth region of ~9 MiB size is being most
frequently accessed for last 300 milliseconds. Finally, the fourth command
stops DAMON.
The first line of the output shows the relative access temperature (hotness) of
the regions in a single row hetmap format. Each column on the heatmap
represents regions of same size on the monitored virtual address space. The
position of the colun on the row and the number on the column represents the
relative location and access temperature of the region. ``[...]`` means
unmapped huge regions on the virtual address spaces. The second line shows
additional information for better understanding the heatmap.
Each line of the output from the third line shows which virtual address range
(``addr XX size XX``) of the process is how frequently (``access XX %``)
accessed for how long time (``age XX``). For example, the evelenth region of
~9.5 MiB size is being most frequently accessed for last 3.7 seconds. Finally,
the fourth command stops DAMON.
Note that DAMON can monitor not only virtual address spaces but multiple types
of address spaces including the physical address space.
@ -95,7 +108,7 @@ Visualizing Recorded Patterns
You can visualize the pattern in a heatmap, showing which memory region
(x-axis) got accessed when (y-axis) and how frequently (number).::
$ sudo damo report heats --heatmap stdout
$ sudo damo report heatmap
22222222222222222222222222222222222222211111111111111111111111111111111111111100
44444444444444444444444444444444444444434444444444444444444444444444444444443200
44444444444444444444444444444444444444433444444444444444444444444444444444444200
@ -160,6 +173,6 @@ Data Access Pattern Aware Memory Management
Below command makes every memory region of size >=4K that has not accessed for
>=60 seconds in your workload to be swapped out. ::
$ sudo damo schemes --damos_access_rate 0 0 --damos_sz_region 4K max \
--damos_age 60s max --damos_action pageout \
<pid of your workload>
$ sudo damo start --damos_access_rate 0 0 --damos_sz_region 4K max \
--damos_age 60s max --damos_action pageout \
<pid of your workload>

View File

@ -26,12 +26,6 @@ DAMON provides below interfaces for different users.
writing kernel space DAMON application programs for you. You can even extend
DAMON for various address spaces. For detail, please refer to the interface
:doc:`document </mm/damon/api>`.
- *debugfs interface. (DEPRECATED!)*
:ref:`This <debugfs_interface>` is almost identical to :ref:`sysfs interface
<sysfs_interface>`. This is deprecated, so users should move to the
:ref:`sysfs interface <sysfs_interface>`. If you depend on this and cannot
move, please report your usecase to damon@lists.linux.dev and
linux-mm@kvack.org.
.. _sysfs_interface:
@ -89,10 +83,10 @@ comma (",").
│ │ │ │ │ │ │ │ │ 0/target_metric,target_value,current_value
│ │ │ │ │ │ │ :ref:`watermarks <sysfs_watermarks>`/metric,interval_us,high,mid,low
│ │ │ │ │ │ │ :ref:`filters <sysfs_filters>`/nr_filters
│ │ │ │ │ │ │ │ 0/type,matching,memcg_id
│ │ │ │ │ │ │ :ref:`stats <sysfs_schemes_stats>`/nr_tried,sz_tried,nr_applied,sz_applied,qt_exceeds
│ │ │ │ │ │ │ │ 0/type,matching,allow,memcg_path,addr_start,addr_end,target_idx
│ │ │ │ │ │ │ :ref:`stats <sysfs_schemes_stats>`/nr_tried,sz_tried,nr_applied,sz_applied,sz_ops_filter_passed,qt_exceeds
│ │ │ │ │ │ │ :ref:`tried_regions <sysfs_schemes_tried_regions>`/total_bytes
│ │ │ │ │ │ │ │ 0/start,end,nr_accesses,age
│ │ │ │ │ │ │ │ 0/start,end,nr_accesses,age,sz_filter_passed
│ │ │ │ │ │ │ │ ...
│ │ │ │ │ │ ...
│ │ │ │ ...
@ -412,59 +406,62 @@ number (``N``) to the file creates the number of child directories named ``0``
to ``N-1``. Each directory represents each filter. The filters are evaluated
in the numeric order.
Each filter directory contains six files, namely ``type``, ``matcing``,
``memcg_path``, ``addr_start``, ``addr_end``, and ``target_idx``. To ``type``
file, you can write one of five special keywords: ``anon`` for anonymous pages,
``memcg`` for specific memory cgroup, ``young`` for young pages, ``addr`` for
specific address range (an open-ended interval), or ``target`` for specific
DAMON monitoring target filtering. In case of the memory cgroup filtering, you
can specify the memory cgroup of the interest by writing the path of the memory
cgroup from the cgroups mount point to ``memcg_path`` file. In case of the
address range filtering, you can specify the start and end address of the range
to ``addr_start`` and ``addr_end`` files, respectively. For the DAMON
monitoring target filtering, you can specify the index of the target between
the list of the DAMON context's monitoring targets list to ``target_idx`` file.
You can write ``Y`` or ``N`` to ``matching`` file to filter out pages that does
or does not match to the type, respectively. Then, the scheme's action will
not be applied to the pages that specified to be filtered out.
Each filter directory contains seven files, namely ``type``, ``matching``,
``allow``, ``memcg_path``, ``addr_start``, ``addr_end``, and ``target_idx``.
To ``type`` file, you can write one of five special keywords: ``anon`` for
anonymous pages, ``memcg`` for specific memory cgroup, ``young`` for young
pages, ``addr`` for specific address range (an open-ended interval), or
``target`` for specific DAMON monitoring target filtering. Meaning of the
types are same to the description on the :ref:`design doc
<damon_design_damos_filters>`.
In case of the memory cgroup filtering, you can specify the memory cgroup of
the interest by writing the path of the memory cgroup from the cgroups mount
point to ``memcg_path`` file. In case of the address range filtering, you can
specify the start and end address of the range to ``addr_start`` and
``addr_end`` files, respectively. For the DAMON monitoring target filtering,
you can specify the index of the target between the list of the DAMON context's
monitoring targets list to ``target_idx`` file.
You can write ``Y`` or ``N`` to ``matching`` file to specify whether the filter
is for memory that matches the ``type``. You can write ``Y`` or ``N`` to
``allow`` file to specify if applying the action to the memory that satisfies
the ``type`` and ``matching`` should be allowed or not.
For example, below restricts a DAMOS action to be applied to only non-anonymous
pages of all memory cgroups except ``/having_care_already``.::
# echo 2 > nr_filters
# # filter out anonymous pages
# # disallow anonymous pages
echo anon > 0/type
echo Y > 0/matching
echo N > 0/allow
# # further filter out all cgroups except one at '/having_care_already'
echo memcg > 1/type
echo /having_care_already > 1/memcg_path
echo Y > 1/matching
echo N > 1/allow
Note that ``anon`` and ``memcg`` filters are currently supported only when
``paddr`` :ref:`implementation <sysfs_context>` is being used.
Also, memory regions that are filtered out by ``addr`` or ``target`` filters
are not counted as the scheme has tried to those, while regions that filtered
out by other type filters are counted as the scheme has tried to. The
difference is applied to :ref:`stats <damos_stats>` and
:ref:`tried regions <sysfs_schemes_tried_regions>`.
Refer to the :ref:`DAMOS filters design documentation
<damon_design_damos_filters>` for more details including how multiple filters
of different ``allow`` works, when each of the filters are supported, and
differences on stats.
.. _sysfs_schemes_stats:
schemes/<N>/stats/
------------------
DAMON counts the total number and bytes of regions that each scheme is tried to
be applied, the two numbers for the regions that each scheme is successfully
applied, and the total number of the quota limit exceeds. This statistics can
be used for online analysis or tuning of the schemes.
DAMON counts statistics for each scheme. This statistics can be used for
online analysis or tuning of the schemes. Refer to :ref:`design doc
<damon_design_damos_stat>` for more details about the stats.
The statistics can be retrieved by reading the files under ``stats`` directory
(``nr_tried``, ``sz_tried``, ``nr_applied``, ``sz_applied``, and
``qt_exceeds``), respectively. The files are not updated in real time, so you
should ask DAMON sysfs interface to update the content of the files for the
stats by writing a special keyword, ``update_schemes_stats`` to the relevant
``kdamonds/<N>/state`` file.
(``nr_tried``, ``sz_tried``, ``nr_applied``, ``sz_applied``,
``sz_ops_filter_passed``, and ``qt_exceeds``), respectively. The files are not
updated in real time, so you should ask DAMON sysfs interface to update the
content of the files for the stats by writing a special keyword,
``update_schemes_stats`` to the relevant ``kdamonds/<N>/state`` file.
.. _sysfs_schemes_tried_regions:
@ -501,10 +498,10 @@ set the ``access pattern`` as their interested pattern that they want to query.
tried_regions/<N>/
------------------
In each region directory, you will find four files (``start``, ``end``,
``nr_accesses``, and ``age``). Reading the files will show the start and end
addresses, ``nr_accesses``, and ``age`` of the region that corresponding
DAMON-based operation scheme ``action`` has tried to be applied.
In each region directory, you will find five files (``start``, ``end``,
``nr_accesses``, ``age``, and ``sz_filter_passed``). Reading the files will
show the properties of the region that corresponding DAMON-based operation
scheme ``action`` has tried to be applied.
Example
~~~~~~~
@ -600,306 +597,3 @@ fields are as usual. It shows the index of the DAMON context (``ctx_idx=X``)
of the scheme in the list of the contexts of the context's kdamond, the index
of the scheme (``scheme_idx=X``) in the list of the schemes of the context, in
addition to the output of ``damon_aggregated`` tracepoint.
.. _debugfs_interface:
debugfs Interface (DEPRECATED!)
===============================
.. note::
THIS IS DEPRECATED!
DAMON debugfs interface is deprecated, so users should move to the
:ref:`sysfs interface <sysfs_interface>`. If you depend on this and cannot
move, please report your usecase to damon@lists.linux.dev and
linux-mm@kvack.org.
DAMON exports nine files, ``DEPRECATED``, ``attrs``, ``target_ids``,
``init_regions``, ``schemes``, ``monitor_on_DEPRECATED``, ``kdamond_pid``,
``mk_contexts`` and ``rm_contexts`` under its debugfs directory,
``<debugfs>/damon/``.
``DEPRECATED`` is a read-only file for the DAMON debugfs interface deprecation
notice. Reading it returns the deprecation notice, as below::
# cat DEPRECATED
DAMON debugfs interface is deprecated, so users should move to DAMON_SYSFS. If you cannot, please report your usecase to damon@lists.linux.dev and linux-mm@kvack.org.
Attributes
----------
Users can get and set the ``sampling interval``, ``aggregation interval``,
``update interval``, and min/max number of monitoring target regions by
reading from and writing to the ``attrs`` file. To know about the monitoring
attributes in detail, please refer to the :doc:`/mm/damon/design`. For
example, below commands set those values to 5 ms, 100 ms, 1,000 ms, 10 and
1000, and then check it again::
# cd <debugfs>/damon
# echo 5000 100000 1000000 10 1000 > attrs
# cat attrs
5000 100000 1000000 10 1000
Target IDs
----------
Some types of address spaces supports multiple monitoring target. For example,
the virtual memory address spaces monitoring can have multiple processes as the
monitoring targets. Users can set the targets by writing relevant id values of
the targets to, and get the ids of the current targets by reading from the
``target_ids`` file. In case of the virtual address spaces monitoring, the
values should be pids of the monitoring target processes. For example, below
commands set processes having pids 42 and 4242 as the monitoring targets and
check it again::
# cd <debugfs>/damon
# echo 42 4242 > target_ids
# cat target_ids
42 4242
Users can also monitor the physical memory address space of the system by
writing a special keyword, "``paddr\n``" to the file. Because physical address
space monitoring doesn't support multiple targets, reading the file will show a
fake value, ``42``, as below::
# cd <debugfs>/damon
# echo paddr > target_ids
# cat target_ids
42
Note that setting the target ids doesn't start the monitoring.
Initial Monitoring Target Regions
---------------------------------
In case of the virtual address space monitoring, DAMON automatically sets and
updates the monitoring target regions so that entire memory mappings of target
processes can be covered. However, users can want to limit the monitoring
region to specific address ranges, such as the heap, the stack, or specific
file-mapped area. Or, some users can know the initial access pattern of their
workloads and therefore want to set optimal initial regions for the 'adaptive
regions adjustment'.
In contrast, DAMON do not automatically sets and updates the monitoring target
regions in case of physical memory monitoring. Therefore, users should set the
monitoring target regions by themselves.
In such cases, users can explicitly set the initial monitoring target regions
as they want, by writing proper values to the ``init_regions`` file. The input
should be a sequence of three integers separated by white spaces that represent
one region in below form.::
<target idx> <start address> <end address>
The ``target idx`` should be the index of the target in ``target_ids`` file,
starting from ``0``, and the regions should be passed in address order. For
example, below commands will set a couple of address ranges, ``1-100`` and
``100-200`` as the initial monitoring target region of pid 42, which is the
first one (index ``0``) in ``target_ids``, and another couple of address
ranges, ``20-40`` and ``50-100`` as that of pid 4242, which is the second one
(index ``1``) in ``target_ids``.::
# cd <debugfs>/damon
# cat target_ids
42 4242
# echo "0 1 100 \
0 100 200 \
1 20 40 \
1 50 100" > init_regions
Note that this sets the initial monitoring target regions only. In case of
virtual memory monitoring, DAMON will automatically updates the boundary of the
regions after one ``update interval``. Therefore, users should set the
``update interval`` large enough in this case, if they don't want the
update.
Schemes
-------
Users can get and set the DAMON-based operation :ref:`schemes
<damon_design_damos>` by reading from and writing to ``schemes`` debugfs file.
Reading the file also shows the statistics of each scheme. To the file, each
of the schemes should be represented in each line in below form::
<target access pattern> <action> <quota> <watermarks>
You can disable schemes by simply writing an empty string to the file.
Target Access Pattern
~~~~~~~~~~~~~~~~~~~~~
The target access :ref:`pattern <damon_design_damos_access_pattern>` of the
scheme. The ``<target access pattern>`` is constructed with three ranges in
below form::
min-size max-size min-acc max-acc min-age max-age
Specifically, bytes for the size of regions (``min-size`` and ``max-size``),
number of monitored accesses per aggregate interval for access frequency
(``min-acc`` and ``max-acc``), number of aggregate intervals for the age of
regions (``min-age`` and ``max-age``) are specified. Note that the ranges are
closed interval.
Action
~~~~~~
The ``<action>`` is a predefined integer for memory management :ref:`actions
<damon_design_damos_action>`. The mapping between the ``<action>`` values and
the memory management actions is as below. For the detailed meaning of the
action and DAMON operations set supporting each action, please refer to the
list on :ref:`design doc <damon_design_damos_action>`.
- 0: ``willneed``
- 1: ``cold``
- 2: ``pageout``
- 3: ``hugepage``
- 4: ``nohugepage``
- 5: ``stat``
Quota
~~~~~
Users can set the :ref:`quotas <damon_design_damos_quotas>` of the given scheme
via the ``<quota>`` in below form::
<ms> <sz> <reset interval> <priority weights>
This makes DAMON to try to use only up to ``<ms>`` milliseconds for applying
the action to memory regions of the ``target access pattern`` within the
``<reset interval>`` milliseconds, and to apply the action to only up to
``<sz>`` bytes of memory regions within the ``<reset interval>``. Setting both
``<ms>`` and ``<sz>`` zero disables the quota limits.
For the :ref:`prioritization <damon_design_damos_quotas_prioritization>`, users
can set the weights for the three properties in ``<priority weights>`` in below
form::
<size weight> <access frequency weight> <age weight>
Watermarks
~~~~~~~~~~
Users can specify :ref:`watermarks <damon_design_damos_watermarks>` of the
given scheme via ``<watermarks>`` in below form::
<metric> <check interval> <high mark> <middle mark> <low mark>
``<metric>`` is a predefined integer for the metric to be checked. The
supported numbers and their meanings are as below.
- 0: Ignore the watermarks
- 1: System's free memory rate (per thousand)
The value of the metric is checked every ``<check interval>`` microseconds.
If the value is higher than ``<high mark>`` or lower than ``<low mark>``, the
scheme is deactivated. If the value is lower than ``<mid mark>``, the scheme
is activated.
.. _damos_stats:
Statistics
~~~~~~~~~~
It also counts the total number and bytes of regions that each scheme is tried
to be applied, the two numbers for the regions that each scheme is successfully
applied, and the total number of the quota limit exceeds. This statistics can
be used for online analysis or tuning of the schemes.
The statistics can be shown by reading the ``schemes`` file. Reading the file
will show each scheme you entered in each line, and the five numbers for the
statistics will be added at the end of each line.
Example
~~~~~~~
Below commands applies a scheme saying "If a memory region of size in [4KiB,
8KiB] is showing accesses per aggregate interval in [0, 5] for aggregate
interval in [10, 20], page out the region. For the paging out, use only up to
10ms per second, and also don't page out more than 1GiB per second. Under the
limitation, page out memory regions having longer age first. Also, check the
free memory rate of the system every 5 seconds, start the monitoring and paging
out when the free memory rate becomes lower than 50%, but stop it if the free
memory rate becomes larger than 60%, or lower than 30%".::
# cd <debugfs>/damon
# scheme="4096 8192 0 5 10 20 2" # target access pattern and action
# scheme+=" 10 $((1024*1024*1024)) 1000" # quotas
# scheme+=" 0 0 100" # prioritization weights
# scheme+=" 1 5000000 600 500 300" # watermarks
# echo "$scheme" > schemes
Turning On/Off
--------------
Setting the files as described above doesn't incur effect unless you explicitly
start the monitoring. You can start, stop, and check the current status of the
monitoring by writing to and reading from the ``monitor_on_DEPRECATED`` file.
Writing ``on`` to the file starts the monitoring of the targets with the
attributes. Writing ``off`` to the file stops those. DAMON also stops if
every target process is terminated. Below example commands turn on, off, and
check the status of DAMON::
# cd <debugfs>/damon
# echo on > monitor_on_DEPRECATED
# echo off > monitor_on_DEPRECATED
# cat monitor_on_DEPRECATED
off
Please note that you cannot write to the above-mentioned debugfs files while
the monitoring is turned on. If you write to the files while DAMON is running,
an error code such as ``-EBUSY`` will be returned.
Monitoring Thread PID
---------------------
DAMON does requested monitoring with a kernel thread called ``kdamond``. You
can get the pid of the thread by reading the ``kdamond_pid`` file. When the
monitoring is turned off, reading the file returns ``none``. ::
# cd <debugfs>/damon
# cat monitor_on_DEPRECATED
off
# cat kdamond_pid
none
# echo on > monitor_on_DEPRECATED
# cat kdamond_pid
18594
Using Multiple Monitoring Threads
---------------------------------
One ``kdamond`` thread is created for each monitoring context. You can create
and remove monitoring contexts for multiple ``kdamond`` required use case using
the ``mk_contexts`` and ``rm_contexts`` files.
Writing the name of the new context to the ``mk_contexts`` file creates a
directory of the name on the DAMON debugfs directory. The directory will have
DAMON debugfs files for the context. ::
# cd <debugfs>/damon
# ls foo
# ls: cannot access 'foo': No such file or directory
# echo foo > mk_contexts
# ls foo
# attrs init_regions kdamond_pid schemes target_ids
If the context is not needed anymore, you can remove it and the corresponding
directory by putting the name of the context to the ``rm_contexts`` file. ::
# echo foo > rm_contexts
# ls foo
# ls: cannot access 'foo': No such file or directory
Note that ``mk_contexts``, ``rm_contexts``, and ``monitor_on_DEPRECATED`` files
are in the root directory only.

View File

@ -280,8 +280,8 @@ The following files are currently defined:
blocks; configure auto-onlining.
The default value depends on the
CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE kernel configuration
option.
CONFIG_MHP_DEFAULT_ONLINE_TYPE kernel configuration
options.
See the ``state`` property of memory blocks for details.
``block_size_bytes`` read-only: the size in bytes of a memory block.

View File

@ -332,6 +332,12 @@ allocation policy for the internal shmem mount by using the kernel parameter
seven valid policies for shmem (``always``, ``within_size``, ``advise``,
``never``, ``deny``, and ``force``).
Similarly to ``transparent_hugepage_shmem``, you can control the default
hugepage allocation policy for the tmpfs mount by using the kernel parameter
``transparent_hugepage_tmpfs=<policy>``, where ``<policy>`` is one of the
four valid policies for tmpfs (``always``, ``within_size``, ``advise``,
``never``). The tmpfs mount default policy is ``never``.
In the same manner as ``thp_anon`` controls each supported anonymous THP
size, ``thp_shmem`` controls each supported shmem THP size. ``thp_shmem``
has the same format as ``thp_anon``, but also supports the policy
@ -352,8 +358,21 @@ default to ``never``.
Hugepages in tmpfs/shmem
========================
You can control hugepage allocation policy in tmpfs with mount option
``huge=``. It can have following values:
Traditionally, tmpfs only supported a single huge page size ("PMD"). Today,
it also supports smaller sizes just like anonymous memory, often referred
to as "multi-size THP" (mTHP). Huge pages of any size are commonly
represented in the kernel as "large folios".
While there is fine control over the huge page sizes to use for the internal
shmem mount (see below), ordinary tmpfs mounts will make use of all available
huge page sizes without any control over the exact sizes, behaving more like
other file systems.
tmpfs mounts
------------
The THP allocation policy for tmpfs mounts can be adjusted using the mount
option: ``huge=``. It can have following values:
always
Attempt to allocate huge pages every time we need a new page;
@ -363,24 +382,24 @@ never
within_size
Only allocate huge page if it will be fully within i_size.
Also respect fadvise()/madvise() hints;
Also respect madvise() hints;
advise
Only allocate huge pages if requested with fadvise()/madvise();
Only allocate huge pages if requested with madvise();
The default policy is ``never``.
Remember, that the kernel may use huge pages of all available sizes, and
that no fine control as for the internal tmpfs mount is available.
The default policy in the past was ``never``, but it can now be adjusted
using the kernel parameter ``transparent_hugepage_tmpfs=<policy>``.
``mount -o remount,huge= /mountpoint`` works fine after mount: remounting
``huge=never`` will not attempt to break up huge pages at all, just stop more
from being allocated.
There's also sysfs knob to control hugepage allocation policy for internal
shmem mount: /sys/kernel/mm/transparent_hugepage/shmem_enabled. The mount
is used for SysV SHM, memfds, shared anonymous mmaps (of /dev/zero or
MAP_ANONYMOUS), GPU drivers' DRM objects, Ashmem.
In addition to policies listed above, shmem_enabled allows two further
values:
In addition to policies listed above, the sysfs knob
/sys/kernel/mm/transparent_hugepage/shmem_enabled will affect the
allocation policy of tmpfs mounts, when set to the following values:
deny
For use in emergencies, to force the huge option off from
@ -388,13 +407,24 @@ deny
force
Force the huge option on for all - very useful for testing;
Shmem can also use "multi-size THP" (mTHP) by adding a new sysfs knob to
control mTHP allocation:
'/sys/kernel/mm/transparent_hugepage/hugepages-<size>kB/shmem_enabled',
and its value for each mTHP is essentially consistent with the global
setting. An 'inherit' option is added to ensure compatibility with these
global settings. Conversely, the options 'force' and 'deny' are dropped,
which are rather testing artifacts from the old ages.
shmem / internal tmpfs
----------------------
The mount internal tmpfs mount is used for SysV SHM, memfds, shared anonymous
mmaps (of /dev/zero or MAP_ANONYMOUS), GPU drivers' DRM objects, Ashmem.
To control the THP allocation policy for this internal tmpfs mount, the
sysfs knob /sys/kernel/mm/transparent_hugepage/shmem_enabled and the knobs
per THP size in
'/sys/kernel/mm/transparent_hugepage/hugepages-<size>kB/shmem_enabled'
can be used.
The global knob has the same semantics as the ``huge=`` mount options
for tmpfs mounts, except that the different huge page sizes can be controlled
individually, and will only use the setting of the global knob when the
per-size knob is set to 'inherit'.
The options 'force' and 'deny' are dropped for the individual sizes, which
are rather testing artifacts from the old ages.
always
Attempt to allocate <size> huge pages every time we need a new page;
@ -408,10 +438,10 @@ never
within_size
Only allocate <size> huge page if it will be fully within i_size.
Also respect fadvise()/madvise() hints;
Also respect madvise() hints;
advise
Only allocate <size> huge pages if requested with fadvise()/madvise();
Only allocate <size> huge pages if requested with madvise();
Need of application restart
===========================
@ -436,7 +466,7 @@ AnonHugePmdMapped).
The number of file transparent huge pages mapped to userspace is available
by reading ShmemPmdMapped and ShmemHugePages fields in ``/proc/meminfo``.
To identify what applications are mapping file transparent huge pages, it
is necessary to read ``/proc/PID/smaps`` and count the FileHugeMapped fields
is necessary to read ``/proc/PID/smaps`` and count the FilePmdMapped fields
for each mapping.
Note that reading the smaps file is expensive and reading it
@ -561,6 +591,16 @@ swpin
is incremented every time a huge page is swapped in from a non-zswap
swap device in one piece.
swpin_fallback
is incremented if swapin fails to allocate or charge a huge page
and instead falls back to using huge pages with lower orders or
small pages.
swpin_fallback_charge
is incremented if swapin fails to charge a huge page and instead
falls back to using huge pages with lower orders or small pages
even though the allocation was successful.
swpout
is incremented every time a huge page is swapped out to a non-zswap
swap device in one piece without splitting.

View File

@ -0,0 +1,72 @@
.. SPDX-License-Identifier: GPL-2.0
====================
Linux NVMe multipath
====================
This document describes NVMe multipath and its path selection policies supported
by the Linux NVMe host driver.
Introduction
============
The NVMe multipath feature in Linux integrates namespaces with the same
identifier into a single block device. Using multipath enhances the reliability
and stability of I/O access while improving bandwidth performance. When a user
sends I/O to this merged block device, the multipath mechanism selects one of
the underlying block devices (paths) according to the configured policy.
Different policies result in different path selections.
Policies
========
All policies follow the ANA (Asymmetric Namespace Access) mechanism, meaning
that when an optimized path is available, it will be chosen over a non-optimized
one. Current the NVMe multipath policies include numa(default), round-robin and
queue-depth.
To set the desired policy (e.g., round-robin), use one of the following methods:
1. echo -n "round-robin" > /sys/module/nvme_core/parameters/iopolicy
2. or add the "nvme_core.iopolicy=round-robin" to cmdline.
NUMA
----
The NUMA policy selects the path closest to the NUMA node of the current CPU for
I/O distribution. This policy maintains the nearest paths to each NUMA node
based on network interface connections.
When to use the NUMA policy:
1. Multi-core Systems: Optimizes memory access in multi-core and
multi-processor systems, especially under NUMA architecture.
2. High Affinity Workloads: Binds I/O processing to the CPU to reduce
communication and data transfer delays across nodes.
Round-Robin
-----------
The round-robin policy distributes I/O requests evenly across all paths to
enhance throughput and resource utilization. Each I/O operation is sent to the
next path in sequence.
When to use the round-robin policy:
1. Balanced Workloads: Effective for balanced and predictable workloads with
similar I/O size and type.
2. Homogeneous Path Performance: Utilizes all paths efficiently when
performance characteristics (e.g., latency, bandwidth) are similar.
Queue-Depth
-----------
The queue-depth policy manages I/O requests based on the current queue depth
of each path, selecting the path with the least number of in-flight I/Os.
When to use the queue-depth policy:
1. High load with small I/Os: Effectively balances load across paths when
the load is high, and I/O operations consist of small, relatively
fixed-sized requests.

View File

@ -60,7 +60,7 @@ description of available events and configuration options in sysfs, see
The "format" directory describes format of the config fields of the
perf_event_attr structure. The "events" directory provides configuration
templates for all documented events. For example,
"Rx_PCIe_TLP_Data_Payload" is an equivalent of "eventid=0x22,type=0x1".
"rx_pcie_tlp_data_payload" is an equivalent of "eventid=0x21,type=0x0".
The "perf list" command shall list the available events from sysfs, e.g.::
@ -79,8 +79,8 @@ Example usage of counting PCIe RX TLP data payload (Units of bytes)::
The average RX/TX bandwidth can be calculated using the following formula:
PCIe RX Bandwidth = Rx_PCIe_TLP_Data_Payload / Measure_Time_Window
PCIe TX Bandwidth = Tx_PCIe_TLP_Data_Payload / Measure_Time_Window
PCIe RX Bandwidth = rx_pcie_tlp_data_payload / Measure_Time_Window
PCIe TX Bandwidth = tx_pcie_tlp_data_payload / Measure_Time_Window
Lane Event Usage
-------------------------------

View File

@ -35,7 +35,10 @@ e.g. hisi_sccl1_hha0/rx_operations is RX_OPERATIONS event of HHA index #0 in
SCCL ID #1.
The driver also provides a "cpumask" sysfs attribute, which shows the CPU core
ID used to count the uncore PMU event.
ID used to count the uncore PMU event. An "associated_cpus" sysfs attribute is
also provided to show the CPUs associated with this PMU. The "cpumask" indicates
the CPUs to open the events, usually as a hint for userspaces tools like perf.
It only contains one associated CPU from the "associated_cpus".
Example usage of perf::

View File

@ -14,6 +14,8 @@ Performance monitor support
qcom_l2_pmu
qcom_l3_pmu
starfive_starlink_pmu
mrvl-odyssey-ddr-pmu
mrvl-odyssey-tad-pmu
arm-ccn
arm-cmn
arm-ni

View File

@ -0,0 +1,80 @@
===================================================================
Marvell Odyssey DDR PMU Performance Monitoring Unit (PMU UNCORE)
===================================================================
Odyssey DRAM Subsystem supports eight counters for monitoring performance
and software can program those counters to monitor any of the defined
performance events. Supported performance events include those counted
at the interface between the DDR controller and the PHY, interface between
the DDR Controller and the CHI interconnect, or within the DDR Controller.
Additionally DSS also supports two fixed performance event counters, one
for ddr reads and the other for ddr writes.
The counter will be operating in either manual or auto mode.
The PMU driver exposes the available events and format options under sysfs::
/sys/bus/event_source/devices/mrvl_ddr_pmu_<>/events/
/sys/bus/event_source/devices/mrvl_ddr_pmu_<>/format/
Examples::
$ perf list | grep ddr
mrvl_ddr_pmu_<>/ddr_act_bypass_access/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_bsm_alloc/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_bsm_starvation/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_cam_active_access/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_cam_mwr/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_cam_rd_active_access/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_cam_rd_or_wr_access/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_cam_read/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_cam_wr_access/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_cam_write/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_capar_error/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_crit_ref/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_ddr_reads/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_ddr_writes/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_dfi_cmd_is_retry/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_dfi_cycles/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_dfi_parity_poison/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_dfi_rd_data_access/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_dfi_wr_data_access/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_dqsosc_mpc/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_dqsosc_mrr/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_enter_mpsm/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_enter_powerdown/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_enter_selfref/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_hif_pri_rdaccess/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_hif_rd_access/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_hif_rd_or_wr_access/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_hif_rmw_access/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_hif_wr_access/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_hpri_sched_rd_crit_access/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_load_mode/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_lpri_sched_rd_crit_access/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_precharge/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_precharge_for_other/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_precharge_for_rdwr/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_raw_hazard/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_rd_bypass_access/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_rd_crc_error/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_rd_uc_ecc_error/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_rdwr_transitions/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_refresh/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_retry_fifo_full/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_spec_ref/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_tcr_mrr/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_war_hazard/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_waw_hazard/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_win_limit_reached_rd/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_win_limit_reached_wr/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_wr_crc_error/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_wr_trxn_crit_access/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_write_combine/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_zqcl/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_zqlatch/ [Kernel PMU event]
mrvl_ddr_pmu_<>/ddr_zqstart/ [Kernel PMU event]
$ perf stat -e ddr_cam_read,ddr_cam_write,ddr_cam_active_access,ddr_cam
rd_or_wr_access,ddr_cam_rd_active_access,ddr_cam_mwr <workload>

View File

@ -0,0 +1,37 @@
====================================================================
Marvell Odyssey LLC-TAD Performance Monitoring Unit (PMU UNCORE)
====================================================================
Each TAD provides eight 64-bit counters for monitoring
cache behavior.The driver always configures the same counter for
all the TADs. The user would end up effectively reserving one of
eight counters in every TAD to look across all TADs.
The occurrences of events are aggregated and presented to the user
at the end of running the workload. The driver does not provide a
way for the user to partition TADs so that different TADs are used for
different applications.
The performance events reflect various internal or interface activities.
By combining the values from multiple performance counters, cache
performance can be measured in terms such as: cache miss rate, cache
allocations, interface retry rate, internal resource occupancy, etc.
The PMU driver exposes the available events and format options under sysfs::
/sys/bus/event_source/devices/tad/events/
/sys/bus/event_source/devices/tad/format/
Examples::
$ perf list | grep tad
tad/tad_alloc_any/ [Kernel PMU event]
tad/tad_alloc_dtg/ [Kernel PMU event]
tad/tad_alloc_ltg/ [Kernel PMU event]
tad/tad_hit_any/ [Kernel PMU event]
tad/tad_hit_dtg/ [Kernel PMU event]
tad/tad_hit_ltg/ [Kernel PMU event]
tad/tad_req_msh_in_exlmn/ [Kernel PMU event]
tad/tad_tag_rd/ [Kernel PMU event]
tad/tad_tot_cycle/ [Kernel PMU event]
$ perf stat -e tad_alloc_dtg,tad_alloc_ltg,tad_alloc_any,tad_hit_dtg,tad_hit_ltg,tad_hit_any,tad_tag_rd <workload>

View File

@ -34,7 +34,7 @@ strongly-ordered (SO) PCIE write traffic to local/remote memory. Please see
traffic coverage.
The events and configuration options of this PMU device are described in sysfs,
see /sys/bus/event_sources/devices/nvidia_scf_pmu_<socket-id>.
see /sys/bus/event_source/devices/nvidia_scf_pmu_<socket-id>.
Example usage:
@ -66,7 +66,7 @@ Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section` for more info about
the PMU traffic coverage.
The events and configuration options of this PMU device are described in sysfs,
see /sys/bus/event_sources/devices/nvidia_nvlink_c2c0_pmu_<socket-id>.
see /sys/bus/event_source/devices/nvidia_nvlink_c2c0_pmu_<socket-id>.
Example usage:
@ -86,6 +86,22 @@ Example usage:
perf stat -a -e nvidia_nvlink_c2c0_pmu_3/event=0x0/
The NVLink-C2C has two ports that can be connected to one GPU (occupying both
ports) or to two GPUs (one GPU per port). The user can use "port" bitmap
parameter to select the port(s) to monitor. Each bit represents the port number,
e.g. "port=0x1" corresponds to port 0 and "port=0x3" is for port 0 and 1. The
PMU will monitor both ports by default if not specified.
Example for port filtering:
* Count event id 0x0 from the GPU connected with socket 0 on port 0::
perf stat -a -e nvidia_nvlink_c2c0_pmu_0/event=0x0,port=0x1/
* Count event id 0x0 from the GPUs connected with socket 0 on port 0 and port 1::
perf stat -a -e nvidia_nvlink_c2c0_pmu_0/event=0x0,port=0x3/
NVLink-C2C1 PMU
-------------------
@ -96,7 +112,7 @@ Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section` for more info about
the PMU traffic coverage.
The events and configuration options of this PMU device are described in sysfs,
see /sys/bus/event_sources/devices/nvidia_nvlink_c2c1_pmu_<socket-id>.
see /sys/bus/event_source/devices/nvidia_nvlink_c2c1_pmu_<socket-id>.
Example usage:
@ -116,6 +132,22 @@ Example usage:
perf stat -a -e nvidia_nvlink_c2c1_pmu_3/event=0x0/
The NVLink-C2C has two ports that can be connected to one GPU (occupying both
ports) or to two GPUs (one GPU per port). The user can use "port" bitmap
parameter to select the port(s) to monitor. Each bit represents the port number,
e.g. "port=0x1" corresponds to port 0 and "port=0x3" is for port 0 and 1. The
PMU will monitor both ports by default if not specified.
Example for port filtering:
* Count event id 0x0 from the GPU connected with socket 0 on port 0::
perf stat -a -e nvidia_nvlink_c2c1_pmu_0/event=0x0,port=0x1/
* Count event id 0x0 from the GPUs connected with socket 0 on port 0 and port 1::
perf stat -a -e nvidia_nvlink_c2c1_pmu_0/event=0x0,port=0x3/
CNVLink PMU
---------------
@ -125,13 +157,14 @@ to local memory. For PCIE traffic, this PMU captures read and relaxed ordered
for more info about the PMU traffic coverage.
The events and configuration options of this PMU device are described in sysfs,
see /sys/bus/event_sources/devices/nvidia_cnvlink_pmu_<socket-id>.
see /sys/bus/event_source/devices/nvidia_cnvlink_pmu_<socket-id>.
Each SoC socket can be connected to one or more sockets via CNVLink. The user can
use "rem_socket" bitmap parameter to select the remote socket(s) to monitor.
Each bit represents the socket number, e.g. "rem_socket=0xE" corresponds to
socket 1 to 3.
/sys/bus/event_sources/devices/nvidia_cnvlink_pmu_<socket-id>/format/rem_socket
socket 1 to 3. The PMU will monitor all remote sockets by default if not
specified.
/sys/bus/event_source/devices/nvidia_cnvlink_pmu_<socket-id>/format/rem_socket
shows the valid bits that can be set in the "rem_socket" parameter.
The PMU can not distinguish the remote traffic initiator, therefore it does not
@ -165,12 +198,13 @@ local/remote memory. Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section
for more info about the PMU traffic coverage.
The events and configuration options of this PMU device are described in sysfs,
see /sys/bus/event_sources/devices/nvidia_pcie_pmu_<socket-id>.
see /sys/bus/event_source/devices/nvidia_pcie_pmu_<socket-id>.
Each SoC socket can support multiple root ports. The user can use
"root_port" bitmap parameter to select the port(s) to monitor, i.e.
"root_port=0xF" corresponds to root port 0 to 3.
/sys/bus/event_sources/devices/nvidia_pcie_pmu_<socket-id>/format/root_port
"root_port=0xF" corresponds to root port 0 to 3. The PMU will monitor all root
ports by default if not specified.
/sys/bus/event_source/devices/nvidia_pcie_pmu_<socket-id>/format/root_port
shows the valid bits that can be set in the "root_port" parameter.
Example usage:

View File

@ -251,9 +251,7 @@ performance supported in `AMD CPPC Performance Capability <perf_cap_>`_).
In some ASICs, the highest CPPC performance is not the one in the ``_CPC``
table, so we need to expose it to sysfs. If boost is not active, but
still supported, this maximum frequency will be larger than the one in
``cpuinfo``. On systems that support preferred core, the driver will have
different values for some cores than others and this will reflect the values
advertised by the platform at bootup.
``cpuinfo``.
This attribute is read-only.
``amd_pstate_lowest_nonlinear_freq``

View File

@ -269,27 +269,7 @@ Namely, when invoked to select an idle state for a CPU (i.e. an idle state that
the CPU will ask the processor hardware to enter), it attempts to predict the
idle duration and uses the predicted value for idle state selection.
It first obtains the time until the closest timer event with the assumption
that the scheduler tick will be stopped. That time, referred to as the *sleep
length* in what follows, is the upper bound on the time before the next CPU
wakeup. It is used to determine the sleep length range, which in turn is needed
to get the sleep length correction factor.
The ``menu`` governor maintains two arrays of sleep length correction factors.
One of them is used when tasks previously running on the given CPU are waiting
for some I/O operations to complete and the other one is used when that is not
the case. Each array contains several correction factor values that correspond
to different sleep length ranges organized so that each range represented in the
array is approximately 10 times wider than the previous one.
The correction factor for the given sleep length range (determined before
selecting the idle state for the CPU) is updated after the CPU has been woken
up and the closer the sleep length is to the observed idle duration, the closer
to 1 the correction factor becomes (it must fall between 0 and 1 inclusive).
The sleep length is multiplied by the correction factor for the range that it
falls into to obtain the first approximation of the predicted idle duration.
Next, the governor uses a simple pattern recognition algorithm to refine its
It first uses a simple pattern recognition algorithm to obtain a preliminary
idle duration prediction. Namely, it saves the last 8 observed idle duration
values and, when predicting the idle duration next time, it computes the average
and variance of them. If the variance is small (smaller than 400 square
@ -301,29 +281,39 @@ Again, if the variance of them is small (in the above sense), the average is
taken as the "typical interval" value and so on, until either the "typical
interval" is determined or too many data points are disregarded, in which case
the "typical interval" is assumed to equal "infinity" (the maximum unsigned
integer value). The "typical interval" computed this way is compared with the
sleep length multiplied by the correction factor and the minimum of the two is
taken as the predicted idle duration.
integer value).
Then, the governor computes an extra latency limit to help "interactive"
workloads. It uses the observation that if the exit latency of the selected
idle state is comparable with the predicted idle duration, the total time spent
in that state probably will be very short and the amount of energy to save by
entering it will be relatively small, so likely it is better to avoid the
overhead related to entering that state and exiting it. Thus selecting a
shallower state is likely to be a better option then. The first approximation
of the extra latency limit is the predicted idle duration itself which
additionally is divided by a value depending on the number of tasks that
previously ran on the given CPU and now they are waiting for I/O operations to
complete. The result of that division is compared with the latency limit coming
from the power management quality of service, or `PM QoS <cpu-pm-qos_>`_,
framework and the minimum of the two is taken as the limit for the idle states'
exit latency.
If the "typical interval" computed this way is long enough, the governor obtains
the time until the closest timer event with the assumption that the scheduler
tick will be stopped. That time, referred to as the *sleep length* in what follows,
is the upper bound on the time before the next CPU wakeup. It is used to determine
the sleep length range, which in turn is needed to get the sleep length correction
factor.
The ``menu`` governor maintains an array containing several correction factor
values that correspond to different sleep length ranges organized so that each
range represented in the array is approximately 10 times wider than the previous
one.
The correction factor for the given sleep length range (determined before
selecting the idle state for the CPU) is updated after the CPU has been woken
up and the closer the sleep length is to the observed idle duration, the closer
to 1 the correction factor becomes (it must fall between 0 and 1 inclusive).
The sleep length is multiplied by the correction factor for the range that it
falls into to obtain an approximation of the predicted idle duration that is
compared to the "typical interval" determined previously and the minimum of
the two is taken as the idle duration prediction.
If the "typical interval" value is small, which means that the CPU is likely
to be woken up soon enough, the sleep length computation is skipped as it may
be costly and the idle duration is simply predicted to equal the "typical
interval" value.
Now, the governor is ready to walk the list of idle states and choose one of
them. For this purpose, it compares the target residency of each state with
the predicted idle duration and the exit latency of it with the computed latency
limit. It selects the state with the target residency closest to the predicted
the predicted idle duration and the exit latency of it with the with the latency
limit coming from the power management quality of service, or `PM QoS <cpu-pm-qos_>`_,
framework. It selects the state with the target residency closest to the predicted
idle duration, but still below it, and exit latency that does not exceed the
limit.

View File

@ -733,7 +733,7 @@ can easily happen that your self-built kernel will lack modules for tasks you
did not perform before utilizing this make target. That's because those tasks
require kernel modules that are normally autoloaded when you perform that task
for the first time; if you didn't perform that task at least once before using
localmodonfig, the latter will thus assume these modules are superfluous and
localmodconfig, the latter will thus assume these modules are superfluous and
disable them.
You can try to avoid this by performing typical tasks that often will autoload

View File

@ -41,7 +41,7 @@ pre-allocation or re-sizing of any kernel data structures.
dentry-negative
----------------------------
Policy for negative dentries. Set to 1 to to always delete the dentry when a
Policy for negative dentries. Set to 1 to always delete the dentry when a
file is removed, and 0 to disable it. By default, this behavior is disabled.
dentry-state

View File

@ -1544,6 +1544,13 @@ constant ``FUTEX_TID_MASK`` (0x3fffffff).
If a value outside of this range is written to ``threads-max`` an
``EINVAL`` error occurs.
timer_migration
===============
When set to a non-zero value, attempt to migrate timers away from idle cpus to
allow them to remain in low power states longer.
Default is set (1).
traceoff_on_warning
===================

View File

@ -49,26 +49,26 @@ How do I use the magic SysRq key?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
On x86
You press the key combo :kbd:`ALT-SysRq-<command key>`.
You press the key combo `ALT-SysRq-<command key>`.
.. note::
Some
keyboards may not have a key labeled 'SysRq'. The 'SysRq' key is
also known as the 'Print Screen' key. Also some keyboards cannot
handle so many keys being pressed at the same time, so you might
have better luck with press :kbd:`Alt`, press :kbd:`SysRq`,
release :kbd:`SysRq`, press :kbd:`<command key>`, release everything.
have better luck with press `Alt`, press `SysRq`,
release `SysRq`, press `<command key>`, release everything.
On SPARC
You press :kbd:`ALT-STOP-<command key>`, I believe.
You press `ALT-STOP-<command key>`, I believe.
On the serial console (PC style standard serial ports only)
You send a ``BREAK``, then within 5 seconds a command key. Sending
``BREAK`` twice is interpreted as a normal BREAK.
On PowerPC
Press :kbd:`ALT - Print Screen` (or :kbd:`F13`) - :kbd:`<command key>`.
:kbd:`Print Screen` (or :kbd:`F13`) - :kbd:`<command key>` may suffice.
Press `ALT - Print Screen` (or `F13`) - `<command key>`.
`Print Screen` (or `F13`) - `<command key>` may suffice.
On other
If you know of the key combos for other architectures, please
@ -88,7 +88,7 @@ On all
echo _reisub > /proc/sysrq-trigger
The :kbd:`<command key>` is case sensitive.
The `<command key>` is case sensitive.
What are the 'command' keys?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -225,9 +225,9 @@ Sometimes SysRq seems to get 'stuck' after using it, what can I do?
When this happens, try tapping shift, alt and control on both sides of the
keyboard, and hitting an invalid sysrq sequence again. (i.e., something like
:kbd:`alt-sysrq-z`).
`alt-sysrq-z`).
Switching to another virtual console (:kbd:`ALT+Fn`) and then back again
Switching to another virtual console (`ALT+Fn`) and then back again
should also help.
I hit SysRq, but nothing seems to happen, what's wrong?
@ -290,7 +290,7 @@ exception the header line from the sysrq command is passed to all console
consumers as if the current loglevel was maximum. If only the header
is emitted it is almost certain that the kernel loglevel is too low.
Should you require the output on the console channel then you will need
to temporarily up the console loglevel using :kbd:`alt-sysrq-8` or::
to temporarily up the console loglevel using `alt-sysrq-8` or::
echo 8 > /proc/sysrq-trigger

View File

@ -1431,7 +1431,7 @@ can easily happen that your self-built kernels will lack modules for tasks you
did not perform at least once before utilizing this make target. That happens
when a task requires kernel modules which are only autoloaded when you execute
it for the first time. So when you never performed that task since starting your
kernel the modules will not have been loaded -- and from localmodonfig's point
kernel the modules will not have been loaded -- and from localmodconfig's point
of view look superfluous, which thus disables them to reduce the amount of code
to be compiled.

View File

@ -83,7 +83,7 @@ scripts/ver_linux is a good way to check if your system already has
the necessary tools::
sudo apt-get build-essentials flex bison yacc
sudo apt install libelf-dev systemtap-sdt-dev libaudit-dev libslang2-dev libperl-dev libdw-dev
sudo apt install libelf-dev systemtap-sdt-dev libslang2-dev libperl-dev libdw-dev
cscope is a good tool to browse kernel sources. Let's install it now::

View File

@ -153,3 +153,11 @@ asymmetric system, a broken guest at EL1 could still attempt to execute
mode will return to host userspace with an ``exit_reason`` of
``KVM_EXIT_FAIL_ENTRY`` and will remain non-runnable until successfully
re-initialised by a subsequent ``KVM_ARM_VCPU_INIT`` operation.
NOHZ FULL
---------
To avoid perturbing an adaptive-ticks CPU (specified using
``nohz_full=``) when a 32-bit task is forcefully migrated, these CPUs
are treated as 64-bit-only when support for asymmetric 32-bit systems
is enabled.

View File

@ -449,6 +449,18 @@ Before jumping into the kernel, the following conditions must be met:
- HFGWTR_EL2.nGCS_EL0 (bit 52) must be initialised to 0b1.
- For CPUs with debug architecture i.e FEAT_Debugv8pN (all versions):
- If EL3 is present:
- MDCR_EL3.TDA (bit 9) must be initialized to 0b0
- For CPUs with FEAT_PMUv3:
- If EL3 is present:
- MDCR_EL3.TPM (bit 6) must be initialized to 0b0
The requirements described above for CPU mode, caches, MMUs, architected
timers, coherency and system registers apply to all CPUs. All CPUs must
enter the kernel in the same exception level. Where the values documented

View File

@ -174,26 +174,82 @@ HWCAP_GCS
Functionality implied by ID_AA64PFR1_EL1.GCS == 0b1, as
described by Documentation/arch/arm64/gcs.rst.
HWCAP_CMPBR
Functionality implied by ID_AA64ISAR2_EL1.CSSC == 0b0010.
HWCAP_FPRCVT
Functionality implied by ID_AA64ISAR3_EL1.FPRCVT == 0b0001.
HWCAP_F8MM8
Functionality implied by ID_AA64FPFR0_EL1.F8MM8 == 0b0001.
HWCAP_F8MM4
Functionality implied by ID_AA64FPFR0_EL1.F8MM4 == 0b0001.
HWCAP_SVE_F16MM
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
ID_AA64ZFR0_EL1.F16MM == 0b0001.
HWCAP_SVE_ELTPERM
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
ID_AA64ZFR0_EL1.ELTPERM == 0b0001.
HWCAP_SVE_AES2
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
ID_AA64ZFR0_EL1.AES == 0b0011.
HWCAP_SVE_BFSCALE
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
ID_AA64ZFR0_EL1.B16B16 == 0b0010.
HWCAP_SVE2P2
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
ID_AA64ZFR0_EL1.SVEver == 0b0011.
HWCAP_SME2P2
Functionality implied by ID_AA64SMFR0_EL1.SMEver == 0b0011.
HWCAP_SME_SBITPERM
Functionality implied by ID_AA64SMFR0_EL1.SBitPerm == 0b1.
HWCAP_SME_AES
Functionality implied by ID_AA64SMFR0_EL1.AES == 0b1.
HWCAP_SME_SFEXPA
Functionality implied by ID_AA64SMFR0_EL1.SFEXPA == 0b1.
HWCAP_SME_STMOP
Functionality implied by ID_AA64SMFR0_EL1.STMOP == 0b1.
HWCAP_SME_SMOP4
Functionality implied by ID_AA64SMFR0_EL1.SMOP4 == 0b1.
HWCAP2_DCPODP
Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0010.
HWCAP2_SVE2
Functionality implied by ID_AA64ZFR0_EL1.SVEver == 0b0001.
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
ID_AA64ZFR0_EL1.SVEver == 0b0001.
HWCAP2_SVEAES
Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0001.
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
ID_AA64ZFR0_EL1.AES == 0b0001.
HWCAP2_SVEPMULL
Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0010.
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
ID_AA64ZFR0_EL1.AES == 0b0010.
HWCAP2_SVEBITPERM
Functionality implied by ID_AA64ZFR0_EL1.BitPerm == 0b0001.
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
ID_AA64ZFR0_EL1.BitPerm == 0b0001.
HWCAP2_SVESHA3
Functionality implied by ID_AA64ZFR0_EL1.SHA3 == 0b0001.
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
ID_AA64ZFR0_EL1.SHA3 == 0b0001.
HWCAP2_SVESM4
Functionality implied by ID_AA64ZFR0_EL1.SM4 == 0b0001.
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
ID_AA64ZFR0_EL1.SM4 == 0b0001.
HWCAP2_FLAGM2
Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0010.
@ -202,16 +258,20 @@ HWCAP2_FRINT
Functionality implied by ID_AA64ISAR1_EL1.FRINTTS == 0b0001.
HWCAP2_SVEI8MM
Functionality implied by ID_AA64ZFR0_EL1.I8MM == 0b0001.
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
ID_AA64ZFR0_EL1.I8MM == 0b0001.
HWCAP2_SVEF32MM
Functionality implied by ID_AA64ZFR0_EL1.F32MM == 0b0001.
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
ID_AA64ZFR0_EL1.F32MM == 0b0001.
HWCAP2_SVEF64MM
Functionality implied by ID_AA64ZFR0_EL1.F64MM == 0b0001.
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
ID_AA64ZFR0_EL1.F64MM == 0b0001.
HWCAP2_SVEBF16
Functionality implied by ID_AA64ZFR0_EL1.BF16 == 0b0001.
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
ID_AA64ZFR0_EL1.BF16 == 0b0001.
HWCAP2_I8MM
Functionality implied by ID_AA64ISAR1_EL1.I8MM == 0b0001.
@ -277,7 +337,8 @@ HWCAP2_EBF16
Functionality implied by ID_AA64ISAR1_EL1.BF16 == 0b0010.
HWCAP2_SVE_EBF16
Functionality implied by ID_AA64ZFR0_EL1.BF16 == 0b0010.
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
ID_AA64ZFR0_EL1.BF16 == 0b0010.
HWCAP2_CSSC
Functionality implied by ID_AA64ISAR2_EL1.CSSC == 0b0001.
@ -286,7 +347,8 @@ HWCAP2_RPRFM
Functionality implied by ID_AA64ISAR2_EL1.RPRFM == 0b0001.
HWCAP2_SVE2P1
Functionality implied by ID_AA64ZFR0_EL1.SVEver == 0b0010.
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
ID_AA64ZFR0_EL1.SVEver == 0b0010.
HWCAP2_SME2
Functionality implied by ID_AA64SMFR0_EL1.SMEver == 0b0001.
@ -313,7 +375,8 @@ HWCAP2_HBC
Functionality implied by ID_AA64ISAR2_EL1.BC == 0b0001.
HWCAP2_SVE_B16B16
Functionality implied by ID_AA64ZFR0_EL1.B16B16 == 0b0001.
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
ID_AA64ZFR0_EL1.B16B16 == 0b0001.
HWCAP2_LRCPC3
Functionality implied by ID_AA64ISAR1_EL1.LRCPC == 0b0011.

View File

@ -37,7 +37,7 @@ intended to be exhaustive.
shadow stacks rather than GCS.
* Support for GCS is reported to userspace via HWCAP_GCS in the aux vector
AT_HWCAP2 entry.
AT_HWCAP entry.
* GCS is enabled per thread. While there is support for disabling GCS
at runtime this should be done with great care.

View File

@ -23,71 +23,6 @@ swapper_pg_dir contains only kernel (global) mappings while the user pgd
contains only user (non-global) mappings. The swapper_pg_dir address is
written to TTBR1 and never written to TTBR0.
AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit)::
Start End Size Use
-----------------------------------------------------------------------
0000000000000000 0000ffffffffffff 256TB user
ffff000000000000 ffff7fffffffffff 128TB kernel logical memory map
[ffff600000000000 ffff7fffffffffff] 32TB [kasan shadow region]
ffff800000000000 ffff80007fffffff 2GB modules
ffff800080000000 fffffbffefffffff 124TB vmalloc
fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down)
fffffbfffe000000 fffffbfffe7fffff 8MB [guard region]
fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space
fffffbffff800000 fffffbffffffffff 8MB [guard region]
fffffc0000000000 fffffdffffffffff 2TB vmemmap
fffffe0000000000 ffffffffffffffff 2TB [guard region]
AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support)::
Start End Size Use
-----------------------------------------------------------------------
0000000000000000 000fffffffffffff 4PB user
fff0000000000000 ffff7fffffffffff ~4PB kernel logical memory map
[fffd800000000000 ffff7fffffffffff] 512TB [kasan shadow region]
ffff800000000000 ffff80007fffffff 2GB modules
ffff800080000000 fffffbffefffffff 124TB vmalloc
fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down)
fffffbfffe000000 fffffbfffe7fffff 8MB [guard region]
fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space
fffffbffff800000 fffffbffffffffff 8MB [guard region]
fffffc0000000000 ffffffdfffffffff ~4TB vmemmap
ffffffe000000000 ffffffffffffffff 128GB [guard region]
Translation table lookup with 4KB pages::
+--------+--------+--------+--------+--------+--------+--------+--------+
|63 56|55 48|47 40|39 32|31 24|23 16|15 8|7 0|
+--------+--------+--------+--------+--------+--------+--------+--------+
| | | | | |
| | | | | v
| | | | | [11:0] in-page offset
| | | | +-> [20:12] L3 index
| | | +-----------> [29:21] L2 index
| | +---------------------> [38:30] L1 index
| +-------------------------------> [47:39] L0 index
+----------------------------------------> [55] TTBR0/1
Translation table lookup with 64KB pages::
+--------+--------+--------+--------+--------+--------+--------+--------+
|63 56|55 48|47 40|39 32|31 24|23 16|15 8|7 0|
+--------+--------+--------+--------+--------+--------+--------+--------+
| | | | |
| | | | v
| | | | [15:0] in-page offset
| | | +----------> [28:16] L3 index
| | +--------------------------> [41:29] L2 index
| +-------------------------------> [47:42] L1 index (48-bit)
| [51:42] L1 index (52-bit)
+----------------------------------------> [55] TTBR0/1
When using KVM without the Virtualization Host Extensions, the
hypervisor maps kernel pages in EL2 at a fixed (and potentially
random) offset from the linear mapping. See the kern_hyp_va macro and

View File

@ -198,7 +198,8 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-V3 | #3312417 | ARM64_ERRATUM_3194386 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | MMU-500 | #841119,826419 | N/A |
| ARM | MMU-500 | #841119,826419 | ARM_SMMU_MMU_500_CPRE_ERRATA|
| | | #562869,1047329 | |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | MMU-600 | #1076982,1209401| N/A |
+----------------+-----------------+-----------------+-----------------------------+

View File

@ -293,3 +293,13 @@ The following keys are defined:
* :c:macro:`RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED`: Misaligned vector accesses are
not supported at all and will generate a misaligned address fault.
* :c:macro:`RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0`: A bitmask containing the
thead vendor extensions that are compatible with the
:c:macro:`RISCV_HWPROBE_BASE_BEHAVIOR_IMA`: base system behavior.
* T-HEAD
* :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XTHEADVECTOR`: The xtheadvector vendor
extension is supported in the T-Head ISA extensions spec starting from
commit a18c801634 ("Add T-Head VECTOR vendor extension. ").

View File

@ -130,8 +130,126 @@ SNP feature support.
More details in AMD64 APM[1] Vol 2: 15.34.10 SEV_STATUS MSR
Reverse Map Table (RMP)
=======================
The RMP is a structure in system memory that is used to ensure a one-to-one
mapping between system physical addresses and guest physical addresses. Each
page of memory that is potentially assignable to guests has one entry within
the RMP.
The RMP table can be either contiguous in memory or a collection of segments
in memory.
Contiguous RMP
--------------
Support for this form of the RMP is present when support for SEV-SNP is
present, which can be determined using the CPUID instruction::
0x8000001f[eax]:
Bit[4] indicates support for SEV-SNP
The location of the RMP is identified to the hardware through two MSRs::
0xc0010132 (RMP_BASE):
System physical address of the first byte of the RMP
0xc0010133 (RMP_END):
System physical address of the last byte of the RMP
Hardware requires that RMP_BASE and (RPM_END + 1) be 8KB aligned, but SEV
firmware increases the alignment requirement to require a 1MB alignment.
The RMP consists of a 16KB region used for processor bookkeeping followed
by the RMP entries, which are 16 bytes in size. The size of the RMP
determines the range of physical memory that the hypervisor can assign to
SEV-SNP guests. The RMP covers the system physical address from::
0 to ((RMP_END + 1 - RMP_BASE - 16KB) / 16B) x 4KB.
The current Linux support relies on BIOS to allocate/reserve the memory for
the RMP and to set RMP_BASE and RMP_END appropriately. Linux uses the MSR
values to locate the RMP and determine the size of the RMP. The RMP must
cover all of system memory in order for Linux to enable SEV-SNP.
Segmented RMP
-------------
Segmented RMP support is a new way of representing the layout of an RMP.
Initial RMP support required the RMP table to be contiguous in memory.
RMP accesses from a NUMA node on which the RMP doesn't reside
can take longer than accesses from a NUMA node on which the RMP resides.
Segmented RMP support allows the RMP entries to be located on the same
node as the memory the RMP is covering, potentially reducing latency
associated with accessing an RMP entry associated with the memory. Each
RMP segment covers a specific range of system physical addresses.
Support for this form of the RMP can be determined using the CPUID
instruction::
0x8000001f[eax]:
Bit[23] indicates support for segmented RMP
If supported, segmented RMP attributes can be found using the CPUID
instruction::
0x80000025[eax]:
Bits[5:0] minimum supported RMP segment size
Bits[11:6] maximum supported RMP segment size
0x80000025[ebx]:
Bits[9:0] number of cacheable RMP segment definitions
Bit[10] indicates if the number of cacheable RMP segments
is a hard limit
To enable a segmented RMP, a new MSR is available::
0xc0010136 (RMP_CFG):
Bit[0] indicates if segmented RMP is enabled
Bits[13:8] contains the size of memory covered by an RMP
segment (expressed as a power of 2)
The RMP segment size defined in the RMP_CFG MSR applies to all segments
of the RMP. Therefore each RMP segment covers a specific range of system
physical addresses. For example, if the RMP_CFG MSR value is 0x2401, then
the RMP segment coverage value is 0x24 => 36, meaning the size of memory
covered by an RMP segment is 64GB (1 << 36). So the first RMP segment
covers physical addresses from 0 to 0xF_FFFF_FFFF, the second RMP segment
covers physical addresses from 0x10_0000_0000 to 0x1F_FFFF_FFFF, etc.
When a segmented RMP is enabled, RMP_BASE points to the RMP bookkeeping
area as it does today (16K in size). However, instead of RMP entries
beginning immediately after the bookkeeping area, there is a 4K RMP
segment table (RST). Each entry in the RST is 8-bytes in size and represents
an RMP segment::
Bits[19:0] mapped size (in GB)
The mapped size can be less than the defined segment size.
A value of zero, indicates that no RMP exists for the range
of system physical addresses associated with this segment.
Bits[51:20] segment physical address
This address is left shift 20-bits (or just masked when
read) to form the physical address of the segment (1MB
alignment).
The RST can hold 512 segment entries but can be limited in size to the number
of cacheable RMP segments (CPUID 0x80000025_EBX[9:0]) if the number of cacheable
RMP segments is a hard limit (CPUID 0x80000025_EBX[10]).
The current Linux support relies on BIOS to allocate/reserve the memory for
the segmented RMP (the bookkeeping area, RST, and all segments), build the RST
and to set RMP_BASE, RMP_END, and RMP_CFG appropriately. Linux uses the MSR
values to locate the RMP and determine the size and location of the RMP
segments. The RMP must cover all of system memory in order for Linux to enable
SEV-SNP.
More details in the AMD64 APM Vol 2, section "15.36.3 Reverse Map Table",
docID: 24593.
Secure VM Service Module (SVSM)
===============================
SNP provides a feature called Virtual Machine Privilege Levels (VMPL) which
defines four privilege levels at which guest software can run. The most
privileged level is 0 and numerically higher numbers have lesser privileges.

View File

@ -77,7 +77,7 @@ Protocol 2.14 BURNT BY INCORRECT COMMIT
Protocol 2.15 (Kernel 5.5) Added the kernel_info and kernel_info.setup_type_max.
============= ============================================================
.. note::
.. note::
The protocol version number should be changed only if the setup header
is changed. There is no need to update the version number if boot_params
or kernel_info are changed. Additionally, it is recommended to use
@ -95,27 +95,27 @@ Memory Layout
The traditional memory map for the kernel loader, used for Image or
zImage kernels, typically looks like::
| |
0A0000 +------------------------+
| Reserved for BIOS | Do not use. Reserved for BIOS EBDA.
09A000 +------------------------+
| Command line |
| Stack/heap | For use by the kernel real-mode code.
098000 +------------------------+
| Kernel setup | The kernel real-mode code.
090200 +------------------------+
| Kernel boot sector | The kernel legacy boot sector.
090000 +------------------------+
| Protected-mode kernel | The bulk of the kernel image.
010000 +------------------------+
| Boot loader | <- Boot sector entry point 0000:7C00
001000 +------------------------+
| Reserved for MBR/BIOS |
000800 +------------------------+
| Typically used by MBR |
000600 +------------------------+
| BIOS use only |
000000 +------------------------+
| |
0A0000 +------------------------+
| Reserved for BIOS | Do not use. Reserved for BIOS EBDA.
09A000 +------------------------+
| Command line |
| Stack/heap | For use by the kernel real-mode code.
098000 +------------------------+
| Kernel setup | The kernel real-mode code.
090200 +------------------------+
| Kernel boot sector | The kernel legacy boot sector.
090000 +------------------------+
| Protected-mode kernel | The bulk of the kernel image.
010000 +------------------------+
| Boot loader | <- Boot sector entry point 0000:7C00
001000 +------------------------+
| Reserved for MBR/BIOS |
000800 +------------------------+
| Typically used by MBR |
000600 +------------------------+
| BIOS use only |
000000 +------------------------+
When using bzImage, the protected-mode kernel was relocated to
0x100000 ("high memory"), and the kernel real-mode block (boot sector,
@ -142,28 +142,28 @@ above the 0x9A000 point; too many BIOSes will break above that point.
For a modern bzImage kernel with boot protocol version >= 2.02, a
memory layout like the following is suggested::
~ ~
| Protected-mode kernel |
100000 +------------------------+
| I/O memory hole |
0A0000 +------------------------+
| Reserved for BIOS | Leave as much as possible unused
~ ~
| Command line | (Can also be below the X+10000 mark)
X+10000 +------------------------+
| Stack/heap | For use by the kernel real-mode code.
X+08000 +------------------------+
| Kernel setup | The kernel real-mode code.
| Kernel boot sector | The kernel legacy boot sector.
X +------------------------+
| Boot loader | <- Boot sector entry point 0000:7C00
001000 +------------------------+
| Reserved for MBR/BIOS |
000800 +------------------------+
| Typically used by MBR |
000600 +------------------------+
| BIOS use only |
000000 +------------------------+
~ ~
| Protected-mode kernel |
100000 +------------------------+
| I/O memory hole |
0A0000 +------------------------+
| Reserved for BIOS | Leave as much as possible unused
~ ~
| Command line | (Can also be below the X+10000 mark)
X+10000 +------------------------+
| Stack/heap | For use by the kernel real-mode code.
X+08000 +------------------------+
| Kernel setup | The kernel real-mode code.
| Kernel boot sector | The kernel legacy boot sector.
X +------------------------+
| Boot loader | <- Boot sector entry point 0000:7C00
001000 +------------------------+
| Reserved for MBR/BIOS |
000800 +------------------------+
| Typically used by MBR |
000600 +------------------------+
| BIOS use only |
000000 +------------------------+
... where the address X is as low as the design of the boot loader permits.
@ -229,22 +229,22 @@ Offset/Size Proto Name Meaning
=========== ======== ===================== ============================================
.. note::
(1) For backwards compatibility, if the setup_sects field contains 0, the
real value is 4.
(1) For backwards compatibility, if the setup_sects field contains 0,
the real value is 4.
(2) For boot protocol prior to 2.04, the upper two bytes of the syssize
field are unusable, which means the size of a bzImage kernel
cannot be determined.
(2) For boot protocol prior to 2.04, the upper two bytes of the syssize
field are unusable, which means the size of a bzImage kernel
cannot be determined.
(3) Ignored, but safe to set, for boot protocols 2.02-2.09.
(3) Ignored, but safe to set, for boot protocols 2.02-2.09.
If the "HdrS" (0x53726448) magic number is not found at offset 0x202,
the boot protocol version is "old". Loading an old kernel, the
following parameters should be assumed::
Image type = zImage
initrd not supported
Real-mode kernel must be located at 0x90000.
Image type = zImage
initrd not supported
Real-mode kernel must be located at 0x90000.
Otherwise, the "version" field contains the protocol version,
e.g. protocol version 2.01 will contain 0x0201 in this field. When
@ -265,7 +265,7 @@ All general purpose boot loaders should write the fields marked
nonstandard address should fill in the fields marked (reloc); other
boot loaders can ignore those fields.
The byte order of all fields is littleendian (this is x86, after all.)
The byte order of all fields is little endian (this is x86, after all.)
============ ===========
Field name: setup_sects
@ -365,7 +365,7 @@ Offset/size: 0x206/2
Protocol: 2.00+
============ =======
Contains the boot protocol version, in (major << 8)+minor format,
Contains the boot protocol version, in (major << 8) + minor format,
e.g. 0x0204 for version 2.04, and 0x0a11 for a hypothetical version
10.17.
@ -397,17 +397,17 @@ Protocol: 2.00+
If set to a nonzero value, contains a pointer to a NUL-terminated
human-readable kernel version number string, less 0x200. This can
be used to display the kernel version to the user. This value
should be less than (0x200*setup_sects).
should be less than (0x200 * setup_sects).
For example, if this value is set to 0x1c00, the kernel version
number string can be found at offset 0x1e00 in the kernel file.
This is a valid value if and only if the "setup_sects" field
contains the value 15 or higher, as::
0x1c00 < 15*0x200 (= 0x1e00) but
0x1c00 >= 14*0x200 (= 0x1c00)
0x1c00 < 15 * 0x200 (= 0x1e00) but
0x1c00 >= 14 * 0x200 (= 0x1c00)
0x1c00 >> 9 = 14, So the minimum value for setup_secs is 15.
0x1c00 >> 9 = 14, So the minimum value for setup_secs is 15.
============ ==================
Field name: type_of_loader
@ -427,9 +427,9 @@ Protocol: 2.00+
For example, for T = 0x15, V = 0x234, write::
type_of_loader <- 0xE4
ext_loader_type <- 0x05
ext_loader_ver <- 0x23
type_of_loader <- 0xE4
ext_loader_type <- 0x05
ext_loader_ver <- 0x23
Assigned boot loader ids (hexadecimal):
@ -686,7 +686,7 @@ Protocol: 2.10+
If a boot loader makes use of this field, it should update the
kernel_alignment field with the alignment unit desired; typically::
kernel_alignment = 1 << min_alignment
kernel_alignment = 1 << min_alignment;
There may be a considerable performance cost with an excessively
misaligned kernel. Therefore, a loader should typically try each
@ -754,7 +754,7 @@ Protocol: 2.07+
0x00000000 The default x86/PC environment
0x00000001 lguest
0x00000002 Xen
0x00000003 Moorestown MID
0x00000003 Intel MID (Moorestown, CloverTrail, Merrifield, Moorefield)
0x00000004 CE4100 TV Platform
========== ==============================
@ -808,13 +808,13 @@ Protocol: 2.09+
parameters passing mechanism. The definition of struct setup_data is
as follow::
struct setup_data {
u64 next;
u32 type;
u32 len;
u8 data[0];
};
struct setup_data {
__u64 next;
__u32 type;
__u32 len;
__u8 data[];
}
Where, the next is a 64-bit physical pointer to the next node of
linked list, the next field of the last node is 0; the type is used
to identify the contents of data; the len is the length of data
@ -834,12 +834,12 @@ Protocol: 2.09+
Thus setup_indirect struct and SETUP_INDIRECT type were introduced in
protocol 2.15::
struct setup_indirect {
__u32 type;
__u32 reserved; /* Reserved, must be set to zero. */
__u64 len;
__u64 addr;
};
struct setup_indirect {
__u32 type;
__u32 reserved; /* Reserved, must be set to zero. */
__u64 len;
__u64 addr;
};
The type member is a SETUP_INDIRECT | SETUP_* type. However, it cannot be
SETUP_INDIRECT itself since making the setup_indirect a tree structure
@ -849,17 +849,17 @@ Protocol: 2.09+
Let's give an example how to point to SETUP_E820_EXT data using setup_indirect.
In this case setup_data and setup_indirect will look like this::
struct setup_data {
__u64 next = 0 or <addr_of_next_setup_data_struct>;
__u32 type = SETUP_INDIRECT;
__u32 len = sizeof(setup_indirect);
__u8 data[sizeof(setup_indirect)] = struct setup_indirect {
__u32 type = SETUP_INDIRECT | SETUP_E820_EXT;
__u32 reserved = 0;
__u64 len = <len_of_SETUP_E820_EXT_data>;
__u64 addr = <addr_of_SETUP_E820_EXT_data>;
}
}
struct setup_data {
.next = 0, /* or <addr_of_next_setup_data_struct> */
.type = SETUP_INDIRECT,
.len = sizeof(setup_indirect),
.data[sizeof(setup_indirect)] = (struct setup_indirect) {
.type = SETUP_INDIRECT | SETUP_E820_EXT,
.reserved = 0,
.len = <len_of_SETUP_E820_EXT_data>,
.addr = <addr_of_SETUP_E820_EXT_data>,
},
}
.. note::
SETUP_INDIRECT | SETUP_NONE objects cannot be properly distinguished
@ -896,19 +896,19 @@ Offset/size: 0x260/4
The kernel runtime start address is determined by the following algorithm::
if (relocatable_kernel) {
if (load_address < pref_address)
load_address = pref_address;
runtime_start = align_up(load_address, kernel_alignment);
} else {
runtime_start = pref_address;
}
if (relocatable_kernel) {
if (load_address < pref_address)
load_address = pref_address;
runtime_start = align_up(load_address, kernel_alignment);
} else {
runtime_start = pref_address;
}
Hence the necessary memory window location and size can be estimated by
a boot loader as::
memory_window_start = runtime_start;
memory_window_size = init_size;
memory_window_start = runtime_start;
memory_window_size = init_size;
============ ===============
Field name: handover_offset
@ -938,12 +938,12 @@ The kernel_info
===============
The relationships between the headers are analogous to the various data
sections:
sections::
setup_header = .data
boot_params/setup_data = .bss
What is missing from the above list? That's right:
What is missing from the above list? That's right::
kernel_info = .rodata
@ -975,22 +975,22 @@ after kernel_info_var_len_data label. Each chunk of variable size data has to
be prefixed with header/magic and its size, e.g.::
kernel_info:
.ascii "LToP" /* Header, Linux top (structure). */
.long kernel_info_var_len_data - kernel_info
.long kernel_info_end - kernel_info
.long 0x01234567 /* Some fixed size data for the bootloaders. */
.ascii "LToP" /* Header, Linux top (structure). */
.long kernel_info_var_len_data - kernel_info
.long kernel_info_end - kernel_info
.long 0x01234567 /* Some fixed size data for the bootloaders. */
kernel_info_var_len_data:
example_struct: /* Some variable size data for the bootloaders. */
.ascii "0123" /* Header/Magic. */
.long example_struct_end - example_struct
.ascii "Struct"
.long 0x89012345
example_struct: /* Some variable size data for the bootloaders. */
.ascii "0123" /* Header/Magic. */
.long example_struct_end - example_struct
.ascii "Struct"
.long 0x89012345
example_struct_end:
example_strings: /* Some variable size data for the bootloaders. */
.ascii "ABCD" /* Header/Magic. */
.long example_strings_end - example_strings
.asciz "String_0"
.asciz "String_1"
example_strings: /* Some variable size data for the bootloaders. */
.ascii "ABCD" /* Header/Magic. */
.long example_strings_end - example_strings
.asciz "String_0"
.asciz "String_1"
example_strings_end:
kernel_info_end:
@ -1139,67 +1139,63 @@ mode segment.
Such a boot loader should enter the following fields in the header::
unsigned long base_ptr; /* base address for real-mode segment */
unsigned long base_ptr; /* base address for real-mode segment */
if ( setup_sects == 0 ) {
setup_sects = 4;
}
if (setup_sects == 0)
setup_sects = 4;
if ( protocol >= 0x0200 ) {
type_of_loader = <type code>;
if ( loading_initrd ) {
ramdisk_image = <initrd_address>;
ramdisk_size = <initrd_size>;
}
if (protocol >= 0x0200) {
type_of_loader = <type code>;
if (loading_initrd) {
ramdisk_image = <initrd_address>;
ramdisk_size = <initrd_size>;
}
if ( protocol >= 0x0202 && loadflags & 0x01 )
heap_end = 0xe000;
else
heap_end = 0x9800;
if (protocol >= 0x0202 && loadflags & 0x01)
heap_end = 0xe000;
else
heap_end = 0x9800;
if ( protocol >= 0x0201 ) {
heap_end_ptr = heap_end - 0x200;
loadflags |= 0x80; /* CAN_USE_HEAP */
}
if (protocol >= 0x0201) {
heap_end_ptr = heap_end - 0x200;
loadflags |= 0x80; /* CAN_USE_HEAP */
}
if ( protocol >= 0x0202 ) {
cmd_line_ptr = base_ptr + heap_end;
strcpy(cmd_line_ptr, cmdline);
} else {
cmd_line_magic = 0xA33F;
cmd_line_offset = heap_end;
setup_move_size = heap_end + strlen(cmdline)+1;
strcpy(base_ptr+cmd_line_offset, cmdline);
}
} else {
/* Very old kernel */
if (protocol >= 0x0202) {
cmd_line_ptr = base_ptr + heap_end;
strcpy(cmd_line_ptr, cmdline);
} else {
cmd_line_magic = 0xA33F;
cmd_line_offset = heap_end;
setup_move_size = heap_end + strlen(cmdline) + 1;
strcpy(base_ptr + cmd_line_offset, cmdline);
}
} else {
/* Very old kernel */
heap_end = 0x9800;
heap_end = 0x9800;
cmd_line_magic = 0xA33F;
cmd_line_offset = heap_end;
cmd_line_magic = 0xA33F;
cmd_line_offset = heap_end;
/* A very old kernel MUST have its real-mode code
loaded at 0x90000 */
/* A very old kernel MUST have its real-mode code loaded at 0x90000 */
if (base_ptr != 0x90000) {
/* Copy the real-mode kernel */
memcpy(0x90000, base_ptr, (setup_sects + 1) * 512);
base_ptr = 0x90000; /* Relocated */
}
if ( base_ptr != 0x90000 ) {
/* Copy the real-mode kernel */
memcpy(0x90000, base_ptr, (setup_sects+1)*512);
base_ptr = 0x90000; /* Relocated */
}
strcpy(0x90000 + cmd_line_offset, cmdline);
strcpy(0x90000+cmd_line_offset, cmdline);
/* It is recommended to clear memory up to the 32K mark */
memset(0x90000 + (setup_sects+1)*512, 0,
(64-(setup_sects+1))*512);
}
/* It is recommended to clear memory up to the 32K mark */
memset(0x90000 + (setup_sects + 1) * 512, 0, (64 - (setup_sects + 1)) * 512);
}
Loading The Rest of The Kernel
==============================
The 32-bit (non-real-mode) kernel starts at offset (setup_sects+1)*512
The 32-bit (non-real-mode) kernel starts at offset (setup_sects + 1) * 512
in the kernel file (again, if setup_sects == 0 the real value is 4.)
It should be loaded at address 0x10000 for Image/zImage kernels and
0x100000 for bzImage kernels.
@ -1207,13 +1203,14 @@ It should be loaded at address 0x10000 for Image/zImage kernels and
The kernel is a bzImage kernel if the protocol >= 2.00 and the 0x01
bit (LOAD_HIGH) in the loadflags field is set::
is_bzImage = (protocol >= 0x0200) && (loadflags & 0x01);
load_address = is_bzImage ? 0x100000 : 0x10000;
is_bzImage = (protocol >= 0x0200) && (loadflags & 0x01);
load_address = is_bzImage ? 0x100000 : 0x10000;
Note that Image/zImage kernels can be up to 512K in size, and thus use
the entire 0x10000-0x90000 range of memory. This means it is pretty
much a requirement for these kernels to load the real-mode part at
0x90000. bzImage kernels allow much more flexibility.
.. note::
Image/zImage kernels can be up to 512K in size, and thus use the entire
0x10000-0x90000 range of memory. This means it is pretty much a
requirement for these kernels to load the real-mode part at 0x90000.
bzImage kernels allow much more flexibility.
Special Command Line Options
============================
@ -1282,19 +1279,20 @@ es = ss.
In our example from above, we would do::
/* Note: in the case of the "old" kernel protocol, base_ptr must
be == 0x90000 at this point; see the previous sample code */
/*
* Note: in the case of the "old" kernel protocol, base_ptr must
* be == 0x90000 at this point; see the previous sample code.
*/
seg = base_ptr >> 4;
seg = base_ptr >> 4;
cli(); /* Enter with interrupts disabled! */
cli(); /* Enter with interrupts disabled! */
/* Set up the real-mode kernel stack */
_SS = seg;
_SP = heap_end;
/* Set up the real-mode kernel stack */
_SS = seg;
_SP = heap_end;
_DS = _ES = _FS = _GS = seg;
jmp_far(seg+0x20, 0); /* Run the kernel */
_DS = _ES = _FS = _GS = seg;
jmp_far(seg + 0x20, 0); /* Run the kernel */
If your boot sector accesses a floppy drive, it is recommended to
switch off the floppy motor before running the kernel, since the
@ -1349,7 +1347,7 @@ from offset 0x01f1 of kernel image on should be loaded into struct
boot_params and examined. The end of setup header can be calculated as
follow::
0x0202 + byte value at offset 0x0201
0x0202 + byte value at offset 0x0201
In addition to read/modify/write the setup header of the struct
boot_params as that of 16-bit boot protocol, the boot loader should
@ -1385,7 +1383,7 @@ Then, the setup header at offset 0x01f1 of kernel image on should be
loaded into struct boot_params and examined. The end of setup header
can be calculated as follows::
0x0202 + byte value at offset 0x0201
0x0202 + byte value at offset 0x0201
In addition to read/modify/write the setup header of the struct
boot_params as that of 16-bit boot protocol, the boot loader should
@ -1427,7 +1425,7 @@ execution context provided by the EFI firmware.
The function prototype for the handover entry point looks like this::
efi_stub_entry(void *handle, efi_system_table_t *table, struct boot_params *bp)
void efi_stub_entry(void *handle, efi_system_table_t *table, struct boot_params *bp);
'handle' is the EFI image handle passed to the boot loader by the EFI
firmware, 'table' is the EFI system table - these are the first two
@ -1442,12 +1440,13 @@ The boot loader *must* fill out the following fields in bp::
All other fields should be zero.
NOTE: The EFI Handover Protocol is deprecated in favour of the ordinary PE/COFF
entry point, combined with the LINUX_EFI_INITRD_MEDIA_GUID based initrd
loading protocol (refer to [0] for an example of the bootloader side of
this), which removes the need for any knowledge on the part of the EFI
bootloader regarding the internal representation of boot_params or any
requirements/limitations regarding the placement of the command line
and ramdisk in memory, or the placement of the kernel image itself.
.. note::
The EFI Handover Protocol is deprecated in favour of the ordinary PE/COFF
entry point, combined with the LINUX_EFI_INITRD_MEDIA_GUID based initrd
loading protocol (refer to [0] for an example of the bootloader side of
this), which removes the need for any knowledge on the part of the EFI
bootloader regarding the internal representation of boot_params or any
requirements/limitations regarding the placement of the command line
and ramdisk in memory, or the placement of the kernel image itself.
[0] https://github.com/u-boot/u-boot/commit/ec80b4735a593961fe701cc3a5d717d4739b0fd0

View File

@ -384,6 +384,16 @@ When monitoring is enabled all MON groups will also contain:
Available only with debug option. The identifier used by hardware
for the monitor group. On x86 this is the RMID.
When the "mba_MBps" mount option is used all CTRL_MON groups will also contain:
"mba_MBps_event":
Reading this file shows which memory bandwidth event is used
as input to the software feedback loop that keeps memory bandwidth
below the value specified in the schemata file. Writing the
name of one of the supported memory bandwidth events found in
/sys/fs/resctrl/info/L3_MON/mon_features changes the input
event.
Resource allocation rules
-------------------------

View File

@ -25,7 +25,7 @@ to cache translations for virtual addresses. The IOMMU driver uses the
mmu_notifier() support to keep the device TLB cache and the CPU cache in
sync. When an ATS lookup fails for a virtual address, the device should
use the PRI in order to request the virtual address to be paged into the
CPU page tables. The device must use ATS again in order the fetch the
CPU page tables. The device must use ATS again in order to fetch the
translation before use.
Shared Hardware Workqueues
@ -216,7 +216,7 @@ submitting work and processing completions.
Single Root I/O Virtualization (SR-IOV) focuses on providing independent
hardware interfaces for virtualizing hardware. Hence, it's required to be
almost fully functional interface to software supporting the traditional
an almost fully functional interface to software supporting the traditional
BARs, space for interrupts via MSI-X, its own register layout.
Virtual Functions (VFs) are assisted by the Physical Function (PF)
driver.

View File

@ -135,6 +135,10 @@ Thread-related topology information in the kernel:
The ID of the core to which a thread belongs. It is also printed in /proc/cpuinfo
"core_id."
- topology_logical_core_id();
The logical core ID to which a thread belongs.
System topology examples

View File

@ -1,312 +0,0 @@
.. SPDX-License-Identifier: GPL-2.0
===========================
AMD64 Specific Boot Options
===========================
There are many others (usually documented in driver documentation), but
only the AMD64 specific ones are listed here.
Machine check
=============
Please see Documentation/arch/x86/x86_64/machinecheck.rst for sysfs runtime tunables.
mce=off
Disable machine check
mce=no_cmci
Disable CMCI(Corrected Machine Check Interrupt) that
Intel processor supports. Usually this disablement is
not recommended, but it might be handy if your hardware
is misbehaving.
Note that you'll get more problems without CMCI than with
due to the shared banks, i.e. you might get duplicated
error logs.
mce=dont_log_ce
Don't make logs for corrected errors. All events reported
as corrected are silently cleared by OS.
This option will be useful if you have no interest in any
of corrected errors.
mce=ignore_ce
Disable features for corrected errors, e.g. polling timer
and CMCI. All events reported as corrected are not cleared
by OS and remained in its error banks.
Usually this disablement is not recommended, however if
there is an agent checking/clearing corrected errors
(e.g. BIOS or hardware monitoring applications), conflicting
with OS's error handling, and you cannot deactivate the agent,
then this option will be a help.
mce=no_lmce
Do not opt-in to Local MCE delivery. Use legacy method
to broadcast MCEs.
mce=bootlog
Enable logging of machine checks left over from booting.
Disabled by default on AMD Fam10h and older because some BIOS
leave bogus ones.
If your BIOS doesn't do that it's a good idea to enable though
to make sure you log even machine check events that result
in a reboot. On Intel systems it is enabled by default.
mce=nobootlog
Disable boot machine check logging.
mce=monarchtimeout (number)
monarchtimeout:
Sets the time in us to wait for other CPUs on machine checks. 0
to disable.
mce=bios_cmci_threshold
Don't overwrite the bios-set CMCI threshold. This boot option
prevents Linux from overwriting the CMCI threshold set by the
bios. Without this option, Linux always sets the CMCI
threshold to 1. Enabling this may make memory predictive failure
analysis less effective if the bios sets thresholds for memory
errors since we will not see details for all errors.
mce=recovery
Force-enable recoverable machine check code paths
nomce (for compatibility with i386)
same as mce=off
Everything else is in sysfs now.
APICs
=====
apic
Use IO-APIC. Default
noapic
Don't use the IO-APIC.
disableapic
Don't use the local APIC
nolapic
Don't use the local APIC (alias for i386 compatibility)
pirq=...
See Documentation/arch/x86/i386/IO-APIC.rst
noapictimer
Don't set up the APIC timer
no_timer_check
Don't check the IO-APIC timer. This can work around
problems with incorrect timer initialization on some boards.
apicpmtimer
Do APIC timer calibration using the pmtimer. Implies
apicmaintimer. Useful when your PIT timer is totally broken.
Timing
======
notsc
Deprecated, use tsc=unstable instead.
nohpet
Don't use the HPET timer.
Idle loop
=========
idle=poll
Don't do power saving in the idle loop using HLT, but poll for rescheduling
event. This will make the CPUs eat a lot more power, but may be useful
to get slightly better performance in multiprocessor benchmarks. It also
makes some profiling using performance counters more accurate.
Please note that on systems with MONITOR/MWAIT support (like Intel EM64T
CPUs) this option has no performance advantage over the normal idle loop.
It may also interact badly with hyperthreading.
Rebooting
=========
reboot=b[ios] | t[riple] | k[bd] | a[cpi] | e[fi] | p[ci] [, [w]arm | [c]old]
bios
Use the CPU reboot vector for warm reset
warm
Don't set the cold reboot flag
cold
Set the cold reboot flag
triple
Force a triple fault (init)
kbd
Use the keyboard controller. cold reset (default)
acpi
Use the ACPI RESET_REG in the FADT. If ACPI is not configured or
the ACPI reset does not work, the reboot path attempts the reset
using the keyboard controller.
efi
Use efi reset_system runtime service. If EFI is not configured or
the EFI reset does not work, the reboot path attempts the reset using
the keyboard controller.
pci
Use a write to the PCI config space register 0xcf9 to trigger reboot.
Using warm reset will be much faster especially on big memory
systems because the BIOS will not go through the memory check.
Disadvantage is that not all hardware will be completely reinitialized
on reboot so there may be boot problems on some systems.
reboot=force
Don't stop other CPUs on reboot. This can make reboot more reliable
in some cases.
reboot=default
There are some built-in platform specific "quirks" - you may see:
"reboot: <name> series board detected. Selecting <type> for reboots."
In the case where you think the quirk is in error (e.g. you have
newer BIOS, or newer board) using this option will ignore the built-in
quirk table, and use the generic default reboot actions.
NUMA
====
numa=off
Only set up a single NUMA node spanning all memory.
numa=noacpi
Don't parse the SRAT table for NUMA setup
numa=nohmat
Don't parse the HMAT table for NUMA setup, or soft-reserved memory
partitioning.
ACPI
====
acpi=off
Don't enable ACPI
acpi=ht
Use ACPI boot table parsing, but don't enable ACPI interpreter
acpi=force
Force ACPI on (currently not needed)
acpi=strict
Disable out of spec ACPI workarounds.
acpi_sci={edge,level,high,low}
Set up ACPI SCI interrupt.
acpi=noirq
Don't route interrupts
acpi=nocmcff
Disable firmware first mode for corrected errors. This
disables parsing the HEST CMC error source to check if
firmware has set the FF flag. This may result in
duplicate corrected error reports.
PCI
===
pci=off
Don't use PCI
pci=conf1
Use conf1 access.
pci=conf2
Use conf2 access.
pci=rom
Assign ROMs.
pci=assign-busses
Assign busses
pci=irqmask=MASK
Set PCI interrupt mask to MASK
pci=lastbus=NUMBER
Scan up to NUMBER busses, no matter what the mptable says.
pci=noacpi
Don't use ACPI to set up PCI interrupt routing.
IOMMU (input/output memory management unit)
===========================================
Multiple x86-64 PCI-DMA mapping implementations exist, for example:
1. <kernel/dma/direct.c>: use no hardware/software IOMMU at all
(e.g. because you have < 3 GB memory).
Kernel boot message: "PCI-DMA: Disabling IOMMU"
2. <arch/x86/kernel/amd_gart_64.c>: AMD GART based hardware IOMMU.
Kernel boot message: "PCI-DMA: using GART IOMMU"
3. <arch/x86_64/kernel/pci-swiotlb.c> : Software IOMMU implementation. Used
e.g. if there is no hardware IOMMU in the system and it is need because
you have >3GB memory or told the kernel to us it (iommu=soft))
Kernel boot message: "PCI-DMA: Using software bounce buffering
for IO (SWIOTLB)"
::
iommu=[<size>][,noagp][,off][,force][,noforce]
[,memaper[=<order>]][,merge][,fullflush][,nomerge]
[,noaperture]
General iommu options:
off
Don't initialize and use any kind of IOMMU.
noforce
Don't force hardware IOMMU usage when it is not needed. (default).
force
Force the use of the hardware IOMMU even when it is
not actually needed (e.g. because < 3 GB memory).
soft
Use software bounce buffering (SWIOTLB) (default for
Intel machines). This can be used to prevent the usage
of an available hardware IOMMU.
iommu options only relevant to the AMD GART hardware IOMMU:
<size>
Set the size of the remapping area in bytes.
allowed
Overwrite iommu off workarounds for specific chipsets.
fullflush
Flush IOMMU on each allocation (default).
nofullflush
Don't use IOMMU fullflush.
memaper[=<order>]
Allocate an own aperture over RAM with size 32MB<<order.
(default: order=1, i.e. 64MB)
merge
Do scatter-gather (SG) merging. Implies "force" (experimental).
nomerge
Don't do scatter-gather (SG) merging.
noaperture
Ask the IOMMU not to touch the aperture for AGP.
noagp
Don't initialize the AGP driver and use full aperture.
panic
Always panic when IOMMU overflows.
iommu options only relevant to the software bounce buffering (SWIOTLB) IOMMU
implementation:
swiotlb=<slots>[,force,noforce]
<slots>
Prereserve that many 2K slots for the software IO bounce buffering.
force
Force all IO through the software TLB.
noforce
Do not initialize the software TLB.
Miscellaneous
=============
nogbpages
Do not use GB pages for kernel direct mappings.
gbpages
Use GB pages for kernel direct mappings.
AMD SEV (Secure Encrypted Virtualization)
=========================================
Options relating to AMD SEV, specified via the following format:
::
sev=option1[,option2]
The available options are:
debug
Enable debug messages.
nosnp
Do not enable SEV-SNP (applies to host/hypervisor only). Setting
'nosnp' avoids the RMP check overhead in memory accesses when
users do not want to run SEV-SNP guests.

View File

@ -18,7 +18,7 @@ For more information on the features of cpusets, see
Documentation/admin-guide/cgroup-v1/cpusets.rst.
There are a number of different configurations you can use for your needs. For
more information on the numa=fake command line option and its various ways of
configuring fake nodes, see Documentation/arch/x86/x86_64/boot-options.rst.
configuring fake nodes, see Documentation/admin-guide/kernel-parameters.txt
For the purposes of this introduction, we'll assume a very primitive NUMA
emulation setup of "numa=fake=4*512,". This will split our system memory into

View File

@ -7,7 +7,6 @@ x86_64 Support
.. toctree::
:maxdepth: 2
boot-options
uefi
mm
5level-paging

View File

@ -12,14 +12,20 @@ with EFI firmware and specifications are listed below.
1. UEFI specification: http://www.uefi.org
2. Booting Linux kernel on UEFI x86_64 platform requires bootloader
support. Elilo with x86_64 support can be used.
2. Booting Linux kernel on UEFI x86_64 platform can either be
done using the <Documentation/admin-guide/efi-stub.rst> or using a
separate bootloader.
3. x86_64 platform with EFI/UEFI firmware.
Mechanics
---------
Refer to <Documentation/admin-guide/efi-stub.rst> to learn how to use the EFI stub.
Below are general EFI setup guidelines on the x86_64 platform,
regardless of whether you use the EFI stub or a separate bootloader.
- Build the kernel with the following configuration::
CONFIG_FB_EFI=y
@ -31,16 +37,27 @@ Mechanics
CONFIG_EFI=y
CONFIG_EFIVAR_FS=y or m # optional
- Create a VFAT partition on the disk
- Copy the following to the VFAT partition:
- Create a VFAT partition on the disk with the EFI System flag
You can do this with fdisk with the following commands:
elilo bootloader with x86_64 support, elilo configuration file,
kernel image built in first step and corresponding
initrd. Instructions on building elilo and its dependencies
can be found in the elilo sourceforge project.
1. g - initialize a GPT partition table
2. n - create a new partition
3. t - change the partition type to "EFI System" (number 1)
4. w - write and save the changes
Afterwards, initialize the VFAT filesystem by running mkfs::
mkfs.fat /dev/<your-partition>
- Copy the boot files to the VFAT partition:
If you use the EFI stub method, the kernel acts also as an EFI executable.
You can just copy the bzImage to the EFI/boot/bootx64.efi path on the partition
so that it will automatically get booted, see the <Documentation/admin-guide/efi-stub.rst> page
for additional instructions regarding passage of kernel parameters and initramfs.
If you use a custom bootloader, refer to the relevant documentation for help on this part.
- Boot to EFI shell and invoke elilo choosing the kernel image built
in first step.
- If some or all EFI runtime services don't work, you can try following
kernel command line parameters to turn off some or all EFI runtime
services.

View File

@ -333,6 +333,4 @@ References
.. [#userspace_readme] https://github.com/ming1/ubdsrv/blob/master/README
.. [#stefan] https://lore.kernel.org/linux-block/YoOr6jBfgVm8GvWg@stefanha-x1.localdomain/
.. [#xiaoguang] https://lore.kernel.org/linux-block/YoOr6jBfgVm8GvWg@stefanha-x1.localdomain/

View File

@ -0,0 +1,9 @@
==================
Cgroup Kernel APIs
==================
Device Memory Cgroup API (dmemcg)
=================================
.. kernel-doc:: kernel/cgroup/dmem.c
:export:

View File

@ -53,6 +53,7 @@ Library functionality that is used throughout the kernel.
floating-point
union_find
min_heap
parser
Low level entry and exit
========================
@ -109,6 +110,7 @@ more memory-management documentation in Documentation/mm/index.rst.
dma-isa-lpc
swiotlb
mm-api
cgroup
genalloc
pin_user_pages
boot-time-mm

View File

@ -3,7 +3,7 @@ Adding reference counters (krefs) to kernel objects
===================================================
:Author: Corey Minyard <minyard@acm.org>
:Author: Thomas Hellstrom <thellstrom@vmware.com>
:Author: Thomas Hellström <thomas.hellstrom@linux.intel.com>
A lot of this was lifted from Greg Kroah-Hartman's 2004 OLS paper and
presentation on krefs, which can be found at:
@ -321,3 +321,8 @@ rcu grace period after release_entry_rcu was called. That can be accomplished
by using kfree_rcu(entry, rhead) as done above, or by calling synchronize_rcu()
before using kfree, but note that synchronize_rcu() may sleep for a
substantial amount of time.
Functions and structures
========================
.. kernel-doc:: include/linux/kref.h

View File

@ -4,6 +4,8 @@
Min Heap API
============
:Author: Kuan-Wei Chiu <visitorckw@gmail.com>
Introduction
============

View File

@ -227,11 +227,119 @@ Intended use
Drivers that opt to use this API first need to identify which of the above 3
quirk combinations (for a total of 8) match what the hardware documentation
describes. Then they should wrap the packing() function, creating a new
xxx_packing() that calls it using the proper QUIRK_* one-hot bits set.
describes.
There are 3 supported usage patterns, detailed below.
packing()
^^^^^^^^^
This API function is deprecated.
The packing() function returns an int-encoded error code, which protects the
programmer against incorrect API use. The errors are not expected to occur
during runtime, therefore it is reasonable for xxx_packing() to return void
and simply swallow those errors. Optionally it can dump stack or print the
error description.
during runtime, therefore it is reasonable to wrap packing() into a custom
function which returns void and swallows those errors. Optionally it can
dump stack or print the error description.
.. code-block:: c
void my_packing(void *buf, u64 *val, int startbit, int endbit,
size_t len, enum packing_op op)
{
int err;
/* Adjust quirks accordingly */
err = packing(buf, val, startbit, endbit, len, op, QUIRK_LSW32_IS_FIRST);
if (likely(!err))
return;
if (err == -EINVAL) {
pr_err("Start bit (%d) expected to be larger than end (%d)\n",
startbit, endbit);
} else if (err == -ERANGE) {
if ((startbit - endbit + 1) > 64)
pr_err("Field %d-%d too large for 64 bits!\n",
startbit, endbit);
else
pr_err("Cannot store %llx inside bits %d-%d (would truncate)\n",
*val, startbit, endbit);
}
dump_stack();
}
pack() and unpack()
^^^^^^^^^^^^^^^^^^^
These are const-correct variants of packing(), and eliminate the last "enum
packing_op op" argument.
Calling pack(...) is equivalent, and preferred, to calling packing(..., PACK).
Calling unpack(...) is equivalent, and preferred, to calling packing(..., UNPACK).
pack_fields() and unpack_fields()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The library exposes optimized functions for the scenario where there are many
fields represented in a buffer, and it encourages consumer drivers to avoid
repetitive calls to pack() and unpack() for each field, but instead use
pack_fields() and unpack_fields(), which reduces the code footprint.
These APIs use field definitions in arrays of ``struct packed_field_u8`` or
``struct packed_field_u16``, allowing consumer drivers to minimize the size
of these arrays according to their custom requirements.
The pack_fields() and unpack_fields() API functions are actually macros which
automatically select the appropriate function at compile time, based on the
type of the fields array passed in.
An additional benefit over pack() and unpack() is that sanity checks on the
field definitions are handled at compile time with ``BUILD_BUG_ON`` rather
than only when the offending code is executed. These functions return void and
wrapping them to handle unexpected errors is not necessary.
It is recommended, but not required, that you wrap your packed buffer into a
structured type with a fixed size. This generally makes it easier for the
compiler to enforce that the correct size buffer is used.
Here is an example of how to use the fields APIs:
.. code-block:: c
/* Ordering inside the unpacked structure is flexible and can be different
* from the packed buffer. Here, it is optimized to reduce padding.
*/
struct data {
u64 field3;
u32 field4;
u16 field1;
u8 field2;
};
#define SIZE 13
typdef struct __packed { u8 buf[SIZE]; } packed_buf_t;
static const struct packed_field_u8 fields[] = {
PACKED_FIELD(100, 90, struct data, field1),
PACKED_FIELD(90, 87, struct data, field2),
PACKED_FIELD(86, 30, struct data, field3),
PACKED_FIELD(29, 0, struct data, field4),
};
void unpack_your_data(const packed_buf_t *buf, struct data *unpacked)
{
BUILD_BUG_ON(sizeof(*buf) != SIZE;
unpack_fields(buf, sizeof(*buf), unpacked, fields,
QUIRK_LITTLE_ENDIAN);
}
void pack_your_data(const struct data *unpacked, packed_buf_t *buf)
{
BUILD_BUG_ON(sizeof(*buf) != SIZE;
pack_fields(buf, sizeof(*buf), unpacked, fields,
QUIRK_LITTLE_ENDIAN);
}

View File

@ -0,0 +1,17 @@
.. SPDX-License-Identifier: GPL-2.0+
==============
Generic parser
==============
Overview
========
The generic parser is a simple parser for parsing mount options,
filesystem options, driver options, subsystem options, etc.
Parser API
==========
.. kernel-doc:: lib/parser.c
:export:

View File

@ -41,9 +41,9 @@ entries.
In addition to the macros EXPORT_SYMBOL() and EXPORT_SYMBOL_GPL(), that allow
exporting of kernel symbols to the kernel symbol table, variants of these are
available to export symbols into a certain namespace: EXPORT_SYMBOL_NS() and
EXPORT_SYMBOL_NS_GPL(). They take one additional argument: the namespace.
Please note that due to macro expansion that argument needs to be a
preprocessor symbol. E.g. to export the symbol ``usb_stor_suspend`` into the
EXPORT_SYMBOL_NS_GPL(). They take one additional argument: the namespace as a
string constant. Note that this string must not contain whitespaces.
E.g. to export the symbol ``usb_stor_suspend`` into the
namespace ``USB_STORAGE``, use::
EXPORT_SYMBOL_NS(usb_stor_suspend, "USB_STORAGE");
@ -78,11 +78,10 @@ as this argument has preference over a default symbol namespace.
A second option to define the default namespace is directly in the compilation
unit as preprocessor statement. The above example would then read::
#undef DEFAULT_SYMBOL_NAMESPACE
#define DEFAULT_SYMBOL_NAMESPACE "USB_COMMON"
within the corresponding compilation unit before any EXPORT_SYMBOL macro is
used.
within the corresponding compilation unit before the #include for
<linux/export.h>. Typically it's placed before the first #include statement.
3. How to use Symbols exported in Namespaces
============================================

View File

@ -42,8 +42,8 @@ call xa_tag_pointer() to create an entry with a tag, xa_untag_pointer()
to turn a tagged entry back into an untagged pointer and xa_pointer_tag()
to retrieve the tag of an entry. Tagged pointers use the same bits that
are used to distinguish value entries from normal pointers, so you must
decide whether they want to store value entries or tagged pointers in
any particular XArray.
decide whether you want to store value entries or tagged pointers in any
particular XArray.
The XArray does not support storing IS_ERR() pointers as some
conflict with value entries or internal entries.
@ -52,8 +52,9 @@ An unusual feature of the XArray is the ability to create entries which
occupy a range of indices. Once stored to, looking up any index in
the range will return the same entry as looking up any other index in
the range. Storing to any index will store to all of them. Multi-index
entries can be explicitly split into smaller entries, or storing ``NULL``
into any entry will cause the XArray to forget about the range.
entries can be explicitly split into smaller entries. Unsetting (using
xa_erase() or xa_store() with ``NULL``) any entry will cause the XArray
to forget about the range.
Normal API
==========
@ -63,13 +64,14 @@ for statically allocated XArrays or xa_init() for dynamically
allocated ones. A freshly-initialised XArray contains a ``NULL``
pointer at every index.
You can then set entries using xa_store() and get entries
using xa_load(). xa_store will overwrite any entry with the
new entry and return the previous entry stored at that index. You can
use xa_erase() instead of calling xa_store() with a
``NULL`` entry. There is no difference between an entry that has never
been stored to, one that has been erased and one that has most recently
had ``NULL`` stored to it.
You can then set entries using xa_store() and get entries using
xa_load(). xa_store() will overwrite any entry with the new entry and
return the previous entry stored at that index. You can unset entries
using xa_erase() or by setting the entry to ``NULL`` using xa_store().
There is no difference between an entry that has never been stored to
and one that has been erased with xa_erase(); an entry that has most
recently had ``NULL`` stored to it is also equivalent except if the
XArray was initialized with ``XA_FLAGS_ALLOC``.
You can conditionally replace an entry at an index by using
xa_cmpxchg(). Like cmpxchg(), it will only succeed if

View File

@ -10,6 +10,9 @@ whole; patches welcome!
A brief overview of testing-specific tools can be found in
Documentation/dev-tools/testing-overview.rst
Tools that are specific to debugging can be found in
Documentation/process/debugging/index.rst
.. toctree::
:caption: Table of contents
:maxdepth: 2
@ -27,8 +30,6 @@ Documentation/dev-tools/testing-overview.rst
kmemleak
kcsan
kfence
gdb-kernel-debugging
kgdb
kselftest
kunit/index
ktap

View File

@ -1,25 +0,0 @@
Altera SOCFPGA System Manager
Required properties:
- compatible : "altr,sys-mgr"
- reg : Should contain 1 register ranges(address and length)
- cpu1-start-addr : CPU1 start address in hex.
Example:
sysmgr@ffd08000 {
compatible = "altr,sys-mgr";
reg = <0xffd08000 0x1000>;
cpu1-start-addr = <0xffd080c4>;
};
ARM64 - Stratix10
Required properties:
- compatible : "altr,sys-mgr-s10"
- reg : Should contain 1 register range(address and length)
for system manager register.
Example:
sysmgr@ffd12000 {
compatible = "altr,sys-mgr-s10";
reg = <0xffd12000 0x228>;
};

View File

@ -38,6 +38,12 @@ properties:
enum:
- arm,coresight-dummy-source
arm,static-trace-id:
description: If dummy source needs static id support, use this to set trace id.
$ref: /schemas/types.yaml#/definitions/uint32
minimum: 1
maximum: 111
out-ports:
$ref: /schemas/graph.yaml#/properties/ports

View File

@ -45,7 +45,22 @@ properties:
patternProperties:
'^port@[01]$':
description: Output connections to CoreSight Trace bus
$ref: /schemas/graph.yaml#/properties/port
$ref: /schemas/graph.yaml#/$defs/port-base
unevaluatedProperties: false
properties:
endpoint:
$ref: /schemas/graph.yaml#/$defs/endpoint-base
unevaluatedProperties: false
properties:
filter-source:
$ref: /schemas/types.yaml#/definitions/phandle
description:
phandle to the coresight trace source device matching the
hard coded filtering for this port
remote-endpoint: true
required:
- compatible
@ -72,6 +87,7 @@ examples:
reg = <0>;
replicator_out_port0: endpoint {
remote-endpoint = <&etb_in_port>;
filter-source = <&tpdm_video>;
};
};
@ -79,6 +95,7 @@ examples:
reg = <1>;
replicator_out_port1: endpoint {
remote-endpoint = <&tpiu_in_port>;
filter-source = <&tpdm_mdss>;
};
};
};

View File

@ -23,7 +23,7 @@ description: |
properties:
$nodename:
pattern: "^ete([0-9a-f]+)$"
pattern: "^ete(-[0-9]+)?$"
compatible:
items:
- const: arm,embedded-trace-extension
@ -55,13 +55,13 @@ examples:
# An ETE node without legacy CoreSight connections
- |
ete0 {
ete-0 {
compatible = "arm,embedded-trace-extension";
cpu = <&cpu_0>;
};
# An ETE node with legacy CoreSight connections
- |
ete1 {
ete-1 {
compatible = "arm,embedded-trace-extension";
cpu = <&cpu_1>;

View File

@ -74,6 +74,7 @@ properties:
- description: AST2600 based boards
items:
- enum:
- ampere,mtjefferson-bmc
- ampere,mtmitchell-bmc
- aspeed,ast2600-evb
- aspeed,ast2600-evb-a1
@ -91,6 +92,7 @@ properties:
- ibm,everest-bmc
- ibm,fuji-bmc
- ibm,rainier-bmc
- ibm,sbp1-bmc
- ibm,system1-bmc
- ibm,tacoma-bmc
- inventec,starscream-bmc

View File

@ -180,6 +180,13 @@ properties:
- const: atmel,sama5d4
- const: atmel,sama5
- description: Microchip SAMA7D65 Curiosity Board
items:
- const: microchip,sama7d65-curiosity
- const: microchip,sama7d65
- const: microchip,sama7d6
- const: microchip,sama7
- items:
- const: microchip,sama7g5ek # SAMA7G5 Evaluation Kit
- const: microchip,sama7g5

View File

@ -13,6 +13,7 @@ PIT Timer required properties:
PIT64B Timer required properties:
- compatible: Should be "microchip,sam9x60-pit64b" or
"microchip,sam9x7-pit64b", "microchip,sam9x60-pit64b"
"microchip,sama7d65-pit64b", "microchip,sam9x60-pit64b"
- reg: Should contain registers location and length
- interrupts: Should contain interrupt for PIT64B timer
- clocks: Should contain the available clock sources for PIT64B timer.
@ -27,12 +28,13 @@ Its subnodes can be:
- watchdog: compatible should be "atmel,at91rm9200-wdt"
RAMC SDRAM/DDR Controller required properties:
- compatible: Should be "atmel,at91rm9200-sdramc", "syscon"
"atmel,at91sam9260-sdramc",
"atmel,at91sam9g45-ddramc",
"atmel,sama5d3-ddramc",
"microchip,sam9x60-ddramc",
"microchip,sama7g5-uddrc",
- compatible: Should be "atmel,at91rm9200-sdramc", "syscon" or
"atmel,at91sam9260-sdramc" or
"atmel,at91sam9g45-ddramc" or
"atmel,sama5d3-ddramc" or
"microchip,sam9x60-ddramc" or
"microchip,sama7g5-uddrc" or
"microchip,sama7d65-uddrc", "microchip,sama7g5-uddrc" or
"microchip,sam9x7-ddramc", "atmel,sama5d3-ddramc".
- reg: Should contain registers location and length

View File

@ -34,6 +34,7 @@ properties:
- enum:
- netgear,r8000p
- tplink,archer-c2300-v1
- zyxel,ex3510b
- const: brcm,bcm4906
- const: brcm,bcm4908
- const: brcm,bcmbca
@ -115,6 +116,7 @@ properties:
items:
- enum:
- brcm,bcm96846
- genexis,xg6846b
- const: brcm,bcm6846
- const: brcm,bcmbca

View File

@ -0,0 +1,40 @@
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/arm/blaize.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Blaize Platforms
maintainers:
- James Cowgill <james.cowgill@blaize.com>
- Matt Redfearn <matt.redfearn@blaize.com>
- Neil Jones <neil.jones@blaize.com>
- Nikolaos Pasaloukos <nikolaos.pasaloukos@blaize.com>
description: |
Blaize Platforms using SoCs designed by Blaize Inc.
The products based on the BLZP1600 SoC:
- BLZP1600-SoM: SoM (System on Module)
- BLZP1600-CB2: Development board CB2 based on BLZP1600-SoM
BLZP1600 SoC integrates a dual core ARM Cortex A53 cluster
and a Blaize Graph Streaming Processor for AI and ML workloads,
plus a suite of connectivity and other peripherals.
properties:
$nodename:
const: '/'
compatible:
oneOf:
- description: Blaize BLZP1600 based boards
items:
- enum:
- blaize,blzp1600-cb2
- const: blaize,blzp1600
additionalProperties: true
...

View File

@ -1091,6 +1091,7 @@ properties:
- dmo,imx8mp-data-modul-edm-sbc # i.MX8MP eDM SBC
- emcraft,imx8mp-navqp # i.MX8MP Emcraft Systems NavQ+ Kit
- fsl,imx8mp-evk # i.MX8MP EVK Board
- fsl,imx8mp-evk-revb4 # i.MX8MP EVK Rev B4 Board
- gateworks,imx8mp-gw71xx-2x # i.MX8MP Gateworks Board
- gateworks,imx8mp-gw72xx-2x # i.MX8MP Gateworks Board
- gateworks,imx8mp-gw73xx-2x # i.MX8MP Gateworks Board
@ -1106,6 +1107,15 @@ properties:
- ysoft,imx8mp-iota2-lumpy # Y Soft i.MX8MP IOTA2 Lumpy Board
- const: fsl,imx8mp
- description: ABB Boards with i.MX8M Plus Modules from ADLink
items:
- enum:
- abb,imx8mp-aristanetos3-adpismarc # i.MX8MP ABB SoM on PI SMARC Board
- abb,imx8mp-aristanetos3-helios # i.MX8MP ABB SoM on helios Board
- abb,imx8mp-aristanetos3-proton2s # i.MX8MP ABB SoM on proton2s Board
- const: abb,imx8mp-aristanetos3-som # i.MX8MP ABB SoM
- const: fsl,imx8mp
- description: Avnet (MSC Branded) Boards with SM2S i.MX8M Plus Modules
items:
- const: avnet,sm2s-imx8mp-14N0600E-ep1 # SM2S-IMX8PLUS-14N0600E on SM2-MB-EP1 Carrier Board
@ -1262,6 +1272,7 @@ properties:
items:
- enum:
- fsl,imx8qm-mek # i.MX8QM MEK Board
- fsl,imx8qm-mek-revd # i.MX8QM MEK Rev D Board
- toradex,apalis-imx8 # Apalis iMX8 Modules
- toradex,apalis-imx8-v1.1 # Apalis iMX8 V1.1 Modules
- const: fsl,imx8qm
@ -1290,6 +1301,7 @@ properties:
- enum:
- einfochips,imx8qxp-ai_ml # i.MX8QXP AI_ML Board
- fsl,imx8qxp-mek # i.MX8QXP MEK Board
- fsl,imx8qxp-mek-wcpu # i.MX8QXP MEK WCPU Board
- const: fsl,imx8qxp
- description: i.MX8DXL based Boards

View File

@ -239,6 +239,34 @@ properties:
- enum:
- mediatek,mt8183-pumpkin
- const: mediatek,mt8183
- description: Google Chinchou (Asus Chromebook CZ1104CM2A/CZ1204CM2A)
items:
- const: google,chinchou-sku0
- const: google,chinchou-sku2
- const: google,chinchou-sku4
- const: google,chinchou-sku5
- const: google,chinchou
- const: mediatek,mt8186
- description: Google Chinchou (Asus Chromebook CZ1104FM2A/CZ1204FM2A/CZ1104CM2A/CZ1204CM2A)
items:
- const: google,chinchou-sku1
- const: google,chinchou-sku3
- const: google,chinchou-sku6
- const: google,chinchou-sku7
- const: google,chinchou-sku17
- const: google,chinchou-sku20
- const: google,chinchou-sku22
- const: google,chinchou-sku23
- const: google,chinchou
- const: mediatek,mt8186
- description: Google Chinchou360 (Asus Chromebook CZ1104FM2A/CZ1204FM2A Flip)
items:
- const: google,chinchou-sku16
- const: google,chinchou-sku18
- const: google,chinchou-sku19
- const: google,chinchou-sku21
- const: google,chinchou
- const: mediatek,mt8186
- description: Google Magneton (Lenovo IdeaPad Slim 3 Chromebook (14M868))
items:
- const: google,steelix-sku393219
@ -263,6 +291,19 @@ properties:
- const: google,steelix-sku196608
- const: google,steelix
- const: mediatek,mt8186
- description: Google Starmie (ASUS Chromebook Enterprise CM30 (CM3001))
items:
- const: google,starmie-sku0
- const: google,starmie-sku2
- const: google,starmie-sku3
- const: google,starmie
- const: mediatek,mt8186
- description: Google Starmie (ASUS Chromebook Enterprise CM30 (CM3001))
items:
- const: google,starmie-sku1
- const: google,starmie-sku4
- const: google,starmie
- const: mediatek,mt8186
- description: Google Steelix (Lenovo 300e Yoga Chromebook Gen 4)
items:
- enum:
@ -307,6 +348,19 @@ properties:
- enum:
- mediatek,mt8186-evb
- const: mediatek,mt8186
- description: Google Ciri (Lenovo Chromebook Duet (11", 9))
items:
- enum:
- google,ciri-sku0
- google,ciri-sku1
- google,ciri-sku2
- google,ciri-sku3
- google,ciri-sku4
- google,ciri-sku5
- google,ciri-sku6
- google,ciri-sku7
- const: google,ciri
- const: mediatek,mt8188
- items:
- enum:
- mediatek,mt8188-evb
@ -316,12 +370,6 @@ properties:
- const: google,hayato-rev1
- const: google,hayato
- const: mediatek,mt8192
- description: Google Hayato rev5
items:
- const: google,hayato-rev5-sku2
- const: google,hayato-sku2
- const: google,hayato
- const: mediatek,mt8192
- description: Google Spherion (Acer Chromebook 514)
items:
- const: google,spherion-rev3
@ -330,11 +378,6 @@ properties:
- const: google,spherion-rev0
- const: google,spherion
- const: mediatek,mt8192
- description: Google Spherion rev4 (Acer Chromebook 514)
items:
- const: google,spherion-rev4
- const: google,spherion
- const: mediatek,mt8192
- items:
- enum:
- mediatek,mt8192-evb

View File

@ -23,7 +23,7 @@ description: |
select:
properties:
compatible:
pattern: "^qcom,.*(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sc|sd[amx]|sm|x1e)[0-9]+.*$"
pattern: "^qcom,.*(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sar|sc|sd[amx]|sm|x1[ep])[0-9]+.*$"
required:
- compatible
@ -31,7 +31,8 @@ properties:
compatible:
oneOf:
# Preferred naming style for compatibles of SoC components:
- pattern: "^qcom,(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sc|sd[amx]|sm|x1e)[0-9]+(pro)?-.*$"
- pattern: "^qcom,(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sc|sd[amx]|sm|x1[ep])[0-9]+(pro)?-.*$"
- pattern: "^qcom,sar[0-9]+[a-z]?-.*$"
- pattern: "^qcom,(sa|sc)8[0-9]+[a-z][a-z]?-.*$"
# Legacy namings - variations of existing patterns/compatibles are OK,
@ -39,9 +40,9 @@ properties:
- pattern: "^qcom,[ak]pss-wdt-(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sc|sd[amx]|sm)[0-9]+.*$"
- pattern: "^qcom,gcc-(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sc|sd[amx]|sm)[0-9]+.*$"
- pattern: "^qcom,mmcc-(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sc|sd[amx]|sm)[0-9]+.*$"
- pattern: "^qcom,pcie-(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sc|sd[amx]|sm|x1e)[0-9]+.*$"
- pattern: "^qcom,pcie-(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sc|sd[amx]|sm|x1[ep])[0-9]+.*$"
- pattern: "^qcom,rpm-(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sc|sd[amx]|sm)[0-9]+.*$"
- pattern: "^qcom,scm-(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sc|sd[amx]|sm|x1e)[0-9]+.*$"
- pattern: "^qcom,scm-(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sc|sd[amx]|sm|x1[ep])[0-9]+.*$"
- enum:
- qcom,dsi-ctrl-6g-qcm2290
- qcom,gpucc-sdm630

View File

@ -19,29 +19,42 @@ description: |
apq8016
apq8026
apq8064
apq8074
apq8084
apq8094
apq8096
ipq4018
ipq4019
ipq5018
ipq5332
ipq5424
ipq6018
ipq8064
ipq8074
ipq9574
mdm9615
msm8226
msm8660
msm8916
msm8917
msm8926
msm8929
msm8939
msm8953
msm8956
msm8960
msm8974
msm8974pro
msm8976
msm8992
msm8994
msm8996
msm8996pro
msm8998
qcs404
qcs615
qcs8300
qcs8550
qcm2290
qcm6490
@ -53,6 +66,7 @@ description: |
sa8155p
sa8540p
sa8775p
sar2130p
sc7180
sc7280
sc8180x
@ -84,7 +98,10 @@ description: |
sm8450
sm8550
sm8650
sm8750
x1e78100
x1e80100
x1p42100
There are many devices in the list below that run the standard ChromeOS
bootloader setup and use the open source depthcharge bootloader to boot the
@ -250,6 +267,11 @@ properties:
- yiming,uz801-v3
- const: qcom,msm8916
- items:
- enum:
- xiaomi,riva
- const: qcom,msm8917
- items:
- enum:
- motorola,potter
@ -352,6 +374,11 @@ properties:
- qcom,ipq5332-ap-mi01.9
- const: qcom,ipq5332
- items:
- enum:
- qcom,ipq5424-rdp466
- const: qcom,ipq5424
- items:
- enum:
- mikrotik,rb3011
@ -408,6 +435,12 @@ properties:
- qcom,qru1000-idp
- const: qcom,qru1000
- description: Qualcomm AR2 Gen1 platform
items:
- enum:
- qcom,qar2130p
- const: qcom,sar2130p
- items:
- enum:
- acer,aspire1
@ -822,8 +855,10 @@ properties:
- items:
- enum:
- huawei,gaokun3
- lenovo,thinkpad-x13s
- microsoft,arcata
- microsoft,blackrock
- qcom,sc8280xp-crd
- qcom,sc8280xp-qrd
- const: qcom,sc8280xp
@ -898,6 +933,16 @@ properties:
- const: qcom,qcs404-evb
- const: qcom,qcs404
- items:
- enum:
- qcom,qcs8300-ride
- const: qcom,qcs8300
- items:
- enum:
- qcom,qcs615-ride
- const: qcom,qcs615
- items:
- enum:
- qcom,sa8155p-adp
@ -1064,6 +1109,18 @@ properties:
- qcom,sm8650-qrd
- const: qcom,sm8650
- items:
- enum:
- qcom,sm8750-mtp
- qcom,sm8750-qrd
- const: qcom,sm8750
- items:
- enum:
- qcom,x1e001de-devkit
- const: qcom,x1e001de
- const: qcom,x1e80100
- items:
- enum:
- lenovo,thinkpad-t14s
@ -1074,6 +1131,7 @@ properties:
- enum:
- asus,vivobook-s15
- dell,xps13-9345
- hp,omnibook-x14
- lenovo,yoga-slim7x
- microsoft,romulus13
- microsoft,romulus15
@ -1081,6 +1139,11 @@ properties:
- qcom,x1e80100-qcp
- const: qcom,x1e80100
- items:
- enum:
- qcom,x1p42100-crd
- const: qcom,x1p42100
# Board compatibles go above
qcom,msm-id:
@ -1158,6 +1221,7 @@ allOf:
- qcom,apq8026
- qcom,apq8094
- qcom,apq8096
- qcom,msm8917
- qcom,msm8939
- qcom,msm8953
- qcom,msm8956

View File

@ -81,6 +81,17 @@ properties:
- const: azw,beelink-a1
- const: rockchip,rk3328
- description: BigTreeTech CB2 Manta M4/8P
items:
- const: bigtreetech,cb2-manta
- const: bigtreetech,cb2
- const: rockchip,rk3566
- description: BigTreeTech Pi 2
items:
- const: bigtreetech,pi2
- const: rockchip,rk3566
- description: bq Curie 2 tablet
items:
- const: mundoreader,bq-curie2
@ -167,6 +178,13 @@ properties:
- const: engicam,px30-core
- const: rockchip,px30
- description: Firefly Core-3588J-based boards
items:
- enum:
- firefly,itx-3588j
- const: firefly,core-3588j
- const: rockchip,rk3588
- description: Firefly Core-PX30-JD4 on MB-JD4-PX30 baseboard
items:
- const: firefly,px30-jd4-core-mb
@ -597,6 +615,11 @@ properties:
- const: google,veyron
- const: rockchip,rk3288
- description: H96 Max V58 TV Box
items:
- const: haochuangyi,h96-max-v58
- const: rockchip,rk3588
- description: Haoyu MarsBoard RK3066
items:
- const: haoyu,marsboard-rk3066
@ -812,6 +835,12 @@ properties:
- const: radxa,e20c
- const: rockchip,rk3528
- description: Radxa E52C
items:
- const: radxa,e52c
- const: rockchip,rk3582
- const: rockchip,rk3588s
- description: Radxa Rock
items:
- const: radxa,rock
@ -1006,6 +1035,21 @@ properties:
- const: rockchip,rk3399-sapphire-excavator
- const: rockchip,rk3399
- description: Rockchip RK3566 BOX Evaluation Demo board
items:
- const: rockchip,rk3566-box-demo
- const: rockchip,rk3566
- description: Rockchip RK3568 Evaluation board
items:
- const: rockchip,rk3568-evb1-v10
- const: rockchip,rk3568
- description: Rockchip RK3576 Evaluation board
items:
- const: rockchip,rk3576-evb1-v10
- const: rockchip,rk3576
- description: Rockchip RK3588 Evaluation board
items:
- const: rockchip,rk3588-evb1-v10
@ -1026,6 +1070,23 @@ properties:
- const: rockchip,rk3588-toybrick-x0
- const: rockchip,rk3588
- description: Sinovoip RK3308 Banana Pi P2 Pro
items:
- const: sinovoip,rk3308-bpi-p2pro
- const: rockchip,rk3308
- description: Sinovoip RK3568 Banana Pi R2 Pro
items:
- const: sinovoip,rk3568-bpi-r2pro
- const: rockchip,rk3568
- description: Sonoff iHost Smart Home Hub
items:
- const: itead,sonoff-ihost
- enum:
- rockchip,rv1126
- rockchip,rv1109
- description: Theobroma Systems PX30-uQ7 with Haikou baseboard
items:
- const: tsd,px30-ringneck-haikou
@ -1075,9 +1136,11 @@ properties:
- const: xunlong,orangepi-3b
- const: rockchip,rk3566
- description: Xunlong Orange Pi 5 Plus
- description: Xunlong Orange Pi 5 Max/Plus
items:
- const: xunlong,orangepi-5-plus
- enum:
- xunlong,orangepi-5-max
- xunlong,orangepi-5-plus
- const: rockchip,rk3588
- description: Xunlong Orange Pi R1 Plus / LTS
@ -1099,33 +1162,6 @@ properties:
- const: zkmagic,a95x-z2
- const: rockchip,rk3318
- description: Rockchip RK3566 BOX Evaluation Demo board
items:
- const: rockchip,rk3566-box-demo
- const: rockchip,rk3566
- description: Rockchip RK3568 Evaluation board
items:
- const: rockchip,rk3568-evb1-v10
- const: rockchip,rk3568
- description: Sinovoip RK3308 Banana Pi P2 Pro
items:
- const: sinovoip,rk3308-bpi-p2pro
- const: rockchip,rk3308
- description: Sinovoip RK3568 Banana Pi R2 Pro
items:
- const: sinovoip,rk3568-bpi-r2pro
- const: rockchip,rk3568
- description: Sonoff iHost Smart Home Hub
items:
- const: itead,sonoff-ihost
- enum:
- rockchip,rv1126
- rockchip,rv1109
additionalProperties: true
...

View File

@ -53,11 +53,17 @@ properties:
reg:
maxItems: 1
power-controller:
type: object
reboot-mode:
type: object
required:
- compatible
- reg
additionalProperties: true
additionalProperties: false
examples:
- |

View File

@ -240,6 +240,9 @@ properties:
items:
- enum:
- samsung,c1s # Samsung Galaxy Note20 5G (SM-N981B)
- samsung,r8s # Samsung Galaxy S20 FE (SM-G780F)
- samsung,x1s # Samsung Galaxy S20 5G (SM-G981B)
- samsung,x1slte # Samsung Galaxy S20 (SM-G980F)
- const: samsung,exynos990
- description: Exynos Auto v9 based boards

Some files were not shown because too many files have changed in this diff Show More