mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/
synced 2025-04-19 20:58:31 +09:00
Merge branch 'next' into for-linus
Prepare input updates for 6.15 merge window.
This commit is contained in:
commit
946661e3be
@ -1,5 +1,7 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
msrv = "1.78.0"
|
||||
|
||||
check-private-items = true
|
||||
|
||||
disallowed-macros = [
|
||||
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -22,6 +22,7 @@
|
||||
*.dtb.S
|
||||
*.dtbo.S
|
||||
*.dwo
|
||||
*.dylib
|
||||
*.elf
|
||||
*.gcno
|
||||
*.gcda
|
||||
|
42
.mailmap
42
.mailmap
@ -83,6 +83,13 @@ Anirudh Ghayal <quic_aghayal@quicinc.com> <aghayal@codeaurora.org>
|
||||
Antoine Tenart <atenart@kernel.org> <antoine.tenart@bootlin.com>
|
||||
Antoine Tenart <atenart@kernel.org> <antoine.tenart@free-electrons.com>
|
||||
Antonio Ospite <ao2@ao2.it> <ao2@amarulasolutions.com>
|
||||
Antonio Quartulli <antonio@mandelbit.com> <antonio@meshcoding.com>
|
||||
Antonio Quartulli <antonio@mandelbit.com> <antonio@open-mesh.com>
|
||||
Antonio Quartulli <antonio@mandelbit.com> <antonio.quartulli@open-mesh.com>
|
||||
Antonio Quartulli <antonio@mandelbit.com> <ordex@autistici.org>
|
||||
Antonio Quartulli <antonio@mandelbit.com> <ordex@ritirata.org>
|
||||
Antonio Quartulli <antonio@mandelbit.com> <antonio@openvpn.net>
|
||||
Antonio Quartulli <antonio@mandelbit.com> <a@unstable.cc>
|
||||
Anup Patel <anup@brainfault.org> <anup.patel@wdc.com>
|
||||
Archit Taneja <archit@ti.com>
|
||||
Ard Biesheuvel <ardb@kernel.org> <ard.biesheuvel@linaro.org>
|
||||
@ -121,6 +128,8 @@ Ben Widawsky <bwidawsk@kernel.org> <benjamin.widawsky@intel.com>
|
||||
Benjamin Poirier <benjamin.poirier@gmail.com> <bpoirier@suse.de>
|
||||
Benjamin Tissoires <bentiss@kernel.org> <benjamin.tissoires@gmail.com>
|
||||
Benjamin Tissoires <bentiss@kernel.org> <benjamin.tissoires@redhat.com>
|
||||
Bingwu Zhang <xtex@aosc.io> <xtexchooser@duck.com>
|
||||
Bingwu Zhang <xtex@aosc.io> <xtex@xtexx.eu.org>
|
||||
Bjorn Andersson <andersson@kernel.org> <bjorn@kryo.se>
|
||||
Bjorn Andersson <andersson@kernel.org> <bjorn.andersson@linaro.org>
|
||||
Bjorn Andersson <andersson@kernel.org> <bjorn.andersson@sonymobile.com>
|
||||
@ -133,13 +142,17 @@ Boris Brezillon <bbrezillon@kernel.org> <boris.brezillon@bootlin.com>
|
||||
Boris Brezillon <bbrezillon@kernel.org> <boris.brezillon@free-electrons.com>
|
||||
Brendan Higgins <brendan.higgins@linux.dev> <brendanhiggins@google.com>
|
||||
Brian Avery <b.avery@hp.com>
|
||||
Brian Cain <bcain@kernel.org> <brian.cain@oss.qualcomm.com>
|
||||
Brian Cain <bcain@kernel.org> <bcain@quicinc.com>
|
||||
Brian King <brking@us.ibm.com>
|
||||
Brian Silverman <bsilver16384@gmail.com> <brian.silverman@bluerivertech.com>
|
||||
Bryan Tan <bryan-bt.tan@broadcom.com> <bryantan@vmware.com>
|
||||
Cai Huoqing <cai.huoqing@linux.dev> <caihuoqing@baidu.com>
|
||||
Can Guo <quic_cang@quicinc.com> <cang@codeaurora.org>
|
||||
Carl Huang <quic_cjhuang@quicinc.com> <cjhuang@codeaurora.org>
|
||||
Carlos Bilbao <carlos.bilbao.osdev@gmail.com> <carlos.bilbao@amd.com>
|
||||
Carlos Bilbao <carlos.bilbao@kernel.org> <carlos.bilbao@amd.com>
|
||||
Carlos Bilbao <carlos.bilbao@kernel.org> <carlos.bilbao.osdev@gmail.com>
|
||||
Carlos Bilbao <carlos.bilbao@kernel.org> <bilbao@vt.edu>
|
||||
Changbin Du <changbin.du@intel.com> <changbin.du@gmail.com>
|
||||
Changbin Du <changbin.du@intel.com> <changbin.du@intel.com>
|
||||
Chao Yu <chao@kernel.org> <chao2.yu@samsung.com>
|
||||
@ -156,6 +169,7 @@ Christian Brauner <brauner@kernel.org> <christian.brauner@canonical.com>
|
||||
Christian Brauner <brauner@kernel.org> <christian.brauner@ubuntu.com>
|
||||
Christian Marangi <ansuelsmth@gmail.com>
|
||||
Christophe Ricard <christophe.ricard@gmail.com>
|
||||
Christopher Obbard <christopher.obbard@linaro.org> <chris.obbard@collabora.com>
|
||||
Christoph Hellwig <hch@lst.de>
|
||||
Chuck Lever <chuck.lever@oracle.com> <cel@kernel.org>
|
||||
Chuck Lever <chuck.lever@oracle.com> <cel@netapp.com>
|
||||
@ -200,6 +214,7 @@ Elliot Berman <quic_eberman@quicinc.com> <eberman@codeaurora.org>
|
||||
Enric Balletbo i Serra <eballetbo@kernel.org> <enric.balletbo@collabora.com>
|
||||
Enric Balletbo i Serra <eballetbo@kernel.org> <eballetbo@iseebcn.com>
|
||||
Erik Kaneda <erik.kaneda@intel.com> <erik.schmauss@intel.com>
|
||||
Ethan Carter Edwards <ethan@ethancedwards.com> Ethan Edwards <ethancarteredwards@gmail.com>
|
||||
Eugen Hristev <eugen.hristev@linaro.org> <eugen.hristev@microchip.com>
|
||||
Eugen Hristev <eugen.hristev@linaro.org> <eugen.hristev@collabora.com>
|
||||
Evgeniy Polyakov <johnpol@2ka.mipt.ru>
|
||||
@ -211,6 +226,7 @@ Fangrui Song <i@maskray.me> <maskray@google.com>
|
||||
Felipe W Damasio <felipewd@terra.com.br>
|
||||
Felix Kuhling <fxkuehl@gmx.de>
|
||||
Felix Moeller <felix@derklecks.de>
|
||||
Feng Tang <feng.79.tang@gmail.com> <feng.tang@intel.com>
|
||||
Fenglin Wu <quic_fenglinw@quicinc.com> <fenglinw@codeaurora.org>
|
||||
Filipe Lautert <filipe@icewall.org>
|
||||
Finn Thain <fthain@linux-m68k.org> <fthain@telegraphics.com.au>
|
||||
@ -251,6 +267,7 @@ Guo Ren <guoren@kernel.org> <ren_guo@c-sky.com>
|
||||
Guru Das Srinagesh <quic_gurus@quicinc.com> <gurus@codeaurora.org>
|
||||
Gustavo Padovan <gustavo@las.ic.unicamp.br>
|
||||
Gustavo Padovan <padovan@profusion.mobi>
|
||||
Hamza Mahfooz <hamzamahfooz@linux.microsoft.com> <hamza.mahfooz@amd.com>
|
||||
Hanjun Guo <guohanjun@huawei.com> <hanjun.guo@linaro.org>
|
||||
Hans Verkuil <hverkuil@xs4all.nl> <hansverk@cisco.com>
|
||||
Hans Verkuil <hverkuil@xs4all.nl> <hverkuil-cisco@xs4all.nl>
|
||||
@ -301,6 +318,8 @@ Jayachandran C <c.jayachandran@gmail.com> <jnair@caviumnetworks.com>
|
||||
Jean Tourrilhes <jt@hpl.hp.com>
|
||||
Jeevan Shriram <quic_jshriram@quicinc.com> <jshriram@codeaurora.org>
|
||||
Jeff Garzik <jgarzik@pretzel.yyz.us>
|
||||
Jeff Johnson <jeff.johnson@oss.qualcomm.com> <jjohnson@codeaurora.org>
|
||||
Jeff Johnson <jeff.johnson@oss.qualcomm.com> <quic_jjohnson@quicinc.com>
|
||||
Jeff Layton <jlayton@kernel.org> <jlayton@poochiereds.net>
|
||||
Jeff Layton <jlayton@kernel.org> <jlayton@primarydata.com>
|
||||
Jeff Layton <jlayton@kernel.org> <jlayton@redhat.com>
|
||||
@ -360,6 +379,7 @@ Juha Yrjola <juha.yrjola@solidboot.com>
|
||||
Julien Thierry <julien.thierry.kdev@gmail.com> <julien.thierry@arm.com>
|
||||
Iskren Chernev <me@iskren.info> <iskren.chernev@gmail.com>
|
||||
Kalle Valo <kvalo@kernel.org> <kvalo@codeaurora.org>
|
||||
Kalle Valo <kvalo@kernel.org> <quic_kvalo@quicinc.com>
|
||||
Kalyan Thota <quic_kalyant@quicinc.com> <kalyan_t@codeaurora.org>
|
||||
Karthikeyan Periyasamy <quic_periyasa@quicinc.com> <periyasa@codeaurora.org>
|
||||
Kathiravan T <quic_kathirav@quicinc.com> <kathirav@codeaurora.org>
|
||||
@ -405,6 +425,7 @@ Liam Mark <quic_lmark@quicinc.com> <lmark@codeaurora.org>
|
||||
Linas Vepstas <linas@austin.ibm.com>
|
||||
Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@ascom.ch>
|
||||
Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@web.de>
|
||||
Linus Lüssing <linus.luessing@c0d3.blue> <ll@simonwunderlich.de>
|
||||
<linux-hardening@vger.kernel.org> <kernel-hardening@lists.openwall.com>
|
||||
Li Yang <leoyang.li@nxp.com> <leoli@freescale.com>
|
||||
Li Yang <leoyang.li@nxp.com> <leo@zh-kernel.org>
|
||||
@ -427,6 +448,8 @@ Marcin Nowakowski <marcin.nowakowski@mips.com> <marcin.nowakowski@imgtec.com>
|
||||
Marc Zyngier <maz@kernel.org> <marc.zyngier@arm.com>
|
||||
Marek Behún <kabel@kernel.org> <marek.behun@nic.cz>
|
||||
Marek Behún <kabel@kernel.org> Marek Behun <marek.behun@nic.cz>
|
||||
Marek Lindner <marek.lindner@mailbox.org> <lindner_marek@yahoo.de>
|
||||
Marek Lindner <marek.lindner@mailbox.org> <mareklindner@neomailbox.ch>
|
||||
Mark Brown <broonie@sirena.org.uk>
|
||||
Mark Starovoytov <mstarovo@pm.me> <mstarovoitov@marvell.com>
|
||||
Markus Schneider-Pargmann <msp@baylibre.com> <mpa@pengutronix.de>
|
||||
@ -435,7 +458,7 @@ Martin Kepplinger <martink@posteo.de> <martin.kepplinger@ginzinger.com>
|
||||
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@puri.sm>
|
||||
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
|
||||
Martyna Szapar-Mudlaw <martyna.szapar-mudlaw@linux.intel.com> <martyna.szapar-mudlaw@intel.com>
|
||||
Mathieu Othacehe <m.othacehe@gmail.com> <othacehe@gnu.org>
|
||||
Mathieu Othacehe <othacehe@gnu.org> <m.othacehe@gmail.com>
|
||||
Mat Martineau <martineau@kernel.org> <mathew.j.martineau@linux.intel.com>
|
||||
Mat Martineau <martineau@kernel.org> <mathewm@codeaurora.org>
|
||||
Matthew Wilcox <willy@infradead.org> <matthew.r.wilcox@intel.com>
|
||||
@ -511,6 +534,7 @@ Nicholas Piggin <npiggin@gmail.com> <npiggin@kernel.dk>
|
||||
Nicholas Piggin <npiggin@gmail.com> <npiggin@suse.de>
|
||||
Nicholas Piggin <npiggin@gmail.com> <nickpiggin@yahoo.com.au>
|
||||
Nicholas Piggin <npiggin@gmail.com> <piggin@cyberone.com.au>
|
||||
Nick Desaulniers <nick.desaulniers+lkml@gmail.com> <ndesaulniers@google.com>
|
||||
Nicolas Ferre <nicolas.ferre@microchip.com> <nicolas.ferre@atmel.com>
|
||||
Nicolas Pitre <nico@fluxnic.net> <nicolas.pitre@linaro.org>
|
||||
Nicolas Pitre <nico@fluxnic.net> <nico@linaro.org>
|
||||
@ -529,6 +553,8 @@ Oleksij Rempel <linux@rempel-privat.de> <external.Oleksij.Rempel@de.bosch.com>
|
||||
Oleksij Rempel <linux@rempel-privat.de> <fixed-term.Oleksij.Rempel@de.bosch.com>
|
||||
Oleksij Rempel <o.rempel@pengutronix.de>
|
||||
Oleksij Rempel <o.rempel@pengutronix.de> <ore@pengutronix.de>
|
||||
Oliver Hartkopp <socketcan@hartkopp.net> <oliver.hartkopp@volkswagen.de>
|
||||
Oliver Hartkopp <socketcan@hartkopp.net> <oliver@hartkopp.net>
|
||||
Oliver Upton <oliver.upton@linux.dev> <oupton@google.com>
|
||||
Ondřej Jirman <megi@xff.cz> <megous@megous.com>
|
||||
Oza Pawandeep <quic_poza@quicinc.com> <poza@codeaurora.org>
|
||||
@ -640,6 +666,11 @@ Simona Vetter <simona.vetter@ffwll.ch> <daniel@biene.ffwll.ch>
|
||||
Simon Horman <horms@kernel.org> <simon.horman@corigine.com>
|
||||
Simon Horman <horms@kernel.org> <simon.horman@netronome.com>
|
||||
Simon Kelley <simon@thekelleys.org.uk>
|
||||
Simon Wunderlich <sw@simonwunderlich.de> <simon.wunderlich@open-mesh.com>
|
||||
Simon Wunderlich <sw@simonwunderlich.de> <simon.wunderlich@s2003.tu-chemnitz.de>
|
||||
Simon Wunderlich <sw@simonwunderlich.de> <simon.wunderlich@saxnet.de>
|
||||
Simon Wunderlich <sw@simonwunderlich.de> <simon@open-mesh.com>
|
||||
Simon Wunderlich <sw@simonwunderlich.de> <siwu@hrz.tu-chemnitz.de>
|
||||
Sricharan Ramabadhran <quic_srichara@quicinc.com> <sricharan@codeaurora.org>
|
||||
Srinivas Ramana <quic_sramana@quicinc.com> <sramana@codeaurora.org>
|
||||
Sriram R <quic_srirrama@quicinc.com> <srirrama@codeaurora.org>
|
||||
@ -660,6 +691,11 @@ Sudarshan Rajagopalan <quic_sudaraja@quicinc.com> <sudaraja@codeaurora.org>
|
||||
Sudeep Holla <sudeep.holla@arm.com> Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
|
||||
Sumit Semwal <sumit.semwal@ti.com>
|
||||
Surabhi Vishnoi <quic_svishnoi@quicinc.com> <svishnoi@codeaurora.org>
|
||||
Sven Eckelmann <sven@narfation.org> <seckelmann@datto.com>
|
||||
Sven Eckelmann <sven@narfation.org> <sven.eckelmann@gmx.de>
|
||||
Sven Eckelmann <sven@narfation.org> <sven.eckelmann@open-mesh.com>
|
||||
Sven Eckelmann <sven@narfation.org> <sven.eckelmann@openmesh.com>
|
||||
Sven Eckelmann <sven@narfation.org> <sven@open-mesh.com>
|
||||
Takashi YOSHII <takashi.yoshii.zj@renesas.com>
|
||||
Tamizh Chelvam Raja <quic_tamizhr@quicinc.com> <tamizhr@codeaurora.org>
|
||||
Taniya Das <quic_tdas@quicinc.com> <tdas@codeaurora.org>
|
||||
@ -735,6 +771,8 @@ Wolfram Sang <wsa@kernel.org> <w.sang@pengutronix.de>
|
||||
Wolfram Sang <wsa@kernel.org> <wsa@the-dreams.de>
|
||||
Yakir Yang <kuankuan.y@gmail.com> <ykk@rock-chips.com>
|
||||
Yanteng Si <si.yanteng@linux.dev> <siyanteng@loongson.cn>
|
||||
Ying Huang <huang.ying.caritas@gmail.com> <ying.huang@intel.com>
|
||||
Yosry Ahmed <yosry.ahmed@linux.dev> <yosryahmed@google.com>
|
||||
Yusuke Goda <goda.yusuke@renesas.com>
|
||||
Zack Rusin <zack.rusin@broadcom.com> <zackr@vmware.com>
|
||||
Zhu Yanjun <zyjzyj2000@gmail.com> <yanjunz@nvidia.com>
|
||||
|
20
CREDITS
20
CREDITS
@ -20,6 +20,10 @@ N: Thomas Abraham
|
||||
E: thomas.ab@samsung.com
|
||||
D: Samsung pin controller driver
|
||||
|
||||
N: Jose Abreu
|
||||
E: jose.abreu@synopsys.com
|
||||
D: Synopsys DesignWare XPCS MDIO/PCS driver.
|
||||
|
||||
N: Dragos Acostachioaie
|
||||
E: dragos@iname.com
|
||||
W: http://www.arbornet.org/~dragos
|
||||
@ -1428,6 +1432,10 @@ S: 8124 Constitution Apt. 7
|
||||
S: Sterling Heights, Michigan 48313
|
||||
S: USA
|
||||
|
||||
N: Andy Gospodarek
|
||||
E: andy@greyhouse.net
|
||||
D: Maintenance and contributions to the network interface bonding driver.
|
||||
|
||||
N: Wolfgang Grandegger
|
||||
E: wg@grandegger.com
|
||||
D: Controller Area Network (device drivers)
|
||||
@ -1812,6 +1820,10 @@ D: Author/maintainer of most DRM drivers (especially ATI, MGA)
|
||||
D: Core DRM templates, general DRM and 3D-related hacking
|
||||
S: No fixed address
|
||||
|
||||
N: Woojung Huh
|
||||
E: woojung.huh@microchip.com
|
||||
D: Microchip LAN78XX USB Ethernet driver
|
||||
|
||||
N: Kenn Humborg
|
||||
E: kenn@wombat.ie
|
||||
D: Mods to loop device to support sparse backing files
|
||||
@ -2503,11 +2515,9 @@ D: SLS distribution
|
||||
D: Initial implementation of VC's, pty's and select()
|
||||
|
||||
N: Pavel Machek
|
||||
E: pavel@ucw.cz
|
||||
E: pavel@kernel.org
|
||||
P: 4096R/92DFCE96 4FA7 9EEF FCD4 C44F C585 B8C7 C060 2241 92DF CE96
|
||||
D: Softcursor for vga, hypertech cdrom support, vcsa bugfix, nbd,
|
||||
D: sun4/330 port, capabilities for elf, speedup for rm on ext2, USB,
|
||||
D: work on suspend-to-ram/disk, killing duplicates from ioctl32,
|
||||
D: NBD, Sun4/330 port, USB, work on suspend-to-ram/disk,
|
||||
D: Altera SoCFPGA and Nokia N900 support.
|
||||
S: Czech Republic
|
||||
|
||||
@ -4327,7 +4337,7 @@ D: Freescale Highspeed USB device driver
|
||||
D: Freescale QE SoC support and Ethernet driver
|
||||
S: B-1206 Jingmao Guojigongyu
|
||||
S: 16 Baliqiao Nanjie, Beijing 101100
|
||||
S: People's Repulic of China
|
||||
S: People's Republic of China
|
||||
|
||||
N: Vlad Yasevich
|
||||
E: vyasevich@gmail.com
|
||||
|
@ -1,3 +1,6 @@
|
||||
The cxl driver is no longer maintained, and will be removed from the kernel in
|
||||
the near future.
|
||||
|
||||
Please note that attributes that are shared between devices are stored in
|
||||
the directory pointed to by the symlink device/.
|
||||
For example, the real path of the attribute /sys/class/cxl/afu0.0s/irqs_max is
|
9
Documentation/ABI/stable/sysfs-class-bluetooth
Normal file
9
Documentation/ABI/stable/sysfs-class-bluetooth
Normal file
@ -0,0 +1,9 @@
|
||||
What: /sys/class/bluetooth/hci<index>/reset
|
||||
Date: 14-Jan-2025
|
||||
KernelVersion: 6.13
|
||||
Contact: linux-bluetooth@vger.kernel.org
|
||||
Description: This write-only attribute allows users to trigger the vendor reset
|
||||
method on the Bluetooth device when arbitrary data is written.
|
||||
The reset may or may not be done through the device transport
|
||||
(e.g., UART/USB), and can also be done through an out-of-band
|
||||
approach such as GPIO.
|
@ -0,0 +1,15 @@
|
||||
What: /sys/bus/coresight/devices/dummy_source<N>/enable_source
|
||||
Date: Dec 2024
|
||||
KernelVersion: 6.14
|
||||
Contact: Mao Jinlong <quic_jinlmao@quicinc.com>
|
||||
Description: (RW) Enable/disable tracing of dummy source. A sink should be activated
|
||||
before enabling the source. The path of coresight components linking
|
||||
the source to the sink is configured and managed automatically by the
|
||||
coresight framework.
|
||||
|
||||
What: /sys/bus/coresight/devices/dummy_source<N>/traceid
|
||||
Date: Dec 2024
|
||||
KernelVersion: 6.14
|
||||
Contact: Mao Jinlong <quic_jinlmao@quicinc.com>
|
||||
Description: (R) Show the trace ID that will appear in the trace stream
|
||||
coming from this trace entity.
|
24
Documentation/ABI/testing/sysfs-bus-event_source-devices
Normal file
24
Documentation/ABI/testing/sysfs-bus-event_source-devices
Normal file
@ -0,0 +1,24 @@
|
||||
What: /sys/bus/event_source/devices/<pmu>
|
||||
Date: 2014/02/24
|
||||
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
|
||||
Description: Performance Monitoring Unit (<pmu>)
|
||||
|
||||
Each <pmu> directory, for a PMU device, is a name
|
||||
optionally followed by an underscore and then either a
|
||||
decimal or hexadecimal number. For example, cpu is a
|
||||
PMU name without a suffix as is intel_bts,
|
||||
uncore_imc_0 is a PMU name with a 0 numeric suffix,
|
||||
ddr_pmu_87e1b0000000 is a PMU name with a hex
|
||||
suffix. The hex suffix must be more than two
|
||||
characters long to avoid ambiguity with PMUs like the
|
||||
S390 cpum_cf.
|
||||
|
||||
Tools can treat PMUs with the same name that differ by
|
||||
suffix as instances of the same PMU for the sake of,
|
||||
for example, opening an event. For example, the PMUs
|
||||
uncore_imc_free_running_0 and
|
||||
uncore_imc_free_running_1 have an event data_read;
|
||||
opening the data_read event on a PMU specified as
|
||||
uncore_imc_free_running should be treated as opening
|
||||
the data_read event on PMU uncore_imc_free_running_0
|
||||
and PMU uncore_imc_free_running_1.
|
@ -37,11 +37,13 @@ Description: Per-pmu performance monitoring events specific to the running syste
|
||||
performance monitoring event supported by the <pmu>. The name
|
||||
of the file is the name of the event.
|
||||
|
||||
As performance monitoring event names are case
|
||||
insensitive in the perf tool, the perf tool only looks
|
||||
for lower or upper case event names in sysfs to avoid
|
||||
As performance monitoring event names are case insensitive
|
||||
in the perf tool, the perf tool only looks for all lower
|
||||
case or all upper case event names in sysfs to avoid
|
||||
scanning the directory. It is therefore required the
|
||||
name of the event here is either lower or upper case.
|
||||
name of the event here is either completely lower or upper
|
||||
case, with no mixed-case characters. Numbers, '.', '_', and
|
||||
'-' are also allowed.
|
||||
|
||||
File contents:
|
||||
|
||||
|
@ -168,18 +168,6 @@ Description:
|
||||
is required is a consistent labeling. Units after application
|
||||
of scale and offset are millivolts.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_supply_raw
|
||||
KernelVersion: 3.17
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Raw (unscaled no bias removal etc.) current measurement from
|
||||
channel Y. In special cases where the channel does not
|
||||
correspond to externally available input one of the named
|
||||
versions may be used. The number must always be specified and
|
||||
unique to allow association with event codes. Units after
|
||||
application of scale and offset are milliamps.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_powerY_raw
|
||||
KernelVersion: 4.5
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
@ -227,7 +215,7 @@ Description:
|
||||
same scaling as _raw.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_temp_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_tempX_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_tempY_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_temp_x_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_temp_y_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_temp_ambient_raw
|
||||
@ -416,11 +404,11 @@ Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Scaled humidity measurement in milli percent.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_X_mean_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_Y_mean_raw
|
||||
KernelVersion: 3.5
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Averaged raw measurement from channel X. The number of values
|
||||
Averaged raw measurement from channel Y. The number of values
|
||||
used for averaging is device specific. The converting rules for
|
||||
normal raw values also applies to the averaged raw values.
|
||||
|
||||
@ -448,7 +436,7 @@ What: /sys/bus/iio/devices/iio:deviceX/in_humidityrelative_offset
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_magn_offset
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_rot_offset
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_angl_offset
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_capacitanceX_offset
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_capacitanceY_offset
|
||||
KernelVersion: 2.6.35
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
@ -508,6 +496,9 @@ What: /sys/bus/iio/devices/iio:deviceX/in_angl_scale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_x_scale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_y_scale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_z_scale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_red_scale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_green_scale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_blue_scale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_concentration_co2_scale
|
||||
KernelVersion: 2.6.35
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
@ -660,10 +651,10 @@ What: /sys/.../iio:deviceX/in_magn_scale_available
|
||||
What: /sys/.../iio:deviceX/in_illuminance_scale_available
|
||||
What: /sys/.../iio:deviceX/in_intensity_scale_available
|
||||
What: /sys/.../iio:deviceX/in_proximity_scale_available
|
||||
What: /sys/.../iio:deviceX/in_voltageX_scale_available
|
||||
What: /sys/.../iio:deviceX/in_voltageY_scale_available
|
||||
What: /sys/.../iio:deviceX/in_voltage-voltage_scale_available
|
||||
What: /sys/.../iio:deviceX/out_voltageX_scale_available
|
||||
What: /sys/.../iio:deviceX/out_altvoltageX_scale_available
|
||||
What: /sys/.../iio:deviceX/out_voltageY_scale_available
|
||||
What: /sys/.../iio:deviceX/out_altvoltageY_scale_available
|
||||
What: /sys/.../iio:deviceX/in_capacitance_scale_available
|
||||
What: /sys/.../iio:deviceX/in_pressure_scale_available
|
||||
What: /sys/.../iio:deviceX/in_pressureY_scale_available
|
||||
@ -681,6 +672,7 @@ What: /sys/bus/iio/devices/iio:deviceX/in_intensity_red_hardwaregain
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_green_hardwaregain
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_blue_hardwaregain
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_clear_hardwaregain
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_illuminance_hardwaregain
|
||||
KernelVersion: 2.6.35
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
@ -1562,7 +1554,7 @@ Description:
|
||||
This attribute is used to read the amount of quadrature error
|
||||
present in the device at a given time.
|
||||
|
||||
What: /sys/.../iio:deviceX/in_accelX_power_mode
|
||||
What: /sys/.../iio:deviceX/in_accelY_power_mode
|
||||
KernelVersion: 3.11
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
@ -1633,6 +1625,10 @@ What: /sys/.../iio:deviceX/in_intensityY_uv_raw
|
||||
What: /sys/.../iio:deviceX/in_intensityY_uva_raw
|
||||
What: /sys/.../iio:deviceX/in_intensityY_uvb_raw
|
||||
What: /sys/.../iio:deviceX/in_intensityY_duv_raw
|
||||
What: /sys/.../iio:deviceX/in_intensity_red_raw
|
||||
What: /sys/.../iio:deviceX/in_intensity_green_raw
|
||||
What: /sys/.../iio:deviceX/in_intensity_blue_raw
|
||||
What: /sys/.../iio:deviceX/in_intensity_clear_raw
|
||||
KernelVersion: 3.4
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
@ -1691,16 +1687,19 @@ Description:
|
||||
Raw value of rotation from true/magnetic north measured with
|
||||
or without compensation from tilt sensors.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_currentX_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_currentX_i_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_currentX_q_raw
|
||||
KernelVersion: 3.18
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_supply_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_i_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_q_raw
|
||||
KernelVersion: 3.17
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Raw current measurement from channel X. Units are in milliamps
|
||||
Raw current measurement from channel Y. Units are in milliamps
|
||||
after application of scale and offset. If no offset or scale is
|
||||
present, output should be considered as processed with the
|
||||
unit in milliamps.
|
||||
unit in milliamps. In special cases where the channel does not
|
||||
correspond to externally available input one of the named
|
||||
versions may be used.
|
||||
|
||||
Channels with 'i' and 'q' modifiers always exist in pairs and both
|
||||
channels refer to the same signal. The 'i' channel contains the in-phase
|
||||
@ -1864,9 +1863,9 @@ Description:
|
||||
hardware fifo watermark level.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_temp_calibemissivity
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_tempX_calibemissivity
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_tempY_calibemissivity
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_temp_object_calibemissivity
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_tempX_object_calibemissivity
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_tempY_object_calibemissivity
|
||||
KernelVersion: 4.1
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
@ -1887,17 +1886,17 @@ Description:
|
||||
is considered as one sample for <type>[_name]_sampling_frequency.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_concentration_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_concentrationY_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_concentration_co2_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_co2_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_concentrationY_co2_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_concentration_ethanol_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_ethanol_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_concentrationY_ethanol_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_concentration_h2_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_h2_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_concentrationY_h2_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_concentration_o2_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_o2_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_concentrationY_o2_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_concentration_voc_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_voc_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_concentrationY_voc_raw
|
||||
KernelVersion: 4.3
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
@ -1905,9 +1904,9 @@ Description:
|
||||
after application of scale and offset are percents.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_resistance_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_resistanceX_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_resistanceY_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_resistance_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_resistanceX_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_resistanceY_raw
|
||||
KernelVersion: 4.3
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
@ -2096,7 +2095,7 @@ Description:
|
||||
One of the following thermocouple types: B, E, J, K, N, R, S, T.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_temp_object_calibambient
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_tempX_object_calibambient
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_tempY_object_calibambient
|
||||
KernelVersion: 5.10
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
@ -2172,9 +2171,9 @@ Description:
|
||||
|
||||
- a range specified as "[min step max]"
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageX_sampling_frequency
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_sampling_frequency
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_powerY_sampling_frequency
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_currentZ_sampling_frequency
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_sampling_frequency
|
||||
KernelVersion: 5.20
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
|
23
Documentation/ABI/testing/sysfs-bus-iio-adc-ad-sigma-delta
Normal file
23
Documentation/ABI/testing/sysfs-bus-iio-adc-ad-sigma-delta
Normal file
@ -0,0 +1,23 @@
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_sys_calibration
|
||||
KernelVersion: 5.5
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
This attribute, if available, initiates the system calibration procedure. This is done on a
|
||||
single channel at a time. Write '1' to start the calibration.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_sys_calibration_mode_available
|
||||
KernelVersion: 5.5
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
This attribute, if available, returns a list with the possible calibration modes.
|
||||
There are two available options:
|
||||
"zero_scale" - calibrate to zero scale
|
||||
"full_scale" - calibrate to full scale
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_sys_calibration_mode
|
||||
KernelVersion: 5.5
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
This attribute, if available, sets up the calibration mode used in the system calibration
|
||||
procedure. Reading returns the current calibration mode.
|
||||
Writing sets the system calibration mode.
|
@ -19,33 +19,9 @@ Description:
|
||||
the bridge can be disconnected (when it is not being used
|
||||
using the bridge_switch_en attribute.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltagex_sys_calibration
|
||||
KernelVersion:
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Initiates the system calibration procedure. This is done on a
|
||||
single channel at a time. Write '1' to start the calibration.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage2-voltage2_shorted_raw
|
||||
KernelVersion:
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Measure voltage from AIN2 pin connected to AIN(+)
|
||||
and AIN(-) shorted.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltagex_sys_calibration_mode_available
|
||||
KernelVersion:
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Reading returns a list with the possible calibration modes.
|
||||
There are two available options:
|
||||
"zero_scale" - calibrate to zero scale
|
||||
"full_scale" - calibrate to full scale
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltagex_sys_calibration_mode
|
||||
KernelVersion:
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Sets up the calibration mode used in the system calibration
|
||||
procedure. Reading returns the current calibration mode.
|
||||
Writing sets the system calibration mode.
|
||||
|
48
Documentation/ABI/testing/sysfs-class-platform-profile
Normal file
48
Documentation/ABI/testing/sysfs-class-platform-profile
Normal file
@ -0,0 +1,48 @@
|
||||
What: /sys/class/platform-profile/platform-profile-X/name
|
||||
Date: March 2025
|
||||
KernelVersion: 6.14
|
||||
Description: Name of the class device given by the driver.
|
||||
|
||||
RO
|
||||
|
||||
What: /sys/class/platform-profile/platform-profile-X/choices
|
||||
Date: March 2025
|
||||
KernelVersion: 6.14
|
||||
Description: This file contains a space-separated list of profiles supported
|
||||
for this device.
|
||||
|
||||
Drivers must use the following standard profile-names:
|
||||
|
||||
==================== ========================================
|
||||
low-power Low power consumption
|
||||
cool Cooler operation
|
||||
quiet Quieter operation
|
||||
balanced Balance between low power consumption
|
||||
and performance
|
||||
balanced-performance Balance between performance and low
|
||||
power consumption with a slight bias
|
||||
towards performance
|
||||
performance High performance operation
|
||||
custom Driver defined custom profile
|
||||
==================== ========================================
|
||||
|
||||
RO
|
||||
|
||||
What: /sys/class/platform-profile/platform-profile-X/profile
|
||||
Date: March 2025
|
||||
KernelVersion: 6.14
|
||||
Description: Reading this file gives the current selected profile for this
|
||||
device. Writing this file with one of the strings from
|
||||
platform_profile_choices changes the profile to the new value.
|
||||
|
||||
This file can be monitored for changes by polling for POLLPRI,
|
||||
POLLPRI will be signaled on any changes, independent of those
|
||||
changes coming from a userspace write; or coming from another
|
||||
source such as e.g. a hotkey triggered profile change handled
|
||||
either directly by the embedded-controller or fully handled
|
||||
inside the kernel.
|
||||
|
||||
This file may also emit the string 'custom' to indicate
|
||||
that the driver is using a driver defined custom profile.
|
||||
|
||||
RW
|
@ -407,10 +407,30 @@ Description:
|
||||
|
||||
Access: Read, Write
|
||||
|
||||
Reading this returns the current active value, e.g. 'Standard'.
|
||||
Check charge_types to get the values supported by the battery.
|
||||
|
||||
Valid values:
|
||||
"Unknown", "N/A", "Trickle", "Fast", "Standard",
|
||||
"Adaptive", "Custom", "Long Life", "Bypass"
|
||||
|
||||
What: /sys/class/power_supply/<supply_name>/charge_types
|
||||
Date: December 2024
|
||||
Contact: linux-pm@vger.kernel.org
|
||||
Description:
|
||||
Identical to charge_type but reading returns a list of supported
|
||||
charge-types with the currently active type surrounded by square
|
||||
brackets, e.g.: "Fast [Standard] Long_Life".
|
||||
|
||||
power_supply class devices may support both charge_type and
|
||||
charge_types for backward compatibility. In this case both will
|
||||
always have the same active value and the active value can be
|
||||
changed by writing either property.
|
||||
|
||||
Note charge-types which contain a space such as "Long Life" will
|
||||
have the space replaced by a '_' resulting in e.g. "Long_Life".
|
||||
When writing charge-types both variants are accepted.
|
||||
|
||||
What: /sys/class/power_supply/<supply_name>/charge_term_current
|
||||
Date: July 2014
|
||||
Contact: linux-pm@vger.kernel.org
|
||||
@ -433,7 +453,7 @@ Description:
|
||||
|
||||
Valid values:
|
||||
"Unknown", "Good", "Overheat", "Dead",
|
||||
"Over voltage", "Unspecified failure", "Cold",
|
||||
"Over voltage", "Under voltage", "Unspecified failure", "Cold",
|
||||
"Watchdog timer expire", "Safety timer expire",
|
||||
"Over current", "Calibration required", "Warm",
|
||||
"Cool", "Hot", "No battery"
|
||||
@ -793,3 +813,12 @@ Description:
|
||||
|
||||
Access: Read
|
||||
Valid values: 1-31
|
||||
|
||||
What: /sys/class/power_supply/<supply_name>/extensions/<extension_name>
|
||||
Date: March 2025
|
||||
Contact: linux-pm@vger.kernel.org
|
||||
Description:
|
||||
Reports the extensions registered to the power supply.
|
||||
Each entry is a link to the device which registered the extension.
|
||||
|
||||
Access: Read
|
||||
|
32
Documentation/ABI/testing/sysfs-class-power-max1720x
Normal file
32
Documentation/ABI/testing/sysfs-class-power-max1720x
Normal file
@ -0,0 +1,32 @@
|
||||
What: /sys/class/power_supply/max1720x/temp_ain1
|
||||
Date: January 2025
|
||||
KernelVersion: 6.14
|
||||
Contact: Dimitri Fedrau <dimitri.fedrau@liebherr.com>
|
||||
Description:
|
||||
Reports the current temperature reading from AIN1 thermistor.
|
||||
|
||||
Access: Read
|
||||
|
||||
Valid values: Represented in 1/10 Degrees Celsius
|
||||
|
||||
What: /sys/class/power_supply/max1720x/temp_ain2
|
||||
Date: January 2025
|
||||
KernelVersion: 6.14
|
||||
Contact: Dimitri Fedrau <dimitri.fedrau@liebherr.com>
|
||||
Description:
|
||||
Reports the current temperature reading from AIN2 thermistor.
|
||||
|
||||
Access: Read
|
||||
|
||||
Valid values: Represented in 1/10 Degrees Celsius
|
||||
|
||||
What: /sys/class/power_supply/max1720x/temp_int
|
||||
Date: January 2025
|
||||
KernelVersion: 6.14
|
||||
Contact: Dimitri Fedrau <dimitri.fedrau@liebherr.com>
|
||||
Description:
|
||||
Reports the current temperature reading from internal die.
|
||||
|
||||
Access: Read
|
||||
|
||||
Valid values: Represented in 1/10 Degrees Celsius
|
@ -55,6 +55,15 @@ Description:
|
||||
An attribute which indicates whether the patch supports
|
||||
atomic-replace.
|
||||
|
||||
What: /sys/kernel/livepatch/<patch>/stack_order
|
||||
Date: Jan 2025
|
||||
KernelVersion: 6.14.0
|
||||
Description:
|
||||
This attribute specifies the sequence in which live patch modules
|
||||
are applied to the system. If multiple live patches modify the same
|
||||
function, the implementation with the biggest 'stack_order' number
|
||||
is used, unless a transition is currently in progress.
|
||||
|
||||
What: /sys/kernel/livepatch/<patch>/<object>
|
||||
Date: Nov 2014
|
||||
KernelVersion: 3.19.0
|
||||
|
@ -355,10 +355,15 @@ Description: If 'target' is written to the 'type' file, writing to or
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/filters/<F>/matching
|
||||
Date: Dec 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing 'Y' or 'N' to this file sets whether to filter out
|
||||
pages that do or do not match to the 'type' and 'memcg_path',
|
||||
respectively. Filter out means the action of the scheme will
|
||||
not be applied to.
|
||||
Description: Writing 'Y' or 'N' to this file sets whether the filter is for
|
||||
the memory of the 'type', or all except the 'type'.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/filters/<F>/allow
|
||||
Date: Jan 2025
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing 'Y' or 'N' to this file sets whether to allow or reject
|
||||
applying the scheme's action to the memory that satisfies the
|
||||
'type' and the 'matching' of the directory.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/nr_tried
|
||||
Date: Mar 2022
|
||||
@ -384,6 +389,12 @@ Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Reading this file returns the total size of regions that the
|
||||
action of the scheme has successfully applied in bytes.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/sz_ops_filter_passed
|
||||
Date: Dec 2024
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Reading this file returns the total size of memory that passed
|
||||
DAMON operations layer-handled filters of the scheme in bytes.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/qt_exceeds
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
@ -424,3 +435,10 @@ Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Reading this file returns the 'age' of a memory region that
|
||||
corresponding DAMON-based Operation Scheme's action has tried
|
||||
to be applied.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/tried_regions/<R>/sz_filter_passed
|
||||
Date: Dec 2024
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Reading this file returns the size of the memory in the region
|
||||
that passed DAMON operations layer-handled filters of the
|
||||
scheme in bytes.
|
||||
|
64
Documentation/ABI/testing/sysfs-platform-mellanox-pmc
Normal file
64
Documentation/ABI/testing/sysfs-platform-mellanox-pmc
Normal file
@ -0,0 +1,64 @@
|
||||
HID Driver Description
|
||||
MLNXBFD0 mlxbf-pmc Performance counters (BlueField-1)
|
||||
MLNXBFD1 mlxbf-pmc Performance counters (BlueField-2)
|
||||
MLNXBFD2 mlxbf-pmc Performance counters (BlueField-3)
|
||||
|
||||
What: /sys/bus/platform/devices/<HID>/hwmon/hwmonX/<block>/event_list
|
||||
Date: Dec 2020
|
||||
KernelVersion: 5.10
|
||||
Contact: "Shravan Kumar Ramani <shravankr@nvidia.com>"
|
||||
Description:
|
||||
List of events supported by the counters in the specific block.
|
||||
It is used to extract the event number or ID associated with
|
||||
each event.
|
||||
|
||||
What: /sys/bus/platform/devices/<HID>/hwmon/hwmonX/<block>/event<N>
|
||||
Date: Dec 2020
|
||||
KernelVersion: 5.10
|
||||
Contact: "Shravan Kumar Ramani <shravankr@nvidia.com>"
|
||||
Description:
|
||||
Event monitored by corresponding counter. This is used to
|
||||
program or read back the event that should be or is currently
|
||||
being monitored by counter<N>.
|
||||
|
||||
What: /sys/bus/platform/devices/<HID>/hwmon/hwmonX/<block>/counter<N>
|
||||
Date: Dec 2020
|
||||
KernelVersion: 5.10
|
||||
Contact: "Shravan Kumar Ramani <shravankr@nvidia.com>"
|
||||
Description:
|
||||
Counter value of the event being monitored. This is used to
|
||||
read the counter value of the event which was programmed using
|
||||
event<N>. This is also used to clear or reset the counter value
|
||||
by writing 0 to the counter sysfs.
|
||||
|
||||
What: /sys/bus/platform/devices/<HID>/hwmon/hwmonX/<block>/enable
|
||||
Date: Dec 2020
|
||||
KernelVersion: 5.10
|
||||
Contact: "Shravan Kumar Ramani <shravankr@nvidia.com>"
|
||||
Description:
|
||||
Start or stop counters. This is used to start the counters
|
||||
for monitoring the programmed events and also to stop the
|
||||
counters after the desired duration. Writing value 1 will
|
||||
start all the counters in the block, and writing 0 will
|
||||
stop all the counters together.
|
||||
|
||||
What: /sys/bus/platform/devices/<HID>/hwmon/hwmonX/<block>/<reg>
|
||||
Date: Dec 2020
|
||||
KernelVersion: 5.10
|
||||
Contact: "Shravan Kumar Ramani <shravankr@nvidia.com>"
|
||||
Description:
|
||||
Value of register. This is used to read or reset the registers
|
||||
where various performance statistics are counted for each block.
|
||||
Writing 0 to the sysfs will clear the counter, writing any other
|
||||
value is not allowed.
|
||||
|
||||
What: /sys/bus/platform/devices/<HID>/hwmon/hwmonX/<block>/count_clock
|
||||
Date: Mar 2025
|
||||
KernelVersion: 6.14
|
||||
Contact: "Shravan Kumar Ramani <shravankr@nvidia.com>"
|
||||
Description:
|
||||
Use a counter for counting cycles. This is used to repurpose/dedicate
|
||||
any of the counters in the block to counting cycles. Each counter is
|
||||
represented by a bit (bit 0 for counter0, bit1 for counter1 and so on)
|
||||
and setting the corresponding bit will reserve that specific counter
|
||||
for counting cycles and override the event<N> setting.
|
@ -33,3 +33,8 @@ Description: Reading this file gives the current selected profile for this
|
||||
source such as e.g. a hotkey triggered profile change handled
|
||||
either directly by the embedded-controller or fully handled
|
||||
inside the kernel.
|
||||
|
||||
This file may also emit the string 'custom' to indicate
|
||||
that multiple platform profiles drivers are in use but
|
||||
have different values. This string can not be written to
|
||||
this interface and is solely for informational purposes.
|
||||
|
43
Documentation/ABI/testing/sysfs-pps-gen
Normal file
43
Documentation/ABI/testing/sysfs-pps-gen
Normal file
@ -0,0 +1,43 @@
|
||||
What: /sys/class/pps-gen/
|
||||
Date: February 2025
|
||||
KernelVersion: 6.13
|
||||
Contact: Rodolfo Giometti <giometti@enneenne.com>
|
||||
Description:
|
||||
The /sys/class/pps-gen/ directory contains files and
|
||||
directories that provide a unified interface to the PPS
|
||||
generators.
|
||||
|
||||
What: /sys/class/pps-gen/pps-genX/
|
||||
Date: February 2025
|
||||
KernelVersion: 6.13
|
||||
Contact: Rodolfo Giometti <giometti@enneenne.com>
|
||||
Description:
|
||||
The /sys/class/pps-gen/pps-genX/ directory is related to X-th
|
||||
PPS generator in the system. Each directory contain files to
|
||||
manage and control its PPS generator.
|
||||
|
||||
What: /sys/class/pps-gen/pps-genX/enable
|
||||
Date: February 2025
|
||||
KernelVersion: 6.13
|
||||
Contact: Rodolfo Giometti <giometti@enneenne.com>
|
||||
Description:
|
||||
This write-only file enables or disables generation of the
|
||||
PPS signal.
|
||||
|
||||
What: /sys/class/pps-gen/pps-genX/system
|
||||
Date: February 2025
|
||||
KernelVersion: 6.13
|
||||
Contact: Rodolfo Giometti <giometti@enneenne.com>
|
||||
Description:
|
||||
This read-only file returns "1" if the generator takes the
|
||||
timing from the system clock, while it returns "0" if not
|
||||
(i.e. from a peripheral device clock).
|
||||
|
||||
What: /sys/class/pps-gen/pps-genX/time
|
||||
Date: February 2025
|
||||
KernelVersion: 6.13
|
||||
Contact: Rodolfo Giometti <giometti@enneenne.com>
|
||||
Description:
|
||||
This read-only file contains the current time stored into the
|
||||
generator clock as two integers representing the current time
|
||||
seconds and nanoseconds.
|
@ -104,7 +104,7 @@ quiet_cmd_sphinx = SPHINX $@ --> file://$(abspath $(BUILDDIR)/$3/$4)
|
||||
YNL_INDEX:=$(srctree)/Documentation/networking/netlink_spec/index.rst
|
||||
YNL_RST_DIR:=$(srctree)/Documentation/networking/netlink_spec
|
||||
YNL_YAML_DIR:=$(srctree)/Documentation/netlink/specs
|
||||
YNL_TOOL:=$(srctree)/tools/net/ynl/ynl-gen-rst.py
|
||||
YNL_TOOL:=$(srctree)/tools/net/ynl/pyynl/ynl_gen_rst.py
|
||||
|
||||
YNL_RST_FILES_TMP := $(patsubst %.yaml,%.rst,$(wildcard $(YNL_YAML_DIR)/*.yaml))
|
||||
YNL_RST_FILES := $(patsubst $(YNL_YAML_DIR)%,$(YNL_RST_DIR)%, $(YNL_RST_FILES_TMP))
|
||||
|
@ -15,6 +15,7 @@ PCI Endpoint Framework
|
||||
pci-ntb-howto
|
||||
pci-vntb-function
|
||||
pci-vntb-howto
|
||||
pci-nvme-function
|
||||
|
||||
function/binding/pci-test
|
||||
function/binding/pci-ntb
|
||||
|
13
Documentation/PCI/endpoint/pci-nvme-function.rst
Normal file
13
Documentation/PCI/endpoint/pci-nvme-function.rst
Normal file
@ -0,0 +1,13 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=================
|
||||
PCI NVMe Function
|
||||
=================
|
||||
|
||||
:Author: Damien Le Moal <dlemoal@kernel.org>
|
||||
|
||||
The PCI NVMe endpoint function implements a PCI NVMe controller using the NVMe
|
||||
subsystem target core code. The driver for this function resides with the NVMe
|
||||
subsystem as drivers/nvme/target/nvmet-pciep.c.
|
||||
|
||||
See Documentation/nvme/nvme-pci-endpoint-target.rst for more details.
|
@ -81,8 +81,8 @@ device, the following commands can be used::
|
||||
|
||||
# echo 0x104c > functions/pci_epf_test/func1/vendorid
|
||||
# echo 0xb500 > functions/pci_epf_test/func1/deviceid
|
||||
# echo 16 > functions/pci_epf_test/func1/msi_interrupts
|
||||
# echo 8 > functions/pci_epf_test/func1/msix_interrupts
|
||||
# echo 32 > functions/pci_epf_test/func1/msi_interrupts
|
||||
# echo 2048 > functions/pci_epf_test/func1/msix_interrupts
|
||||
|
||||
|
||||
Binding pci-epf-test Device to EP Controller
|
||||
@ -123,113 +123,83 @@ above::
|
||||
Using Endpoint Test function Device
|
||||
-----------------------------------
|
||||
|
||||
pcitest.sh added in tools/pci/ can be used to run all the default PCI endpoint
|
||||
tests. To compile this tool the following commands should be used::
|
||||
Kselftest added in tools/testing/selftests/pci_endpoint can be used to run all
|
||||
the default PCI endpoint tests. To build the Kselftest for PCI endpoint
|
||||
subsystem, the following commands should be used::
|
||||
|
||||
# cd <kernel-dir>
|
||||
# make -C tools/pci
|
||||
# make -C tools/testing/selftests/pci_endpoint
|
||||
|
||||
or if you desire to compile and install in your system::
|
||||
|
||||
# cd <kernel-dir>
|
||||
# make -C tools/pci install
|
||||
# make -C tools/testing/selftests/pci_endpoint INSTALL_PATH=/usr/bin install
|
||||
|
||||
The tool and script will be located in <rootfs>/usr/bin/
|
||||
The test will be located in <rootfs>/usr/bin/
|
||||
|
||||
|
||||
pcitest.sh Output
|
||||
~~~~~~~~~~~~~~~~~
|
||||
Kselftest Output
|
||||
~~~~~~~~~~~~~~~~
|
||||
::
|
||||
|
||||
# pcitest.sh
|
||||
BAR tests
|
||||
# pci_endpoint_test
|
||||
TAP version 13
|
||||
1..16
|
||||
# Starting 16 tests from 9 test cases.
|
||||
# RUN pci_ep_bar.BAR0.BAR_TEST ...
|
||||
# OK pci_ep_bar.BAR0.BAR_TEST
|
||||
ok 1 pci_ep_bar.BAR0.BAR_TEST
|
||||
# RUN pci_ep_bar.BAR1.BAR_TEST ...
|
||||
# OK pci_ep_bar.BAR1.BAR_TEST
|
||||
ok 2 pci_ep_bar.BAR1.BAR_TEST
|
||||
# RUN pci_ep_bar.BAR2.BAR_TEST ...
|
||||
# OK pci_ep_bar.BAR2.BAR_TEST
|
||||
ok 3 pci_ep_bar.BAR2.BAR_TEST
|
||||
# RUN pci_ep_bar.BAR3.BAR_TEST ...
|
||||
# OK pci_ep_bar.BAR3.BAR_TEST
|
||||
ok 4 pci_ep_bar.BAR3.BAR_TEST
|
||||
# RUN pci_ep_bar.BAR4.BAR_TEST ...
|
||||
# OK pci_ep_bar.BAR4.BAR_TEST
|
||||
ok 5 pci_ep_bar.BAR4.BAR_TEST
|
||||
# RUN pci_ep_bar.BAR5.BAR_TEST ...
|
||||
# OK pci_ep_bar.BAR5.BAR_TEST
|
||||
ok 6 pci_ep_bar.BAR5.BAR_TEST
|
||||
# RUN pci_ep_basic.CONSECUTIVE_BAR_TEST ...
|
||||
# OK pci_ep_basic.CONSECUTIVE_BAR_TEST
|
||||
ok 7 pci_ep_basic.CONSECUTIVE_BAR_TEST
|
||||
# RUN pci_ep_basic.LEGACY_IRQ_TEST ...
|
||||
# OK pci_ep_basic.LEGACY_IRQ_TEST
|
||||
ok 8 pci_ep_basic.LEGACY_IRQ_TEST
|
||||
# RUN pci_ep_basic.MSI_TEST ...
|
||||
# OK pci_ep_basic.MSI_TEST
|
||||
ok 9 pci_ep_basic.MSI_TEST
|
||||
# RUN pci_ep_basic.MSIX_TEST ...
|
||||
# OK pci_ep_basic.MSIX_TEST
|
||||
ok 10 pci_ep_basic.MSIX_TEST
|
||||
# RUN pci_ep_data_transfer.memcpy.READ_TEST ...
|
||||
# OK pci_ep_data_transfer.memcpy.READ_TEST
|
||||
ok 11 pci_ep_data_transfer.memcpy.READ_TEST
|
||||
# RUN pci_ep_data_transfer.memcpy.WRITE_TEST ...
|
||||
# OK pci_ep_data_transfer.memcpy.WRITE_TEST
|
||||
ok 12 pci_ep_data_transfer.memcpy.WRITE_TEST
|
||||
# RUN pci_ep_data_transfer.memcpy.COPY_TEST ...
|
||||
# OK pci_ep_data_transfer.memcpy.COPY_TEST
|
||||
ok 13 pci_ep_data_transfer.memcpy.COPY_TEST
|
||||
# RUN pci_ep_data_transfer.dma.READ_TEST ...
|
||||
# OK pci_ep_data_transfer.dma.READ_TEST
|
||||
ok 14 pci_ep_data_transfer.dma.READ_TEST
|
||||
# RUN pci_ep_data_transfer.dma.WRITE_TEST ...
|
||||
# OK pci_ep_data_transfer.dma.WRITE_TEST
|
||||
ok 15 pci_ep_data_transfer.dma.WRITE_TEST
|
||||
# RUN pci_ep_data_transfer.dma.COPY_TEST ...
|
||||
# OK pci_ep_data_transfer.dma.COPY_TEST
|
||||
ok 16 pci_ep_data_transfer.dma.COPY_TEST
|
||||
# PASSED: 16 / 16 tests passed.
|
||||
# Totals: pass:16 fail:0 xfail:0 xpass:0 skip:0 error:0
|
||||
|
||||
BAR0: OKAY
|
||||
BAR1: OKAY
|
||||
BAR2: OKAY
|
||||
BAR3: OKAY
|
||||
BAR4: NOT OKAY
|
||||
BAR5: NOT OKAY
|
||||
|
||||
Interrupt tests
|
||||
Testcase 16 (pci_ep_data_transfer.dma.COPY_TEST) will fail for most of the DMA
|
||||
capable endpoint controllers due to the absence of the MEMCPY over DMA. For such
|
||||
controllers, it is advisable to skip this testcase using this
|
||||
command::
|
||||
|
||||
SET IRQ TYPE TO LEGACY: OKAY
|
||||
LEGACY IRQ: NOT OKAY
|
||||
SET IRQ TYPE TO MSI: OKAY
|
||||
MSI1: OKAY
|
||||
MSI2: OKAY
|
||||
MSI3: OKAY
|
||||
MSI4: OKAY
|
||||
MSI5: OKAY
|
||||
MSI6: OKAY
|
||||
MSI7: OKAY
|
||||
MSI8: OKAY
|
||||
MSI9: OKAY
|
||||
MSI10: OKAY
|
||||
MSI11: OKAY
|
||||
MSI12: OKAY
|
||||
MSI13: OKAY
|
||||
MSI14: OKAY
|
||||
MSI15: OKAY
|
||||
MSI16: OKAY
|
||||
MSI17: NOT OKAY
|
||||
MSI18: NOT OKAY
|
||||
MSI19: NOT OKAY
|
||||
MSI20: NOT OKAY
|
||||
MSI21: NOT OKAY
|
||||
MSI22: NOT OKAY
|
||||
MSI23: NOT OKAY
|
||||
MSI24: NOT OKAY
|
||||
MSI25: NOT OKAY
|
||||
MSI26: NOT OKAY
|
||||
MSI27: NOT OKAY
|
||||
MSI28: NOT OKAY
|
||||
MSI29: NOT OKAY
|
||||
MSI30: NOT OKAY
|
||||
MSI31: NOT OKAY
|
||||
MSI32: NOT OKAY
|
||||
SET IRQ TYPE TO MSI-X: OKAY
|
||||
MSI-X1: OKAY
|
||||
MSI-X2: OKAY
|
||||
MSI-X3: OKAY
|
||||
MSI-X4: OKAY
|
||||
MSI-X5: OKAY
|
||||
MSI-X6: OKAY
|
||||
MSI-X7: OKAY
|
||||
MSI-X8: OKAY
|
||||
MSI-X9: NOT OKAY
|
||||
MSI-X10: NOT OKAY
|
||||
MSI-X11: NOT OKAY
|
||||
MSI-X12: NOT OKAY
|
||||
MSI-X13: NOT OKAY
|
||||
MSI-X14: NOT OKAY
|
||||
MSI-X15: NOT OKAY
|
||||
MSI-X16: NOT OKAY
|
||||
[...]
|
||||
MSI-X2047: NOT OKAY
|
||||
MSI-X2048: NOT OKAY
|
||||
|
||||
Read Tests
|
||||
|
||||
SET IRQ TYPE TO MSI: OKAY
|
||||
READ ( 1 bytes): OKAY
|
||||
READ ( 1024 bytes): OKAY
|
||||
READ ( 1025 bytes): OKAY
|
||||
READ (1024000 bytes): OKAY
|
||||
READ (1024001 bytes): OKAY
|
||||
|
||||
Write Tests
|
||||
|
||||
WRITE ( 1 bytes): OKAY
|
||||
WRITE ( 1024 bytes): OKAY
|
||||
WRITE ( 1025 bytes): OKAY
|
||||
WRITE (1024000 bytes): OKAY
|
||||
WRITE (1024001 bytes): OKAY
|
||||
|
||||
Copy Tests
|
||||
|
||||
COPY ( 1 bytes): OKAY
|
||||
COPY ( 1024 bytes): OKAY
|
||||
COPY ( 1025 bytes): OKAY
|
||||
COPY (1024000 bytes): OKAY
|
||||
COPY (1024001 bytes): OKAY
|
||||
# pci_endpoint_test -f pci_ep_bar -f pci_ep_basic -v memcpy -T COPY_TEST -v dma
|
||||
|
281
Documentation/accel/amdxdna/amdnpu.rst
Normal file
281
Documentation/accel/amdxdna/amdnpu.rst
Normal file
@ -0,0 +1,281 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
.. include:: <isonum.txt>
|
||||
|
||||
=========
|
||||
AMD NPU
|
||||
=========
|
||||
|
||||
:Copyright: |copy| 2024 Advanced Micro Devices, Inc.
|
||||
:Author: Sonal Santan <sonal.santan@amd.com>
|
||||
|
||||
Overview
|
||||
========
|
||||
|
||||
AMD NPU (Neural Processing Unit) is a multi-user AI inference accelerator
|
||||
integrated into AMD client APU. NPU enables efficient execution of Machine
|
||||
Learning applications like CNN, LLM, etc. NPU is based on
|
||||
`AMD XDNA Architecture`_. NPU is managed by **amdxdna** driver.
|
||||
|
||||
|
||||
Hardware Description
|
||||
====================
|
||||
|
||||
AMD NPU consists of the following hardware components:
|
||||
|
||||
AMD XDNA Array
|
||||
--------------
|
||||
|
||||
AMD XDNA Array comprises of 2D array of compute and memory tiles built with
|
||||
`AMD AI Engine Technology`_. Each column has 4 rows of compute tiles and 1
|
||||
row of memory tile. Each compute tile contains a VLIW processor with its own
|
||||
dedicated program and data memory. The memory tile acts as L2 memory. The 2D
|
||||
array can be partitioned at a column boundary creating a spatially isolated
|
||||
partition which can be bound to a workload context.
|
||||
|
||||
Each column also has dedicated DMA engines to move data between host DDR and
|
||||
memory tile.
|
||||
|
||||
AMD Phoenix and AMD Hawk Point client NPU have a 4x5 topology, i.e., 4 rows of
|
||||
compute tiles arranged into 5 columns. AMD Strix Point client APU have 4x8
|
||||
topology, i.e., 4 rows of compute tiles arranged into 8 columns.
|
||||
|
||||
Shared L2 Memory
|
||||
----------------
|
||||
|
||||
The single row of memory tiles create a pool of software managed on chip L2
|
||||
memory. DMA engines are used to move data between host DDR and memory tiles.
|
||||
AMD Phoenix and AMD Hawk Point NPUs have a total of 2560 KB of L2 memory.
|
||||
AMD Strix Point NPU has a total of 4096 KB of L2 memory.
|
||||
|
||||
Microcontroller
|
||||
---------------
|
||||
|
||||
A microcontroller runs NPU Firmware which is responsible for command processing,
|
||||
XDNA Array partition setup, XDNA Array configuration, workload context
|
||||
management and workload orchestration.
|
||||
|
||||
NPU Firmware uses a dedicated instance of an isolated non-privileged context
|
||||
called ERT to service each workload context. ERT is also used to execute user
|
||||
provided ``ctrlcode`` associated with the workload context.
|
||||
|
||||
NPU Firmware uses a single isolated privileged context called MERT to service
|
||||
management commands from the amdxdna driver.
|
||||
|
||||
Mailboxes
|
||||
---------
|
||||
|
||||
The microcontroller and amdxdna driver use a privileged channel for management
|
||||
tasks like setting up of contexts, telemetry, query, error handling, setting up
|
||||
user channel, etc. As mentioned before, privileged channel requests are
|
||||
serviced by MERT. The privileged channel is bound to a single mailbox.
|
||||
|
||||
The microcontroller and amdxdna driver use a dedicated user channel per
|
||||
workload context. The user channel is primarily used for submitting work to
|
||||
the NPU. As mentioned before, a user channel requests are serviced by an
|
||||
instance of ERT. Each user channel is bound to its own dedicated mailbox.
|
||||
|
||||
PCIe EP
|
||||
-------
|
||||
|
||||
NPU is visible to the x86 host CPU as a PCIe device with multiple BARs and some
|
||||
MSI-X interrupt vectors. NPU uses a dedicated high bandwidth SoC level fabric
|
||||
for reading or writing into host memory. Each instance of ERT gets its own
|
||||
dedicated MSI-X interrupt. MERT gets a single instance of MSI-X interrupt.
|
||||
|
||||
The number of PCIe BARs varies depending on the specific device. Based on their
|
||||
functions, PCIe BARs can generally be categorized into the following types.
|
||||
|
||||
* PSP BAR: Expose the AMD PSP (Platform Security Processor) function
|
||||
* SMU BAR: Expose the AMD SMU (System Management Unit) function
|
||||
* SRAM BAR: Expose ring buffers for the mailbox
|
||||
* Mailbox BAR: Expose the mailbox control registers (head, tail and ISR
|
||||
registers etc.)
|
||||
* Public Register BAR: Expose public registers
|
||||
|
||||
On specific devices, the above-mentioned BAR type might be combined into a
|
||||
single physical PCIe BAR. Or a module might require two physical PCIe BARs to
|
||||
be fully functional. For example,
|
||||
|
||||
* On AMD Phoenix device, PSP, SMU, Public Register BARs are on PCIe BAR index 0.
|
||||
* On AMD Strix Point device, Mailbox and Public Register BARs are on PCIe BAR
|
||||
index 0. The PSP has some registers in PCIe BAR index 0 (Public Register BAR)
|
||||
and PCIe BAR index 4 (PSP BAR).
|
||||
|
||||
Process Isolation Hardware
|
||||
--------------------------
|
||||
|
||||
As explained before, XDNA Array can be dynamically divided into isolated
|
||||
spatial partitions, each of which may have one or more columns. The spatial
|
||||
partition is setup by programming the column isolation registers by the
|
||||
microcontroller. Each spatial partition is associated with a PASID which is
|
||||
also programmed by the microcontroller. Hence multiple spatial partitions in
|
||||
the NPU can make concurrent host access protected by PASID.
|
||||
|
||||
The NPU FW itself uses microcontroller MMU enforced isolated contexts for
|
||||
servicing user and privileged channel requests.
|
||||
|
||||
|
||||
Mixed Spatial and Temporal Scheduling
|
||||
=====================================
|
||||
|
||||
AMD XDNA architecture supports mixed spatial and temporal (time sharing)
|
||||
scheduling of 2D array. This means that spatial partitions may be setup and
|
||||
torn down dynamically to accommodate various workloads. A *spatial* partition
|
||||
may be *exclusively* bound to one workload context while another partition may
|
||||
be *temporarily* bound to more than one workload contexts. The microcontroller
|
||||
updates the PASID for a temporarily shared partition to match the context that
|
||||
has been bound to the partition at any moment.
|
||||
|
||||
Resource Solver
|
||||
---------------
|
||||
|
||||
The Resource Solver component of the amdxdna driver manages the allocation
|
||||
of 2D array among various workloads. Every workload describes the number
|
||||
of columns required to run the NPU binary in its metadata. The Resource Solver
|
||||
component uses hints passed by the workload and its own heuristics to
|
||||
decide 2D array (re)partition strategy and mapping of workloads for spatial and
|
||||
temporal sharing of columns. The FW enforces the context-to-column(s) resource
|
||||
binding decisions made by the Resource Solver.
|
||||
|
||||
AMD Phoenix and AMD Hawk Point client NPU can support 6 concurrent workload
|
||||
contexts. AMD Strix Point can support 16 concurrent workload contexts.
|
||||
|
||||
|
||||
Application Binaries
|
||||
====================
|
||||
|
||||
A NPU application workload is comprised of two separate binaries which are
|
||||
generated by the NPU compiler.
|
||||
|
||||
1. AMD XDNA Array overlay, which is used to configure a NPU spatial partition.
|
||||
The overlay contains instructions for setting up the stream switch
|
||||
configuration and ELF for the compute tiles. The overlay is loaded on the
|
||||
spatial partition bound to the workload by the associated ERT instance.
|
||||
Refer to the
|
||||
`Versal Adaptive SoC AIE-ML Architecture Manual (AM020)`_ for more details.
|
||||
|
||||
2. ``ctrlcode``, used for orchestrating the overlay loaded on the spatial
|
||||
partition. ``ctrlcode`` is executed by the ERT running in protected mode on
|
||||
the microcontroller in the context of the workload. ``ctrlcode`` is made up
|
||||
of a sequence of opcodes named ``XAie_TxnOpcode``. Refer to the
|
||||
`AI Engine Run Time`_ for more details.
|
||||
|
||||
|
||||
Special Host Buffers
|
||||
====================
|
||||
|
||||
Per-context Instruction Buffer
|
||||
------------------------------
|
||||
|
||||
Every workload context uses a host resident 64 MB buffer which is memory
|
||||
mapped into the ERT instance created to service the workload. The ``ctrlcode``
|
||||
used by the workload is copied into this special memory. This buffer is
|
||||
protected by PASID like all other input/output buffers used by that workload.
|
||||
Instruction buffer is also mapped into the user space of the workload.
|
||||
|
||||
Global Privileged Buffer
|
||||
------------------------
|
||||
|
||||
In addition, the driver also allocates a single buffer for maintenance tasks
|
||||
like recording errors from MERT. This global buffer uses the global IOMMU
|
||||
domain and is only accessible by MERT.
|
||||
|
||||
|
||||
High-level Use Flow
|
||||
===================
|
||||
|
||||
Here are the steps to run a workload on AMD NPU:
|
||||
|
||||
1. Compile the workload into an overlay and a ``ctrlcode`` binary.
|
||||
2. Userspace opens a context in the driver and provides the overlay.
|
||||
3. The driver checks with the Resource Solver for provisioning a set of columns
|
||||
for the workload.
|
||||
4. The driver then asks MERT to create a context on the device with the desired
|
||||
columns.
|
||||
5. MERT then creates an instance of ERT. MERT also maps the Instruction Buffer
|
||||
into ERT memory.
|
||||
6. The userspace then copies the ``ctrlcode`` to the Instruction Buffer.
|
||||
7. Userspace then creates a command buffer with pointers to input, output, and
|
||||
instruction buffer; it then submits command buffer with the driver and goes
|
||||
to sleep waiting for completion.
|
||||
8. The driver sends the command over the Mailbox to ERT.
|
||||
9. ERT *executes* the ``ctrlcode`` in the instruction buffer.
|
||||
10. Execution of the ``ctrlcode`` kicks off DMAs to and from the host DDR while
|
||||
AMD XDNA Array is running.
|
||||
11. When ERT reaches end of ``ctrlcode``, it raises an MSI-X to send completion
|
||||
signal to the driver which then wakes up the waiting workload.
|
||||
|
||||
|
||||
Boot Flow
|
||||
=========
|
||||
|
||||
amdxdna driver uses PSP to securely load signed NPU FW and kick off the boot
|
||||
of the NPU microcontroller. amdxdna driver then waits for the alive signal in
|
||||
a special location on BAR 0. The NPU is switched off during SoC suspend and
|
||||
turned on after resume where the NPU FW is reloaded, and the handshake is
|
||||
performed again.
|
||||
|
||||
|
||||
Userspace components
|
||||
====================
|
||||
|
||||
Compiler
|
||||
--------
|
||||
|
||||
Peano is an LLVM based open-source compiler for AMD XDNA Array compute tile
|
||||
available at:
|
||||
https://github.com/Xilinx/llvm-aie
|
||||
|
||||
The open-source IREE compiler supports graph compilation of ML models for AMD
|
||||
NPU and uses Peano underneath. It is available at:
|
||||
https://github.com/nod-ai/iree-amd-aie
|
||||
|
||||
Usermode Driver (UMD)
|
||||
---------------------
|
||||
|
||||
The open-source XRT runtime stack interfaces with amdxdna kernel driver. XRT
|
||||
can be found at:
|
||||
https://github.com/Xilinx/XRT
|
||||
|
||||
The open-source XRT shim for NPU is can be found at:
|
||||
https://github.com/amd/xdna-driver
|
||||
|
||||
|
||||
DMA Operation
|
||||
=============
|
||||
|
||||
DMA operation instructions are encoded in the ``ctrlcode`` as
|
||||
``XAIE_IO_BLOCKWRITE`` opcode. When ERT executes ``XAIE_IO_BLOCKWRITE``, DMA
|
||||
operations between host DDR and L2 memory are effected.
|
||||
|
||||
|
||||
Error Handling
|
||||
==============
|
||||
|
||||
When MERT detects an error in AMD XDNA Array, it pauses execution for that
|
||||
workload context and sends an asynchronous message to the driver over the
|
||||
privileged channel. The driver then sends a buffer pointer to MERT to capture
|
||||
the register states for the partition bound to faulting workload context. The
|
||||
driver then decodes the error by reading the contents of the buffer pointer.
|
||||
|
||||
|
||||
Telemetry
|
||||
=========
|
||||
|
||||
MERT can report various kinds of telemetry information like the following:
|
||||
|
||||
* L1 interrupt counter
|
||||
* DMA counter
|
||||
* Deep Sleep counter
|
||||
* etc.
|
||||
|
||||
|
||||
References
|
||||
==========
|
||||
|
||||
- `AMD XDNA Architecture <https://www.amd.com/en/technologies/xdna.html>`_
|
||||
- `AMD AI Engine Technology <https://www.xilinx.com/products/technology/ai-engine.html>`_
|
||||
- `Peano <https://github.com/Xilinx/llvm-aie>`_
|
||||
- `Versal Adaptive SoC AIE-ML Architecture Manual (AM020) <https://docs.amd.com/r/en-US/am020-versal-aie-ml>`_
|
||||
- `AI Engine Run Time <https://github.com/Xilinx/aie-rt/tree/release/main_aig>`_
|
11
Documentation/accel/amdxdna/index.rst
Normal file
11
Documentation/accel/amdxdna/index.rst
Normal file
@ -0,0 +1,11 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
=====================================
|
||||
accel/amdxdna NPU driver
|
||||
=====================================
|
||||
|
||||
The accel/amdxdna driver supports the AMD NPU (Neural Processing Unit).
|
||||
|
||||
.. toctree::
|
||||
|
||||
amdnpu
|
@ -8,6 +8,7 @@ Compute Accelerators
|
||||
:maxdepth: 1
|
||||
|
||||
introduction
|
||||
amdxdna/index
|
||||
qaic/index
|
||||
|
||||
.. only:: subproject and html
|
||||
|
@ -100,29 +100,29 @@ Get delays, since system boot, for pid 10::
|
||||
# ./getdelays -d -p 10
|
||||
(output similar to next case)
|
||||
|
||||
Get sum of delays, since system boot, for all pids with tgid 5::
|
||||
Get sum and peak of delays, since system boot, for all pids with tgid 242::
|
||||
|
||||
# ./getdelays -d -t 5
|
||||
bash-4.4# ./getdelays -d -t 242
|
||||
print delayacct stats ON
|
||||
TGID 5
|
||||
TGID 242
|
||||
|
||||
|
||||
CPU count real total virtual total delay total delay average
|
||||
8 7000000 6872122 3382277 0.423ms
|
||||
IO count delay total delay average
|
||||
0 0 0.000ms
|
||||
SWAP count delay total delay average
|
||||
0 0 0.000ms
|
||||
RECLAIM count delay total delay average
|
||||
0 0 0.000ms
|
||||
THRASHING count delay total delay average
|
||||
0 0 0.000ms
|
||||
COMPACT count delay total delay average
|
||||
0 0 0.000ms
|
||||
WPCOPY count delay total delay average
|
||||
0 0 0.000ms
|
||||
IRQ count delay total delay average
|
||||
0 0 0.000ms
|
||||
CPU count real total virtual total delay total delay average delay max delay min
|
||||
39 156000000 156576579 2111069 0.054ms 0.212296ms 0.031307ms
|
||||
IO count delay total delay average delay max delay min
|
||||
0 0 0.000ms 0.000000ms 0.000000ms
|
||||
SWAP count delay total delay average delay max delay min
|
||||
0 0 0.000ms 0.000000ms 0.000000ms
|
||||
RECLAIM count delay total delay average delay max delay min
|
||||
0 0 0.000ms 0.000000ms 0.000000ms
|
||||
THRASHING count delay total delay average delay max delay min
|
||||
0 0 0.000ms 0.000000ms 0.000000ms
|
||||
COMPACT count delay total delay average delay max delay min
|
||||
0 0 0.000ms 0.000000ms 0.000000ms
|
||||
WPCOPY count delay total delay average delay max delay min
|
||||
156 11215873 0.072ms 0.207403ms 0.033913ms
|
||||
IRQ count delay total delay average delay max delay min
|
||||
0 0 0.000ms 0.000000ms 0.000000ms
|
||||
|
||||
Get IO accounting for pid 1, it works only with -p::
|
||||
|
||||
|
@ -47,7 +47,7 @@ should not change the relative position of each field within the struct.
|
||||
1) Common and basic accounting fields::
|
||||
|
||||
/* The version number of this struct. This field is always set to
|
||||
* TAKSTATS_VERSION, which is defined in <linux/taskstats.h>.
|
||||
* TASKSTATS_VERSION, which is defined in <linux/taskstats.h>.
|
||||
* Each time the struct is changed, the value should be incremented.
|
||||
*/
|
||||
__u16 version;
|
||||
|
@ -356,5 +356,5 @@ instructions at 'Documentation/admin-guide/reporting-issues.rst'.
|
||||
|
||||
Hints on understanding kernel bug reports are in
|
||||
'Documentation/admin-guide/bug-hunting.rst'. More on debugging the kernel
|
||||
with gdb is in 'Documentation/dev-tools/gdb-kernel-debugging.rst' and
|
||||
'Documentation/dev-tools/kgdb.rst'.
|
||||
with gdb is in 'Documentation/process/debugging/gdb-kernel-debugging.rst' and
|
||||
'Documentation/process/debugging/kgdb.rst'.
|
||||
|
@ -121,14 +121,14 @@ compression algorithm to use external pre-trained dictionary, pass full
|
||||
path to the `dict` along with other parameters::
|
||||
|
||||
#pass path to pre-trained zstd dictionary
|
||||
echo "algo=zstd dict=/etc/dictioary" > /sys/block/zram0/algorithm_params
|
||||
echo "algo=zstd dict=/etc/dictionary" > /sys/block/zram0/algorithm_params
|
||||
|
||||
#same, but using algorithm priority
|
||||
echo "priority=1 dict=/etc/dictioary" > \
|
||||
echo "priority=1 dict=/etc/dictionary" > \
|
||||
/sys/block/zram0/algorithm_params
|
||||
|
||||
#pass path to pre-trained zstd dictionary and compression level
|
||||
echo "algo=zstd level=8 dict=/etc/dictioary" > \
|
||||
echo "algo=zstd level=8 dict=/etc/dictionary" > \
|
||||
/sys/block/zram0/algorithm_params
|
||||
|
||||
Parameters are algorithm specific: not all algorithms support pre-trained
|
||||
|
@ -21,8 +21,8 @@ override the baud rate to 115200, etc.
|
||||
By default, the braille device will just show the last kernel message (console
|
||||
mode). To review previous messages, press the Insert key to switch to the VT
|
||||
review mode. In review mode, the arrow keys permit to browse in the VT content,
|
||||
:kbd:`PAGE-UP`/:kbd:`PAGE-DOWN` keys go at the top/bottom of the screen, and
|
||||
the :kbd:`HOME` key goes back
|
||||
`PAGE-UP`/`PAGE-DOWN` keys go at the top/bottom of the screen, and
|
||||
the `HOME` key goes back
|
||||
to the cursor, hence providing very basic screen reviewing facility.
|
||||
|
||||
Sound feedback can be obtained by adding the ``braille_console.sound=1`` kernel
|
||||
|
@ -368,12 +368,3 @@ processed by ``klogd``::
|
||||
Aug 29 09:51:01 blizard kernel: Call Trace: [oops:_oops_ioctl+48/80] [_sys_ioctl+254/272] [_system_call+82/128]
|
||||
Aug 29 09:51:01 blizard kernel: Code: c7 00 05 00 00 00 eb 08 90 90 90 90 90 90 90 90 89 ec 5d c3
|
||||
|
||||
---------------------------------------------------------------------------
|
||||
|
||||
::
|
||||
|
||||
Dr. G.W. Wettstein Oncology Research Div. Computing Facility
|
||||
Roger Maris Cancer Center INTERNET: greg@wind.rmcc.com
|
||||
820 4th St. N.
|
||||
Fargo, ND 58122
|
||||
Phone: 701-234-7556
|
||||
|
@ -64,13 +64,14 @@ v1 is available under :ref:`Documentation/admin-guide/cgroup-v1/index.rst <cgrou
|
||||
5-6. Device
|
||||
5-7. RDMA
|
||||
5-7-1. RDMA Interface Files
|
||||
5-8. HugeTLB
|
||||
5.8-1. HugeTLB Interface Files
|
||||
5-9. Misc
|
||||
5.9-1 Miscellaneous cgroup Interface Files
|
||||
5.9-2 Migration and Ownership
|
||||
5-10. Others
|
||||
5-10-1. perf_event
|
||||
5-8. DMEM
|
||||
5-9. HugeTLB
|
||||
5.9-1. HugeTLB Interface Files
|
||||
5-10. Misc
|
||||
5.10-1 Miscellaneous cgroup Interface Files
|
||||
5.10-2 Migration and Ownership
|
||||
5-11. Others
|
||||
5-11-1. perf_event
|
||||
5-N. Non-normative information
|
||||
5-N-1. CPU controller root cgroup process behaviour
|
||||
5-N-2. IO controller root cgroup process behaviour
|
||||
@ -2626,6 +2627,49 @@ RDMA Interface Files
|
||||
mlx4_0 hca_handle=1 hca_object=20
|
||||
ocrdma1 hca_handle=1 hca_object=23
|
||||
|
||||
DMEM
|
||||
----
|
||||
|
||||
The "dmem" controller regulates the distribution and accounting of
|
||||
device memory regions. Because each memory region may have its own page size,
|
||||
which does not have to be equal to the system page size, the units are always bytes.
|
||||
|
||||
DMEM Interface Files
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
dmem.max, dmem.min, dmem.low
|
||||
A readwrite nested-keyed file that exists for all the cgroups
|
||||
except root that describes current configured resource limit
|
||||
for a region.
|
||||
|
||||
An example for xe follows::
|
||||
|
||||
drm/0000:03:00.0/vram0 1073741824
|
||||
drm/0000:03:00.0/stolen max
|
||||
|
||||
The semantics are the same as for the memory cgroup controller, and are
|
||||
calculated in the same way.
|
||||
|
||||
dmem.capacity
|
||||
A read-only file that describes maximum region capacity.
|
||||
It only exists on the root cgroup. Not all memory can be
|
||||
allocated by cgroups, as the kernel reserves some for
|
||||
internal use.
|
||||
|
||||
An example for xe follows::
|
||||
|
||||
drm/0000:03:00.0/vram0 8514437120
|
||||
drm/0000:03:00.0/stolen 67108864
|
||||
|
||||
dmem.current
|
||||
A read-only file that describes current resource usage.
|
||||
It exists for all the cgroup except root.
|
||||
|
||||
An example for xe follows::
|
||||
|
||||
drm/0000:03:00.0/vram0 12550144
|
||||
drm/0000:03:00.0/stolen 8650752
|
||||
|
||||
HugeTLB
|
||||
-------
|
||||
|
||||
|
@ -7,6 +7,9 @@ added to the kernel over time. There is, as yet, little overall order or
|
||||
organization here — this material was not written to be a single, coherent
|
||||
document! With luck things will improve quickly over time.
|
||||
|
||||
General guides to kernel administration
|
||||
---------------------------------------
|
||||
|
||||
This initial section contains overall information, including the README
|
||||
file describing the kernel as a whole, documentation on kernel parameters,
|
||||
etc.
|
||||
@ -15,19 +18,44 @@ etc.
|
||||
:maxdepth: 1
|
||||
|
||||
README
|
||||
kernel-parameters
|
||||
devices
|
||||
sysctl/index
|
||||
|
||||
abi
|
||||
features
|
||||
|
||||
This section describes CPU vulnerabilities and their mitigations.
|
||||
A big part of the kernel's administrative interface is the /proc and sysfs
|
||||
virtual filesystems; these documents describe how to interact with tem
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
sysfs-rules
|
||||
sysctl/index
|
||||
cputopology
|
||||
abi
|
||||
|
||||
Security-related documentation:
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
hw-vuln/index
|
||||
LSM/index
|
||||
perf-security
|
||||
|
||||
Booting the kernel
|
||||
------------------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
bootconfig
|
||||
kernel-parameters
|
||||
efi-stub
|
||||
initrd
|
||||
|
||||
|
||||
Tracking down and identifying problems
|
||||
--------------------------------------
|
||||
|
||||
Here is a set of documents aimed at users who are trying to track down
|
||||
problems and bugs in particular.
|
||||
@ -48,15 +76,97 @@ problems and bugs in particular.
|
||||
kdump/index
|
||||
perf/index
|
||||
pstore-blk
|
||||
clearing-warn-once
|
||||
kernel-per-CPU-kthreads
|
||||
lockup-watchdogs
|
||||
RAS/index
|
||||
sysrq
|
||||
|
||||
This is the beginning of a section with information of interest to
|
||||
application developers. Documents covering various aspects of the kernel
|
||||
ABI will be found here.
|
||||
|
||||
Core-kernel subsystems
|
||||
----------------------
|
||||
|
||||
These documents describe core-kernel administration interfaces that are
|
||||
likely to be of interest on almost any system.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
sysfs-rules
|
||||
cgroup-v2
|
||||
cgroup-v1/index
|
||||
cpu-load
|
||||
mm/index
|
||||
module-signing
|
||||
namespaces/index
|
||||
numastat
|
||||
pm/index
|
||||
syscall-user-dispatch
|
||||
|
||||
Support for non-native binary formats. Note that some of these
|
||||
documents are ... old ...
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
binfmt-misc
|
||||
java
|
||||
mono
|
||||
|
||||
|
||||
Block-layer and filesystem administration
|
||||
-----------------------------------------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
bcache
|
||||
binderfs
|
||||
blockdev/index
|
||||
cifs/index
|
||||
device-mapper/index
|
||||
ext4
|
||||
filesystem-monitoring
|
||||
nfs/index
|
||||
iostats
|
||||
jfs
|
||||
md
|
||||
ufs
|
||||
xfs
|
||||
|
||||
Device-specific guides
|
||||
----------------------
|
||||
|
||||
How to configure your hardware within your Linux system.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
acpi/index
|
||||
aoe/index
|
||||
auxdisplay/index
|
||||
braille-console
|
||||
btmrvl
|
||||
dell_rbu
|
||||
edid
|
||||
gpio/index
|
||||
hw_random
|
||||
laptops/index
|
||||
lcd-panel-cgram
|
||||
media/index
|
||||
nvme-multipath
|
||||
parport
|
||||
pnp
|
||||
rapidio
|
||||
rtc
|
||||
serial-console
|
||||
svga
|
||||
thermal/index
|
||||
thunderbolt
|
||||
vga-softcursor
|
||||
video-output
|
||||
|
||||
Workload analysis
|
||||
-----------------
|
||||
|
||||
This is the beginning of a section with information of interest to
|
||||
application developers and system integrators doing analysis of the
|
||||
@ -69,73 +179,17 @@ subsystems expectations will be found here.
|
||||
|
||||
workload-tracing
|
||||
|
||||
The rest of this manual consists of various unordered guides on how to
|
||||
configure specific aspects of kernel behavior to your liking.
|
||||
Everything else
|
||||
---------------
|
||||
|
||||
A few hard-to-categorize and generally obsolete documents.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
acpi/index
|
||||
aoe/index
|
||||
auxdisplay/index
|
||||
bcache
|
||||
binderfs
|
||||
binfmt-misc
|
||||
blockdev/index
|
||||
bootconfig
|
||||
braille-console
|
||||
btmrvl
|
||||
cgroup-v1/index
|
||||
cgroup-v2
|
||||
cifs/index
|
||||
clearing-warn-once
|
||||
cpu-load
|
||||
cputopology
|
||||
dell_rbu
|
||||
device-mapper/index
|
||||
edid
|
||||
efi-stub
|
||||
ext4
|
||||
filesystem-monitoring
|
||||
nfs/index
|
||||
gpio/index
|
||||
highuid
|
||||
hw_random
|
||||
initrd
|
||||
iostats
|
||||
java
|
||||
jfs
|
||||
kernel-per-CPU-kthreads
|
||||
laptops/index
|
||||
lcd-panel-cgram
|
||||
ldm
|
||||
lockup-watchdogs
|
||||
LSM/index
|
||||
md
|
||||
media/index
|
||||
mm/index
|
||||
module-signing
|
||||
mono
|
||||
namespaces/index
|
||||
numastat
|
||||
parport
|
||||
perf-security
|
||||
pm/index
|
||||
pnp
|
||||
rapidio
|
||||
RAS/index
|
||||
rtc
|
||||
serial-console
|
||||
svga
|
||||
syscall-user-dispatch
|
||||
sysrq
|
||||
thermal/index
|
||||
thunderbolt
|
||||
ufs
|
||||
unicode
|
||||
vga-softcursor
|
||||
video-output
|
||||
xfs
|
||||
|
||||
.. only:: subproject and html
|
||||
|
||||
|
@ -194,8 +194,6 @@ is applicable::
|
||||
WDT Watchdog support is enabled.
|
||||
X86-32 X86-32, aka i386 architecture is enabled.
|
||||
X86-64 X86-64 architecture is enabled.
|
||||
More X86-64 boot options can be found in
|
||||
Documentation/arch/x86/x86_64/boot-options.rst.
|
||||
X86 Either 32-bit or 64-bit x86 (same as X86-32+X86-64)
|
||||
X86_UV SGI UV support is enabled.
|
||||
XEN Xen support is enabled
|
||||
@ -213,7 +211,6 @@ Do not modify the syntax of boot loader parameters without extreme
|
||||
need or coordination with <Documentation/arch/x86/boot.rst>.
|
||||
|
||||
There are also arch-specific kernel-parameters not documented here.
|
||||
See for example <Documentation/arch/x86/x86_64/boot-options.rst>.
|
||||
|
||||
Note that ALL kernel parameters listed below are CASE SENSITIVE, and that
|
||||
a trailing = on the name of any parameter states that that parameter will
|
||||
|
@ -21,6 +21,10 @@
|
||||
strictly ACPI specification compliant.
|
||||
rsdt -- prefer RSDT over (default) XSDT
|
||||
copy_dsdt -- copy DSDT to memory
|
||||
nocmcff -- Disable firmware first mode for corrected
|
||||
errors. This disables parsing the HEST CMC error
|
||||
source to check if firmware has set the FF flag. This
|
||||
may result in duplicate corrected error reports.
|
||||
nospcr -- disable console in ACPI SPCR table as
|
||||
default _serial_ console on ARM64
|
||||
For ARM64, ONLY "acpi=off", "acpi=on", "acpi=force" or
|
||||
@ -405,6 +409,8 @@
|
||||
not play well with APC CPU idle - disable it if you have
|
||||
APC and your system crashes randomly.
|
||||
|
||||
apic [APIC,X86-64] Use IO-APIC. Default.
|
||||
|
||||
apic= [APIC,X86,EARLY] Advanced Programmable Interrupt Controller
|
||||
Change the output verbosity while booting
|
||||
Format: { quiet (default) | verbose | debug }
|
||||
@ -424,6 +430,10 @@
|
||||
useful so that a dump capture kernel won't be
|
||||
shot down by NMI
|
||||
|
||||
apicpmtimer Do APIC timer calibration using the pmtimer. Implies
|
||||
apicmaintimer. Useful when your PIT timer is totally
|
||||
broken.
|
||||
|
||||
autoconf= [IPV6]
|
||||
See Documentation/networking/ipv6.rst.
|
||||
|
||||
@ -1726,6 +1736,8 @@
|
||||
|
||||
off: Disable GDS mitigation.
|
||||
|
||||
gbpages [X86] Use GB pages for kernel direct mappings.
|
||||
|
||||
gcov_persist= [GCOV] When non-zero (default), profiling data for
|
||||
kernel modules is saved and remains accessible via
|
||||
debugfs, even when the module is unloaded/reloaded.
|
||||
@ -2008,12 +2020,21 @@
|
||||
|
||||
idle= [X86,EARLY]
|
||||
Format: idle=poll, idle=halt, idle=nomwait
|
||||
Poll forces a polling idle loop that can slightly
|
||||
improve the performance of waking up a idle CPU, but
|
||||
will use a lot of power and make the system run hot.
|
||||
Not recommended.
|
||||
|
||||
idle=poll: Don't do power saving in the idle loop
|
||||
using HLT, but poll for rescheduling event. This will
|
||||
make the CPUs eat a lot more power, but may be useful
|
||||
to get slightly better performance in multiprocessor
|
||||
benchmarks. It also makes some profiling using
|
||||
performance counters more accurate. Please note that
|
||||
on systems with MONITOR/MWAIT support (like Intel
|
||||
EM64T CPUs) this option has no performance advantage
|
||||
over the normal idle loop. It may also interact badly
|
||||
with hyperthreading.
|
||||
|
||||
idle=halt: Halt is forced to be used for CPU idle.
|
||||
In such case C2/C3 won't be used again.
|
||||
|
||||
idle=nomwait: Disable mwait for CPU C-states
|
||||
|
||||
idxd.sva= [HW]
|
||||
@ -2311,20 +2332,73 @@
|
||||
relaxed
|
||||
|
||||
iommu= [X86,EARLY]
|
||||
|
||||
off
|
||||
Don't initialize and use any kind of IOMMU.
|
||||
|
||||
force
|
||||
Force the use of the hardware IOMMU even when
|
||||
it is not actually needed (e.g. because < 3 GB
|
||||
memory).
|
||||
|
||||
noforce
|
||||
Don't force hardware IOMMU usage when it is not
|
||||
needed. (default).
|
||||
|
||||
biomerge
|
||||
panic
|
||||
nopanic
|
||||
merge
|
||||
nomerge
|
||||
|
||||
soft
|
||||
pt [X86]
|
||||
nopt [X86]
|
||||
nobypass [PPC/POWERNV]
|
||||
Use software bounce buffering (SWIOTLB) (default for
|
||||
Intel machines). This can be used to prevent the usage
|
||||
of an available hardware IOMMU.
|
||||
|
||||
[X86]
|
||||
pt
|
||||
[X86]
|
||||
nopt
|
||||
[PPC/POWERNV]
|
||||
nobypass
|
||||
Disable IOMMU bypass, using IOMMU for PCI devices.
|
||||
|
||||
[X86]
|
||||
AMD Gart HW IOMMU-specific options:
|
||||
|
||||
<size>
|
||||
Set the size of the remapping area in bytes.
|
||||
|
||||
allowed
|
||||
Overwrite iommu off workarounds for specific chipsets
|
||||
|
||||
fullflush
|
||||
Flush IOMMU on each allocation (default).
|
||||
|
||||
nofullflush
|
||||
Don't use IOMMU fullflush.
|
||||
|
||||
memaper[=<order>]
|
||||
Allocate an own aperture over RAM with size
|
||||
32MB<<order. (default: order=1, i.e. 64MB)
|
||||
|
||||
merge
|
||||
Do scatter-gather (SG) merging. Implies "force"
|
||||
(experimental).
|
||||
|
||||
nomerge
|
||||
Don't do scatter-gather (SG) merging.
|
||||
|
||||
noaperture
|
||||
Ask the IOMMU not to touch the aperture for AGP.
|
||||
|
||||
noagp
|
||||
Don't initialize the AGP driver and use full aperture.
|
||||
|
||||
panic
|
||||
Always panic when IOMMU overflows.
|
||||
|
||||
iommu.forcedac= [ARM64,X86,EARLY] Control IOVA allocation for PCI devices.
|
||||
Format: { "0" | "1" }
|
||||
0 - Try to allocate a 32-bit DMA address first, before
|
||||
@ -2432,7 +2506,9 @@
|
||||
specified in the flag list (default: domain):
|
||||
|
||||
nohz
|
||||
Disable the tick when a single task runs.
|
||||
Disable the tick when a single task runs as well as
|
||||
disabling other kernel noises like having RCU callbacks
|
||||
offloaded. This is equivalent to the nohz_full parameter.
|
||||
|
||||
A residual 1Hz tick is offloaded to workqueues, which you
|
||||
need to affine to housekeeping through the global
|
||||
@ -2695,7 +2771,7 @@
|
||||
VMs, i.e. on the 0=>1 and 1=>0 transitions of the
|
||||
number of VMs.
|
||||
|
||||
Enabling virtualization at module lode avoids potential
|
||||
Enabling virtualization at module load avoids potential
|
||||
latency for creation of the 0=>1 VM, as KVM serializes
|
||||
virtualization enabling across all online CPUs. The
|
||||
"cost" of enabling virtualization when KVM is loaded,
|
||||
@ -2748,17 +2824,21 @@
|
||||
nvhe: Standard nVHE-based mode, without support for
|
||||
protected guests.
|
||||
|
||||
protected: nVHE-based mode with support for guests whose
|
||||
state is kept private from the host.
|
||||
protected: Mode with support for guests whose state is
|
||||
kept private from the host, using VHE or
|
||||
nVHE depending on HW support.
|
||||
|
||||
nested: VHE-based mode with support for nested
|
||||
virtualization. Requires at least ARMv8.3
|
||||
hardware.
|
||||
virtualization. Requires at least ARMv8.4
|
||||
hardware (with FEAT_NV2).
|
||||
|
||||
Defaults to VHE/nVHE based on hardware support. Setting
|
||||
mode to "protected" will disable kexec and hibernation
|
||||
for the host. "nested" is experimental and should be
|
||||
used with extreme caution.
|
||||
for the host. To force nVHE on VHE hardware, add
|
||||
"arm64_sw.hvhe=0 id_aa64mmfr1.vh=0" to the
|
||||
command-line.
|
||||
"nested" is experimental and should be used with
|
||||
extreme caution.
|
||||
|
||||
kvm-arm.vgic_v3_group0_trap=
|
||||
[KVM,ARM,EARLY] Trap guest accesses to GICv3 group-0
|
||||
@ -3259,9 +3339,77 @@
|
||||
devices can be requested on-demand with the
|
||||
/dev/loop-control interface.
|
||||
|
||||
mce [X86-32] Machine Check Exception
|
||||
mce= [X86-{32,64}]
|
||||
|
||||
Please see Documentation/arch/x86/x86_64/machinecheck.rst for sysfs runtime tunables.
|
||||
|
||||
off
|
||||
disable machine check
|
||||
|
||||
no_cmci
|
||||
disable CMCI(Corrected Machine Check Interrupt) that
|
||||
Intel processor supports. Usually this disablement is
|
||||
not recommended, but it might be handy if your
|
||||
hardware is misbehaving.
|
||||
|
||||
Note that you'll get more problems without CMCI than
|
||||
with due to the shared banks, i.e. you might get
|
||||
duplicated error logs.
|
||||
|
||||
dont_log_ce
|
||||
don't make logs for corrected errors. All events
|
||||
reported as corrected are silently cleared by OS. This
|
||||
option will be useful if you have no interest in any
|
||||
of corrected errors.
|
||||
|
||||
ignore_ce
|
||||
disable features for corrected errors, e.g.
|
||||
polling timer and CMCI. All events reported as
|
||||
corrected are not cleared by OS and remained in its
|
||||
error banks.
|
||||
|
||||
Usually this disablement is not recommended, however
|
||||
if there is an agent checking/clearing corrected
|
||||
errors (e.g. BIOS or hardware monitoring
|
||||
applications), conflicting with OS's error handling,
|
||||
and you cannot deactivate the agent, then this option
|
||||
will be a help.
|
||||
|
||||
no_lmce
|
||||
do not opt-in to Local MCE delivery. Use legacy method
|
||||
to broadcast MCEs.
|
||||
|
||||
bootlog
|
||||
enable logging of machine checks left over from
|
||||
booting. Disabled by default on AMD Fam10h and older
|
||||
because some BIOS leave bogus ones.
|
||||
|
||||
If your BIOS doesn't do that it's a good idea to
|
||||
enable though to make sure you log even machine check
|
||||
events that result in a reboot. On Intel systems it is
|
||||
enabled by default.
|
||||
|
||||
nobootlog
|
||||
disable boot machine check logging.
|
||||
|
||||
monarchtimeout (number)
|
||||
sets the time in us to wait for other CPUs on machine
|
||||
checks. 0 to disable.
|
||||
|
||||
bios_cmci_threshold
|
||||
don't overwrite the bios-set CMCI threshold. This boot
|
||||
option prevents Linux from overwriting the CMCI
|
||||
threshold set by the bios. Without this option, Linux
|
||||
always sets the CMCI threshold to 1. Enabling this may
|
||||
make memory predictive failure analysis less effective
|
||||
if the bios sets thresholds for memory errors since we
|
||||
will not see details for all errors.
|
||||
|
||||
recovery
|
||||
force-enable recoverable machine check code paths
|
||||
|
||||
Everything else is in sysfs now.
|
||||
|
||||
mce=option [X86-64] See Documentation/arch/x86/x86_64/boot-options.rst
|
||||
|
||||
md= [HW] RAID subsystems devices and level
|
||||
See Documentation/admin-guide/md.rst.
|
||||
@ -3351,8 +3499,8 @@
|
||||
[KNL] Set the initial state for the memory hotplug
|
||||
onlining policy. If not specified, the default value is
|
||||
set according to the
|
||||
CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE kernel config
|
||||
option.
|
||||
CONFIG_MHP_DEFAULT_ONLINE_TYPE kernel config
|
||||
options.
|
||||
See Documentation/admin-guide/mm/memory-hotplug.rst.
|
||||
|
||||
memmap=exactmap [KNL,X86,EARLY] Enable setting of an exact
|
||||
@ -3887,6 +4035,8 @@
|
||||
noapic [SMP,APIC,EARLY] Tells the kernel to not make use of any
|
||||
IOAPICs that may be present in the system.
|
||||
|
||||
noapictimer [APIC,X86] Don't set up the APIC timer
|
||||
|
||||
noautogroup Disable scheduler automatic task group creation.
|
||||
|
||||
nocache [ARM,EARLY]
|
||||
@ -3934,6 +4084,8 @@
|
||||
register save and restore. The kernel will only save
|
||||
legacy floating-point registers on task switch.
|
||||
|
||||
nogbpages [X86] Do not use GB pages for kernel direct mappings.
|
||||
|
||||
no_hash_pointers
|
||||
[KNL,EARLY]
|
||||
Force pointers printed to the console or buffers to be
|
||||
@ -3960,6 +4112,8 @@
|
||||
the impact of the sleep instructions. This is also
|
||||
useful when using JTAG debugger.
|
||||
|
||||
nohpet [X86] Don't use the HPET timer.
|
||||
|
||||
nohugeiomap [KNL,X86,PPC,ARM64,EARLY] Disable kernel huge I/O mappings.
|
||||
|
||||
nohugevmalloc [KNL,X86,PPC,ARM64,EARLY] Disable kernel huge vmalloc mappings.
|
||||
@ -4111,8 +4265,10 @@
|
||||
|
||||
nosync [HW,M68K] Disables sync negotiation for all devices.
|
||||
|
||||
no_timer_check [X86,APIC] Disables the code which tests for
|
||||
broken timer IRQ sources.
|
||||
no_timer_check [X86,APIC] Disables the code which tests for broken
|
||||
timer IRQ sources, i.e., the IO-APIC timer. This can
|
||||
work around problems with incorrect timer
|
||||
initialization on some boards.
|
||||
|
||||
no_uaccess_flush
|
||||
[PPC,EARLY] Don't flush the L1-D cache after accessing user data.
|
||||
@ -4192,6 +4348,11 @@
|
||||
If given as an integer followed by 'U', it will
|
||||
divide each physical node into N emulated nodes.
|
||||
|
||||
numa=noacpi [X86] Don't parse the SRAT table for NUMA setup
|
||||
|
||||
numa=nohmat [X86] Don't parse the HMAT table for NUMA setup, or
|
||||
soft-reserved memory partitioning.
|
||||
|
||||
numa_balancing= [KNL,ARM64,PPC,RISCV,S390,X86] Enable or disable automatic
|
||||
NUMA balancing.
|
||||
Allowed values are enable and disable
|
||||
@ -4673,7 +4834,7 @@
|
||||
'1' – force enabled
|
||||
'x' – unchanged
|
||||
For example,
|
||||
pci=config_acs=10x
|
||||
pci=config_acs=10x@pci:0:0
|
||||
would configure all devices that support
|
||||
ACS to enable P2P Request Redirect, disable
|
||||
Translation Blocking, and leave Source
|
||||
@ -5367,7 +5528,42 @@
|
||||
|
||||
rcutorture.gp_cond= [KNL]
|
||||
Use conditional/asynchronous update-side
|
||||
primitives, if available.
|
||||
normal-grace-period primitives, if available.
|
||||
|
||||
rcutorture.gp_cond_exp= [KNL]
|
||||
Use conditional/asynchronous update-side
|
||||
expedited-grace-period primitives, if available.
|
||||
|
||||
rcutorture.gp_cond_full= [KNL]
|
||||
Use conditional/asynchronous update-side
|
||||
normal-grace-period primitives that also take
|
||||
concurrent expedited grace periods into account,
|
||||
if available.
|
||||
|
||||
rcutorture.gp_cond_exp_full= [KNL]
|
||||
Use conditional/asynchronous update-side
|
||||
expedited-grace-period primitives that also take
|
||||
concurrent normal grace periods into account,
|
||||
if available.
|
||||
|
||||
rcutorture.gp_cond_wi= [KNL]
|
||||
Nominal wait interval for normal conditional
|
||||
grace periods (specified by rcutorture's
|
||||
gp_cond and gp_cond_full module parameters),
|
||||
in microseconds. The actual wait interval will
|
||||
be randomly selected to nanosecond granularity up
|
||||
to this wait interval. Defaults to 16 jiffies,
|
||||
for example, 16,000 microseconds on a system
|
||||
with HZ=1000.
|
||||
|
||||
rcutorture.gp_cond_wi_exp= [KNL]
|
||||
Nominal wait interval for expedited conditional
|
||||
grace periods (specified by rcutorture's
|
||||
gp_cond_exp and gp_cond_exp_full module
|
||||
parameters), in microseconds. The actual wait
|
||||
interval will be randomly selected to nanosecond
|
||||
granularity up to this wait interval. Defaults to
|
||||
128 microseconds.
|
||||
|
||||
rcutorture.gp_exp= [KNL]
|
||||
Use expedited update-side primitives, if available.
|
||||
@ -5376,6 +5572,43 @@
|
||||
Use normal (non-expedited) asynchronous
|
||||
update-side primitives, if available.
|
||||
|
||||
rcutorture.gp_poll= [KNL]
|
||||
Use polled update-side normal-grace-period
|
||||
primitives, if available.
|
||||
|
||||
rcutorture.gp_poll_exp= [KNL]
|
||||
Use polled update-side expedited-grace-period
|
||||
primitives, if available.
|
||||
|
||||
rcutorture.gp_poll_full= [KNL]
|
||||
Use polled update-side normal-grace-period
|
||||
primitives that also take concurrent expedited
|
||||
grace periods into account, if available.
|
||||
|
||||
rcutorture.gp_poll_exp_full= [KNL]
|
||||
Use polled update-side expedited-grace-period
|
||||
primitives that also take concurrent normal
|
||||
grace periods into account, if available.
|
||||
|
||||
rcutorture.gp_poll_wi= [KNL]
|
||||
Nominal wait interval for normal conditional
|
||||
grace periods (specified by rcutorture's
|
||||
gp_poll and gp_poll_full module parameters),
|
||||
in microseconds. The actual wait interval will
|
||||
be randomly selected to nanosecond granularity up
|
||||
to this wait interval. Defaults to 16 jiffies,
|
||||
for example, 16,000 microseconds on a system
|
||||
with HZ=1000.
|
||||
|
||||
rcutorture.gp_poll_wi_exp= [KNL]
|
||||
Nominal wait interval for expedited conditional
|
||||
grace periods (specified by rcutorture's
|
||||
gp_poll_exp and gp_poll_exp_full module
|
||||
parameters), in microseconds. The actual wait
|
||||
interval will be randomly selected to nanosecond
|
||||
granularity up to this wait interval. Defaults to
|
||||
128 microseconds.
|
||||
|
||||
rcutorture.gp_sync= [KNL]
|
||||
Use normal (non-expedited) synchronous
|
||||
update-side primitives, if available. If all
|
||||
@ -5429,6 +5662,22 @@
|
||||
Set time (jiffies) between CPU-hotplug operations,
|
||||
or zero to disable CPU-hotplug testing.
|
||||
|
||||
rcutorture.preempt_duration= [KNL]
|
||||
Set duration (in milliseconds) of preemptions
|
||||
by a high-priority FIFO real-time task. Set to
|
||||
zero (the default) to disable. The CPUs to
|
||||
preempt are selected randomly from the set that
|
||||
are online at a given point in time. Races with
|
||||
CPUs going offline are ignored, with that attempt
|
||||
at preemption skipped.
|
||||
|
||||
rcutorture.preempt_interval= [KNL]
|
||||
Set interval (in milliseconds, defaulting to one
|
||||
second) between preemptions by a high-priority
|
||||
FIFO real-time task. This delay is mediated
|
||||
by an hrtimer and is further fuzzed to avoid
|
||||
inadvertent synchronizations.
|
||||
|
||||
rcutorture.read_exit_burst= [KNL]
|
||||
The number of times in a given read-then-exit
|
||||
episode that a set of read-then-exit kthreads
|
||||
@ -5715,6 +5964,55 @@
|
||||
reboot_cpu is s[mp]#### with #### being the processor
|
||||
to be used for rebooting.
|
||||
|
||||
acpi
|
||||
Use the ACPI RESET_REG in the FADT. If ACPI is not
|
||||
configured or the ACPI reset does not work, the reboot
|
||||
path attempts the reset using the keyboard controller.
|
||||
|
||||
bios
|
||||
Use the CPU reboot vector for warm reset
|
||||
|
||||
cold
|
||||
Set the cold reboot flag
|
||||
|
||||
default
|
||||
There are some built-in platform specific "quirks"
|
||||
- you may see: "reboot: <name> series board detected.
|
||||
Selecting <type> for reboots." In the case where you
|
||||
think the quirk is in error (e.g. you have newer BIOS,
|
||||
or newer board) using this option will ignore the
|
||||
built-in quirk table, and use the generic default
|
||||
reboot actions.
|
||||
|
||||
efi
|
||||
Use efi reset_system runtime service. If EFI is not
|
||||
configured or the EFI reset does not work, the reboot
|
||||
path attempts the reset using the keyboard controller.
|
||||
|
||||
force
|
||||
Don't stop other CPUs on reboot. This can make reboot
|
||||
more reliable in some cases.
|
||||
|
||||
kbd
|
||||
Use the keyboard controller. cold reset (default)
|
||||
|
||||
pci
|
||||
Use a write to the PCI config space register 0xcf9 to
|
||||
trigger reboot.
|
||||
|
||||
triple
|
||||
Force a triple fault (init)
|
||||
|
||||
warm
|
||||
Don't set the cold reboot flag
|
||||
|
||||
Using warm reset will be much faster especially on big
|
||||
memory systems because the BIOS will not go through
|
||||
the memory check. Disadvantage is that not all
|
||||
hardware will be completely reinitialized on reboot so
|
||||
there may be boot problems on some systems.
|
||||
|
||||
|
||||
refscale.holdoff= [KNL]
|
||||
Set test-start holdoff period. The purpose of
|
||||
this parameter is to delay the start of the
|
||||
@ -6106,7 +6404,16 @@
|
||||
|
||||
serialnumber [BUGS=X86-32]
|
||||
|
||||
sev=option[,option...] [X86-64] See Documentation/arch/x86/x86_64/boot-options.rst
|
||||
sev=option[,option...] [X86-64]
|
||||
|
||||
debug
|
||||
Enable debug messages.
|
||||
|
||||
nosnp
|
||||
Do not enable SEV-SNP (applies to host/hypervisor
|
||||
only). Setting 'nosnp' avoids the RMP check overhead
|
||||
in memory accesses when users do not want to run
|
||||
SEV-SNP guests.
|
||||
|
||||
shapers= [NET]
|
||||
Maximal number of shapers.
|
||||
@ -6858,6 +7165,14 @@
|
||||
comma-separated list of trace events to enable. See
|
||||
also Documentation/trace/events.rst
|
||||
|
||||
To enable modules, use :mod: keyword:
|
||||
|
||||
trace_event=:mod:<module>
|
||||
|
||||
The value before :mod: will only enable specific events
|
||||
that are part of the module. See the above mentioned
|
||||
document for more information.
|
||||
|
||||
trace_instance=[instance-info]
|
||||
[FTRACE] Create a ring buffer instance early in boot up.
|
||||
This will be listed in:
|
||||
@ -6992,6 +7307,13 @@
|
||||
See Documentation/admin-guide/mm/transhuge.rst
|
||||
for more details.
|
||||
|
||||
transparent_hugepage_tmpfs= [KNL]
|
||||
Format: [always|within_size|advise|never]
|
||||
Can be used to control the default hugepage allocation policy
|
||||
for the tmpfs mount.
|
||||
See Documentation/admin-guide/mm/transhuge.rst
|
||||
for more details.
|
||||
|
||||
trusted.source= [KEYS]
|
||||
Format: <string>
|
||||
This parameter identifies the trust source as a backend
|
||||
@ -7474,7 +7796,7 @@
|
||||
vt.cur_default= [VT] Default cursor shape.
|
||||
Format: 0xCCBBAA, where AA, BB, and CC are the same as
|
||||
the parameters of the <Esc>[?A;B;Cc escape sequence;
|
||||
see VGA-softcursor.txt. Default: 2 = underline.
|
||||
see vga-softcursor.rst. Default: 2 = underline.
|
||||
|
||||
vt.default_blu= [VT]
|
||||
Format: <blue0>,<blue1>,<blue2>,...,<blue15>
|
||||
|
@ -445,8 +445,10 @@ event code Key Notes
|
||||
0x1008 0x07 FN+F8 IBM: toggle screen expand
|
||||
Lenovo: configure UltraNav,
|
||||
or toggle screen expand.
|
||||
On newer platforms (2024+)
|
||||
replaced by 0x131f (see below)
|
||||
On 2024 platforms replaced by
|
||||
0x131f (see below) and on newer
|
||||
platforms (2025 +) keycode is
|
||||
replaced by 0x1401 (see below).
|
||||
|
||||
0x1009 0x08 FN+F9 -
|
||||
|
||||
@ -506,9 +508,11 @@ event code Key Notes
|
||||
|
||||
0x1019 0x18 unknown
|
||||
|
||||
0x131f ... FN+F8 Platform Mode change.
|
||||
0x131f ... FN+F8 Platform Mode change (2024 systems).
|
||||
Implemented in driver.
|
||||
|
||||
0x1401 ... FN+F8 Platform Mode change (2025 + systems).
|
||||
Implemented in driver.
|
||||
... ... ...
|
||||
|
||||
0x1020 0x1F unknown
|
||||
|
@ -98,7 +98,7 @@ frames in packed raw Bayer format to IPU3 CSI2 receiver.
|
||||
# and that ov5670 sensor is connected to i2c bus 10 with address 0x36
|
||||
export SDEV=$(media-ctl -d $MDEV -e "ov5670 10-0036")
|
||||
|
||||
# Establish the link for the media devices using media-ctl [#f3]_
|
||||
# Establish the link for the media devices using media-ctl
|
||||
media-ctl -d $MDEV -l "ov5670:0 -> ipu3-csi2 0:0[1]"
|
||||
|
||||
# Set the format for the media devices
|
||||
@ -589,12 +589,8 @@ preserved.
|
||||
References
|
||||
==========
|
||||
|
||||
.. [#f5] drivers/staging/media/ipu3/include/uapi/intel-ipu3.h
|
||||
|
||||
.. [#f1] https://github.com/intel/nvt
|
||||
|
||||
.. [#f2] http://git.ideasonboard.org/yavta.git
|
||||
|
||||
.. [#f3] http://git.ideasonboard.org/?p=media-ctl.git;a=summary
|
||||
|
||||
.. [#f4] ImgU limitation requires an additional 16x16 for all input resolutions
|
||||
|
@ -42,32 +42,45 @@ the execution. ::
|
||||
|
||||
$ git clone https://github.com/sjp38/masim; cd masim; make
|
||||
$ sudo damo start "./masim ./configs/stairs.cfg --quiet"
|
||||
$ sudo ./damo show
|
||||
0 addr [85.541 TiB , 85.541 TiB ) (57.707 MiB ) access 0 % age 10.400 s
|
||||
1 addr [85.541 TiB , 85.542 TiB ) (413.285 MiB) access 0 % age 11.400 s
|
||||
2 addr [127.649 TiB , 127.649 TiB) (57.500 MiB ) access 0 % age 1.600 s
|
||||
3 addr [127.649 TiB , 127.649 TiB) (32.500 MiB ) access 0 % age 500 ms
|
||||
4 addr [127.649 TiB , 127.649 TiB) (9.535 MiB ) access 100 % age 300 ms
|
||||
5 addr [127.649 TiB , 127.649 TiB) (8.000 KiB ) access 60 % age 0 ns
|
||||
6 addr [127.649 TiB , 127.649 TiB) (6.926 MiB ) access 0 % age 1 s
|
||||
7 addr [127.998 TiB , 127.998 TiB) (120.000 KiB) access 0 % age 11.100 s
|
||||
8 addr [127.998 TiB , 127.998 TiB) (8.000 KiB ) access 40 % age 100 ms
|
||||
9 addr [127.998 TiB , 127.998 TiB) (4.000 KiB ) access 0 % age 11 s
|
||||
total size: 577.590 MiB
|
||||
$ sudo ./damo stop
|
||||
$ sudo damo report access
|
||||
heatmap: 641111111000000000000000000000000000000000000000000000[...]33333333333333335557984444[...]7
|
||||
# min/max temperatures: -1,840,000,000, 370,010,000, column size: 3.925 MiB
|
||||
0 addr 86.182 TiB size 8.000 KiB access 0 % age 14.900 s
|
||||
1 addr 86.182 TiB size 8.000 KiB access 60 % age 0 ns
|
||||
2 addr 86.182 TiB size 3.422 MiB access 0 % age 4.100 s
|
||||
3 addr 86.182 TiB size 2.004 MiB access 95 % age 2.200 s
|
||||
4 addr 86.182 TiB size 29.688 MiB access 0 % age 14.100 s
|
||||
5 addr 86.182 TiB size 29.516 MiB access 0 % age 16.700 s
|
||||
6 addr 86.182 TiB size 29.633 MiB access 0 % age 17.900 s
|
||||
7 addr 86.182 TiB size 117.652 MiB access 0 % age 18.400 s
|
||||
8 addr 126.990 TiB size 62.332 MiB access 0 % age 9.500 s
|
||||
9 addr 126.990 TiB size 13.980 MiB access 0 % age 5.200 s
|
||||
10 addr 126.990 TiB size 9.539 MiB access 100 % age 3.700 s
|
||||
11 addr 126.990 TiB size 16.098 MiB access 0 % age 6.400 s
|
||||
12 addr 127.987 TiB size 132.000 KiB access 0 % age 2.900 s
|
||||
total size: 314.008 MiB
|
||||
$ sudo damo stop
|
||||
|
||||
The first command of the above example downloads and builds an artificial
|
||||
memory access generator program called ``masim``. The second command asks DAMO
|
||||
to execute the artificial generator process start via the given command and
|
||||
make DAMON monitors the generator process. The third command retrieves the
|
||||
current snapshot of the monitored access pattern of the process from DAMON and
|
||||
shows the pattern in a human readable format.
|
||||
to start the program via the given command and make DAMON monitors the newly
|
||||
started process. The third command retrieves the current snapshot of the
|
||||
monitored access pattern of the process from DAMON and shows the pattern in a
|
||||
human readable format.
|
||||
|
||||
Each line of the output shows which virtual address range (``addr [XX, XX)``)
|
||||
of the process is how frequently (``access XX %``) accessed for how long time
|
||||
(``age XX``). For example, the fifth region of ~9 MiB size is being most
|
||||
frequently accessed for last 300 milliseconds. Finally, the fourth command
|
||||
stops DAMON.
|
||||
The first line of the output shows the relative access temperature (hotness) of
|
||||
the regions in a single row hetmap format. Each column on the heatmap
|
||||
represents regions of same size on the monitored virtual address space. The
|
||||
position of the colun on the row and the number on the column represents the
|
||||
relative location and access temperature of the region. ``[...]`` means
|
||||
unmapped huge regions on the virtual address spaces. The second line shows
|
||||
additional information for better understanding the heatmap.
|
||||
|
||||
Each line of the output from the third line shows which virtual address range
|
||||
(``addr XX size XX``) of the process is how frequently (``access XX %``)
|
||||
accessed for how long time (``age XX``). For example, the evelenth region of
|
||||
~9.5 MiB size is being most frequently accessed for last 3.7 seconds. Finally,
|
||||
the fourth command stops DAMON.
|
||||
|
||||
Note that DAMON can monitor not only virtual address spaces but multiple types
|
||||
of address spaces including the physical address space.
|
||||
@ -95,7 +108,7 @@ Visualizing Recorded Patterns
|
||||
You can visualize the pattern in a heatmap, showing which memory region
|
||||
(x-axis) got accessed when (y-axis) and how frequently (number).::
|
||||
|
||||
$ sudo damo report heats --heatmap stdout
|
||||
$ sudo damo report heatmap
|
||||
22222222222222222222222222222222222222211111111111111111111111111111111111111100
|
||||
44444444444444444444444444444444444444434444444444444444444444444444444444443200
|
||||
44444444444444444444444444444444444444433444444444444444444444444444444444444200
|
||||
@ -160,6 +173,6 @@ Data Access Pattern Aware Memory Management
|
||||
Below command makes every memory region of size >=4K that has not accessed for
|
||||
>=60 seconds in your workload to be swapped out. ::
|
||||
|
||||
$ sudo damo schemes --damos_access_rate 0 0 --damos_sz_region 4K max \
|
||||
--damos_age 60s max --damos_action pageout \
|
||||
<pid of your workload>
|
||||
$ sudo damo start --damos_access_rate 0 0 --damos_sz_region 4K max \
|
||||
--damos_age 60s max --damos_action pageout \
|
||||
<pid of your workload>
|
||||
|
@ -26,12 +26,6 @@ DAMON provides below interfaces for different users.
|
||||
writing kernel space DAMON application programs for you. You can even extend
|
||||
DAMON for various address spaces. For detail, please refer to the interface
|
||||
:doc:`document </mm/damon/api>`.
|
||||
- *debugfs interface. (DEPRECATED!)*
|
||||
:ref:`This <debugfs_interface>` is almost identical to :ref:`sysfs interface
|
||||
<sysfs_interface>`. This is deprecated, so users should move to the
|
||||
:ref:`sysfs interface <sysfs_interface>`. If you depend on this and cannot
|
||||
move, please report your usecase to damon@lists.linux.dev and
|
||||
linux-mm@kvack.org.
|
||||
|
||||
.. _sysfs_interface:
|
||||
|
||||
@ -89,10 +83,10 @@ comma (",").
|
||||
│ │ │ │ │ │ │ │ │ 0/target_metric,target_value,current_value
|
||||
│ │ │ │ │ │ │ :ref:`watermarks <sysfs_watermarks>`/metric,interval_us,high,mid,low
|
||||
│ │ │ │ │ │ │ :ref:`filters <sysfs_filters>`/nr_filters
|
||||
│ │ │ │ │ │ │ │ 0/type,matching,memcg_id
|
||||
│ │ │ │ │ │ │ :ref:`stats <sysfs_schemes_stats>`/nr_tried,sz_tried,nr_applied,sz_applied,qt_exceeds
|
||||
│ │ │ │ │ │ │ │ 0/type,matching,allow,memcg_path,addr_start,addr_end,target_idx
|
||||
│ │ │ │ │ │ │ :ref:`stats <sysfs_schemes_stats>`/nr_tried,sz_tried,nr_applied,sz_applied,sz_ops_filter_passed,qt_exceeds
|
||||
│ │ │ │ │ │ │ :ref:`tried_regions <sysfs_schemes_tried_regions>`/total_bytes
|
||||
│ │ │ │ │ │ │ │ 0/start,end,nr_accesses,age
|
||||
│ │ │ │ │ │ │ │ 0/start,end,nr_accesses,age,sz_filter_passed
|
||||
│ │ │ │ │ │ │ │ ...
|
||||
│ │ │ │ │ │ ...
|
||||
│ │ │ │ ...
|
||||
@ -412,59 +406,62 @@ number (``N``) to the file creates the number of child directories named ``0``
|
||||
to ``N-1``. Each directory represents each filter. The filters are evaluated
|
||||
in the numeric order.
|
||||
|
||||
Each filter directory contains six files, namely ``type``, ``matcing``,
|
||||
``memcg_path``, ``addr_start``, ``addr_end``, and ``target_idx``. To ``type``
|
||||
file, you can write one of five special keywords: ``anon`` for anonymous pages,
|
||||
``memcg`` for specific memory cgroup, ``young`` for young pages, ``addr`` for
|
||||
specific address range (an open-ended interval), or ``target`` for specific
|
||||
DAMON monitoring target filtering. In case of the memory cgroup filtering, you
|
||||
can specify the memory cgroup of the interest by writing the path of the memory
|
||||
cgroup from the cgroups mount point to ``memcg_path`` file. In case of the
|
||||
address range filtering, you can specify the start and end address of the range
|
||||
to ``addr_start`` and ``addr_end`` files, respectively. For the DAMON
|
||||
monitoring target filtering, you can specify the index of the target between
|
||||
the list of the DAMON context's monitoring targets list to ``target_idx`` file.
|
||||
You can write ``Y`` or ``N`` to ``matching`` file to filter out pages that does
|
||||
or does not match to the type, respectively. Then, the scheme's action will
|
||||
not be applied to the pages that specified to be filtered out.
|
||||
Each filter directory contains seven files, namely ``type``, ``matching``,
|
||||
``allow``, ``memcg_path``, ``addr_start``, ``addr_end``, and ``target_idx``.
|
||||
To ``type`` file, you can write one of five special keywords: ``anon`` for
|
||||
anonymous pages, ``memcg`` for specific memory cgroup, ``young`` for young
|
||||
pages, ``addr`` for specific address range (an open-ended interval), or
|
||||
``target`` for specific DAMON monitoring target filtering. Meaning of the
|
||||
types are same to the description on the :ref:`design doc
|
||||
<damon_design_damos_filters>`.
|
||||
|
||||
In case of the memory cgroup filtering, you can specify the memory cgroup of
|
||||
the interest by writing the path of the memory cgroup from the cgroups mount
|
||||
point to ``memcg_path`` file. In case of the address range filtering, you can
|
||||
specify the start and end address of the range to ``addr_start`` and
|
||||
``addr_end`` files, respectively. For the DAMON monitoring target filtering,
|
||||
you can specify the index of the target between the list of the DAMON context's
|
||||
monitoring targets list to ``target_idx`` file.
|
||||
|
||||
You can write ``Y`` or ``N`` to ``matching`` file to specify whether the filter
|
||||
is for memory that matches the ``type``. You can write ``Y`` or ``N`` to
|
||||
``allow`` file to specify if applying the action to the memory that satisfies
|
||||
the ``type`` and ``matching`` should be allowed or not.
|
||||
|
||||
For example, below restricts a DAMOS action to be applied to only non-anonymous
|
||||
pages of all memory cgroups except ``/having_care_already``.::
|
||||
|
||||
# echo 2 > nr_filters
|
||||
# # filter out anonymous pages
|
||||
# # disallow anonymous pages
|
||||
echo anon > 0/type
|
||||
echo Y > 0/matching
|
||||
echo N > 0/allow
|
||||
# # further filter out all cgroups except one at '/having_care_already'
|
||||
echo memcg > 1/type
|
||||
echo /having_care_already > 1/memcg_path
|
||||
echo Y > 1/matching
|
||||
echo N > 1/allow
|
||||
|
||||
Note that ``anon`` and ``memcg`` filters are currently supported only when
|
||||
``paddr`` :ref:`implementation <sysfs_context>` is being used.
|
||||
|
||||
Also, memory regions that are filtered out by ``addr`` or ``target`` filters
|
||||
are not counted as the scheme has tried to those, while regions that filtered
|
||||
out by other type filters are counted as the scheme has tried to. The
|
||||
difference is applied to :ref:`stats <damos_stats>` and
|
||||
:ref:`tried regions <sysfs_schemes_tried_regions>`.
|
||||
Refer to the :ref:`DAMOS filters design documentation
|
||||
<damon_design_damos_filters>` for more details including how multiple filters
|
||||
of different ``allow`` works, when each of the filters are supported, and
|
||||
differences on stats.
|
||||
|
||||
.. _sysfs_schemes_stats:
|
||||
|
||||
schemes/<N>/stats/
|
||||
------------------
|
||||
|
||||
DAMON counts the total number and bytes of regions that each scheme is tried to
|
||||
be applied, the two numbers for the regions that each scheme is successfully
|
||||
applied, and the total number of the quota limit exceeds. This statistics can
|
||||
be used for online analysis or tuning of the schemes.
|
||||
DAMON counts statistics for each scheme. This statistics can be used for
|
||||
online analysis or tuning of the schemes. Refer to :ref:`design doc
|
||||
<damon_design_damos_stat>` for more details about the stats.
|
||||
|
||||
The statistics can be retrieved by reading the files under ``stats`` directory
|
||||
(``nr_tried``, ``sz_tried``, ``nr_applied``, ``sz_applied``, and
|
||||
``qt_exceeds``), respectively. The files are not updated in real time, so you
|
||||
should ask DAMON sysfs interface to update the content of the files for the
|
||||
stats by writing a special keyword, ``update_schemes_stats`` to the relevant
|
||||
``kdamonds/<N>/state`` file.
|
||||
(``nr_tried``, ``sz_tried``, ``nr_applied``, ``sz_applied``,
|
||||
``sz_ops_filter_passed``, and ``qt_exceeds``), respectively. The files are not
|
||||
updated in real time, so you should ask DAMON sysfs interface to update the
|
||||
content of the files for the stats by writing a special keyword,
|
||||
``update_schemes_stats`` to the relevant ``kdamonds/<N>/state`` file.
|
||||
|
||||
.. _sysfs_schemes_tried_regions:
|
||||
|
||||
@ -501,10 +498,10 @@ set the ``access pattern`` as their interested pattern that they want to query.
|
||||
tried_regions/<N>/
|
||||
------------------
|
||||
|
||||
In each region directory, you will find four files (``start``, ``end``,
|
||||
``nr_accesses``, and ``age``). Reading the files will show the start and end
|
||||
addresses, ``nr_accesses``, and ``age`` of the region that corresponding
|
||||
DAMON-based operation scheme ``action`` has tried to be applied.
|
||||
In each region directory, you will find five files (``start``, ``end``,
|
||||
``nr_accesses``, ``age``, and ``sz_filter_passed``). Reading the files will
|
||||
show the properties of the region that corresponding DAMON-based operation
|
||||
scheme ``action`` has tried to be applied.
|
||||
|
||||
Example
|
||||
~~~~~~~
|
||||
@ -600,306 +597,3 @@ fields are as usual. It shows the index of the DAMON context (``ctx_idx=X``)
|
||||
of the scheme in the list of the contexts of the context's kdamond, the index
|
||||
of the scheme (``scheme_idx=X``) in the list of the schemes of the context, in
|
||||
addition to the output of ``damon_aggregated`` tracepoint.
|
||||
|
||||
|
||||
.. _debugfs_interface:
|
||||
|
||||
debugfs Interface (DEPRECATED!)
|
||||
===============================
|
||||
|
||||
.. note::
|
||||
|
||||
THIS IS DEPRECATED!
|
||||
|
||||
DAMON debugfs interface is deprecated, so users should move to the
|
||||
:ref:`sysfs interface <sysfs_interface>`. If you depend on this and cannot
|
||||
move, please report your usecase to damon@lists.linux.dev and
|
||||
linux-mm@kvack.org.
|
||||
|
||||
DAMON exports nine files, ``DEPRECATED``, ``attrs``, ``target_ids``,
|
||||
``init_regions``, ``schemes``, ``monitor_on_DEPRECATED``, ``kdamond_pid``,
|
||||
``mk_contexts`` and ``rm_contexts`` under its debugfs directory,
|
||||
``<debugfs>/damon/``.
|
||||
|
||||
|
||||
``DEPRECATED`` is a read-only file for the DAMON debugfs interface deprecation
|
||||
notice. Reading it returns the deprecation notice, as below::
|
||||
|
||||
# cat DEPRECATED
|
||||
DAMON debugfs interface is deprecated, so users should move to DAMON_SYSFS. If you cannot, please report your usecase to damon@lists.linux.dev and linux-mm@kvack.org.
|
||||
|
||||
|
||||
Attributes
|
||||
----------
|
||||
|
||||
Users can get and set the ``sampling interval``, ``aggregation interval``,
|
||||
``update interval``, and min/max number of monitoring target regions by
|
||||
reading from and writing to the ``attrs`` file. To know about the monitoring
|
||||
attributes in detail, please refer to the :doc:`/mm/damon/design`. For
|
||||
example, below commands set those values to 5 ms, 100 ms, 1,000 ms, 10 and
|
||||
1000, and then check it again::
|
||||
|
||||
# cd <debugfs>/damon
|
||||
# echo 5000 100000 1000000 10 1000 > attrs
|
||||
# cat attrs
|
||||
5000 100000 1000000 10 1000
|
||||
|
||||
|
||||
Target IDs
|
||||
----------
|
||||
|
||||
Some types of address spaces supports multiple monitoring target. For example,
|
||||
the virtual memory address spaces monitoring can have multiple processes as the
|
||||
monitoring targets. Users can set the targets by writing relevant id values of
|
||||
the targets to, and get the ids of the current targets by reading from the
|
||||
``target_ids`` file. In case of the virtual address spaces monitoring, the
|
||||
values should be pids of the monitoring target processes. For example, below
|
||||
commands set processes having pids 42 and 4242 as the monitoring targets and
|
||||
check it again::
|
||||
|
||||
# cd <debugfs>/damon
|
||||
# echo 42 4242 > target_ids
|
||||
# cat target_ids
|
||||
42 4242
|
||||
|
||||
Users can also monitor the physical memory address space of the system by
|
||||
writing a special keyword, "``paddr\n``" to the file. Because physical address
|
||||
space monitoring doesn't support multiple targets, reading the file will show a
|
||||
fake value, ``42``, as below::
|
||||
|
||||
# cd <debugfs>/damon
|
||||
# echo paddr > target_ids
|
||||
# cat target_ids
|
||||
42
|
||||
|
||||
Note that setting the target ids doesn't start the monitoring.
|
||||
|
||||
|
||||
Initial Monitoring Target Regions
|
||||
---------------------------------
|
||||
|
||||
In case of the virtual address space monitoring, DAMON automatically sets and
|
||||
updates the monitoring target regions so that entire memory mappings of target
|
||||
processes can be covered. However, users can want to limit the monitoring
|
||||
region to specific address ranges, such as the heap, the stack, or specific
|
||||
file-mapped area. Or, some users can know the initial access pattern of their
|
||||
workloads and therefore want to set optimal initial regions for the 'adaptive
|
||||
regions adjustment'.
|
||||
|
||||
In contrast, DAMON do not automatically sets and updates the monitoring target
|
||||
regions in case of physical memory monitoring. Therefore, users should set the
|
||||
monitoring target regions by themselves.
|
||||
|
||||
In such cases, users can explicitly set the initial monitoring target regions
|
||||
as they want, by writing proper values to the ``init_regions`` file. The input
|
||||
should be a sequence of three integers separated by white spaces that represent
|
||||
one region in below form.::
|
||||
|
||||
<target idx> <start address> <end address>
|
||||
|
||||
The ``target idx`` should be the index of the target in ``target_ids`` file,
|
||||
starting from ``0``, and the regions should be passed in address order. For
|
||||
example, below commands will set a couple of address ranges, ``1-100`` and
|
||||
``100-200`` as the initial monitoring target region of pid 42, which is the
|
||||
first one (index ``0``) in ``target_ids``, and another couple of address
|
||||
ranges, ``20-40`` and ``50-100`` as that of pid 4242, which is the second one
|
||||
(index ``1``) in ``target_ids``.::
|
||||
|
||||
# cd <debugfs>/damon
|
||||
# cat target_ids
|
||||
42 4242
|
||||
# echo "0 1 100 \
|
||||
0 100 200 \
|
||||
1 20 40 \
|
||||
1 50 100" > init_regions
|
||||
|
||||
Note that this sets the initial monitoring target regions only. In case of
|
||||
virtual memory monitoring, DAMON will automatically updates the boundary of the
|
||||
regions after one ``update interval``. Therefore, users should set the
|
||||
``update interval`` large enough in this case, if they don't want the
|
||||
update.
|
||||
|
||||
|
||||
Schemes
|
||||
-------
|
||||
|
||||
Users can get and set the DAMON-based operation :ref:`schemes
|
||||
<damon_design_damos>` by reading from and writing to ``schemes`` debugfs file.
|
||||
Reading the file also shows the statistics of each scheme. To the file, each
|
||||
of the schemes should be represented in each line in below form::
|
||||
|
||||
<target access pattern> <action> <quota> <watermarks>
|
||||
|
||||
You can disable schemes by simply writing an empty string to the file.
|
||||
|
||||
Target Access Pattern
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The target access :ref:`pattern <damon_design_damos_access_pattern>` of the
|
||||
scheme. The ``<target access pattern>`` is constructed with three ranges in
|
||||
below form::
|
||||
|
||||
min-size max-size min-acc max-acc min-age max-age
|
||||
|
||||
Specifically, bytes for the size of regions (``min-size`` and ``max-size``),
|
||||
number of monitored accesses per aggregate interval for access frequency
|
||||
(``min-acc`` and ``max-acc``), number of aggregate intervals for the age of
|
||||
regions (``min-age`` and ``max-age``) are specified. Note that the ranges are
|
||||
closed interval.
|
||||
|
||||
Action
|
||||
~~~~~~
|
||||
|
||||
The ``<action>`` is a predefined integer for memory management :ref:`actions
|
||||
<damon_design_damos_action>`. The mapping between the ``<action>`` values and
|
||||
the memory management actions is as below. For the detailed meaning of the
|
||||
action and DAMON operations set supporting each action, please refer to the
|
||||
list on :ref:`design doc <damon_design_damos_action>`.
|
||||
|
||||
- 0: ``willneed``
|
||||
- 1: ``cold``
|
||||
- 2: ``pageout``
|
||||
- 3: ``hugepage``
|
||||
- 4: ``nohugepage``
|
||||
- 5: ``stat``
|
||||
|
||||
Quota
|
||||
~~~~~
|
||||
|
||||
Users can set the :ref:`quotas <damon_design_damos_quotas>` of the given scheme
|
||||
via the ``<quota>`` in below form::
|
||||
|
||||
<ms> <sz> <reset interval> <priority weights>
|
||||
|
||||
This makes DAMON to try to use only up to ``<ms>`` milliseconds for applying
|
||||
the action to memory regions of the ``target access pattern`` within the
|
||||
``<reset interval>`` milliseconds, and to apply the action to only up to
|
||||
``<sz>`` bytes of memory regions within the ``<reset interval>``. Setting both
|
||||
``<ms>`` and ``<sz>`` zero disables the quota limits.
|
||||
|
||||
For the :ref:`prioritization <damon_design_damos_quotas_prioritization>`, users
|
||||
can set the weights for the three properties in ``<priority weights>`` in below
|
||||
form::
|
||||
|
||||
<size weight> <access frequency weight> <age weight>
|
||||
|
||||
Watermarks
|
||||
~~~~~~~~~~
|
||||
|
||||
Users can specify :ref:`watermarks <damon_design_damos_watermarks>` of the
|
||||
given scheme via ``<watermarks>`` in below form::
|
||||
|
||||
<metric> <check interval> <high mark> <middle mark> <low mark>
|
||||
|
||||
``<metric>`` is a predefined integer for the metric to be checked. The
|
||||
supported numbers and their meanings are as below.
|
||||
|
||||
- 0: Ignore the watermarks
|
||||
- 1: System's free memory rate (per thousand)
|
||||
|
||||
The value of the metric is checked every ``<check interval>`` microseconds.
|
||||
|
||||
If the value is higher than ``<high mark>`` or lower than ``<low mark>``, the
|
||||
scheme is deactivated. If the value is lower than ``<mid mark>``, the scheme
|
||||
is activated.
|
||||
|
||||
.. _damos_stats:
|
||||
|
||||
Statistics
|
||||
~~~~~~~~~~
|
||||
|
||||
It also counts the total number and bytes of regions that each scheme is tried
|
||||
to be applied, the two numbers for the regions that each scheme is successfully
|
||||
applied, and the total number of the quota limit exceeds. This statistics can
|
||||
be used for online analysis or tuning of the schemes.
|
||||
|
||||
The statistics can be shown by reading the ``schemes`` file. Reading the file
|
||||
will show each scheme you entered in each line, and the five numbers for the
|
||||
statistics will be added at the end of each line.
|
||||
|
||||
Example
|
||||
~~~~~~~
|
||||
|
||||
Below commands applies a scheme saying "If a memory region of size in [4KiB,
|
||||
8KiB] is showing accesses per aggregate interval in [0, 5] for aggregate
|
||||
interval in [10, 20], page out the region. For the paging out, use only up to
|
||||
10ms per second, and also don't page out more than 1GiB per second. Under the
|
||||
limitation, page out memory regions having longer age first. Also, check the
|
||||
free memory rate of the system every 5 seconds, start the monitoring and paging
|
||||
out when the free memory rate becomes lower than 50%, but stop it if the free
|
||||
memory rate becomes larger than 60%, or lower than 30%".::
|
||||
|
||||
# cd <debugfs>/damon
|
||||
# scheme="4096 8192 0 5 10 20 2" # target access pattern and action
|
||||
# scheme+=" 10 $((1024*1024*1024)) 1000" # quotas
|
||||
# scheme+=" 0 0 100" # prioritization weights
|
||||
# scheme+=" 1 5000000 600 500 300" # watermarks
|
||||
# echo "$scheme" > schemes
|
||||
|
||||
|
||||
Turning On/Off
|
||||
--------------
|
||||
|
||||
Setting the files as described above doesn't incur effect unless you explicitly
|
||||
start the monitoring. You can start, stop, and check the current status of the
|
||||
monitoring by writing to and reading from the ``monitor_on_DEPRECATED`` file.
|
||||
Writing ``on`` to the file starts the monitoring of the targets with the
|
||||
attributes. Writing ``off`` to the file stops those. DAMON also stops if
|
||||
every target process is terminated. Below example commands turn on, off, and
|
||||
check the status of DAMON::
|
||||
|
||||
# cd <debugfs>/damon
|
||||
# echo on > monitor_on_DEPRECATED
|
||||
# echo off > monitor_on_DEPRECATED
|
||||
# cat monitor_on_DEPRECATED
|
||||
off
|
||||
|
||||
Please note that you cannot write to the above-mentioned debugfs files while
|
||||
the monitoring is turned on. If you write to the files while DAMON is running,
|
||||
an error code such as ``-EBUSY`` will be returned.
|
||||
|
||||
|
||||
Monitoring Thread PID
|
||||
---------------------
|
||||
|
||||
DAMON does requested monitoring with a kernel thread called ``kdamond``. You
|
||||
can get the pid of the thread by reading the ``kdamond_pid`` file. When the
|
||||
monitoring is turned off, reading the file returns ``none``. ::
|
||||
|
||||
# cd <debugfs>/damon
|
||||
# cat monitor_on_DEPRECATED
|
||||
off
|
||||
# cat kdamond_pid
|
||||
none
|
||||
# echo on > monitor_on_DEPRECATED
|
||||
# cat kdamond_pid
|
||||
18594
|
||||
|
||||
|
||||
Using Multiple Monitoring Threads
|
||||
---------------------------------
|
||||
|
||||
One ``kdamond`` thread is created for each monitoring context. You can create
|
||||
and remove monitoring contexts for multiple ``kdamond`` required use case using
|
||||
the ``mk_contexts`` and ``rm_contexts`` files.
|
||||
|
||||
Writing the name of the new context to the ``mk_contexts`` file creates a
|
||||
directory of the name on the DAMON debugfs directory. The directory will have
|
||||
DAMON debugfs files for the context. ::
|
||||
|
||||
# cd <debugfs>/damon
|
||||
# ls foo
|
||||
# ls: cannot access 'foo': No such file or directory
|
||||
# echo foo > mk_contexts
|
||||
# ls foo
|
||||
# attrs init_regions kdamond_pid schemes target_ids
|
||||
|
||||
If the context is not needed anymore, you can remove it and the corresponding
|
||||
directory by putting the name of the context to the ``rm_contexts`` file. ::
|
||||
|
||||
# echo foo > rm_contexts
|
||||
# ls foo
|
||||
# ls: cannot access 'foo': No such file or directory
|
||||
|
||||
Note that ``mk_contexts``, ``rm_contexts``, and ``monitor_on_DEPRECATED`` files
|
||||
are in the root directory only.
|
||||
|
@ -280,8 +280,8 @@ The following files are currently defined:
|
||||
blocks; configure auto-onlining.
|
||||
|
||||
The default value depends on the
|
||||
CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE kernel configuration
|
||||
option.
|
||||
CONFIG_MHP_DEFAULT_ONLINE_TYPE kernel configuration
|
||||
options.
|
||||
|
||||
See the ``state`` property of memory blocks for details.
|
||||
``block_size_bytes`` read-only: the size in bytes of a memory block.
|
||||
|
@ -332,6 +332,12 @@ allocation policy for the internal shmem mount by using the kernel parameter
|
||||
seven valid policies for shmem (``always``, ``within_size``, ``advise``,
|
||||
``never``, ``deny``, and ``force``).
|
||||
|
||||
Similarly to ``transparent_hugepage_shmem``, you can control the default
|
||||
hugepage allocation policy for the tmpfs mount by using the kernel parameter
|
||||
``transparent_hugepage_tmpfs=<policy>``, where ``<policy>`` is one of the
|
||||
four valid policies for tmpfs (``always``, ``within_size``, ``advise``,
|
||||
``never``). The tmpfs mount default policy is ``never``.
|
||||
|
||||
In the same manner as ``thp_anon`` controls each supported anonymous THP
|
||||
size, ``thp_shmem`` controls each supported shmem THP size. ``thp_shmem``
|
||||
has the same format as ``thp_anon``, but also supports the policy
|
||||
@ -352,8 +358,21 @@ default to ``never``.
|
||||
Hugepages in tmpfs/shmem
|
||||
========================
|
||||
|
||||
You can control hugepage allocation policy in tmpfs with mount option
|
||||
``huge=``. It can have following values:
|
||||
Traditionally, tmpfs only supported a single huge page size ("PMD"). Today,
|
||||
it also supports smaller sizes just like anonymous memory, often referred
|
||||
to as "multi-size THP" (mTHP). Huge pages of any size are commonly
|
||||
represented in the kernel as "large folios".
|
||||
|
||||
While there is fine control over the huge page sizes to use for the internal
|
||||
shmem mount (see below), ordinary tmpfs mounts will make use of all available
|
||||
huge page sizes without any control over the exact sizes, behaving more like
|
||||
other file systems.
|
||||
|
||||
tmpfs mounts
|
||||
------------
|
||||
|
||||
The THP allocation policy for tmpfs mounts can be adjusted using the mount
|
||||
option: ``huge=``. It can have following values:
|
||||
|
||||
always
|
||||
Attempt to allocate huge pages every time we need a new page;
|
||||
@ -363,24 +382,24 @@ never
|
||||
|
||||
within_size
|
||||
Only allocate huge page if it will be fully within i_size.
|
||||
Also respect fadvise()/madvise() hints;
|
||||
Also respect madvise() hints;
|
||||
|
||||
advise
|
||||
Only allocate huge pages if requested with fadvise()/madvise();
|
||||
Only allocate huge pages if requested with madvise();
|
||||
|
||||
The default policy is ``never``.
|
||||
Remember, that the kernel may use huge pages of all available sizes, and
|
||||
that no fine control as for the internal tmpfs mount is available.
|
||||
|
||||
The default policy in the past was ``never``, but it can now be adjusted
|
||||
using the kernel parameter ``transparent_hugepage_tmpfs=<policy>``.
|
||||
|
||||
``mount -o remount,huge= /mountpoint`` works fine after mount: remounting
|
||||
``huge=never`` will not attempt to break up huge pages at all, just stop more
|
||||
from being allocated.
|
||||
|
||||
There's also sysfs knob to control hugepage allocation policy for internal
|
||||
shmem mount: /sys/kernel/mm/transparent_hugepage/shmem_enabled. The mount
|
||||
is used for SysV SHM, memfds, shared anonymous mmaps (of /dev/zero or
|
||||
MAP_ANONYMOUS), GPU drivers' DRM objects, Ashmem.
|
||||
|
||||
In addition to policies listed above, shmem_enabled allows two further
|
||||
values:
|
||||
In addition to policies listed above, the sysfs knob
|
||||
/sys/kernel/mm/transparent_hugepage/shmem_enabled will affect the
|
||||
allocation policy of tmpfs mounts, when set to the following values:
|
||||
|
||||
deny
|
||||
For use in emergencies, to force the huge option off from
|
||||
@ -388,13 +407,24 @@ deny
|
||||
force
|
||||
Force the huge option on for all - very useful for testing;
|
||||
|
||||
Shmem can also use "multi-size THP" (mTHP) by adding a new sysfs knob to
|
||||
control mTHP allocation:
|
||||
'/sys/kernel/mm/transparent_hugepage/hugepages-<size>kB/shmem_enabled',
|
||||
and its value for each mTHP is essentially consistent with the global
|
||||
setting. An 'inherit' option is added to ensure compatibility with these
|
||||
global settings. Conversely, the options 'force' and 'deny' are dropped,
|
||||
which are rather testing artifacts from the old ages.
|
||||
shmem / internal tmpfs
|
||||
----------------------
|
||||
The mount internal tmpfs mount is used for SysV SHM, memfds, shared anonymous
|
||||
mmaps (of /dev/zero or MAP_ANONYMOUS), GPU drivers' DRM objects, Ashmem.
|
||||
|
||||
To control the THP allocation policy for this internal tmpfs mount, the
|
||||
sysfs knob /sys/kernel/mm/transparent_hugepage/shmem_enabled and the knobs
|
||||
per THP size in
|
||||
'/sys/kernel/mm/transparent_hugepage/hugepages-<size>kB/shmem_enabled'
|
||||
can be used.
|
||||
|
||||
The global knob has the same semantics as the ``huge=`` mount options
|
||||
for tmpfs mounts, except that the different huge page sizes can be controlled
|
||||
individually, and will only use the setting of the global knob when the
|
||||
per-size knob is set to 'inherit'.
|
||||
|
||||
The options 'force' and 'deny' are dropped for the individual sizes, which
|
||||
are rather testing artifacts from the old ages.
|
||||
|
||||
always
|
||||
Attempt to allocate <size> huge pages every time we need a new page;
|
||||
@ -408,10 +438,10 @@ never
|
||||
|
||||
within_size
|
||||
Only allocate <size> huge page if it will be fully within i_size.
|
||||
Also respect fadvise()/madvise() hints;
|
||||
Also respect madvise() hints;
|
||||
|
||||
advise
|
||||
Only allocate <size> huge pages if requested with fadvise()/madvise();
|
||||
Only allocate <size> huge pages if requested with madvise();
|
||||
|
||||
Need of application restart
|
||||
===========================
|
||||
@ -436,7 +466,7 @@ AnonHugePmdMapped).
|
||||
The number of file transparent huge pages mapped to userspace is available
|
||||
by reading ShmemPmdMapped and ShmemHugePages fields in ``/proc/meminfo``.
|
||||
To identify what applications are mapping file transparent huge pages, it
|
||||
is necessary to read ``/proc/PID/smaps`` and count the FileHugeMapped fields
|
||||
is necessary to read ``/proc/PID/smaps`` and count the FilePmdMapped fields
|
||||
for each mapping.
|
||||
|
||||
Note that reading the smaps file is expensive and reading it
|
||||
@ -561,6 +591,16 @@ swpin
|
||||
is incremented every time a huge page is swapped in from a non-zswap
|
||||
swap device in one piece.
|
||||
|
||||
swpin_fallback
|
||||
is incremented if swapin fails to allocate or charge a huge page
|
||||
and instead falls back to using huge pages with lower orders or
|
||||
small pages.
|
||||
|
||||
swpin_fallback_charge
|
||||
is incremented if swapin fails to charge a huge page and instead
|
||||
falls back to using huge pages with lower orders or small pages
|
||||
even though the allocation was successful.
|
||||
|
||||
swpout
|
||||
is incremented every time a huge page is swapped out to a non-zswap
|
||||
swap device in one piece without splitting.
|
||||
|
72
Documentation/admin-guide/nvme-multipath.rst
Normal file
72
Documentation/admin-guide/nvme-multipath.rst
Normal file
@ -0,0 +1,72 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
====================
|
||||
Linux NVMe multipath
|
||||
====================
|
||||
|
||||
This document describes NVMe multipath and its path selection policies supported
|
||||
by the Linux NVMe host driver.
|
||||
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
The NVMe multipath feature in Linux integrates namespaces with the same
|
||||
identifier into a single block device. Using multipath enhances the reliability
|
||||
and stability of I/O access while improving bandwidth performance. When a user
|
||||
sends I/O to this merged block device, the multipath mechanism selects one of
|
||||
the underlying block devices (paths) according to the configured policy.
|
||||
Different policies result in different path selections.
|
||||
|
||||
|
||||
Policies
|
||||
========
|
||||
|
||||
All policies follow the ANA (Asymmetric Namespace Access) mechanism, meaning
|
||||
that when an optimized path is available, it will be chosen over a non-optimized
|
||||
one. Current the NVMe multipath policies include numa(default), round-robin and
|
||||
queue-depth.
|
||||
|
||||
To set the desired policy (e.g., round-robin), use one of the following methods:
|
||||
1. echo -n "round-robin" > /sys/module/nvme_core/parameters/iopolicy
|
||||
2. or add the "nvme_core.iopolicy=round-robin" to cmdline.
|
||||
|
||||
|
||||
NUMA
|
||||
----
|
||||
|
||||
The NUMA policy selects the path closest to the NUMA node of the current CPU for
|
||||
I/O distribution. This policy maintains the nearest paths to each NUMA node
|
||||
based on network interface connections.
|
||||
|
||||
When to use the NUMA policy:
|
||||
1. Multi-core Systems: Optimizes memory access in multi-core and
|
||||
multi-processor systems, especially under NUMA architecture.
|
||||
2. High Affinity Workloads: Binds I/O processing to the CPU to reduce
|
||||
communication and data transfer delays across nodes.
|
||||
|
||||
|
||||
Round-Robin
|
||||
-----------
|
||||
|
||||
The round-robin policy distributes I/O requests evenly across all paths to
|
||||
enhance throughput and resource utilization. Each I/O operation is sent to the
|
||||
next path in sequence.
|
||||
|
||||
When to use the round-robin policy:
|
||||
1. Balanced Workloads: Effective for balanced and predictable workloads with
|
||||
similar I/O size and type.
|
||||
2. Homogeneous Path Performance: Utilizes all paths efficiently when
|
||||
performance characteristics (e.g., latency, bandwidth) are similar.
|
||||
|
||||
|
||||
Queue-Depth
|
||||
-----------
|
||||
|
||||
The queue-depth policy manages I/O requests based on the current queue depth
|
||||
of each path, selecting the path with the least number of in-flight I/Os.
|
||||
|
||||
When to use the queue-depth policy:
|
||||
1. High load with small I/Os: Effectively balances load across paths when
|
||||
the load is high, and I/O operations consist of small, relatively
|
||||
fixed-sized requests.
|
@ -60,7 +60,7 @@ description of available events and configuration options in sysfs, see
|
||||
The "format" directory describes format of the config fields of the
|
||||
perf_event_attr structure. The "events" directory provides configuration
|
||||
templates for all documented events. For example,
|
||||
"Rx_PCIe_TLP_Data_Payload" is an equivalent of "eventid=0x22,type=0x1".
|
||||
"rx_pcie_tlp_data_payload" is an equivalent of "eventid=0x21,type=0x0".
|
||||
|
||||
The "perf list" command shall list the available events from sysfs, e.g.::
|
||||
|
||||
@ -79,8 +79,8 @@ Example usage of counting PCIe RX TLP data payload (Units of bytes)::
|
||||
|
||||
The average RX/TX bandwidth can be calculated using the following formula:
|
||||
|
||||
PCIe RX Bandwidth = Rx_PCIe_TLP_Data_Payload / Measure_Time_Window
|
||||
PCIe TX Bandwidth = Tx_PCIe_TLP_Data_Payload / Measure_Time_Window
|
||||
PCIe RX Bandwidth = rx_pcie_tlp_data_payload / Measure_Time_Window
|
||||
PCIe TX Bandwidth = tx_pcie_tlp_data_payload / Measure_Time_Window
|
||||
|
||||
Lane Event Usage
|
||||
-------------------------------
|
||||
|
@ -35,7 +35,10 @@ e.g. hisi_sccl1_hha0/rx_operations is RX_OPERATIONS event of HHA index #0 in
|
||||
SCCL ID #1.
|
||||
|
||||
The driver also provides a "cpumask" sysfs attribute, which shows the CPU core
|
||||
ID used to count the uncore PMU event.
|
||||
ID used to count the uncore PMU event. An "associated_cpus" sysfs attribute is
|
||||
also provided to show the CPUs associated with this PMU. The "cpumask" indicates
|
||||
the CPUs to open the events, usually as a hint for userspaces tools like perf.
|
||||
It only contains one associated CPU from the "associated_cpus".
|
||||
|
||||
Example usage of perf::
|
||||
|
||||
|
@ -14,6 +14,8 @@ Performance monitor support
|
||||
qcom_l2_pmu
|
||||
qcom_l3_pmu
|
||||
starfive_starlink_pmu
|
||||
mrvl-odyssey-ddr-pmu
|
||||
mrvl-odyssey-tad-pmu
|
||||
arm-ccn
|
||||
arm-cmn
|
||||
arm-ni
|
||||
|
80
Documentation/admin-guide/perf/mrvl-odyssey-ddr-pmu.rst
Normal file
80
Documentation/admin-guide/perf/mrvl-odyssey-ddr-pmu.rst
Normal file
@ -0,0 +1,80 @@
|
||||
===================================================================
|
||||
Marvell Odyssey DDR PMU Performance Monitoring Unit (PMU UNCORE)
|
||||
===================================================================
|
||||
|
||||
Odyssey DRAM Subsystem supports eight counters for monitoring performance
|
||||
and software can program those counters to monitor any of the defined
|
||||
performance events. Supported performance events include those counted
|
||||
at the interface between the DDR controller and the PHY, interface between
|
||||
the DDR Controller and the CHI interconnect, or within the DDR Controller.
|
||||
|
||||
Additionally DSS also supports two fixed performance event counters, one
|
||||
for ddr reads and the other for ddr writes.
|
||||
|
||||
The counter will be operating in either manual or auto mode.
|
||||
|
||||
The PMU driver exposes the available events and format options under sysfs::
|
||||
|
||||
/sys/bus/event_source/devices/mrvl_ddr_pmu_<>/events/
|
||||
/sys/bus/event_source/devices/mrvl_ddr_pmu_<>/format/
|
||||
|
||||
Examples::
|
||||
|
||||
$ perf list | grep ddr
|
||||
mrvl_ddr_pmu_<>/ddr_act_bypass_access/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_bsm_alloc/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_bsm_starvation/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_cam_active_access/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_cam_mwr/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_cam_rd_active_access/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_cam_rd_or_wr_access/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_cam_read/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_cam_wr_access/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_cam_write/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_capar_error/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_crit_ref/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_ddr_reads/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_ddr_writes/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_dfi_cmd_is_retry/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_dfi_cycles/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_dfi_parity_poison/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_dfi_rd_data_access/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_dfi_wr_data_access/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_dqsosc_mpc/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_dqsosc_mrr/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_enter_mpsm/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_enter_powerdown/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_enter_selfref/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_hif_pri_rdaccess/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_hif_rd_access/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_hif_rd_or_wr_access/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_hif_rmw_access/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_hif_wr_access/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_hpri_sched_rd_crit_access/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_load_mode/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_lpri_sched_rd_crit_access/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_precharge/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_precharge_for_other/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_precharge_for_rdwr/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_raw_hazard/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_rd_bypass_access/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_rd_crc_error/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_rd_uc_ecc_error/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_rdwr_transitions/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_refresh/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_retry_fifo_full/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_spec_ref/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_tcr_mrr/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_war_hazard/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_waw_hazard/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_win_limit_reached_rd/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_win_limit_reached_wr/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_wr_crc_error/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_wr_trxn_crit_access/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_write_combine/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_zqcl/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_zqlatch/ [Kernel PMU event]
|
||||
mrvl_ddr_pmu_<>/ddr_zqstart/ [Kernel PMU event]
|
||||
|
||||
$ perf stat -e ddr_cam_read,ddr_cam_write,ddr_cam_active_access,ddr_cam
|
||||
rd_or_wr_access,ddr_cam_rd_active_access,ddr_cam_mwr <workload>
|
37
Documentation/admin-guide/perf/mrvl-odyssey-tad-pmu.rst
Normal file
37
Documentation/admin-guide/perf/mrvl-odyssey-tad-pmu.rst
Normal file
@ -0,0 +1,37 @@
|
||||
====================================================================
|
||||
Marvell Odyssey LLC-TAD Performance Monitoring Unit (PMU UNCORE)
|
||||
====================================================================
|
||||
|
||||
Each TAD provides eight 64-bit counters for monitoring
|
||||
cache behavior.The driver always configures the same counter for
|
||||
all the TADs. The user would end up effectively reserving one of
|
||||
eight counters in every TAD to look across all TADs.
|
||||
The occurrences of events are aggregated and presented to the user
|
||||
at the end of running the workload. The driver does not provide a
|
||||
way for the user to partition TADs so that different TADs are used for
|
||||
different applications.
|
||||
|
||||
The performance events reflect various internal or interface activities.
|
||||
By combining the values from multiple performance counters, cache
|
||||
performance can be measured in terms such as: cache miss rate, cache
|
||||
allocations, interface retry rate, internal resource occupancy, etc.
|
||||
|
||||
The PMU driver exposes the available events and format options under sysfs::
|
||||
|
||||
/sys/bus/event_source/devices/tad/events/
|
||||
/sys/bus/event_source/devices/tad/format/
|
||||
|
||||
Examples::
|
||||
|
||||
$ perf list | grep tad
|
||||
tad/tad_alloc_any/ [Kernel PMU event]
|
||||
tad/tad_alloc_dtg/ [Kernel PMU event]
|
||||
tad/tad_alloc_ltg/ [Kernel PMU event]
|
||||
tad/tad_hit_any/ [Kernel PMU event]
|
||||
tad/tad_hit_dtg/ [Kernel PMU event]
|
||||
tad/tad_hit_ltg/ [Kernel PMU event]
|
||||
tad/tad_req_msh_in_exlmn/ [Kernel PMU event]
|
||||
tad/tad_tag_rd/ [Kernel PMU event]
|
||||
tad/tad_tot_cycle/ [Kernel PMU event]
|
||||
|
||||
$ perf stat -e tad_alloc_dtg,tad_alloc_ltg,tad_alloc_any,tad_hit_dtg,tad_hit_ltg,tad_hit_any,tad_tag_rd <workload>
|
@ -34,7 +34,7 @@ strongly-ordered (SO) PCIE write traffic to local/remote memory. Please see
|
||||
traffic coverage.
|
||||
|
||||
The events and configuration options of this PMU device are described in sysfs,
|
||||
see /sys/bus/event_sources/devices/nvidia_scf_pmu_<socket-id>.
|
||||
see /sys/bus/event_source/devices/nvidia_scf_pmu_<socket-id>.
|
||||
|
||||
Example usage:
|
||||
|
||||
@ -66,7 +66,7 @@ Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section` for more info about
|
||||
the PMU traffic coverage.
|
||||
|
||||
The events and configuration options of this PMU device are described in sysfs,
|
||||
see /sys/bus/event_sources/devices/nvidia_nvlink_c2c0_pmu_<socket-id>.
|
||||
see /sys/bus/event_source/devices/nvidia_nvlink_c2c0_pmu_<socket-id>.
|
||||
|
||||
Example usage:
|
||||
|
||||
@ -86,6 +86,22 @@ Example usage:
|
||||
|
||||
perf stat -a -e nvidia_nvlink_c2c0_pmu_3/event=0x0/
|
||||
|
||||
The NVLink-C2C has two ports that can be connected to one GPU (occupying both
|
||||
ports) or to two GPUs (one GPU per port). The user can use "port" bitmap
|
||||
parameter to select the port(s) to monitor. Each bit represents the port number,
|
||||
e.g. "port=0x1" corresponds to port 0 and "port=0x3" is for port 0 and 1. The
|
||||
PMU will monitor both ports by default if not specified.
|
||||
|
||||
Example for port filtering:
|
||||
|
||||
* Count event id 0x0 from the GPU connected with socket 0 on port 0::
|
||||
|
||||
perf stat -a -e nvidia_nvlink_c2c0_pmu_0/event=0x0,port=0x1/
|
||||
|
||||
* Count event id 0x0 from the GPUs connected with socket 0 on port 0 and port 1::
|
||||
|
||||
perf stat -a -e nvidia_nvlink_c2c0_pmu_0/event=0x0,port=0x3/
|
||||
|
||||
NVLink-C2C1 PMU
|
||||
-------------------
|
||||
|
||||
@ -96,7 +112,7 @@ Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section` for more info about
|
||||
the PMU traffic coverage.
|
||||
|
||||
The events and configuration options of this PMU device are described in sysfs,
|
||||
see /sys/bus/event_sources/devices/nvidia_nvlink_c2c1_pmu_<socket-id>.
|
||||
see /sys/bus/event_source/devices/nvidia_nvlink_c2c1_pmu_<socket-id>.
|
||||
|
||||
Example usage:
|
||||
|
||||
@ -116,6 +132,22 @@ Example usage:
|
||||
|
||||
perf stat -a -e nvidia_nvlink_c2c1_pmu_3/event=0x0/
|
||||
|
||||
The NVLink-C2C has two ports that can be connected to one GPU (occupying both
|
||||
ports) or to two GPUs (one GPU per port). The user can use "port" bitmap
|
||||
parameter to select the port(s) to monitor. Each bit represents the port number,
|
||||
e.g. "port=0x1" corresponds to port 0 and "port=0x3" is for port 0 and 1. The
|
||||
PMU will monitor both ports by default if not specified.
|
||||
|
||||
Example for port filtering:
|
||||
|
||||
* Count event id 0x0 from the GPU connected with socket 0 on port 0::
|
||||
|
||||
perf stat -a -e nvidia_nvlink_c2c1_pmu_0/event=0x0,port=0x1/
|
||||
|
||||
* Count event id 0x0 from the GPUs connected with socket 0 on port 0 and port 1::
|
||||
|
||||
perf stat -a -e nvidia_nvlink_c2c1_pmu_0/event=0x0,port=0x3/
|
||||
|
||||
CNVLink PMU
|
||||
---------------
|
||||
|
||||
@ -125,13 +157,14 @@ to local memory. For PCIE traffic, this PMU captures read and relaxed ordered
|
||||
for more info about the PMU traffic coverage.
|
||||
|
||||
The events and configuration options of this PMU device are described in sysfs,
|
||||
see /sys/bus/event_sources/devices/nvidia_cnvlink_pmu_<socket-id>.
|
||||
see /sys/bus/event_source/devices/nvidia_cnvlink_pmu_<socket-id>.
|
||||
|
||||
Each SoC socket can be connected to one or more sockets via CNVLink. The user can
|
||||
use "rem_socket" bitmap parameter to select the remote socket(s) to monitor.
|
||||
Each bit represents the socket number, e.g. "rem_socket=0xE" corresponds to
|
||||
socket 1 to 3.
|
||||
/sys/bus/event_sources/devices/nvidia_cnvlink_pmu_<socket-id>/format/rem_socket
|
||||
socket 1 to 3. The PMU will monitor all remote sockets by default if not
|
||||
specified.
|
||||
/sys/bus/event_source/devices/nvidia_cnvlink_pmu_<socket-id>/format/rem_socket
|
||||
shows the valid bits that can be set in the "rem_socket" parameter.
|
||||
|
||||
The PMU can not distinguish the remote traffic initiator, therefore it does not
|
||||
@ -165,12 +198,13 @@ local/remote memory. Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section
|
||||
for more info about the PMU traffic coverage.
|
||||
|
||||
The events and configuration options of this PMU device are described in sysfs,
|
||||
see /sys/bus/event_sources/devices/nvidia_pcie_pmu_<socket-id>.
|
||||
see /sys/bus/event_source/devices/nvidia_pcie_pmu_<socket-id>.
|
||||
|
||||
Each SoC socket can support multiple root ports. The user can use
|
||||
"root_port" bitmap parameter to select the port(s) to monitor, i.e.
|
||||
"root_port=0xF" corresponds to root port 0 to 3.
|
||||
/sys/bus/event_sources/devices/nvidia_pcie_pmu_<socket-id>/format/root_port
|
||||
"root_port=0xF" corresponds to root port 0 to 3. The PMU will monitor all root
|
||||
ports by default if not specified.
|
||||
/sys/bus/event_source/devices/nvidia_pcie_pmu_<socket-id>/format/root_port
|
||||
shows the valid bits that can be set in the "root_port" parameter.
|
||||
|
||||
Example usage:
|
||||
|
@ -251,9 +251,7 @@ performance supported in `AMD CPPC Performance Capability <perf_cap_>`_).
|
||||
In some ASICs, the highest CPPC performance is not the one in the ``_CPC``
|
||||
table, so we need to expose it to sysfs. If boost is not active, but
|
||||
still supported, this maximum frequency will be larger than the one in
|
||||
``cpuinfo``. On systems that support preferred core, the driver will have
|
||||
different values for some cores than others and this will reflect the values
|
||||
advertised by the platform at bootup.
|
||||
``cpuinfo``.
|
||||
This attribute is read-only.
|
||||
|
||||
``amd_pstate_lowest_nonlinear_freq``
|
||||
|
@ -269,27 +269,7 @@ Namely, when invoked to select an idle state for a CPU (i.e. an idle state that
|
||||
the CPU will ask the processor hardware to enter), it attempts to predict the
|
||||
idle duration and uses the predicted value for idle state selection.
|
||||
|
||||
It first obtains the time until the closest timer event with the assumption
|
||||
that the scheduler tick will be stopped. That time, referred to as the *sleep
|
||||
length* in what follows, is the upper bound on the time before the next CPU
|
||||
wakeup. It is used to determine the sleep length range, which in turn is needed
|
||||
to get the sleep length correction factor.
|
||||
|
||||
The ``menu`` governor maintains two arrays of sleep length correction factors.
|
||||
One of them is used when tasks previously running on the given CPU are waiting
|
||||
for some I/O operations to complete and the other one is used when that is not
|
||||
the case. Each array contains several correction factor values that correspond
|
||||
to different sleep length ranges organized so that each range represented in the
|
||||
array is approximately 10 times wider than the previous one.
|
||||
|
||||
The correction factor for the given sleep length range (determined before
|
||||
selecting the idle state for the CPU) is updated after the CPU has been woken
|
||||
up and the closer the sleep length is to the observed idle duration, the closer
|
||||
to 1 the correction factor becomes (it must fall between 0 and 1 inclusive).
|
||||
The sleep length is multiplied by the correction factor for the range that it
|
||||
falls into to obtain the first approximation of the predicted idle duration.
|
||||
|
||||
Next, the governor uses a simple pattern recognition algorithm to refine its
|
||||
It first uses a simple pattern recognition algorithm to obtain a preliminary
|
||||
idle duration prediction. Namely, it saves the last 8 observed idle duration
|
||||
values and, when predicting the idle duration next time, it computes the average
|
||||
and variance of them. If the variance is small (smaller than 400 square
|
||||
@ -301,29 +281,39 @@ Again, if the variance of them is small (in the above sense), the average is
|
||||
taken as the "typical interval" value and so on, until either the "typical
|
||||
interval" is determined or too many data points are disregarded, in which case
|
||||
the "typical interval" is assumed to equal "infinity" (the maximum unsigned
|
||||
integer value). The "typical interval" computed this way is compared with the
|
||||
sleep length multiplied by the correction factor and the minimum of the two is
|
||||
taken as the predicted idle duration.
|
||||
integer value).
|
||||
|
||||
Then, the governor computes an extra latency limit to help "interactive"
|
||||
workloads. It uses the observation that if the exit latency of the selected
|
||||
idle state is comparable with the predicted idle duration, the total time spent
|
||||
in that state probably will be very short and the amount of energy to save by
|
||||
entering it will be relatively small, so likely it is better to avoid the
|
||||
overhead related to entering that state and exiting it. Thus selecting a
|
||||
shallower state is likely to be a better option then. The first approximation
|
||||
of the extra latency limit is the predicted idle duration itself which
|
||||
additionally is divided by a value depending on the number of tasks that
|
||||
previously ran on the given CPU and now they are waiting for I/O operations to
|
||||
complete. The result of that division is compared with the latency limit coming
|
||||
from the power management quality of service, or `PM QoS <cpu-pm-qos_>`_,
|
||||
framework and the minimum of the two is taken as the limit for the idle states'
|
||||
exit latency.
|
||||
If the "typical interval" computed this way is long enough, the governor obtains
|
||||
the time until the closest timer event with the assumption that the scheduler
|
||||
tick will be stopped. That time, referred to as the *sleep length* in what follows,
|
||||
is the upper bound on the time before the next CPU wakeup. It is used to determine
|
||||
the sleep length range, which in turn is needed to get the sleep length correction
|
||||
factor.
|
||||
|
||||
The ``menu`` governor maintains an array containing several correction factor
|
||||
values that correspond to different sleep length ranges organized so that each
|
||||
range represented in the array is approximately 10 times wider than the previous
|
||||
one.
|
||||
|
||||
The correction factor for the given sleep length range (determined before
|
||||
selecting the idle state for the CPU) is updated after the CPU has been woken
|
||||
up and the closer the sleep length is to the observed idle duration, the closer
|
||||
to 1 the correction factor becomes (it must fall between 0 and 1 inclusive).
|
||||
The sleep length is multiplied by the correction factor for the range that it
|
||||
falls into to obtain an approximation of the predicted idle duration that is
|
||||
compared to the "typical interval" determined previously and the minimum of
|
||||
the two is taken as the idle duration prediction.
|
||||
|
||||
If the "typical interval" value is small, which means that the CPU is likely
|
||||
to be woken up soon enough, the sleep length computation is skipped as it may
|
||||
be costly and the idle duration is simply predicted to equal the "typical
|
||||
interval" value.
|
||||
|
||||
Now, the governor is ready to walk the list of idle states and choose one of
|
||||
them. For this purpose, it compares the target residency of each state with
|
||||
the predicted idle duration and the exit latency of it with the computed latency
|
||||
limit. It selects the state with the target residency closest to the predicted
|
||||
the predicted idle duration and the exit latency of it with the with the latency
|
||||
limit coming from the power management quality of service, or `PM QoS <cpu-pm-qos_>`_,
|
||||
framework. It selects the state with the target residency closest to the predicted
|
||||
idle duration, but still below it, and exit latency that does not exceed the
|
||||
limit.
|
||||
|
||||
|
@ -733,7 +733,7 @@ can easily happen that your self-built kernel will lack modules for tasks you
|
||||
did not perform before utilizing this make target. That's because those tasks
|
||||
require kernel modules that are normally autoloaded when you perform that task
|
||||
for the first time; if you didn't perform that task at least once before using
|
||||
localmodonfig, the latter will thus assume these modules are superfluous and
|
||||
localmodconfig, the latter will thus assume these modules are superfluous and
|
||||
disable them.
|
||||
|
||||
You can try to avoid this by performing typical tasks that often will autoload
|
||||
|
@ -41,7 +41,7 @@ pre-allocation or re-sizing of any kernel data structures.
|
||||
dentry-negative
|
||||
----------------------------
|
||||
|
||||
Policy for negative dentries. Set to 1 to to always delete the dentry when a
|
||||
Policy for negative dentries. Set to 1 to always delete the dentry when a
|
||||
file is removed, and 0 to disable it. By default, this behavior is disabled.
|
||||
|
||||
dentry-state
|
||||
|
@ -1544,6 +1544,13 @@ constant ``FUTEX_TID_MASK`` (0x3fffffff).
|
||||
If a value outside of this range is written to ``threads-max`` an
|
||||
``EINVAL`` error occurs.
|
||||
|
||||
timer_migration
|
||||
===============
|
||||
|
||||
When set to a non-zero value, attempt to migrate timers away from idle cpus to
|
||||
allow them to remain in low power states longer.
|
||||
|
||||
Default is set (1).
|
||||
|
||||
traceoff_on_warning
|
||||
===================
|
||||
|
@ -49,26 +49,26 @@ How do I use the magic SysRq key?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
On x86
|
||||
You press the key combo :kbd:`ALT-SysRq-<command key>`.
|
||||
You press the key combo `ALT-SysRq-<command key>`.
|
||||
|
||||
.. note::
|
||||
Some
|
||||
keyboards may not have a key labeled 'SysRq'. The 'SysRq' key is
|
||||
also known as the 'Print Screen' key. Also some keyboards cannot
|
||||
handle so many keys being pressed at the same time, so you might
|
||||
have better luck with press :kbd:`Alt`, press :kbd:`SysRq`,
|
||||
release :kbd:`SysRq`, press :kbd:`<command key>`, release everything.
|
||||
have better luck with press `Alt`, press `SysRq`,
|
||||
release `SysRq`, press `<command key>`, release everything.
|
||||
|
||||
On SPARC
|
||||
You press :kbd:`ALT-STOP-<command key>`, I believe.
|
||||
You press `ALT-STOP-<command key>`, I believe.
|
||||
|
||||
On the serial console (PC style standard serial ports only)
|
||||
You send a ``BREAK``, then within 5 seconds a command key. Sending
|
||||
``BREAK`` twice is interpreted as a normal BREAK.
|
||||
|
||||
On PowerPC
|
||||
Press :kbd:`ALT - Print Screen` (or :kbd:`F13`) - :kbd:`<command key>`.
|
||||
:kbd:`Print Screen` (or :kbd:`F13`) - :kbd:`<command key>` may suffice.
|
||||
Press `ALT - Print Screen` (or `F13`) - `<command key>`.
|
||||
`Print Screen` (or `F13`) - `<command key>` may suffice.
|
||||
|
||||
On other
|
||||
If you know of the key combos for other architectures, please
|
||||
@ -88,7 +88,7 @@ On all
|
||||
|
||||
echo _reisub > /proc/sysrq-trigger
|
||||
|
||||
The :kbd:`<command key>` is case sensitive.
|
||||
The `<command key>` is case sensitive.
|
||||
|
||||
What are the 'command' keys?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
@ -225,9 +225,9 @@ Sometimes SysRq seems to get 'stuck' after using it, what can I do?
|
||||
|
||||
When this happens, try tapping shift, alt and control on both sides of the
|
||||
keyboard, and hitting an invalid sysrq sequence again. (i.e., something like
|
||||
:kbd:`alt-sysrq-z`).
|
||||
`alt-sysrq-z`).
|
||||
|
||||
Switching to another virtual console (:kbd:`ALT+Fn`) and then back again
|
||||
Switching to another virtual console (`ALT+Fn`) and then back again
|
||||
should also help.
|
||||
|
||||
I hit SysRq, but nothing seems to happen, what's wrong?
|
||||
@ -290,7 +290,7 @@ exception the header line from the sysrq command is passed to all console
|
||||
consumers as if the current loglevel was maximum. If only the header
|
||||
is emitted it is almost certain that the kernel loglevel is too low.
|
||||
Should you require the output on the console channel then you will need
|
||||
to temporarily up the console loglevel using :kbd:`alt-sysrq-8` or::
|
||||
to temporarily up the console loglevel using `alt-sysrq-8` or::
|
||||
|
||||
echo 8 > /proc/sysrq-trigger
|
||||
|
||||
|
@ -1431,7 +1431,7 @@ can easily happen that your self-built kernels will lack modules for tasks you
|
||||
did not perform at least once before utilizing this make target. That happens
|
||||
when a task requires kernel modules which are only autoloaded when you execute
|
||||
it for the first time. So when you never performed that task since starting your
|
||||
kernel the modules will not have been loaded -- and from localmodonfig's point
|
||||
kernel the modules will not have been loaded -- and from localmodconfig's point
|
||||
of view look superfluous, which thus disables them to reduce the amount of code
|
||||
to be compiled.
|
||||
|
||||
|
@ -83,7 +83,7 @@ scripts/ver_linux is a good way to check if your system already has
|
||||
the necessary tools::
|
||||
|
||||
sudo apt-get build-essentials flex bison yacc
|
||||
sudo apt install libelf-dev systemtap-sdt-dev libaudit-dev libslang2-dev libperl-dev libdw-dev
|
||||
sudo apt install libelf-dev systemtap-sdt-dev libslang2-dev libperl-dev libdw-dev
|
||||
|
||||
cscope is a good tool to browse kernel sources. Let's install it now::
|
||||
|
||||
|
@ -153,3 +153,11 @@ asymmetric system, a broken guest at EL1 could still attempt to execute
|
||||
mode will return to host userspace with an ``exit_reason`` of
|
||||
``KVM_EXIT_FAIL_ENTRY`` and will remain non-runnable until successfully
|
||||
re-initialised by a subsequent ``KVM_ARM_VCPU_INIT`` operation.
|
||||
|
||||
NOHZ FULL
|
||||
---------
|
||||
|
||||
To avoid perturbing an adaptive-ticks CPU (specified using
|
||||
``nohz_full=``) when a 32-bit task is forcefully migrated, these CPUs
|
||||
are treated as 64-bit-only when support for asymmetric 32-bit systems
|
||||
is enabled.
|
||||
|
@ -449,6 +449,18 @@ Before jumping into the kernel, the following conditions must be met:
|
||||
|
||||
- HFGWTR_EL2.nGCS_EL0 (bit 52) must be initialised to 0b1.
|
||||
|
||||
- For CPUs with debug architecture i.e FEAT_Debugv8pN (all versions):
|
||||
|
||||
- If EL3 is present:
|
||||
|
||||
- MDCR_EL3.TDA (bit 9) must be initialized to 0b0
|
||||
|
||||
- For CPUs with FEAT_PMUv3:
|
||||
|
||||
- If EL3 is present:
|
||||
|
||||
- MDCR_EL3.TPM (bit 6) must be initialized to 0b0
|
||||
|
||||
The requirements described above for CPU mode, caches, MMUs, architected
|
||||
timers, coherency and system registers apply to all CPUs. All CPUs must
|
||||
enter the kernel in the same exception level. Where the values documented
|
||||
|
@ -174,26 +174,82 @@ HWCAP_GCS
|
||||
Functionality implied by ID_AA64PFR1_EL1.GCS == 0b1, as
|
||||
described by Documentation/arch/arm64/gcs.rst.
|
||||
|
||||
HWCAP_CMPBR
|
||||
Functionality implied by ID_AA64ISAR2_EL1.CSSC == 0b0010.
|
||||
|
||||
HWCAP_FPRCVT
|
||||
Functionality implied by ID_AA64ISAR3_EL1.FPRCVT == 0b0001.
|
||||
|
||||
HWCAP_F8MM8
|
||||
Functionality implied by ID_AA64FPFR0_EL1.F8MM8 == 0b0001.
|
||||
|
||||
HWCAP_F8MM4
|
||||
Functionality implied by ID_AA64FPFR0_EL1.F8MM4 == 0b0001.
|
||||
|
||||
HWCAP_SVE_F16MM
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.F16MM == 0b0001.
|
||||
|
||||
HWCAP_SVE_ELTPERM
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.ELTPERM == 0b0001.
|
||||
|
||||
HWCAP_SVE_AES2
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.AES == 0b0011.
|
||||
|
||||
HWCAP_SVE_BFSCALE
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.B16B16 == 0b0010.
|
||||
|
||||
HWCAP_SVE2P2
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.SVEver == 0b0011.
|
||||
|
||||
HWCAP_SME2P2
|
||||
Functionality implied by ID_AA64SMFR0_EL1.SMEver == 0b0011.
|
||||
|
||||
HWCAP_SME_SBITPERM
|
||||
Functionality implied by ID_AA64SMFR0_EL1.SBitPerm == 0b1.
|
||||
|
||||
HWCAP_SME_AES
|
||||
Functionality implied by ID_AA64SMFR0_EL1.AES == 0b1.
|
||||
|
||||
HWCAP_SME_SFEXPA
|
||||
Functionality implied by ID_AA64SMFR0_EL1.SFEXPA == 0b1.
|
||||
|
||||
HWCAP_SME_STMOP
|
||||
Functionality implied by ID_AA64SMFR0_EL1.STMOP == 0b1.
|
||||
|
||||
HWCAP_SME_SMOP4
|
||||
Functionality implied by ID_AA64SMFR0_EL1.SMOP4 == 0b1.
|
||||
|
||||
HWCAP2_DCPODP
|
||||
Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0010.
|
||||
|
||||
HWCAP2_SVE2
|
||||
Functionality implied by ID_AA64ZFR0_EL1.SVEver == 0b0001.
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.SVEver == 0b0001.
|
||||
|
||||
HWCAP2_SVEAES
|
||||
Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0001.
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.AES == 0b0001.
|
||||
|
||||
HWCAP2_SVEPMULL
|
||||
Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0010.
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.AES == 0b0010.
|
||||
|
||||
HWCAP2_SVEBITPERM
|
||||
Functionality implied by ID_AA64ZFR0_EL1.BitPerm == 0b0001.
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.BitPerm == 0b0001.
|
||||
|
||||
HWCAP2_SVESHA3
|
||||
Functionality implied by ID_AA64ZFR0_EL1.SHA3 == 0b0001.
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.SHA3 == 0b0001.
|
||||
|
||||
HWCAP2_SVESM4
|
||||
Functionality implied by ID_AA64ZFR0_EL1.SM4 == 0b0001.
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.SM4 == 0b0001.
|
||||
|
||||
HWCAP2_FLAGM2
|
||||
Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0010.
|
||||
@ -202,16 +258,20 @@ HWCAP2_FRINT
|
||||
Functionality implied by ID_AA64ISAR1_EL1.FRINTTS == 0b0001.
|
||||
|
||||
HWCAP2_SVEI8MM
|
||||
Functionality implied by ID_AA64ZFR0_EL1.I8MM == 0b0001.
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.I8MM == 0b0001.
|
||||
|
||||
HWCAP2_SVEF32MM
|
||||
Functionality implied by ID_AA64ZFR0_EL1.F32MM == 0b0001.
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.F32MM == 0b0001.
|
||||
|
||||
HWCAP2_SVEF64MM
|
||||
Functionality implied by ID_AA64ZFR0_EL1.F64MM == 0b0001.
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.F64MM == 0b0001.
|
||||
|
||||
HWCAP2_SVEBF16
|
||||
Functionality implied by ID_AA64ZFR0_EL1.BF16 == 0b0001.
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.BF16 == 0b0001.
|
||||
|
||||
HWCAP2_I8MM
|
||||
Functionality implied by ID_AA64ISAR1_EL1.I8MM == 0b0001.
|
||||
@ -277,7 +337,8 @@ HWCAP2_EBF16
|
||||
Functionality implied by ID_AA64ISAR1_EL1.BF16 == 0b0010.
|
||||
|
||||
HWCAP2_SVE_EBF16
|
||||
Functionality implied by ID_AA64ZFR0_EL1.BF16 == 0b0010.
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.BF16 == 0b0010.
|
||||
|
||||
HWCAP2_CSSC
|
||||
Functionality implied by ID_AA64ISAR2_EL1.CSSC == 0b0001.
|
||||
@ -286,7 +347,8 @@ HWCAP2_RPRFM
|
||||
Functionality implied by ID_AA64ISAR2_EL1.RPRFM == 0b0001.
|
||||
|
||||
HWCAP2_SVE2P1
|
||||
Functionality implied by ID_AA64ZFR0_EL1.SVEver == 0b0010.
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.SVEver == 0b0010.
|
||||
|
||||
HWCAP2_SME2
|
||||
Functionality implied by ID_AA64SMFR0_EL1.SMEver == 0b0001.
|
||||
@ -313,7 +375,8 @@ HWCAP2_HBC
|
||||
Functionality implied by ID_AA64ISAR2_EL1.BC == 0b0001.
|
||||
|
||||
HWCAP2_SVE_B16B16
|
||||
Functionality implied by ID_AA64ZFR0_EL1.B16B16 == 0b0001.
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.B16B16 == 0b0001.
|
||||
|
||||
HWCAP2_LRCPC3
|
||||
Functionality implied by ID_AA64ISAR1_EL1.LRCPC == 0b0011.
|
||||
|
@ -37,7 +37,7 @@ intended to be exhaustive.
|
||||
shadow stacks rather than GCS.
|
||||
|
||||
* Support for GCS is reported to userspace via HWCAP_GCS in the aux vector
|
||||
AT_HWCAP2 entry.
|
||||
AT_HWCAP entry.
|
||||
|
||||
* GCS is enabled per thread. While there is support for disabling GCS
|
||||
at runtime this should be done with great care.
|
||||
|
@ -23,71 +23,6 @@ swapper_pg_dir contains only kernel (global) mappings while the user pgd
|
||||
contains only user (non-global) mappings. The swapper_pg_dir address is
|
||||
written to TTBR1 and never written to TTBR0.
|
||||
|
||||
|
||||
AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit)::
|
||||
|
||||
Start End Size Use
|
||||
-----------------------------------------------------------------------
|
||||
0000000000000000 0000ffffffffffff 256TB user
|
||||
ffff000000000000 ffff7fffffffffff 128TB kernel logical memory map
|
||||
[ffff600000000000 ffff7fffffffffff] 32TB [kasan shadow region]
|
||||
ffff800000000000 ffff80007fffffff 2GB modules
|
||||
ffff800080000000 fffffbffefffffff 124TB vmalloc
|
||||
fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down)
|
||||
fffffbfffe000000 fffffbfffe7fffff 8MB [guard region]
|
||||
fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space
|
||||
fffffbffff800000 fffffbffffffffff 8MB [guard region]
|
||||
fffffc0000000000 fffffdffffffffff 2TB vmemmap
|
||||
fffffe0000000000 ffffffffffffffff 2TB [guard region]
|
||||
|
||||
|
||||
AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support)::
|
||||
|
||||
Start End Size Use
|
||||
-----------------------------------------------------------------------
|
||||
0000000000000000 000fffffffffffff 4PB user
|
||||
fff0000000000000 ffff7fffffffffff ~4PB kernel logical memory map
|
||||
[fffd800000000000 ffff7fffffffffff] 512TB [kasan shadow region]
|
||||
ffff800000000000 ffff80007fffffff 2GB modules
|
||||
ffff800080000000 fffffbffefffffff 124TB vmalloc
|
||||
fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down)
|
||||
fffffbfffe000000 fffffbfffe7fffff 8MB [guard region]
|
||||
fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space
|
||||
fffffbffff800000 fffffbffffffffff 8MB [guard region]
|
||||
fffffc0000000000 ffffffdfffffffff ~4TB vmemmap
|
||||
ffffffe000000000 ffffffffffffffff 128GB [guard region]
|
||||
|
||||
|
||||
Translation table lookup with 4KB pages::
|
||||
|
||||
+--------+--------+--------+--------+--------+--------+--------+--------+
|
||||
|63 56|55 48|47 40|39 32|31 24|23 16|15 8|7 0|
|
||||
+--------+--------+--------+--------+--------+--------+--------+--------+
|
||||
| | | | | |
|
||||
| | | | | v
|
||||
| | | | | [11:0] in-page offset
|
||||
| | | | +-> [20:12] L3 index
|
||||
| | | +-----------> [29:21] L2 index
|
||||
| | +---------------------> [38:30] L1 index
|
||||
| +-------------------------------> [47:39] L0 index
|
||||
+----------------------------------------> [55] TTBR0/1
|
||||
|
||||
|
||||
Translation table lookup with 64KB pages::
|
||||
|
||||
+--------+--------+--------+--------+--------+--------+--------+--------+
|
||||
|63 56|55 48|47 40|39 32|31 24|23 16|15 8|7 0|
|
||||
+--------+--------+--------+--------+--------+--------+--------+--------+
|
||||
| | | | |
|
||||
| | | | v
|
||||
| | | | [15:0] in-page offset
|
||||
| | | +----------> [28:16] L3 index
|
||||
| | +--------------------------> [41:29] L2 index
|
||||
| +-------------------------------> [47:42] L1 index (48-bit)
|
||||
| [51:42] L1 index (52-bit)
|
||||
+----------------------------------------> [55] TTBR0/1
|
||||
|
||||
|
||||
When using KVM without the Virtualization Host Extensions, the
|
||||
hypervisor maps kernel pages in EL2 at a fixed (and potentially
|
||||
random) offset from the linear mapping. See the kern_hyp_va macro and
|
||||
|
@ -198,7 +198,8 @@ stable kernels.
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Neoverse-V3 | #3312417 | ARM64_ERRATUM_3194386 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | MMU-500 | #841119,826419 | N/A |
|
||||
| ARM | MMU-500 | #841119,826419 | ARM_SMMU_MMU_500_CPRE_ERRATA|
|
||||
| | | #562869,1047329 | |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | MMU-600 | #1076982,1209401| N/A |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
|
@ -293,3 +293,13 @@ The following keys are defined:
|
||||
|
||||
* :c:macro:`RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED`: Misaligned vector accesses are
|
||||
not supported at all and will generate a misaligned address fault.
|
||||
|
||||
* :c:macro:`RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0`: A bitmask containing the
|
||||
thead vendor extensions that are compatible with the
|
||||
:c:macro:`RISCV_HWPROBE_BASE_BEHAVIOR_IMA`: base system behavior.
|
||||
|
||||
* T-HEAD
|
||||
|
||||
* :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XTHEADVECTOR`: The xtheadvector vendor
|
||||
extension is supported in the T-Head ISA extensions spec starting from
|
||||
commit a18c801634 ("Add T-Head VECTOR vendor extension. ").
|
||||
|
@ -130,8 +130,126 @@ SNP feature support.
|
||||
|
||||
More details in AMD64 APM[1] Vol 2: 15.34.10 SEV_STATUS MSR
|
||||
|
||||
Reverse Map Table (RMP)
|
||||
=======================
|
||||
|
||||
The RMP is a structure in system memory that is used to ensure a one-to-one
|
||||
mapping between system physical addresses and guest physical addresses. Each
|
||||
page of memory that is potentially assignable to guests has one entry within
|
||||
the RMP.
|
||||
|
||||
The RMP table can be either contiguous in memory or a collection of segments
|
||||
in memory.
|
||||
|
||||
Contiguous RMP
|
||||
--------------
|
||||
|
||||
Support for this form of the RMP is present when support for SEV-SNP is
|
||||
present, which can be determined using the CPUID instruction::
|
||||
|
||||
0x8000001f[eax]:
|
||||
Bit[4] indicates support for SEV-SNP
|
||||
|
||||
The location of the RMP is identified to the hardware through two MSRs::
|
||||
|
||||
0xc0010132 (RMP_BASE):
|
||||
System physical address of the first byte of the RMP
|
||||
|
||||
0xc0010133 (RMP_END):
|
||||
System physical address of the last byte of the RMP
|
||||
|
||||
Hardware requires that RMP_BASE and (RPM_END + 1) be 8KB aligned, but SEV
|
||||
firmware increases the alignment requirement to require a 1MB alignment.
|
||||
|
||||
The RMP consists of a 16KB region used for processor bookkeeping followed
|
||||
by the RMP entries, which are 16 bytes in size. The size of the RMP
|
||||
determines the range of physical memory that the hypervisor can assign to
|
||||
SEV-SNP guests. The RMP covers the system physical address from::
|
||||
|
||||
0 to ((RMP_END + 1 - RMP_BASE - 16KB) / 16B) x 4KB.
|
||||
|
||||
The current Linux support relies on BIOS to allocate/reserve the memory for
|
||||
the RMP and to set RMP_BASE and RMP_END appropriately. Linux uses the MSR
|
||||
values to locate the RMP and determine the size of the RMP. The RMP must
|
||||
cover all of system memory in order for Linux to enable SEV-SNP.
|
||||
|
||||
Segmented RMP
|
||||
-------------
|
||||
|
||||
Segmented RMP support is a new way of representing the layout of an RMP.
|
||||
Initial RMP support required the RMP table to be contiguous in memory.
|
||||
RMP accesses from a NUMA node on which the RMP doesn't reside
|
||||
can take longer than accesses from a NUMA node on which the RMP resides.
|
||||
Segmented RMP support allows the RMP entries to be located on the same
|
||||
node as the memory the RMP is covering, potentially reducing latency
|
||||
associated with accessing an RMP entry associated with the memory. Each
|
||||
RMP segment covers a specific range of system physical addresses.
|
||||
|
||||
Support for this form of the RMP can be determined using the CPUID
|
||||
instruction::
|
||||
|
||||
0x8000001f[eax]:
|
||||
Bit[23] indicates support for segmented RMP
|
||||
|
||||
If supported, segmented RMP attributes can be found using the CPUID
|
||||
instruction::
|
||||
|
||||
0x80000025[eax]:
|
||||
Bits[5:0] minimum supported RMP segment size
|
||||
Bits[11:6] maximum supported RMP segment size
|
||||
|
||||
0x80000025[ebx]:
|
||||
Bits[9:0] number of cacheable RMP segment definitions
|
||||
Bit[10] indicates if the number of cacheable RMP segments
|
||||
is a hard limit
|
||||
|
||||
To enable a segmented RMP, a new MSR is available::
|
||||
|
||||
0xc0010136 (RMP_CFG):
|
||||
Bit[0] indicates if segmented RMP is enabled
|
||||
Bits[13:8] contains the size of memory covered by an RMP
|
||||
segment (expressed as a power of 2)
|
||||
|
||||
The RMP segment size defined in the RMP_CFG MSR applies to all segments
|
||||
of the RMP. Therefore each RMP segment covers a specific range of system
|
||||
physical addresses. For example, if the RMP_CFG MSR value is 0x2401, then
|
||||
the RMP segment coverage value is 0x24 => 36, meaning the size of memory
|
||||
covered by an RMP segment is 64GB (1 << 36). So the first RMP segment
|
||||
covers physical addresses from 0 to 0xF_FFFF_FFFF, the second RMP segment
|
||||
covers physical addresses from 0x10_0000_0000 to 0x1F_FFFF_FFFF, etc.
|
||||
|
||||
When a segmented RMP is enabled, RMP_BASE points to the RMP bookkeeping
|
||||
area as it does today (16K in size). However, instead of RMP entries
|
||||
beginning immediately after the bookkeeping area, there is a 4K RMP
|
||||
segment table (RST). Each entry in the RST is 8-bytes in size and represents
|
||||
an RMP segment::
|
||||
|
||||
Bits[19:0] mapped size (in GB)
|
||||
The mapped size can be less than the defined segment size.
|
||||
A value of zero, indicates that no RMP exists for the range
|
||||
of system physical addresses associated with this segment.
|
||||
Bits[51:20] segment physical address
|
||||
This address is left shift 20-bits (or just masked when
|
||||
read) to form the physical address of the segment (1MB
|
||||
alignment).
|
||||
|
||||
The RST can hold 512 segment entries but can be limited in size to the number
|
||||
of cacheable RMP segments (CPUID 0x80000025_EBX[9:0]) if the number of cacheable
|
||||
RMP segments is a hard limit (CPUID 0x80000025_EBX[10]).
|
||||
|
||||
The current Linux support relies on BIOS to allocate/reserve the memory for
|
||||
the segmented RMP (the bookkeeping area, RST, and all segments), build the RST
|
||||
and to set RMP_BASE, RMP_END, and RMP_CFG appropriately. Linux uses the MSR
|
||||
values to locate the RMP and determine the size and location of the RMP
|
||||
segments. The RMP must cover all of system memory in order for Linux to enable
|
||||
SEV-SNP.
|
||||
|
||||
More details in the AMD64 APM Vol 2, section "15.36.3 Reverse Map Table",
|
||||
docID: 24593.
|
||||
|
||||
Secure VM Service Module (SVSM)
|
||||
===============================
|
||||
|
||||
SNP provides a feature called Virtual Machine Privilege Levels (VMPL) which
|
||||
defines four privilege levels at which guest software can run. The most
|
||||
privileged level is 0 and numerically higher numbers have lesser privileges.
|
||||
|
@ -77,7 +77,7 @@ Protocol 2.14 BURNT BY INCORRECT COMMIT
|
||||
Protocol 2.15 (Kernel 5.5) Added the kernel_info and kernel_info.setup_type_max.
|
||||
============= ============================================================
|
||||
|
||||
.. note::
|
||||
.. note::
|
||||
The protocol version number should be changed only if the setup header
|
||||
is changed. There is no need to update the version number if boot_params
|
||||
or kernel_info are changed. Additionally, it is recommended to use
|
||||
@ -95,27 +95,27 @@ Memory Layout
|
||||
The traditional memory map for the kernel loader, used for Image or
|
||||
zImage kernels, typically looks like::
|
||||
|
||||
| |
|
||||
0A0000 +------------------------+
|
||||
| Reserved for BIOS | Do not use. Reserved for BIOS EBDA.
|
||||
09A000 +------------------------+
|
||||
| Command line |
|
||||
| Stack/heap | For use by the kernel real-mode code.
|
||||
098000 +------------------------+
|
||||
| Kernel setup | The kernel real-mode code.
|
||||
090200 +------------------------+
|
||||
| Kernel boot sector | The kernel legacy boot sector.
|
||||
090000 +------------------------+
|
||||
| Protected-mode kernel | The bulk of the kernel image.
|
||||
010000 +------------------------+
|
||||
| Boot loader | <- Boot sector entry point 0000:7C00
|
||||
001000 +------------------------+
|
||||
| Reserved for MBR/BIOS |
|
||||
000800 +------------------------+
|
||||
| Typically used by MBR |
|
||||
000600 +------------------------+
|
||||
| BIOS use only |
|
||||
000000 +------------------------+
|
||||
| |
|
||||
0A0000 +------------------------+
|
||||
| Reserved for BIOS | Do not use. Reserved for BIOS EBDA.
|
||||
09A000 +------------------------+
|
||||
| Command line |
|
||||
| Stack/heap | For use by the kernel real-mode code.
|
||||
098000 +------------------------+
|
||||
| Kernel setup | The kernel real-mode code.
|
||||
090200 +------------------------+
|
||||
| Kernel boot sector | The kernel legacy boot sector.
|
||||
090000 +------------------------+
|
||||
| Protected-mode kernel | The bulk of the kernel image.
|
||||
010000 +------------------------+
|
||||
| Boot loader | <- Boot sector entry point 0000:7C00
|
||||
001000 +------------------------+
|
||||
| Reserved for MBR/BIOS |
|
||||
000800 +------------------------+
|
||||
| Typically used by MBR |
|
||||
000600 +------------------------+
|
||||
| BIOS use only |
|
||||
000000 +------------------------+
|
||||
|
||||
When using bzImage, the protected-mode kernel was relocated to
|
||||
0x100000 ("high memory"), and the kernel real-mode block (boot sector,
|
||||
@ -142,28 +142,28 @@ above the 0x9A000 point; too many BIOSes will break above that point.
|
||||
For a modern bzImage kernel with boot protocol version >= 2.02, a
|
||||
memory layout like the following is suggested::
|
||||
|
||||
~ ~
|
||||
| Protected-mode kernel |
|
||||
100000 +------------------------+
|
||||
| I/O memory hole |
|
||||
0A0000 +------------------------+
|
||||
| Reserved for BIOS | Leave as much as possible unused
|
||||
~ ~
|
||||
| Command line | (Can also be below the X+10000 mark)
|
||||
X+10000 +------------------------+
|
||||
| Stack/heap | For use by the kernel real-mode code.
|
||||
X+08000 +------------------------+
|
||||
| Kernel setup | The kernel real-mode code.
|
||||
| Kernel boot sector | The kernel legacy boot sector.
|
||||
X +------------------------+
|
||||
| Boot loader | <- Boot sector entry point 0000:7C00
|
||||
001000 +------------------------+
|
||||
| Reserved for MBR/BIOS |
|
||||
000800 +------------------------+
|
||||
| Typically used by MBR |
|
||||
000600 +------------------------+
|
||||
| BIOS use only |
|
||||
000000 +------------------------+
|
||||
~ ~
|
||||
| Protected-mode kernel |
|
||||
100000 +------------------------+
|
||||
| I/O memory hole |
|
||||
0A0000 +------------------------+
|
||||
| Reserved for BIOS | Leave as much as possible unused
|
||||
~ ~
|
||||
| Command line | (Can also be below the X+10000 mark)
|
||||
X+10000 +------------------------+
|
||||
| Stack/heap | For use by the kernel real-mode code.
|
||||
X+08000 +------------------------+
|
||||
| Kernel setup | The kernel real-mode code.
|
||||
| Kernel boot sector | The kernel legacy boot sector.
|
||||
X +------------------------+
|
||||
| Boot loader | <- Boot sector entry point 0000:7C00
|
||||
001000 +------------------------+
|
||||
| Reserved for MBR/BIOS |
|
||||
000800 +------------------------+
|
||||
| Typically used by MBR |
|
||||
000600 +------------------------+
|
||||
| BIOS use only |
|
||||
000000 +------------------------+
|
||||
|
||||
... where the address X is as low as the design of the boot loader permits.
|
||||
|
||||
@ -229,22 +229,22 @@ Offset/Size Proto Name Meaning
|
||||
=========== ======== ===================== ============================================
|
||||
|
||||
.. note::
|
||||
(1) For backwards compatibility, if the setup_sects field contains 0, the
|
||||
real value is 4.
|
||||
(1) For backwards compatibility, if the setup_sects field contains 0,
|
||||
the real value is 4.
|
||||
|
||||
(2) For boot protocol prior to 2.04, the upper two bytes of the syssize
|
||||
field are unusable, which means the size of a bzImage kernel
|
||||
cannot be determined.
|
||||
(2) For boot protocol prior to 2.04, the upper two bytes of the syssize
|
||||
field are unusable, which means the size of a bzImage kernel
|
||||
cannot be determined.
|
||||
|
||||
(3) Ignored, but safe to set, for boot protocols 2.02-2.09.
|
||||
(3) Ignored, but safe to set, for boot protocols 2.02-2.09.
|
||||
|
||||
If the "HdrS" (0x53726448) magic number is not found at offset 0x202,
|
||||
the boot protocol version is "old". Loading an old kernel, the
|
||||
following parameters should be assumed::
|
||||
|
||||
Image type = zImage
|
||||
initrd not supported
|
||||
Real-mode kernel must be located at 0x90000.
|
||||
Image type = zImage
|
||||
initrd not supported
|
||||
Real-mode kernel must be located at 0x90000.
|
||||
|
||||
Otherwise, the "version" field contains the protocol version,
|
||||
e.g. protocol version 2.01 will contain 0x0201 in this field. When
|
||||
@ -265,7 +265,7 @@ All general purpose boot loaders should write the fields marked
|
||||
nonstandard address should fill in the fields marked (reloc); other
|
||||
boot loaders can ignore those fields.
|
||||
|
||||
The byte order of all fields is littleendian (this is x86, after all.)
|
||||
The byte order of all fields is little endian (this is x86, after all.)
|
||||
|
||||
============ ===========
|
||||
Field name: setup_sects
|
||||
@ -365,7 +365,7 @@ Offset/size: 0x206/2
|
||||
Protocol: 2.00+
|
||||
============ =======
|
||||
|
||||
Contains the boot protocol version, in (major << 8)+minor format,
|
||||
Contains the boot protocol version, in (major << 8) + minor format,
|
||||
e.g. 0x0204 for version 2.04, and 0x0a11 for a hypothetical version
|
||||
10.17.
|
||||
|
||||
@ -397,17 +397,17 @@ Protocol: 2.00+
|
||||
If set to a nonzero value, contains a pointer to a NUL-terminated
|
||||
human-readable kernel version number string, less 0x200. This can
|
||||
be used to display the kernel version to the user. This value
|
||||
should be less than (0x200*setup_sects).
|
||||
should be less than (0x200 * setup_sects).
|
||||
|
||||
For example, if this value is set to 0x1c00, the kernel version
|
||||
number string can be found at offset 0x1e00 in the kernel file.
|
||||
This is a valid value if and only if the "setup_sects" field
|
||||
contains the value 15 or higher, as::
|
||||
|
||||
0x1c00 < 15*0x200 (= 0x1e00) but
|
||||
0x1c00 >= 14*0x200 (= 0x1c00)
|
||||
0x1c00 < 15 * 0x200 (= 0x1e00) but
|
||||
0x1c00 >= 14 * 0x200 (= 0x1c00)
|
||||
|
||||
0x1c00 >> 9 = 14, So the minimum value for setup_secs is 15.
|
||||
0x1c00 >> 9 = 14, So the minimum value for setup_secs is 15.
|
||||
|
||||
============ ==================
|
||||
Field name: type_of_loader
|
||||
@ -427,9 +427,9 @@ Protocol: 2.00+
|
||||
|
||||
For example, for T = 0x15, V = 0x234, write::
|
||||
|
||||
type_of_loader <- 0xE4
|
||||
ext_loader_type <- 0x05
|
||||
ext_loader_ver <- 0x23
|
||||
type_of_loader <- 0xE4
|
||||
ext_loader_type <- 0x05
|
||||
ext_loader_ver <- 0x23
|
||||
|
||||
Assigned boot loader ids (hexadecimal):
|
||||
|
||||
@ -686,7 +686,7 @@ Protocol: 2.10+
|
||||
If a boot loader makes use of this field, it should update the
|
||||
kernel_alignment field with the alignment unit desired; typically::
|
||||
|
||||
kernel_alignment = 1 << min_alignment
|
||||
kernel_alignment = 1 << min_alignment;
|
||||
|
||||
There may be a considerable performance cost with an excessively
|
||||
misaligned kernel. Therefore, a loader should typically try each
|
||||
@ -754,7 +754,7 @@ Protocol: 2.07+
|
||||
0x00000000 The default x86/PC environment
|
||||
0x00000001 lguest
|
||||
0x00000002 Xen
|
||||
0x00000003 Moorestown MID
|
||||
0x00000003 Intel MID (Moorestown, CloverTrail, Merrifield, Moorefield)
|
||||
0x00000004 CE4100 TV Platform
|
||||
========== ==============================
|
||||
|
||||
@ -808,13 +808,13 @@ Protocol: 2.09+
|
||||
parameters passing mechanism. The definition of struct setup_data is
|
||||
as follow::
|
||||
|
||||
struct setup_data {
|
||||
u64 next;
|
||||
u32 type;
|
||||
u32 len;
|
||||
u8 data[0];
|
||||
};
|
||||
|
||||
struct setup_data {
|
||||
__u64 next;
|
||||
__u32 type;
|
||||
__u32 len;
|
||||
__u8 data[];
|
||||
}
|
||||
|
||||
Where, the next is a 64-bit physical pointer to the next node of
|
||||
linked list, the next field of the last node is 0; the type is used
|
||||
to identify the contents of data; the len is the length of data
|
||||
@ -834,12 +834,12 @@ Protocol: 2.09+
|
||||
Thus setup_indirect struct and SETUP_INDIRECT type were introduced in
|
||||
protocol 2.15::
|
||||
|
||||
struct setup_indirect {
|
||||
__u32 type;
|
||||
__u32 reserved; /* Reserved, must be set to zero. */
|
||||
__u64 len;
|
||||
__u64 addr;
|
||||
};
|
||||
struct setup_indirect {
|
||||
__u32 type;
|
||||
__u32 reserved; /* Reserved, must be set to zero. */
|
||||
__u64 len;
|
||||
__u64 addr;
|
||||
};
|
||||
|
||||
The type member is a SETUP_INDIRECT | SETUP_* type. However, it cannot be
|
||||
SETUP_INDIRECT itself since making the setup_indirect a tree structure
|
||||
@ -849,17 +849,17 @@ Protocol: 2.09+
|
||||
Let's give an example how to point to SETUP_E820_EXT data using setup_indirect.
|
||||
In this case setup_data and setup_indirect will look like this::
|
||||
|
||||
struct setup_data {
|
||||
__u64 next = 0 or <addr_of_next_setup_data_struct>;
|
||||
__u32 type = SETUP_INDIRECT;
|
||||
__u32 len = sizeof(setup_indirect);
|
||||
__u8 data[sizeof(setup_indirect)] = struct setup_indirect {
|
||||
__u32 type = SETUP_INDIRECT | SETUP_E820_EXT;
|
||||
__u32 reserved = 0;
|
||||
__u64 len = <len_of_SETUP_E820_EXT_data>;
|
||||
__u64 addr = <addr_of_SETUP_E820_EXT_data>;
|
||||
}
|
||||
}
|
||||
struct setup_data {
|
||||
.next = 0, /* or <addr_of_next_setup_data_struct> */
|
||||
.type = SETUP_INDIRECT,
|
||||
.len = sizeof(setup_indirect),
|
||||
.data[sizeof(setup_indirect)] = (struct setup_indirect) {
|
||||
.type = SETUP_INDIRECT | SETUP_E820_EXT,
|
||||
.reserved = 0,
|
||||
.len = <len_of_SETUP_E820_EXT_data>,
|
||||
.addr = <addr_of_SETUP_E820_EXT_data>,
|
||||
},
|
||||
}
|
||||
|
||||
.. note::
|
||||
SETUP_INDIRECT | SETUP_NONE objects cannot be properly distinguished
|
||||
@ -896,19 +896,19 @@ Offset/size: 0x260/4
|
||||
|
||||
The kernel runtime start address is determined by the following algorithm::
|
||||
|
||||
if (relocatable_kernel) {
|
||||
if (load_address < pref_address)
|
||||
load_address = pref_address;
|
||||
runtime_start = align_up(load_address, kernel_alignment);
|
||||
} else {
|
||||
runtime_start = pref_address;
|
||||
}
|
||||
if (relocatable_kernel) {
|
||||
if (load_address < pref_address)
|
||||
load_address = pref_address;
|
||||
runtime_start = align_up(load_address, kernel_alignment);
|
||||
} else {
|
||||
runtime_start = pref_address;
|
||||
}
|
||||
|
||||
Hence the necessary memory window location and size can be estimated by
|
||||
a boot loader as::
|
||||
|
||||
memory_window_start = runtime_start;
|
||||
memory_window_size = init_size;
|
||||
memory_window_start = runtime_start;
|
||||
memory_window_size = init_size;
|
||||
|
||||
============ ===============
|
||||
Field name: handover_offset
|
||||
@ -938,12 +938,12 @@ The kernel_info
|
||||
===============
|
||||
|
||||
The relationships between the headers are analogous to the various data
|
||||
sections:
|
||||
sections::
|
||||
|
||||
setup_header = .data
|
||||
boot_params/setup_data = .bss
|
||||
|
||||
What is missing from the above list? That's right:
|
||||
What is missing from the above list? That's right::
|
||||
|
||||
kernel_info = .rodata
|
||||
|
||||
@ -975,22 +975,22 @@ after kernel_info_var_len_data label. Each chunk of variable size data has to
|
||||
be prefixed with header/magic and its size, e.g.::
|
||||
|
||||
kernel_info:
|
||||
.ascii "LToP" /* Header, Linux top (structure). */
|
||||
.long kernel_info_var_len_data - kernel_info
|
||||
.long kernel_info_end - kernel_info
|
||||
.long 0x01234567 /* Some fixed size data for the bootloaders. */
|
||||
.ascii "LToP" /* Header, Linux top (structure). */
|
||||
.long kernel_info_var_len_data - kernel_info
|
||||
.long kernel_info_end - kernel_info
|
||||
.long 0x01234567 /* Some fixed size data for the bootloaders. */
|
||||
kernel_info_var_len_data:
|
||||
example_struct: /* Some variable size data for the bootloaders. */
|
||||
.ascii "0123" /* Header/Magic. */
|
||||
.long example_struct_end - example_struct
|
||||
.ascii "Struct"
|
||||
.long 0x89012345
|
||||
example_struct: /* Some variable size data for the bootloaders. */
|
||||
.ascii "0123" /* Header/Magic. */
|
||||
.long example_struct_end - example_struct
|
||||
.ascii "Struct"
|
||||
.long 0x89012345
|
||||
example_struct_end:
|
||||
example_strings: /* Some variable size data for the bootloaders. */
|
||||
.ascii "ABCD" /* Header/Magic. */
|
||||
.long example_strings_end - example_strings
|
||||
.asciz "String_0"
|
||||
.asciz "String_1"
|
||||
example_strings: /* Some variable size data for the bootloaders. */
|
||||
.ascii "ABCD" /* Header/Magic. */
|
||||
.long example_strings_end - example_strings
|
||||
.asciz "String_0"
|
||||
.asciz "String_1"
|
||||
example_strings_end:
|
||||
kernel_info_end:
|
||||
|
||||
@ -1139,67 +1139,63 @@ mode segment.
|
||||
|
||||
Such a boot loader should enter the following fields in the header::
|
||||
|
||||
unsigned long base_ptr; /* base address for real-mode segment */
|
||||
unsigned long base_ptr; /* base address for real-mode segment */
|
||||
|
||||
if ( setup_sects == 0 ) {
|
||||
setup_sects = 4;
|
||||
}
|
||||
if (setup_sects == 0)
|
||||
setup_sects = 4;
|
||||
|
||||
if ( protocol >= 0x0200 ) {
|
||||
type_of_loader = <type code>;
|
||||
if ( loading_initrd ) {
|
||||
ramdisk_image = <initrd_address>;
|
||||
ramdisk_size = <initrd_size>;
|
||||
}
|
||||
if (protocol >= 0x0200) {
|
||||
type_of_loader = <type code>;
|
||||
if (loading_initrd) {
|
||||
ramdisk_image = <initrd_address>;
|
||||
ramdisk_size = <initrd_size>;
|
||||
}
|
||||
|
||||
if ( protocol >= 0x0202 && loadflags & 0x01 )
|
||||
heap_end = 0xe000;
|
||||
else
|
||||
heap_end = 0x9800;
|
||||
if (protocol >= 0x0202 && loadflags & 0x01)
|
||||
heap_end = 0xe000;
|
||||
else
|
||||
heap_end = 0x9800;
|
||||
|
||||
if ( protocol >= 0x0201 ) {
|
||||
heap_end_ptr = heap_end - 0x200;
|
||||
loadflags |= 0x80; /* CAN_USE_HEAP */
|
||||
}
|
||||
if (protocol >= 0x0201) {
|
||||
heap_end_ptr = heap_end - 0x200;
|
||||
loadflags |= 0x80; /* CAN_USE_HEAP */
|
||||
}
|
||||
|
||||
if ( protocol >= 0x0202 ) {
|
||||
cmd_line_ptr = base_ptr + heap_end;
|
||||
strcpy(cmd_line_ptr, cmdline);
|
||||
} else {
|
||||
cmd_line_magic = 0xA33F;
|
||||
cmd_line_offset = heap_end;
|
||||
setup_move_size = heap_end + strlen(cmdline)+1;
|
||||
strcpy(base_ptr+cmd_line_offset, cmdline);
|
||||
}
|
||||
} else {
|
||||
/* Very old kernel */
|
||||
if (protocol >= 0x0202) {
|
||||
cmd_line_ptr = base_ptr + heap_end;
|
||||
strcpy(cmd_line_ptr, cmdline);
|
||||
} else {
|
||||
cmd_line_magic = 0xA33F;
|
||||
cmd_line_offset = heap_end;
|
||||
setup_move_size = heap_end + strlen(cmdline) + 1;
|
||||
strcpy(base_ptr + cmd_line_offset, cmdline);
|
||||
}
|
||||
} else {
|
||||
/* Very old kernel */
|
||||
|
||||
heap_end = 0x9800;
|
||||
heap_end = 0x9800;
|
||||
|
||||
cmd_line_magic = 0xA33F;
|
||||
cmd_line_offset = heap_end;
|
||||
cmd_line_magic = 0xA33F;
|
||||
cmd_line_offset = heap_end;
|
||||
|
||||
/* A very old kernel MUST have its real-mode code
|
||||
loaded at 0x90000 */
|
||||
/* A very old kernel MUST have its real-mode code loaded at 0x90000 */
|
||||
if (base_ptr != 0x90000) {
|
||||
/* Copy the real-mode kernel */
|
||||
memcpy(0x90000, base_ptr, (setup_sects + 1) * 512);
|
||||
base_ptr = 0x90000; /* Relocated */
|
||||
}
|
||||
|
||||
if ( base_ptr != 0x90000 ) {
|
||||
/* Copy the real-mode kernel */
|
||||
memcpy(0x90000, base_ptr, (setup_sects+1)*512);
|
||||
base_ptr = 0x90000; /* Relocated */
|
||||
}
|
||||
strcpy(0x90000 + cmd_line_offset, cmdline);
|
||||
|
||||
strcpy(0x90000+cmd_line_offset, cmdline);
|
||||
|
||||
/* It is recommended to clear memory up to the 32K mark */
|
||||
memset(0x90000 + (setup_sects+1)*512, 0,
|
||||
(64-(setup_sects+1))*512);
|
||||
}
|
||||
/* It is recommended to clear memory up to the 32K mark */
|
||||
memset(0x90000 + (setup_sects + 1) * 512, 0, (64 - (setup_sects + 1)) * 512);
|
||||
}
|
||||
|
||||
|
||||
Loading The Rest of The Kernel
|
||||
==============================
|
||||
|
||||
The 32-bit (non-real-mode) kernel starts at offset (setup_sects+1)*512
|
||||
The 32-bit (non-real-mode) kernel starts at offset (setup_sects + 1) * 512
|
||||
in the kernel file (again, if setup_sects == 0 the real value is 4.)
|
||||
It should be loaded at address 0x10000 for Image/zImage kernels and
|
||||
0x100000 for bzImage kernels.
|
||||
@ -1207,13 +1203,14 @@ It should be loaded at address 0x10000 for Image/zImage kernels and
|
||||
The kernel is a bzImage kernel if the protocol >= 2.00 and the 0x01
|
||||
bit (LOAD_HIGH) in the loadflags field is set::
|
||||
|
||||
is_bzImage = (protocol >= 0x0200) && (loadflags & 0x01);
|
||||
load_address = is_bzImage ? 0x100000 : 0x10000;
|
||||
is_bzImage = (protocol >= 0x0200) && (loadflags & 0x01);
|
||||
load_address = is_bzImage ? 0x100000 : 0x10000;
|
||||
|
||||
Note that Image/zImage kernels can be up to 512K in size, and thus use
|
||||
the entire 0x10000-0x90000 range of memory. This means it is pretty
|
||||
much a requirement for these kernels to load the real-mode part at
|
||||
0x90000. bzImage kernels allow much more flexibility.
|
||||
.. note::
|
||||
Image/zImage kernels can be up to 512K in size, and thus use the entire
|
||||
0x10000-0x90000 range of memory. This means it is pretty much a
|
||||
requirement for these kernels to load the real-mode part at 0x90000.
|
||||
bzImage kernels allow much more flexibility.
|
||||
|
||||
Special Command Line Options
|
||||
============================
|
||||
@ -1282,19 +1279,20 @@ es = ss.
|
||||
|
||||
In our example from above, we would do::
|
||||
|
||||
/* Note: in the case of the "old" kernel protocol, base_ptr must
|
||||
be == 0x90000 at this point; see the previous sample code */
|
||||
/*
|
||||
* Note: in the case of the "old" kernel protocol, base_ptr must
|
||||
* be == 0x90000 at this point; see the previous sample code.
|
||||
*/
|
||||
seg = base_ptr >> 4;
|
||||
|
||||
seg = base_ptr >> 4;
|
||||
cli(); /* Enter with interrupts disabled! */
|
||||
|
||||
cli(); /* Enter with interrupts disabled! */
|
||||
/* Set up the real-mode kernel stack */
|
||||
_SS = seg;
|
||||
_SP = heap_end;
|
||||
|
||||
/* Set up the real-mode kernel stack */
|
||||
_SS = seg;
|
||||
_SP = heap_end;
|
||||
|
||||
_DS = _ES = _FS = _GS = seg;
|
||||
jmp_far(seg+0x20, 0); /* Run the kernel */
|
||||
_DS = _ES = _FS = _GS = seg;
|
||||
jmp_far(seg + 0x20, 0); /* Run the kernel */
|
||||
|
||||
If your boot sector accesses a floppy drive, it is recommended to
|
||||
switch off the floppy motor before running the kernel, since the
|
||||
@ -1349,7 +1347,7 @@ from offset 0x01f1 of kernel image on should be loaded into struct
|
||||
boot_params and examined. The end of setup header can be calculated as
|
||||
follow::
|
||||
|
||||
0x0202 + byte value at offset 0x0201
|
||||
0x0202 + byte value at offset 0x0201
|
||||
|
||||
In addition to read/modify/write the setup header of the struct
|
||||
boot_params as that of 16-bit boot protocol, the boot loader should
|
||||
@ -1385,7 +1383,7 @@ Then, the setup header at offset 0x01f1 of kernel image on should be
|
||||
loaded into struct boot_params and examined. The end of setup header
|
||||
can be calculated as follows::
|
||||
|
||||
0x0202 + byte value at offset 0x0201
|
||||
0x0202 + byte value at offset 0x0201
|
||||
|
||||
In addition to read/modify/write the setup header of the struct
|
||||
boot_params as that of 16-bit boot protocol, the boot loader should
|
||||
@ -1427,7 +1425,7 @@ execution context provided by the EFI firmware.
|
||||
|
||||
The function prototype for the handover entry point looks like this::
|
||||
|
||||
efi_stub_entry(void *handle, efi_system_table_t *table, struct boot_params *bp)
|
||||
void efi_stub_entry(void *handle, efi_system_table_t *table, struct boot_params *bp);
|
||||
|
||||
'handle' is the EFI image handle passed to the boot loader by the EFI
|
||||
firmware, 'table' is the EFI system table - these are the first two
|
||||
@ -1442,12 +1440,13 @@ The boot loader *must* fill out the following fields in bp::
|
||||
|
||||
All other fields should be zero.
|
||||
|
||||
NOTE: The EFI Handover Protocol is deprecated in favour of the ordinary PE/COFF
|
||||
entry point, combined with the LINUX_EFI_INITRD_MEDIA_GUID based initrd
|
||||
loading protocol (refer to [0] for an example of the bootloader side of
|
||||
this), which removes the need for any knowledge on the part of the EFI
|
||||
bootloader regarding the internal representation of boot_params or any
|
||||
requirements/limitations regarding the placement of the command line
|
||||
and ramdisk in memory, or the placement of the kernel image itself.
|
||||
.. note::
|
||||
The EFI Handover Protocol is deprecated in favour of the ordinary PE/COFF
|
||||
entry point, combined with the LINUX_EFI_INITRD_MEDIA_GUID based initrd
|
||||
loading protocol (refer to [0] for an example of the bootloader side of
|
||||
this), which removes the need for any knowledge on the part of the EFI
|
||||
bootloader regarding the internal representation of boot_params or any
|
||||
requirements/limitations regarding the placement of the command line
|
||||
and ramdisk in memory, or the placement of the kernel image itself.
|
||||
|
||||
[0] https://github.com/u-boot/u-boot/commit/ec80b4735a593961fe701cc3a5d717d4739b0fd0
|
||||
|
@ -384,6 +384,16 @@ When monitoring is enabled all MON groups will also contain:
|
||||
Available only with debug option. The identifier used by hardware
|
||||
for the monitor group. On x86 this is the RMID.
|
||||
|
||||
When the "mba_MBps" mount option is used all CTRL_MON groups will also contain:
|
||||
|
||||
"mba_MBps_event":
|
||||
Reading this file shows which memory bandwidth event is used
|
||||
as input to the software feedback loop that keeps memory bandwidth
|
||||
below the value specified in the schemata file. Writing the
|
||||
name of one of the supported memory bandwidth events found in
|
||||
/sys/fs/resctrl/info/L3_MON/mon_features changes the input
|
||||
event.
|
||||
|
||||
Resource allocation rules
|
||||
-------------------------
|
||||
|
||||
|
@ -25,7 +25,7 @@ to cache translations for virtual addresses. The IOMMU driver uses the
|
||||
mmu_notifier() support to keep the device TLB cache and the CPU cache in
|
||||
sync. When an ATS lookup fails for a virtual address, the device should
|
||||
use the PRI in order to request the virtual address to be paged into the
|
||||
CPU page tables. The device must use ATS again in order the fetch the
|
||||
CPU page tables. The device must use ATS again in order to fetch the
|
||||
translation before use.
|
||||
|
||||
Shared Hardware Workqueues
|
||||
@ -216,7 +216,7 @@ submitting work and processing completions.
|
||||
|
||||
Single Root I/O Virtualization (SR-IOV) focuses on providing independent
|
||||
hardware interfaces for virtualizing hardware. Hence, it's required to be
|
||||
almost fully functional interface to software supporting the traditional
|
||||
an almost fully functional interface to software supporting the traditional
|
||||
BARs, space for interrupts via MSI-X, its own register layout.
|
||||
Virtual Functions (VFs) are assisted by the Physical Function (PF)
|
||||
driver.
|
||||
|
@ -135,6 +135,10 @@ Thread-related topology information in the kernel:
|
||||
The ID of the core to which a thread belongs. It is also printed in /proc/cpuinfo
|
||||
"core_id."
|
||||
|
||||
- topology_logical_core_id();
|
||||
|
||||
The logical core ID to which a thread belongs.
|
||||
|
||||
|
||||
|
||||
System topology examples
|
||||
|
@ -1,312 +0,0 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===========================
|
||||
AMD64 Specific Boot Options
|
||||
===========================
|
||||
|
||||
There are many others (usually documented in driver documentation), but
|
||||
only the AMD64 specific ones are listed here.
|
||||
|
||||
Machine check
|
||||
=============
|
||||
Please see Documentation/arch/x86/x86_64/machinecheck.rst for sysfs runtime tunables.
|
||||
|
||||
mce=off
|
||||
Disable machine check
|
||||
mce=no_cmci
|
||||
Disable CMCI(Corrected Machine Check Interrupt) that
|
||||
Intel processor supports. Usually this disablement is
|
||||
not recommended, but it might be handy if your hardware
|
||||
is misbehaving.
|
||||
Note that you'll get more problems without CMCI than with
|
||||
due to the shared banks, i.e. you might get duplicated
|
||||
error logs.
|
||||
mce=dont_log_ce
|
||||
Don't make logs for corrected errors. All events reported
|
||||
as corrected are silently cleared by OS.
|
||||
This option will be useful if you have no interest in any
|
||||
of corrected errors.
|
||||
mce=ignore_ce
|
||||
Disable features for corrected errors, e.g. polling timer
|
||||
and CMCI. All events reported as corrected are not cleared
|
||||
by OS and remained in its error banks.
|
||||
Usually this disablement is not recommended, however if
|
||||
there is an agent checking/clearing corrected errors
|
||||
(e.g. BIOS or hardware monitoring applications), conflicting
|
||||
with OS's error handling, and you cannot deactivate the agent,
|
||||
then this option will be a help.
|
||||
mce=no_lmce
|
||||
Do not opt-in to Local MCE delivery. Use legacy method
|
||||
to broadcast MCEs.
|
||||
mce=bootlog
|
||||
Enable logging of machine checks left over from booting.
|
||||
Disabled by default on AMD Fam10h and older because some BIOS
|
||||
leave bogus ones.
|
||||
If your BIOS doesn't do that it's a good idea to enable though
|
||||
to make sure you log even machine check events that result
|
||||
in a reboot. On Intel systems it is enabled by default.
|
||||
mce=nobootlog
|
||||
Disable boot machine check logging.
|
||||
mce=monarchtimeout (number)
|
||||
monarchtimeout:
|
||||
Sets the time in us to wait for other CPUs on machine checks. 0
|
||||
to disable.
|
||||
mce=bios_cmci_threshold
|
||||
Don't overwrite the bios-set CMCI threshold. This boot option
|
||||
prevents Linux from overwriting the CMCI threshold set by the
|
||||
bios. Without this option, Linux always sets the CMCI
|
||||
threshold to 1. Enabling this may make memory predictive failure
|
||||
analysis less effective if the bios sets thresholds for memory
|
||||
errors since we will not see details for all errors.
|
||||
mce=recovery
|
||||
Force-enable recoverable machine check code paths
|
||||
|
||||
nomce (for compatibility with i386)
|
||||
same as mce=off
|
||||
|
||||
Everything else is in sysfs now.
|
||||
|
||||
APICs
|
||||
=====
|
||||
|
||||
apic
|
||||
Use IO-APIC. Default
|
||||
|
||||
noapic
|
||||
Don't use the IO-APIC.
|
||||
|
||||
disableapic
|
||||
Don't use the local APIC
|
||||
|
||||
nolapic
|
||||
Don't use the local APIC (alias for i386 compatibility)
|
||||
|
||||
pirq=...
|
||||
See Documentation/arch/x86/i386/IO-APIC.rst
|
||||
|
||||
noapictimer
|
||||
Don't set up the APIC timer
|
||||
|
||||
no_timer_check
|
||||
Don't check the IO-APIC timer. This can work around
|
||||
problems with incorrect timer initialization on some boards.
|
||||
|
||||
apicpmtimer
|
||||
Do APIC timer calibration using the pmtimer. Implies
|
||||
apicmaintimer. Useful when your PIT timer is totally broken.
|
||||
|
||||
Timing
|
||||
======
|
||||
|
||||
notsc
|
||||
Deprecated, use tsc=unstable instead.
|
||||
|
||||
nohpet
|
||||
Don't use the HPET timer.
|
||||
|
||||
Idle loop
|
||||
=========
|
||||
|
||||
idle=poll
|
||||
Don't do power saving in the idle loop using HLT, but poll for rescheduling
|
||||
event. This will make the CPUs eat a lot more power, but may be useful
|
||||
to get slightly better performance in multiprocessor benchmarks. It also
|
||||
makes some profiling using performance counters more accurate.
|
||||
Please note that on systems with MONITOR/MWAIT support (like Intel EM64T
|
||||
CPUs) this option has no performance advantage over the normal idle loop.
|
||||
It may also interact badly with hyperthreading.
|
||||
|
||||
Rebooting
|
||||
=========
|
||||
|
||||
reboot=b[ios] | t[riple] | k[bd] | a[cpi] | e[fi] | p[ci] [, [w]arm | [c]old]
|
||||
bios
|
||||
Use the CPU reboot vector for warm reset
|
||||
warm
|
||||
Don't set the cold reboot flag
|
||||
cold
|
||||
Set the cold reboot flag
|
||||
triple
|
||||
Force a triple fault (init)
|
||||
kbd
|
||||
Use the keyboard controller. cold reset (default)
|
||||
acpi
|
||||
Use the ACPI RESET_REG in the FADT. If ACPI is not configured or
|
||||
the ACPI reset does not work, the reboot path attempts the reset
|
||||
using the keyboard controller.
|
||||
efi
|
||||
Use efi reset_system runtime service. If EFI is not configured or
|
||||
the EFI reset does not work, the reboot path attempts the reset using
|
||||
the keyboard controller.
|
||||
pci
|
||||
Use a write to the PCI config space register 0xcf9 to trigger reboot.
|
||||
|
||||
Using warm reset will be much faster especially on big memory
|
||||
systems because the BIOS will not go through the memory check.
|
||||
Disadvantage is that not all hardware will be completely reinitialized
|
||||
on reboot so there may be boot problems on some systems.
|
||||
|
||||
reboot=force
|
||||
Don't stop other CPUs on reboot. This can make reboot more reliable
|
||||
in some cases.
|
||||
|
||||
reboot=default
|
||||
There are some built-in platform specific "quirks" - you may see:
|
||||
"reboot: <name> series board detected. Selecting <type> for reboots."
|
||||
In the case where you think the quirk is in error (e.g. you have
|
||||
newer BIOS, or newer board) using this option will ignore the built-in
|
||||
quirk table, and use the generic default reboot actions.
|
||||
|
||||
NUMA
|
||||
====
|
||||
|
||||
numa=off
|
||||
Only set up a single NUMA node spanning all memory.
|
||||
|
||||
numa=noacpi
|
||||
Don't parse the SRAT table for NUMA setup
|
||||
|
||||
numa=nohmat
|
||||
Don't parse the HMAT table for NUMA setup, or soft-reserved memory
|
||||
partitioning.
|
||||
|
||||
ACPI
|
||||
====
|
||||
|
||||
acpi=off
|
||||
Don't enable ACPI
|
||||
acpi=ht
|
||||
Use ACPI boot table parsing, but don't enable ACPI interpreter
|
||||
acpi=force
|
||||
Force ACPI on (currently not needed)
|
||||
acpi=strict
|
||||
Disable out of spec ACPI workarounds.
|
||||
acpi_sci={edge,level,high,low}
|
||||
Set up ACPI SCI interrupt.
|
||||
acpi=noirq
|
||||
Don't route interrupts
|
||||
acpi=nocmcff
|
||||
Disable firmware first mode for corrected errors. This
|
||||
disables parsing the HEST CMC error source to check if
|
||||
firmware has set the FF flag. This may result in
|
||||
duplicate corrected error reports.
|
||||
|
||||
PCI
|
||||
===
|
||||
|
||||
pci=off
|
||||
Don't use PCI
|
||||
pci=conf1
|
||||
Use conf1 access.
|
||||
pci=conf2
|
||||
Use conf2 access.
|
||||
pci=rom
|
||||
Assign ROMs.
|
||||
pci=assign-busses
|
||||
Assign busses
|
||||
pci=irqmask=MASK
|
||||
Set PCI interrupt mask to MASK
|
||||
pci=lastbus=NUMBER
|
||||
Scan up to NUMBER busses, no matter what the mptable says.
|
||||
pci=noacpi
|
||||
Don't use ACPI to set up PCI interrupt routing.
|
||||
|
||||
IOMMU (input/output memory management unit)
|
||||
===========================================
|
||||
Multiple x86-64 PCI-DMA mapping implementations exist, for example:
|
||||
|
||||
1. <kernel/dma/direct.c>: use no hardware/software IOMMU at all
|
||||
(e.g. because you have < 3 GB memory).
|
||||
Kernel boot message: "PCI-DMA: Disabling IOMMU"
|
||||
|
||||
2. <arch/x86/kernel/amd_gart_64.c>: AMD GART based hardware IOMMU.
|
||||
Kernel boot message: "PCI-DMA: using GART IOMMU"
|
||||
|
||||
3. <arch/x86_64/kernel/pci-swiotlb.c> : Software IOMMU implementation. Used
|
||||
e.g. if there is no hardware IOMMU in the system and it is need because
|
||||
you have >3GB memory or told the kernel to us it (iommu=soft))
|
||||
Kernel boot message: "PCI-DMA: Using software bounce buffering
|
||||
for IO (SWIOTLB)"
|
||||
|
||||
::
|
||||
|
||||
iommu=[<size>][,noagp][,off][,force][,noforce]
|
||||
[,memaper[=<order>]][,merge][,fullflush][,nomerge]
|
||||
[,noaperture]
|
||||
|
||||
General iommu options:
|
||||
|
||||
off
|
||||
Don't initialize and use any kind of IOMMU.
|
||||
noforce
|
||||
Don't force hardware IOMMU usage when it is not needed. (default).
|
||||
force
|
||||
Force the use of the hardware IOMMU even when it is
|
||||
not actually needed (e.g. because < 3 GB memory).
|
||||
soft
|
||||
Use software bounce buffering (SWIOTLB) (default for
|
||||
Intel machines). This can be used to prevent the usage
|
||||
of an available hardware IOMMU.
|
||||
|
||||
iommu options only relevant to the AMD GART hardware IOMMU:
|
||||
|
||||
<size>
|
||||
Set the size of the remapping area in bytes.
|
||||
allowed
|
||||
Overwrite iommu off workarounds for specific chipsets.
|
||||
fullflush
|
||||
Flush IOMMU on each allocation (default).
|
||||
nofullflush
|
||||
Don't use IOMMU fullflush.
|
||||
memaper[=<order>]
|
||||
Allocate an own aperture over RAM with size 32MB<<order.
|
||||
(default: order=1, i.e. 64MB)
|
||||
merge
|
||||
Do scatter-gather (SG) merging. Implies "force" (experimental).
|
||||
nomerge
|
||||
Don't do scatter-gather (SG) merging.
|
||||
noaperture
|
||||
Ask the IOMMU not to touch the aperture for AGP.
|
||||
noagp
|
||||
Don't initialize the AGP driver and use full aperture.
|
||||
panic
|
||||
Always panic when IOMMU overflows.
|
||||
|
||||
iommu options only relevant to the software bounce buffering (SWIOTLB) IOMMU
|
||||
implementation:
|
||||
|
||||
swiotlb=<slots>[,force,noforce]
|
||||
<slots>
|
||||
Prereserve that many 2K slots for the software IO bounce buffering.
|
||||
force
|
||||
Force all IO through the software TLB.
|
||||
noforce
|
||||
Do not initialize the software TLB.
|
||||
|
||||
|
||||
Miscellaneous
|
||||
=============
|
||||
|
||||
nogbpages
|
||||
Do not use GB pages for kernel direct mappings.
|
||||
gbpages
|
||||
Use GB pages for kernel direct mappings.
|
||||
|
||||
|
||||
AMD SEV (Secure Encrypted Virtualization)
|
||||
=========================================
|
||||
Options relating to AMD SEV, specified via the following format:
|
||||
|
||||
::
|
||||
|
||||
sev=option1[,option2]
|
||||
|
||||
The available options are:
|
||||
|
||||
debug
|
||||
Enable debug messages.
|
||||
|
||||
nosnp
|
||||
Do not enable SEV-SNP (applies to host/hypervisor only). Setting
|
||||
'nosnp' avoids the RMP check overhead in memory accesses when
|
||||
users do not want to run SEV-SNP guests.
|
@ -18,7 +18,7 @@ For more information on the features of cpusets, see
|
||||
Documentation/admin-guide/cgroup-v1/cpusets.rst.
|
||||
There are a number of different configurations you can use for your needs. For
|
||||
more information on the numa=fake command line option and its various ways of
|
||||
configuring fake nodes, see Documentation/arch/x86/x86_64/boot-options.rst.
|
||||
configuring fake nodes, see Documentation/admin-guide/kernel-parameters.txt
|
||||
|
||||
For the purposes of this introduction, we'll assume a very primitive NUMA
|
||||
emulation setup of "numa=fake=4*512,". This will split our system memory into
|
||||
|
@ -7,7 +7,6 @@ x86_64 Support
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
boot-options
|
||||
uefi
|
||||
mm
|
||||
5level-paging
|
||||
|
@ -12,14 +12,20 @@ with EFI firmware and specifications are listed below.
|
||||
|
||||
1. UEFI specification: http://www.uefi.org
|
||||
|
||||
2. Booting Linux kernel on UEFI x86_64 platform requires bootloader
|
||||
support. Elilo with x86_64 support can be used.
|
||||
2. Booting Linux kernel on UEFI x86_64 platform can either be
|
||||
done using the <Documentation/admin-guide/efi-stub.rst> or using a
|
||||
separate bootloader.
|
||||
|
||||
3. x86_64 platform with EFI/UEFI firmware.
|
||||
|
||||
Mechanics
|
||||
---------
|
||||
|
||||
Refer to <Documentation/admin-guide/efi-stub.rst> to learn how to use the EFI stub.
|
||||
|
||||
Below are general EFI setup guidelines on the x86_64 platform,
|
||||
regardless of whether you use the EFI stub or a separate bootloader.
|
||||
|
||||
- Build the kernel with the following configuration::
|
||||
|
||||
CONFIG_FB_EFI=y
|
||||
@ -31,16 +37,27 @@ Mechanics
|
||||
CONFIG_EFI=y
|
||||
CONFIG_EFIVAR_FS=y or m # optional
|
||||
|
||||
- Create a VFAT partition on the disk
|
||||
- Copy the following to the VFAT partition:
|
||||
- Create a VFAT partition on the disk with the EFI System flag
|
||||
You can do this with fdisk with the following commands:
|
||||
|
||||
elilo bootloader with x86_64 support, elilo configuration file,
|
||||
kernel image built in first step and corresponding
|
||||
initrd. Instructions on building elilo and its dependencies
|
||||
can be found in the elilo sourceforge project.
|
||||
1. g - initialize a GPT partition table
|
||||
2. n - create a new partition
|
||||
3. t - change the partition type to "EFI System" (number 1)
|
||||
4. w - write and save the changes
|
||||
|
||||
Afterwards, initialize the VFAT filesystem by running mkfs::
|
||||
|
||||
mkfs.fat /dev/<your-partition>
|
||||
|
||||
- Copy the boot files to the VFAT partition:
|
||||
If you use the EFI stub method, the kernel acts also as an EFI executable.
|
||||
|
||||
You can just copy the bzImage to the EFI/boot/bootx64.efi path on the partition
|
||||
so that it will automatically get booted, see the <Documentation/admin-guide/efi-stub.rst> page
|
||||
for additional instructions regarding passage of kernel parameters and initramfs.
|
||||
|
||||
If you use a custom bootloader, refer to the relevant documentation for help on this part.
|
||||
|
||||
- Boot to EFI shell and invoke elilo choosing the kernel image built
|
||||
in first step.
|
||||
- If some or all EFI runtime services don't work, you can try following
|
||||
kernel command line parameters to turn off some or all EFI runtime
|
||||
services.
|
||||
|
@ -333,6 +333,4 @@ References
|
||||
|
||||
.. [#userspace_readme] https://github.com/ming1/ubdsrv/blob/master/README
|
||||
|
||||
.. [#stefan] https://lore.kernel.org/linux-block/YoOr6jBfgVm8GvWg@stefanha-x1.localdomain/
|
||||
|
||||
.. [#xiaoguang] https://lore.kernel.org/linux-block/YoOr6jBfgVm8GvWg@stefanha-x1.localdomain/
|
||||
|
9
Documentation/core-api/cgroup.rst
Normal file
9
Documentation/core-api/cgroup.rst
Normal file
@ -0,0 +1,9 @@
|
||||
==================
|
||||
Cgroup Kernel APIs
|
||||
==================
|
||||
|
||||
Device Memory Cgroup API (dmemcg)
|
||||
=================================
|
||||
.. kernel-doc:: kernel/cgroup/dmem.c
|
||||
:export:
|
||||
|
@ -53,6 +53,7 @@ Library functionality that is used throughout the kernel.
|
||||
floating-point
|
||||
union_find
|
||||
min_heap
|
||||
parser
|
||||
|
||||
Low level entry and exit
|
||||
========================
|
||||
@ -109,6 +110,7 @@ more memory-management documentation in Documentation/mm/index.rst.
|
||||
dma-isa-lpc
|
||||
swiotlb
|
||||
mm-api
|
||||
cgroup
|
||||
genalloc
|
||||
pin_user_pages
|
||||
boot-time-mm
|
||||
|
@ -3,7 +3,7 @@ Adding reference counters (krefs) to kernel objects
|
||||
===================================================
|
||||
|
||||
:Author: Corey Minyard <minyard@acm.org>
|
||||
:Author: Thomas Hellstrom <thellstrom@vmware.com>
|
||||
:Author: Thomas Hellström <thomas.hellstrom@linux.intel.com>
|
||||
|
||||
A lot of this was lifted from Greg Kroah-Hartman's 2004 OLS paper and
|
||||
presentation on krefs, which can be found at:
|
||||
@ -321,3 +321,8 @@ rcu grace period after release_entry_rcu was called. That can be accomplished
|
||||
by using kfree_rcu(entry, rhead) as done above, or by calling synchronize_rcu()
|
||||
before using kfree, but note that synchronize_rcu() may sleep for a
|
||||
substantial amount of time.
|
||||
|
||||
Functions and structures
|
||||
========================
|
||||
|
||||
.. kernel-doc:: include/linux/kref.h
|
||||
|
@ -4,6 +4,8 @@
|
||||
Min Heap API
|
||||
============
|
||||
|
||||
:Author: Kuan-Wei Chiu <visitorckw@gmail.com>
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
|
@ -227,11 +227,119 @@ Intended use
|
||||
|
||||
Drivers that opt to use this API first need to identify which of the above 3
|
||||
quirk combinations (for a total of 8) match what the hardware documentation
|
||||
describes. Then they should wrap the packing() function, creating a new
|
||||
xxx_packing() that calls it using the proper QUIRK_* one-hot bits set.
|
||||
describes.
|
||||
|
||||
There are 3 supported usage patterns, detailed below.
|
||||
|
||||
packing()
|
||||
^^^^^^^^^
|
||||
|
||||
This API function is deprecated.
|
||||
|
||||
The packing() function returns an int-encoded error code, which protects the
|
||||
programmer against incorrect API use. The errors are not expected to occur
|
||||
during runtime, therefore it is reasonable for xxx_packing() to return void
|
||||
and simply swallow those errors. Optionally it can dump stack or print the
|
||||
error description.
|
||||
during runtime, therefore it is reasonable to wrap packing() into a custom
|
||||
function which returns void and swallows those errors. Optionally it can
|
||||
dump stack or print the error description.
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
void my_packing(void *buf, u64 *val, int startbit, int endbit,
|
||||
size_t len, enum packing_op op)
|
||||
{
|
||||
int err;
|
||||
|
||||
/* Adjust quirks accordingly */
|
||||
err = packing(buf, val, startbit, endbit, len, op, QUIRK_LSW32_IS_FIRST);
|
||||
if (likely(!err))
|
||||
return;
|
||||
|
||||
if (err == -EINVAL) {
|
||||
pr_err("Start bit (%d) expected to be larger than end (%d)\n",
|
||||
startbit, endbit);
|
||||
} else if (err == -ERANGE) {
|
||||
if ((startbit - endbit + 1) > 64)
|
||||
pr_err("Field %d-%d too large for 64 bits!\n",
|
||||
startbit, endbit);
|
||||
else
|
||||
pr_err("Cannot store %llx inside bits %d-%d (would truncate)\n",
|
||||
*val, startbit, endbit);
|
||||
}
|
||||
dump_stack();
|
||||
}
|
||||
|
||||
pack() and unpack()
|
||||
^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
These are const-correct variants of packing(), and eliminate the last "enum
|
||||
packing_op op" argument.
|
||||
|
||||
Calling pack(...) is equivalent, and preferred, to calling packing(..., PACK).
|
||||
|
||||
Calling unpack(...) is equivalent, and preferred, to calling packing(..., UNPACK).
|
||||
|
||||
pack_fields() and unpack_fields()
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The library exposes optimized functions for the scenario where there are many
|
||||
fields represented in a buffer, and it encourages consumer drivers to avoid
|
||||
repetitive calls to pack() and unpack() for each field, but instead use
|
||||
pack_fields() and unpack_fields(), which reduces the code footprint.
|
||||
|
||||
These APIs use field definitions in arrays of ``struct packed_field_u8`` or
|
||||
``struct packed_field_u16``, allowing consumer drivers to minimize the size
|
||||
of these arrays according to their custom requirements.
|
||||
|
||||
The pack_fields() and unpack_fields() API functions are actually macros which
|
||||
automatically select the appropriate function at compile time, based on the
|
||||
type of the fields array passed in.
|
||||
|
||||
An additional benefit over pack() and unpack() is that sanity checks on the
|
||||
field definitions are handled at compile time with ``BUILD_BUG_ON`` rather
|
||||
than only when the offending code is executed. These functions return void and
|
||||
wrapping them to handle unexpected errors is not necessary.
|
||||
|
||||
It is recommended, but not required, that you wrap your packed buffer into a
|
||||
structured type with a fixed size. This generally makes it easier for the
|
||||
compiler to enforce that the correct size buffer is used.
|
||||
|
||||
Here is an example of how to use the fields APIs:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
/* Ordering inside the unpacked structure is flexible and can be different
|
||||
* from the packed buffer. Here, it is optimized to reduce padding.
|
||||
*/
|
||||
struct data {
|
||||
u64 field3;
|
||||
u32 field4;
|
||||
u16 field1;
|
||||
u8 field2;
|
||||
};
|
||||
|
||||
#define SIZE 13
|
||||
|
||||
typdef struct __packed { u8 buf[SIZE]; } packed_buf_t;
|
||||
|
||||
static const struct packed_field_u8 fields[] = {
|
||||
PACKED_FIELD(100, 90, struct data, field1),
|
||||
PACKED_FIELD(90, 87, struct data, field2),
|
||||
PACKED_FIELD(86, 30, struct data, field3),
|
||||
PACKED_FIELD(29, 0, struct data, field4),
|
||||
};
|
||||
|
||||
void unpack_your_data(const packed_buf_t *buf, struct data *unpacked)
|
||||
{
|
||||
BUILD_BUG_ON(sizeof(*buf) != SIZE;
|
||||
|
||||
unpack_fields(buf, sizeof(*buf), unpacked, fields,
|
||||
QUIRK_LITTLE_ENDIAN);
|
||||
}
|
||||
|
||||
void pack_your_data(const struct data *unpacked, packed_buf_t *buf)
|
||||
{
|
||||
BUILD_BUG_ON(sizeof(*buf) != SIZE;
|
||||
|
||||
pack_fields(buf, sizeof(*buf), unpacked, fields,
|
||||
QUIRK_LITTLE_ENDIAN);
|
||||
}
|
||||
|
17
Documentation/core-api/parser.rst
Normal file
17
Documentation/core-api/parser.rst
Normal file
@ -0,0 +1,17 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0+
|
||||
|
||||
==============
|
||||
Generic parser
|
||||
==============
|
||||
|
||||
Overview
|
||||
========
|
||||
|
||||
The generic parser is a simple parser for parsing mount options,
|
||||
filesystem options, driver options, subsystem options, etc.
|
||||
|
||||
Parser API
|
||||
==========
|
||||
|
||||
.. kernel-doc:: lib/parser.c
|
||||
:export:
|
@ -41,9 +41,9 @@ entries.
|
||||
In addition to the macros EXPORT_SYMBOL() and EXPORT_SYMBOL_GPL(), that allow
|
||||
exporting of kernel symbols to the kernel symbol table, variants of these are
|
||||
available to export symbols into a certain namespace: EXPORT_SYMBOL_NS() and
|
||||
EXPORT_SYMBOL_NS_GPL(). They take one additional argument: the namespace.
|
||||
Please note that due to macro expansion that argument needs to be a
|
||||
preprocessor symbol. E.g. to export the symbol ``usb_stor_suspend`` into the
|
||||
EXPORT_SYMBOL_NS_GPL(). They take one additional argument: the namespace as a
|
||||
string constant. Note that this string must not contain whitespaces.
|
||||
E.g. to export the symbol ``usb_stor_suspend`` into the
|
||||
namespace ``USB_STORAGE``, use::
|
||||
|
||||
EXPORT_SYMBOL_NS(usb_stor_suspend, "USB_STORAGE");
|
||||
@ -78,11 +78,10 @@ as this argument has preference over a default symbol namespace.
|
||||
A second option to define the default namespace is directly in the compilation
|
||||
unit as preprocessor statement. The above example would then read::
|
||||
|
||||
#undef DEFAULT_SYMBOL_NAMESPACE
|
||||
#define DEFAULT_SYMBOL_NAMESPACE "USB_COMMON"
|
||||
|
||||
within the corresponding compilation unit before any EXPORT_SYMBOL macro is
|
||||
used.
|
||||
within the corresponding compilation unit before the #include for
|
||||
<linux/export.h>. Typically it's placed before the first #include statement.
|
||||
|
||||
3. How to use Symbols exported in Namespaces
|
||||
============================================
|
||||
|
@ -42,8 +42,8 @@ call xa_tag_pointer() to create an entry with a tag, xa_untag_pointer()
|
||||
to turn a tagged entry back into an untagged pointer and xa_pointer_tag()
|
||||
to retrieve the tag of an entry. Tagged pointers use the same bits that
|
||||
are used to distinguish value entries from normal pointers, so you must
|
||||
decide whether they want to store value entries or tagged pointers in
|
||||
any particular XArray.
|
||||
decide whether you want to store value entries or tagged pointers in any
|
||||
particular XArray.
|
||||
|
||||
The XArray does not support storing IS_ERR() pointers as some
|
||||
conflict with value entries or internal entries.
|
||||
@ -52,8 +52,9 @@ An unusual feature of the XArray is the ability to create entries which
|
||||
occupy a range of indices. Once stored to, looking up any index in
|
||||
the range will return the same entry as looking up any other index in
|
||||
the range. Storing to any index will store to all of them. Multi-index
|
||||
entries can be explicitly split into smaller entries, or storing ``NULL``
|
||||
into any entry will cause the XArray to forget about the range.
|
||||
entries can be explicitly split into smaller entries. Unsetting (using
|
||||
xa_erase() or xa_store() with ``NULL``) any entry will cause the XArray
|
||||
to forget about the range.
|
||||
|
||||
Normal API
|
||||
==========
|
||||
@ -63,13 +64,14 @@ for statically allocated XArrays or xa_init() for dynamically
|
||||
allocated ones. A freshly-initialised XArray contains a ``NULL``
|
||||
pointer at every index.
|
||||
|
||||
You can then set entries using xa_store() and get entries
|
||||
using xa_load(). xa_store will overwrite any entry with the
|
||||
new entry and return the previous entry stored at that index. You can
|
||||
use xa_erase() instead of calling xa_store() with a
|
||||
``NULL`` entry. There is no difference between an entry that has never
|
||||
been stored to, one that has been erased and one that has most recently
|
||||
had ``NULL`` stored to it.
|
||||
You can then set entries using xa_store() and get entries using
|
||||
xa_load(). xa_store() will overwrite any entry with the new entry and
|
||||
return the previous entry stored at that index. You can unset entries
|
||||
using xa_erase() or by setting the entry to ``NULL`` using xa_store().
|
||||
There is no difference between an entry that has never been stored to
|
||||
and one that has been erased with xa_erase(); an entry that has most
|
||||
recently had ``NULL`` stored to it is also equivalent except if the
|
||||
XArray was initialized with ``XA_FLAGS_ALLOC``.
|
||||
|
||||
You can conditionally replace an entry at an index by using
|
||||
xa_cmpxchg(). Like cmpxchg(), it will only succeed if
|
||||
|
@ -10,6 +10,9 @@ whole; patches welcome!
|
||||
A brief overview of testing-specific tools can be found in
|
||||
Documentation/dev-tools/testing-overview.rst
|
||||
|
||||
Tools that are specific to debugging can be found in
|
||||
Documentation/process/debugging/index.rst
|
||||
|
||||
.. toctree::
|
||||
:caption: Table of contents
|
||||
:maxdepth: 2
|
||||
@ -27,8 +30,6 @@ Documentation/dev-tools/testing-overview.rst
|
||||
kmemleak
|
||||
kcsan
|
||||
kfence
|
||||
gdb-kernel-debugging
|
||||
kgdb
|
||||
kselftest
|
||||
kunit/index
|
||||
ktap
|
||||
|
@ -1,25 +0,0 @@
|
||||
Altera SOCFPGA System Manager
|
||||
|
||||
Required properties:
|
||||
- compatible : "altr,sys-mgr"
|
||||
- reg : Should contain 1 register ranges(address and length)
|
||||
- cpu1-start-addr : CPU1 start address in hex.
|
||||
|
||||
Example:
|
||||
sysmgr@ffd08000 {
|
||||
compatible = "altr,sys-mgr";
|
||||
reg = <0xffd08000 0x1000>;
|
||||
cpu1-start-addr = <0xffd080c4>;
|
||||
};
|
||||
|
||||
ARM64 - Stratix10
|
||||
Required properties:
|
||||
- compatible : "altr,sys-mgr-s10"
|
||||
- reg : Should contain 1 register range(address and length)
|
||||
for system manager register.
|
||||
|
||||
Example:
|
||||
sysmgr@ffd12000 {
|
||||
compatible = "altr,sys-mgr-s10";
|
||||
reg = <0xffd12000 0x228>;
|
||||
};
|
@ -38,6 +38,12 @@ properties:
|
||||
enum:
|
||||
- arm,coresight-dummy-source
|
||||
|
||||
arm,static-trace-id:
|
||||
description: If dummy source needs static id support, use this to set trace id.
|
||||
$ref: /schemas/types.yaml#/definitions/uint32
|
||||
minimum: 1
|
||||
maximum: 111
|
||||
|
||||
out-ports:
|
||||
$ref: /schemas/graph.yaml#/properties/ports
|
||||
|
||||
|
@ -45,7 +45,22 @@ properties:
|
||||
patternProperties:
|
||||
'^port@[01]$':
|
||||
description: Output connections to CoreSight Trace bus
|
||||
$ref: /schemas/graph.yaml#/properties/port
|
||||
$ref: /schemas/graph.yaml#/$defs/port-base
|
||||
unevaluatedProperties: false
|
||||
|
||||
properties:
|
||||
endpoint:
|
||||
$ref: /schemas/graph.yaml#/$defs/endpoint-base
|
||||
unevaluatedProperties: false
|
||||
|
||||
properties:
|
||||
filter-source:
|
||||
$ref: /schemas/types.yaml#/definitions/phandle
|
||||
description:
|
||||
phandle to the coresight trace source device matching the
|
||||
hard coded filtering for this port
|
||||
|
||||
remote-endpoint: true
|
||||
|
||||
required:
|
||||
- compatible
|
||||
@ -72,6 +87,7 @@ examples:
|
||||
reg = <0>;
|
||||
replicator_out_port0: endpoint {
|
||||
remote-endpoint = <&etb_in_port>;
|
||||
filter-source = <&tpdm_video>;
|
||||
};
|
||||
};
|
||||
|
||||
@ -79,6 +95,7 @@ examples:
|
||||
reg = <1>;
|
||||
replicator_out_port1: endpoint {
|
||||
remote-endpoint = <&tpiu_in_port>;
|
||||
filter-source = <&tpdm_mdss>;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
@ -23,7 +23,7 @@ description: |
|
||||
|
||||
properties:
|
||||
$nodename:
|
||||
pattern: "^ete([0-9a-f]+)$"
|
||||
pattern: "^ete(-[0-9]+)?$"
|
||||
compatible:
|
||||
items:
|
||||
- const: arm,embedded-trace-extension
|
||||
@ -55,13 +55,13 @@ examples:
|
||||
|
||||
# An ETE node without legacy CoreSight connections
|
||||
- |
|
||||
ete0 {
|
||||
ete-0 {
|
||||
compatible = "arm,embedded-trace-extension";
|
||||
cpu = <&cpu_0>;
|
||||
};
|
||||
# An ETE node with legacy CoreSight connections
|
||||
- |
|
||||
ete1 {
|
||||
ete-1 {
|
||||
compatible = "arm,embedded-trace-extension";
|
||||
cpu = <&cpu_1>;
|
||||
|
||||
|
@ -74,6 +74,7 @@ properties:
|
||||
- description: AST2600 based boards
|
||||
items:
|
||||
- enum:
|
||||
- ampere,mtjefferson-bmc
|
||||
- ampere,mtmitchell-bmc
|
||||
- aspeed,ast2600-evb
|
||||
- aspeed,ast2600-evb-a1
|
||||
@ -91,6 +92,7 @@ properties:
|
||||
- ibm,everest-bmc
|
||||
- ibm,fuji-bmc
|
||||
- ibm,rainier-bmc
|
||||
- ibm,sbp1-bmc
|
||||
- ibm,system1-bmc
|
||||
- ibm,tacoma-bmc
|
||||
- inventec,starscream-bmc
|
||||
|
@ -180,6 +180,13 @@ properties:
|
||||
- const: atmel,sama5d4
|
||||
- const: atmel,sama5
|
||||
|
||||
- description: Microchip SAMA7D65 Curiosity Board
|
||||
items:
|
||||
- const: microchip,sama7d65-curiosity
|
||||
- const: microchip,sama7d65
|
||||
- const: microchip,sama7d6
|
||||
- const: microchip,sama7
|
||||
|
||||
- items:
|
||||
- const: microchip,sama7g5ek # SAMA7G5 Evaluation Kit
|
||||
- const: microchip,sama7g5
|
||||
|
@ -13,6 +13,7 @@ PIT Timer required properties:
|
||||
PIT64B Timer required properties:
|
||||
- compatible: Should be "microchip,sam9x60-pit64b" or
|
||||
"microchip,sam9x7-pit64b", "microchip,sam9x60-pit64b"
|
||||
"microchip,sama7d65-pit64b", "microchip,sam9x60-pit64b"
|
||||
- reg: Should contain registers location and length
|
||||
- interrupts: Should contain interrupt for PIT64B timer
|
||||
- clocks: Should contain the available clock sources for PIT64B timer.
|
||||
@ -27,12 +28,13 @@ Its subnodes can be:
|
||||
- watchdog: compatible should be "atmel,at91rm9200-wdt"
|
||||
|
||||
RAMC SDRAM/DDR Controller required properties:
|
||||
- compatible: Should be "atmel,at91rm9200-sdramc", "syscon"
|
||||
"atmel,at91sam9260-sdramc",
|
||||
"atmel,at91sam9g45-ddramc",
|
||||
"atmel,sama5d3-ddramc",
|
||||
"microchip,sam9x60-ddramc",
|
||||
"microchip,sama7g5-uddrc",
|
||||
- compatible: Should be "atmel,at91rm9200-sdramc", "syscon" or
|
||||
"atmel,at91sam9260-sdramc" or
|
||||
"atmel,at91sam9g45-ddramc" or
|
||||
"atmel,sama5d3-ddramc" or
|
||||
"microchip,sam9x60-ddramc" or
|
||||
"microchip,sama7g5-uddrc" or
|
||||
"microchip,sama7d65-uddrc", "microchip,sama7g5-uddrc" or
|
||||
"microchip,sam9x7-ddramc", "atmel,sama5d3-ddramc".
|
||||
- reg: Should contain registers location and length
|
||||
|
||||
|
@ -34,6 +34,7 @@ properties:
|
||||
- enum:
|
||||
- netgear,r8000p
|
||||
- tplink,archer-c2300-v1
|
||||
- zyxel,ex3510b
|
||||
- const: brcm,bcm4906
|
||||
- const: brcm,bcm4908
|
||||
- const: brcm,bcmbca
|
||||
@ -115,6 +116,7 @@ properties:
|
||||
items:
|
||||
- enum:
|
||||
- brcm,bcm96846
|
||||
- genexis,xg6846b
|
||||
- const: brcm,bcm6846
|
||||
- const: brcm,bcmbca
|
||||
|
||||
|
40
Documentation/devicetree/bindings/arm/blaize.yaml
Normal file
40
Documentation/devicetree/bindings/arm/blaize.yaml
Normal file
@ -0,0 +1,40 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/arm/blaize.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Blaize Platforms
|
||||
|
||||
maintainers:
|
||||
- James Cowgill <james.cowgill@blaize.com>
|
||||
- Matt Redfearn <matt.redfearn@blaize.com>
|
||||
- Neil Jones <neil.jones@blaize.com>
|
||||
- Nikolaos Pasaloukos <nikolaos.pasaloukos@blaize.com>
|
||||
|
||||
description: |
|
||||
Blaize Platforms using SoCs designed by Blaize Inc.
|
||||
|
||||
The products based on the BLZP1600 SoC:
|
||||
|
||||
- BLZP1600-SoM: SoM (System on Module)
|
||||
- BLZP1600-CB2: Development board CB2 based on BLZP1600-SoM
|
||||
|
||||
BLZP1600 SoC integrates a dual core ARM Cortex A53 cluster
|
||||
and a Blaize Graph Streaming Processor for AI and ML workloads,
|
||||
plus a suite of connectivity and other peripherals.
|
||||
|
||||
properties:
|
||||
$nodename:
|
||||
const: '/'
|
||||
compatible:
|
||||
oneOf:
|
||||
- description: Blaize BLZP1600 based boards
|
||||
items:
|
||||
- enum:
|
||||
- blaize,blzp1600-cb2
|
||||
- const: blaize,blzp1600
|
||||
|
||||
additionalProperties: true
|
||||
|
||||
...
|
@ -1091,6 +1091,7 @@ properties:
|
||||
- dmo,imx8mp-data-modul-edm-sbc # i.MX8MP eDM SBC
|
||||
- emcraft,imx8mp-navqp # i.MX8MP Emcraft Systems NavQ+ Kit
|
||||
- fsl,imx8mp-evk # i.MX8MP EVK Board
|
||||
- fsl,imx8mp-evk-revb4 # i.MX8MP EVK Rev B4 Board
|
||||
- gateworks,imx8mp-gw71xx-2x # i.MX8MP Gateworks Board
|
||||
- gateworks,imx8mp-gw72xx-2x # i.MX8MP Gateworks Board
|
||||
- gateworks,imx8mp-gw73xx-2x # i.MX8MP Gateworks Board
|
||||
@ -1106,6 +1107,15 @@ properties:
|
||||
- ysoft,imx8mp-iota2-lumpy # Y Soft i.MX8MP IOTA2 Lumpy Board
|
||||
- const: fsl,imx8mp
|
||||
|
||||
- description: ABB Boards with i.MX8M Plus Modules from ADLink
|
||||
items:
|
||||
- enum:
|
||||
- abb,imx8mp-aristanetos3-adpismarc # i.MX8MP ABB SoM on PI SMARC Board
|
||||
- abb,imx8mp-aristanetos3-helios # i.MX8MP ABB SoM on helios Board
|
||||
- abb,imx8mp-aristanetos3-proton2s # i.MX8MP ABB SoM on proton2s Board
|
||||
- const: abb,imx8mp-aristanetos3-som # i.MX8MP ABB SoM
|
||||
- const: fsl,imx8mp
|
||||
|
||||
- description: Avnet (MSC Branded) Boards with SM2S i.MX8M Plus Modules
|
||||
items:
|
||||
- const: avnet,sm2s-imx8mp-14N0600E-ep1 # SM2S-IMX8PLUS-14N0600E on SM2-MB-EP1 Carrier Board
|
||||
@ -1262,6 +1272,7 @@ properties:
|
||||
items:
|
||||
- enum:
|
||||
- fsl,imx8qm-mek # i.MX8QM MEK Board
|
||||
- fsl,imx8qm-mek-revd # i.MX8QM MEK Rev D Board
|
||||
- toradex,apalis-imx8 # Apalis iMX8 Modules
|
||||
- toradex,apalis-imx8-v1.1 # Apalis iMX8 V1.1 Modules
|
||||
- const: fsl,imx8qm
|
||||
@ -1290,6 +1301,7 @@ properties:
|
||||
- enum:
|
||||
- einfochips,imx8qxp-ai_ml # i.MX8QXP AI_ML Board
|
||||
- fsl,imx8qxp-mek # i.MX8QXP MEK Board
|
||||
- fsl,imx8qxp-mek-wcpu # i.MX8QXP MEK WCPU Board
|
||||
- const: fsl,imx8qxp
|
||||
|
||||
- description: i.MX8DXL based Boards
|
||||
|
@ -239,6 +239,34 @@ properties:
|
||||
- enum:
|
||||
- mediatek,mt8183-pumpkin
|
||||
- const: mediatek,mt8183
|
||||
- description: Google Chinchou (Asus Chromebook CZ1104CM2A/CZ1204CM2A)
|
||||
items:
|
||||
- const: google,chinchou-sku0
|
||||
- const: google,chinchou-sku2
|
||||
- const: google,chinchou-sku4
|
||||
- const: google,chinchou-sku5
|
||||
- const: google,chinchou
|
||||
- const: mediatek,mt8186
|
||||
- description: Google Chinchou (Asus Chromebook CZ1104FM2A/CZ1204FM2A/CZ1104CM2A/CZ1204CM2A)
|
||||
items:
|
||||
- const: google,chinchou-sku1
|
||||
- const: google,chinchou-sku3
|
||||
- const: google,chinchou-sku6
|
||||
- const: google,chinchou-sku7
|
||||
- const: google,chinchou-sku17
|
||||
- const: google,chinchou-sku20
|
||||
- const: google,chinchou-sku22
|
||||
- const: google,chinchou-sku23
|
||||
- const: google,chinchou
|
||||
- const: mediatek,mt8186
|
||||
- description: Google Chinchou360 (Asus Chromebook CZ1104FM2A/CZ1204FM2A Flip)
|
||||
items:
|
||||
- const: google,chinchou-sku16
|
||||
- const: google,chinchou-sku18
|
||||
- const: google,chinchou-sku19
|
||||
- const: google,chinchou-sku21
|
||||
- const: google,chinchou
|
||||
- const: mediatek,mt8186
|
||||
- description: Google Magneton (Lenovo IdeaPad Slim 3 Chromebook (14M868))
|
||||
items:
|
||||
- const: google,steelix-sku393219
|
||||
@ -263,6 +291,19 @@ properties:
|
||||
- const: google,steelix-sku196608
|
||||
- const: google,steelix
|
||||
- const: mediatek,mt8186
|
||||
- description: Google Starmie (ASUS Chromebook Enterprise CM30 (CM3001))
|
||||
items:
|
||||
- const: google,starmie-sku0
|
||||
- const: google,starmie-sku2
|
||||
- const: google,starmie-sku3
|
||||
- const: google,starmie
|
||||
- const: mediatek,mt8186
|
||||
- description: Google Starmie (ASUS Chromebook Enterprise CM30 (CM3001))
|
||||
items:
|
||||
- const: google,starmie-sku1
|
||||
- const: google,starmie-sku4
|
||||
- const: google,starmie
|
||||
- const: mediatek,mt8186
|
||||
- description: Google Steelix (Lenovo 300e Yoga Chromebook Gen 4)
|
||||
items:
|
||||
- enum:
|
||||
@ -307,6 +348,19 @@ properties:
|
||||
- enum:
|
||||
- mediatek,mt8186-evb
|
||||
- const: mediatek,mt8186
|
||||
- description: Google Ciri (Lenovo Chromebook Duet (11", 9))
|
||||
items:
|
||||
- enum:
|
||||
- google,ciri-sku0
|
||||
- google,ciri-sku1
|
||||
- google,ciri-sku2
|
||||
- google,ciri-sku3
|
||||
- google,ciri-sku4
|
||||
- google,ciri-sku5
|
||||
- google,ciri-sku6
|
||||
- google,ciri-sku7
|
||||
- const: google,ciri
|
||||
- const: mediatek,mt8188
|
||||
- items:
|
||||
- enum:
|
||||
- mediatek,mt8188-evb
|
||||
@ -316,12 +370,6 @@ properties:
|
||||
- const: google,hayato-rev1
|
||||
- const: google,hayato
|
||||
- const: mediatek,mt8192
|
||||
- description: Google Hayato rev5
|
||||
items:
|
||||
- const: google,hayato-rev5-sku2
|
||||
- const: google,hayato-sku2
|
||||
- const: google,hayato
|
||||
- const: mediatek,mt8192
|
||||
- description: Google Spherion (Acer Chromebook 514)
|
||||
items:
|
||||
- const: google,spherion-rev3
|
||||
@ -330,11 +378,6 @@ properties:
|
||||
- const: google,spherion-rev0
|
||||
- const: google,spherion
|
||||
- const: mediatek,mt8192
|
||||
- description: Google Spherion rev4 (Acer Chromebook 514)
|
||||
items:
|
||||
- const: google,spherion-rev4
|
||||
- const: google,spherion
|
||||
- const: mediatek,mt8192
|
||||
- items:
|
||||
- enum:
|
||||
- mediatek,mt8192-evb
|
||||
|
@ -23,7 +23,7 @@ description: |
|
||||
select:
|
||||
properties:
|
||||
compatible:
|
||||
pattern: "^qcom,.*(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sc|sd[amx]|sm|x1e)[0-9]+.*$"
|
||||
pattern: "^qcom,.*(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sar|sc|sd[amx]|sm|x1[ep])[0-9]+.*$"
|
||||
required:
|
||||
- compatible
|
||||
|
||||
@ -31,7 +31,8 @@ properties:
|
||||
compatible:
|
||||
oneOf:
|
||||
# Preferred naming style for compatibles of SoC components:
|
||||
- pattern: "^qcom,(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sc|sd[amx]|sm|x1e)[0-9]+(pro)?-.*$"
|
||||
- pattern: "^qcom,(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sc|sd[amx]|sm|x1[ep])[0-9]+(pro)?-.*$"
|
||||
- pattern: "^qcom,sar[0-9]+[a-z]?-.*$"
|
||||
- pattern: "^qcom,(sa|sc)8[0-9]+[a-z][a-z]?-.*$"
|
||||
|
||||
# Legacy namings - variations of existing patterns/compatibles are OK,
|
||||
@ -39,9 +40,9 @@ properties:
|
||||
- pattern: "^qcom,[ak]pss-wdt-(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sc|sd[amx]|sm)[0-9]+.*$"
|
||||
- pattern: "^qcom,gcc-(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sc|sd[amx]|sm)[0-9]+.*$"
|
||||
- pattern: "^qcom,mmcc-(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sc|sd[amx]|sm)[0-9]+.*$"
|
||||
- pattern: "^qcom,pcie-(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sc|sd[amx]|sm|x1e)[0-9]+.*$"
|
||||
- pattern: "^qcom,pcie-(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sc|sd[amx]|sm|x1[ep])[0-9]+.*$"
|
||||
- pattern: "^qcom,rpm-(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sc|sd[amx]|sm)[0-9]+.*$"
|
||||
- pattern: "^qcom,scm-(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sc|sd[amx]|sm|x1e)[0-9]+.*$"
|
||||
- pattern: "^qcom,scm-(apq|ipq|mdm|msm|qcm|qcs|q[dr]u|sa|sc|sd[amx]|sm|x1[ep])[0-9]+.*$"
|
||||
- enum:
|
||||
- qcom,dsi-ctrl-6g-qcm2290
|
||||
- qcom,gpucc-sdm630
|
||||
|
@ -19,29 +19,42 @@ description: |
|
||||
|
||||
apq8016
|
||||
apq8026
|
||||
apq8064
|
||||
apq8074
|
||||
apq8084
|
||||
apq8094
|
||||
apq8096
|
||||
ipq4018
|
||||
ipq4019
|
||||
ipq5018
|
||||
ipq5332
|
||||
ipq5424
|
||||
ipq6018
|
||||
ipq8064
|
||||
ipq8074
|
||||
ipq9574
|
||||
mdm9615
|
||||
msm8226
|
||||
msm8660
|
||||
msm8916
|
||||
msm8917
|
||||
msm8926
|
||||
msm8929
|
||||
msm8939
|
||||
msm8953
|
||||
msm8956
|
||||
msm8960
|
||||
msm8974
|
||||
msm8974pro
|
||||
msm8976
|
||||
msm8992
|
||||
msm8994
|
||||
msm8996
|
||||
msm8996pro
|
||||
msm8998
|
||||
qcs404
|
||||
qcs615
|
||||
qcs8300
|
||||
qcs8550
|
||||
qcm2290
|
||||
qcm6490
|
||||
@ -53,6 +66,7 @@ description: |
|
||||
sa8155p
|
||||
sa8540p
|
||||
sa8775p
|
||||
sar2130p
|
||||
sc7180
|
||||
sc7280
|
||||
sc8180x
|
||||
@ -84,7 +98,10 @@ description: |
|
||||
sm8450
|
||||
sm8550
|
||||
sm8650
|
||||
sm8750
|
||||
x1e78100
|
||||
x1e80100
|
||||
x1p42100
|
||||
|
||||
There are many devices in the list below that run the standard ChromeOS
|
||||
bootloader setup and use the open source depthcharge bootloader to boot the
|
||||
@ -250,6 +267,11 @@ properties:
|
||||
- yiming,uz801-v3
|
||||
- const: qcom,msm8916
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- xiaomi,riva
|
||||
- const: qcom,msm8917
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- motorola,potter
|
||||
@ -352,6 +374,11 @@ properties:
|
||||
- qcom,ipq5332-ap-mi01.9
|
||||
- const: qcom,ipq5332
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- qcom,ipq5424-rdp466
|
||||
- const: qcom,ipq5424
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- mikrotik,rb3011
|
||||
@ -408,6 +435,12 @@ properties:
|
||||
- qcom,qru1000-idp
|
||||
- const: qcom,qru1000
|
||||
|
||||
- description: Qualcomm AR2 Gen1 platform
|
||||
items:
|
||||
- enum:
|
||||
- qcom,qar2130p
|
||||
- const: qcom,sar2130p
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- acer,aspire1
|
||||
@ -822,8 +855,10 @@ properties:
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- huawei,gaokun3
|
||||
- lenovo,thinkpad-x13s
|
||||
- microsoft,arcata
|
||||
- microsoft,blackrock
|
||||
- qcom,sc8280xp-crd
|
||||
- qcom,sc8280xp-qrd
|
||||
- const: qcom,sc8280xp
|
||||
@ -898,6 +933,16 @@ properties:
|
||||
- const: qcom,qcs404-evb
|
||||
- const: qcom,qcs404
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- qcom,qcs8300-ride
|
||||
- const: qcom,qcs8300
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- qcom,qcs615-ride
|
||||
- const: qcom,qcs615
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- qcom,sa8155p-adp
|
||||
@ -1064,6 +1109,18 @@ properties:
|
||||
- qcom,sm8650-qrd
|
||||
- const: qcom,sm8650
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- qcom,sm8750-mtp
|
||||
- qcom,sm8750-qrd
|
||||
- const: qcom,sm8750
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- qcom,x1e001de-devkit
|
||||
- const: qcom,x1e001de
|
||||
- const: qcom,x1e80100
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- lenovo,thinkpad-t14s
|
||||
@ -1074,6 +1131,7 @@ properties:
|
||||
- enum:
|
||||
- asus,vivobook-s15
|
||||
- dell,xps13-9345
|
||||
- hp,omnibook-x14
|
||||
- lenovo,yoga-slim7x
|
||||
- microsoft,romulus13
|
||||
- microsoft,romulus15
|
||||
@ -1081,6 +1139,11 @@ properties:
|
||||
- qcom,x1e80100-qcp
|
||||
- const: qcom,x1e80100
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- qcom,x1p42100-crd
|
||||
- const: qcom,x1p42100
|
||||
|
||||
# Board compatibles go above
|
||||
|
||||
qcom,msm-id:
|
||||
@ -1158,6 +1221,7 @@ allOf:
|
||||
- qcom,apq8026
|
||||
- qcom,apq8094
|
||||
- qcom,apq8096
|
||||
- qcom,msm8917
|
||||
- qcom,msm8939
|
||||
- qcom,msm8953
|
||||
- qcom,msm8956
|
||||
|
@ -81,6 +81,17 @@ properties:
|
||||
- const: azw,beelink-a1
|
||||
- const: rockchip,rk3328
|
||||
|
||||
- description: BigTreeTech CB2 Manta M4/8P
|
||||
items:
|
||||
- const: bigtreetech,cb2-manta
|
||||
- const: bigtreetech,cb2
|
||||
- const: rockchip,rk3566
|
||||
|
||||
- description: BigTreeTech Pi 2
|
||||
items:
|
||||
- const: bigtreetech,pi2
|
||||
- const: rockchip,rk3566
|
||||
|
||||
- description: bq Curie 2 tablet
|
||||
items:
|
||||
- const: mundoreader,bq-curie2
|
||||
@ -167,6 +178,13 @@ properties:
|
||||
- const: engicam,px30-core
|
||||
- const: rockchip,px30
|
||||
|
||||
- description: Firefly Core-3588J-based boards
|
||||
items:
|
||||
- enum:
|
||||
- firefly,itx-3588j
|
||||
- const: firefly,core-3588j
|
||||
- const: rockchip,rk3588
|
||||
|
||||
- description: Firefly Core-PX30-JD4 on MB-JD4-PX30 baseboard
|
||||
items:
|
||||
- const: firefly,px30-jd4-core-mb
|
||||
@ -597,6 +615,11 @@ properties:
|
||||
- const: google,veyron
|
||||
- const: rockchip,rk3288
|
||||
|
||||
- description: H96 Max V58 TV Box
|
||||
items:
|
||||
- const: haochuangyi,h96-max-v58
|
||||
- const: rockchip,rk3588
|
||||
|
||||
- description: Haoyu MarsBoard RK3066
|
||||
items:
|
||||
- const: haoyu,marsboard-rk3066
|
||||
@ -812,6 +835,12 @@ properties:
|
||||
- const: radxa,e20c
|
||||
- const: rockchip,rk3528
|
||||
|
||||
- description: Radxa E52C
|
||||
items:
|
||||
- const: radxa,e52c
|
||||
- const: rockchip,rk3582
|
||||
- const: rockchip,rk3588s
|
||||
|
||||
- description: Radxa Rock
|
||||
items:
|
||||
- const: radxa,rock
|
||||
@ -1006,6 +1035,21 @@ properties:
|
||||
- const: rockchip,rk3399-sapphire-excavator
|
||||
- const: rockchip,rk3399
|
||||
|
||||
- description: Rockchip RK3566 BOX Evaluation Demo board
|
||||
items:
|
||||
- const: rockchip,rk3566-box-demo
|
||||
- const: rockchip,rk3566
|
||||
|
||||
- description: Rockchip RK3568 Evaluation board
|
||||
items:
|
||||
- const: rockchip,rk3568-evb1-v10
|
||||
- const: rockchip,rk3568
|
||||
|
||||
- description: Rockchip RK3576 Evaluation board
|
||||
items:
|
||||
- const: rockchip,rk3576-evb1-v10
|
||||
- const: rockchip,rk3576
|
||||
|
||||
- description: Rockchip RK3588 Evaluation board
|
||||
items:
|
||||
- const: rockchip,rk3588-evb1-v10
|
||||
@ -1026,6 +1070,23 @@ properties:
|
||||
- const: rockchip,rk3588-toybrick-x0
|
||||
- const: rockchip,rk3588
|
||||
|
||||
- description: Sinovoip RK3308 Banana Pi P2 Pro
|
||||
items:
|
||||
- const: sinovoip,rk3308-bpi-p2pro
|
||||
- const: rockchip,rk3308
|
||||
|
||||
- description: Sinovoip RK3568 Banana Pi R2 Pro
|
||||
items:
|
||||
- const: sinovoip,rk3568-bpi-r2pro
|
||||
- const: rockchip,rk3568
|
||||
|
||||
- description: Sonoff iHost Smart Home Hub
|
||||
items:
|
||||
- const: itead,sonoff-ihost
|
||||
- enum:
|
||||
- rockchip,rv1126
|
||||
- rockchip,rv1109
|
||||
|
||||
- description: Theobroma Systems PX30-uQ7 with Haikou baseboard
|
||||
items:
|
||||
- const: tsd,px30-ringneck-haikou
|
||||
@ -1075,9 +1136,11 @@ properties:
|
||||
- const: xunlong,orangepi-3b
|
||||
- const: rockchip,rk3566
|
||||
|
||||
- description: Xunlong Orange Pi 5 Plus
|
||||
- description: Xunlong Orange Pi 5 Max/Plus
|
||||
items:
|
||||
- const: xunlong,orangepi-5-plus
|
||||
- enum:
|
||||
- xunlong,orangepi-5-max
|
||||
- xunlong,orangepi-5-plus
|
||||
- const: rockchip,rk3588
|
||||
|
||||
- description: Xunlong Orange Pi R1 Plus / LTS
|
||||
@ -1099,33 +1162,6 @@ properties:
|
||||
- const: zkmagic,a95x-z2
|
||||
- const: rockchip,rk3318
|
||||
|
||||
- description: Rockchip RK3566 BOX Evaluation Demo board
|
||||
items:
|
||||
- const: rockchip,rk3566-box-demo
|
||||
- const: rockchip,rk3566
|
||||
|
||||
- description: Rockchip RK3568 Evaluation board
|
||||
items:
|
||||
- const: rockchip,rk3568-evb1-v10
|
||||
- const: rockchip,rk3568
|
||||
|
||||
- description: Sinovoip RK3308 Banana Pi P2 Pro
|
||||
items:
|
||||
- const: sinovoip,rk3308-bpi-p2pro
|
||||
- const: rockchip,rk3308
|
||||
|
||||
- description: Sinovoip RK3568 Banana Pi R2 Pro
|
||||
items:
|
||||
- const: sinovoip,rk3568-bpi-r2pro
|
||||
- const: rockchip,rk3568
|
||||
|
||||
- description: Sonoff iHost Smart Home Hub
|
||||
items:
|
||||
- const: itead,sonoff-ihost
|
||||
- enum:
|
||||
- rockchip,rv1126
|
||||
- rockchip,rv1109
|
||||
|
||||
additionalProperties: true
|
||||
|
||||
...
|
||||
|
@ -53,11 +53,17 @@ properties:
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
power-controller:
|
||||
type: object
|
||||
|
||||
reboot-mode:
|
||||
type: object
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
|
||||
additionalProperties: true
|
||||
additionalProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
|
@ -240,6 +240,9 @@ properties:
|
||||
items:
|
||||
- enum:
|
||||
- samsung,c1s # Samsung Galaxy Note20 5G (SM-N981B)
|
||||
- samsung,r8s # Samsung Galaxy S20 FE (SM-G780F)
|
||||
- samsung,x1s # Samsung Galaxy S20 5G (SM-G981B)
|
||||
- samsung,x1slte # Samsung Galaxy S20 (SM-G980F)
|
||||
- const: samsung,exynos990
|
||||
|
||||
- description: Exynos Auto v9 based boards
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user