tracing and sorttable updates for 6.15:

- Implement arm64 build time sorting of the mcount location table
 
   When gcc is used to build arm64, the mcount_loc section is all zeros in
   the vmlinux elf file. The addresses are stored in the Elf_Rela location.
   To sort at build time, an array is allocated and the addresses are added
   to it via the content of the mcount_loc section as well as he Elf_Rela
   data. After sorting, the information is put back into the Elf_Rela which
   now has the section sorted.
 
 - Make sorting of mcount location table for arm64 work with clang as well
 
   When clang is used, the mcount_loc section contains the addresses, unlike
   the gcc build. An array is still created and the sorting works for both
   methods.
 
 - Remove weak functions from the mcount_loc section
 
   Have the sorttable code pass in the data of functions defined via nm -S
   which shows the functions as well as their sizes. Using this information
   the sorttable code can determine if a function in the mcount_loc section
   was weak and overridden. If the function is not found, it is set to be
   zero. On boot, when the mcount_loc section is read and the ftrace table is
   created, if the address in the mcount_loc is not in the kernel core text
   then it is removed and not added to the ftrace_filter_functions (the
   functions that can be attached by ftrace callbacks).
 
 - Update and fix the reporting of how much data is used for ftrace functions
 
   On boot, a report of how many pages were used by the ftrace table as well
   as how they were grouped (the table holds a list of sections that are
   groups of pages that were able to be allocated). The removing of the weak
   functions required the accounting to be updated.
 -----BEGIN PGP SIGNATURE-----
 
 iIoEABYIADIWIQRRSw7ePDh/lE+zeZMp5XQQmuv6qgUCZ+MnThQccm9zdGVkdEBn
 b29kbWlzLm9yZwAKCRAp5XQQmuv6qivsAQDhPOCaONai7rvHX9T1aOHGjdajZ7SI
 qoZgBOsc2ZUkoQD/U2M/m7Yof9aR4I+VFKtT5NsAwpfqPSOL/t/1j6UEOQ8=
 =45AV
 -----END PGP SIGNATURE-----

Merge tag 'trace-sorttable-v6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace

Pull tracing / sorttable updates from Steven Rostedt:

 - Implement arm64 build time sorting of the mcount location table

   When gcc is used to build arm64, the mcount_loc section is all zeros
   in the vmlinux elf file. The addresses are stored in the Elf_Rela
   location.

   To sort at build time, an array is allocated and the addresses are
   added to it via the content of the mcount_loc section as well as he
   Elf_Rela data. After sorting, the information is put back into the
   Elf_Rela which now has the section sorted.

 - Make sorting of mcount location table for arm64 work with clang as
   well

   When clang is used, the mcount_loc section contains the addresses,
   unlike the gcc build. An array is still created and the sorting works
   for both methods.

 - Remove weak functions from the mcount_loc section

   Have the sorttable code pass in the data of functions defined via
   'nm -S' which shows the functions as well as their sizes. Using this
   information the sorttable code can determine if a function in the
   mcount_loc section was weak and overridden. If the function is not
   found, it is set to be zero. On boot, when the mcount_loc section is
   read and the ftrace table is created, if the address in the
   mcount_loc is not in the kernel core text then it is removed and not
   added to the ftrace_filter_functions (the functions that can be
   attached by ftrace callbacks).

 - Update and fix the reporting of how much data is used for ftrace
   functions

   On boot, a report of how many pages were used by the ftrace table as
   well as how they were grouped (the table holds a list of sections
   that are groups of pages that were able to be allocated). The
   removing of the weak functions required the accounting to be updated.

* tag 'trace-sorttable-v6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
  scripts/sorttable: Allow matches to functions before function entry
  scripts/sorttable: Use normal sort if theres no relocs in the mcount section
  ftrace: Check against is_kernel_text() instead of kaslr_offset()
  ftrace: Test mcount_loc addr before calling ftrace_call_addr()
  ftrace: Have ftrace pages output reflect freed pages
  ftrace: Update the mcount_loc check of skipped entries
  scripts/sorttable: Zero out weak functions in mcount_loc table
  scripts/sorttable: Always use an array for the mcount_loc sorting
  scripts/sorttable: Have mcount rela sort use direct values
  arm64: scripts/sorttable: Implement sorting mcount_loc at boot for arm64
This commit is contained in:
Linus Torvalds 2025-03-27 15:44:34 -07:00
commit dd161f74f8
4 changed files with 457 additions and 14 deletions

View File

@ -218,6 +218,7 @@ config ARM64
if DYNAMIC_FTRACE_WITH_ARGS
select HAVE_SAMPLE_FTRACE_DIRECT
select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
select HAVE_BUILDTIME_MCOUNT_SORT
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_GUP_FAST
select HAVE_FTRACE_GRAPH_FUNC

View File

@ -7016,6 +7016,7 @@ static int ftrace_process_locs(struct module *mod,
unsigned long *p;
unsigned long addr;
unsigned long flags = 0; /* Shut up gcc */
unsigned long pages;
int ret = -ENOMEM;
count = end - start;
@ -7023,6 +7024,8 @@ static int ftrace_process_locs(struct module *mod,
if (!count)
return 0;
pages = DIV_ROUND_UP(count, ENTRIES_PER_PAGE);
/*
* Sorting mcount in vmlinux at build time depend on
* CONFIG_BUILDTIME_MCOUNT_SORT, while mcount loc in
@ -7067,7 +7070,9 @@ static int ftrace_process_locs(struct module *mod,
pg = start_pg;
while (p < end) {
unsigned long end_offset;
addr = ftrace_call_adjust(*p++);
addr = *p++;
/*
* Some architecture linkers will pad between
* the different mcount_loc sections of different
@ -7079,6 +7084,19 @@ static int ftrace_process_locs(struct module *mod,
continue;
}
/*
* If this is core kernel, make sure the address is in core
* or inittext, as weak functions get zeroed and KASLR can
* move them to something other than zero. It just will not
* move it to an area where kernel text is.
*/
if (!mod && !(is_kernel_text(addr) || is_kernel_inittext(addr))) {
skipped++;
continue;
}
addr = ftrace_call_adjust(addr);
end_offset = (pg->index+1) * sizeof(pg->records[0]);
if (end_offset > PAGE_SIZE << pg->order) {
/* We should have allocated enough */
@ -7118,11 +7136,41 @@ static int ftrace_process_locs(struct module *mod,
/* We should have used all pages unless we skipped some */
if (pg_unuse) {
WARN_ON(!skipped);
unsigned long pg_remaining, remaining = 0;
unsigned long skip;
/* Count the number of entries unused and compare it to skipped. */
pg_remaining = (ENTRIES_PER_PAGE << pg->order) - pg->index;
if (!WARN(skipped < pg_remaining, "Extra allocated pages for ftrace")) {
skip = skipped - pg_remaining;
for (pg = pg_unuse; pg; pg = pg->next)
remaining += 1 << pg->order;
pages -= remaining;
skip = DIV_ROUND_UP(skip, ENTRIES_PER_PAGE);
/*
* Check to see if the number of pages remaining would
* just fit the number of entries skipped.
*/
WARN(skip != remaining, "Extra allocated pages for ftrace: %lu with %lu skipped",
remaining, skipped);
}
/* Need to synchronize with ftrace_location_range() */
synchronize_rcu();
ftrace_free_pages(pg_unuse);
}
if (!mod) {
count -= skipped;
pr_info("ftrace: allocating %ld entries in %ld pages\n",
count, pages);
}
return ret;
}
@ -7768,9 +7816,6 @@ void __init ftrace_init(void)
goto failed;
}
pr_info("ftrace: allocating %ld entries in %ld pages\n",
count, DIV_ROUND_UP(count, ENTRIES_PER_PAGE));
ret = ftrace_process_locs(NULL,
__start_mcount_loc,
__stop_mcount_loc);

View File

@ -173,12 +173,14 @@ mksysmap()
sorttable()
{
${objtree}/scripts/sorttable ${1}
${NM} -S ${1} > .tmp_vmlinux.nm-sort
${objtree}/scripts/sorttable -s .tmp_vmlinux.nm-sort ${1}
}
cleanup()
{
rm -f .btf.*
rm -f .tmp_vmlinux.nm-sort
rm -f System.map
rm -f vmlinux
rm -f vmlinux.map

View File

@ -28,6 +28,7 @@
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
@ -79,10 +80,16 @@ typedef union {
Elf64_Sym e64;
} Elf_Sym;
typedef union {
Elf32_Rela e32;
Elf64_Rela e64;
} Elf_Rela;
static uint32_t (*r)(const uint32_t *);
static uint16_t (*r2)(const uint16_t *);
static uint64_t (*r8)(const uint64_t *);
static void (*w)(uint32_t, uint32_t *);
static void (*w8)(uint64_t, uint64_t *);
typedef void (*table_sort_t)(char *, int);
static struct elf_funcs {
@ -102,6 +109,10 @@ static struct elf_funcs {
uint32_t (*sym_name)(Elf_Sym *sym);
uint64_t (*sym_value)(Elf_Sym *sym);
uint16_t (*sym_shndx)(Elf_Sym *sym);
uint64_t (*rela_offset)(Elf_Rela *rela);
uint64_t (*rela_info)(Elf_Rela *rela);
uint64_t (*rela_addend)(Elf_Rela *rela);
void (*rela_write_addend)(Elf_Rela *rela, uint64_t val);
} e;
static uint64_t ehdr64_shoff(Elf_Ehdr *ehdr)
@ -262,6 +273,38 @@ SYM_ADDR(value)
SYM_WORD(name)
SYM_HALF(shndx)
#define __maybe_unused __attribute__((__unused__))
#define RELA_ADDR(fn_name) \
static uint64_t rela64_##fn_name(Elf_Rela *rela) \
{ \
return r8((uint64_t *)&rela->e64.r_##fn_name); \
} \
\
static uint64_t rela32_##fn_name(Elf_Rela *rela) \
{ \
return r((uint32_t *)&rela->e32.r_##fn_name); \
} \
\
static uint64_t __maybe_unused rela_##fn_name(Elf_Rela *rela) \
{ \
return e.rela_##fn_name(rela); \
}
RELA_ADDR(offset)
RELA_ADDR(info)
RELA_ADDR(addend)
static void rela64_write_addend(Elf_Rela *rela, uint64_t val)
{
w8(val, (uint64_t *)&rela->e64.r_addend);
}
static void rela32_write_addend(Elf_Rela *rela, uint64_t val)
{
w(val, (uint32_t *)&rela->e32.r_addend);
}
/*
* Get the whole file as a programming convenience in order to avoid
* malloc+lseek+read+free of many pieces. If successful, then mmap
@ -341,6 +384,16 @@ static void wle(uint32_t val, uint32_t *x)
put_unaligned_le32(val, x);
}
static void w8be(uint64_t val, uint64_t *x)
{
put_unaligned_be64(val, x);
}
static void w8le(uint64_t val, uint64_t *x)
{
put_unaligned_le64(val, x);
}
/*
* Move reserved section indices SHN_LORESERVE..SHN_HIRESERVE out of
* the way to -256..-1, to avoid conflicting with real section
@ -398,13 +451,12 @@ static inline void *get_index(void *start, int entsize, int index)
static int extable_ent_size;
static int long_size;
#define ERRSTR_MAXSZ 256
#ifdef UNWINDER_ORC_ENABLED
/* ORC unwinder only support X86_64 */
#include <asm/orc_types.h>
#define ERRSTR_MAXSZ 256
static char g_err[ERRSTR_MAXSZ];
static int *g_orc_ip_table;
static struct orc_entry *g_orc_table;
@ -499,7 +551,136 @@ static void *sort_orctable(void *arg)
#endif
#ifdef MCOUNT_SORT_ENABLED
static int compare_values_64(const void *a, const void *b)
{
uint64_t av = *(uint64_t *)a;
uint64_t bv = *(uint64_t *)b;
if (av < bv)
return -1;
return av > bv;
}
static int compare_values_32(const void *a, const void *b)
{
uint32_t av = *(uint32_t *)a;
uint32_t bv = *(uint32_t *)b;
if (av < bv)
return -1;
return av > bv;
}
static int (*compare_values)(const void *a, const void *b);
/* Only used for sorting mcount table */
static void rela_write_addend(Elf_Rela *rela, uint64_t val)
{
e.rela_write_addend(rela, val);
}
struct func_info {
uint64_t addr;
uint64_t size;
};
/* List of functions created by: nm -S vmlinux */
static struct func_info *function_list;
static int function_list_size;
/* Allocate functions in 1k blocks */
#define FUNC_BLK_SIZE 1024
#define FUNC_BLK_MASK (FUNC_BLK_SIZE - 1)
static int add_field(uint64_t addr, uint64_t size)
{
struct func_info *fi;
int fsize = function_list_size;
if (!(fsize & FUNC_BLK_MASK)) {
fsize += FUNC_BLK_SIZE;
fi = realloc(function_list, fsize * sizeof(struct func_info));
if (!fi)
return -1;
function_list = fi;
}
fi = &function_list[function_list_size++];
fi->addr = addr;
fi->size = size;
return 0;
}
/* Used for when mcount/fentry is before the function entry */
static int before_func;
/* Only return match if the address lies inside the function size */
static int cmp_func_addr(const void *K, const void *A)
{
uint64_t key = *(const uint64_t *)K;
const struct func_info *a = A;
if (key + before_func < a->addr)
return -1;
return key >= a->addr + a->size;
}
/* Find the function in function list that is bounded by the function size */
static int find_func(uint64_t key)
{
return bsearch(&key, function_list, function_list_size,
sizeof(struct func_info), cmp_func_addr) != NULL;
}
static int cmp_funcs(const void *A, const void *B)
{
const struct func_info *a = A;
const struct func_info *b = B;
if (a->addr < b->addr)
return -1;
return a->addr > b->addr;
}
static int parse_symbols(const char *fname)
{
FILE *fp;
char addr_str[20]; /* Only need 17, but round up to next int size */
char size_str[20];
char type;
fp = fopen(fname, "r");
if (!fp) {
perror(fname);
return -1;
}
while (fscanf(fp, "%16s %16s %c %*s\n", addr_str, size_str, &type) == 3) {
uint64_t addr;
uint64_t size;
/* Only care about functions */
if (type != 't' && type != 'T' && type != 'W')
continue;
addr = strtoull(addr_str, NULL, 16);
size = strtoull(size_str, NULL, 16);
if (add_field(addr, size) < 0)
return -1;
}
fclose(fp);
qsort(function_list, function_list_size, sizeof(struct func_info), cmp_funcs);
return 0;
}
static pthread_t mcount_sort_thread;
static bool sort_reloc;
static long rela_type;
static char m_err[ERRSTR_MAXSZ];
struct elf_mcount_loc {
Elf_Ehdr *ehdr;
@ -508,17 +689,197 @@ struct elf_mcount_loc {
uint64_t stop_mcount_loc;
};
/* Fill the array with the content of the relocs */
static int fill_relocs(void *ptr, uint64_t size, Elf_Ehdr *ehdr, uint64_t start_loc)
{
Elf_Shdr *shdr_start;
Elf_Rela *rel;
unsigned int shnum;
unsigned int count = 0;
int shentsize;
void *array_end = ptr + size;
shdr_start = (Elf_Shdr *)((char *)ehdr + ehdr_shoff(ehdr));
shentsize = ehdr_shentsize(ehdr);
shnum = ehdr_shnum(ehdr);
if (shnum == SHN_UNDEF)
shnum = shdr_size(shdr_start);
for (int i = 0; i < shnum; i++) {
Elf_Shdr *shdr = get_index(shdr_start, shentsize, i);
void *end;
if (shdr_type(shdr) != SHT_RELA)
continue;
rel = (void *)ehdr + shdr_offset(shdr);
end = (void *)rel + shdr_size(shdr);
for (; (void *)rel < end; rel = (void *)rel + shdr_entsize(shdr)) {
uint64_t offset = rela_offset(rel);
if (offset >= start_loc && offset < start_loc + size) {
if (ptr + long_size > array_end) {
snprintf(m_err, ERRSTR_MAXSZ,
"Too many relocations");
return -1;
}
/* Make sure this has the correct type */
if (rela_info(rel) != rela_type) {
snprintf(m_err, ERRSTR_MAXSZ,
"rela has type %lx but expected %lx\n",
(long)rela_info(rel), rela_type);
return -1;
}
if (long_size == 4)
*(uint32_t *)ptr = rela_addend(rel);
else
*(uint64_t *)ptr = rela_addend(rel);
ptr += long_size;
count++;
}
}
}
return count;
}
/* Put the sorted vals back into the relocation elements */
static void replace_relocs(void *ptr, uint64_t size, Elf_Ehdr *ehdr, uint64_t start_loc)
{
Elf_Shdr *shdr_start;
Elf_Rela *rel;
unsigned int shnum;
int shentsize;
shdr_start = (Elf_Shdr *)((char *)ehdr + ehdr_shoff(ehdr));
shentsize = ehdr_shentsize(ehdr);
shnum = ehdr_shnum(ehdr);
if (shnum == SHN_UNDEF)
shnum = shdr_size(shdr_start);
for (int i = 0; i < shnum; i++) {
Elf_Shdr *shdr = get_index(shdr_start, shentsize, i);
void *end;
if (shdr_type(shdr) != SHT_RELA)
continue;
rel = (void *)ehdr + shdr_offset(shdr);
end = (void *)rel + shdr_size(shdr);
for (; (void *)rel < end; rel = (void *)rel + shdr_entsize(shdr)) {
uint64_t offset = rela_offset(rel);
if (offset >= start_loc && offset < start_loc + size) {
if (long_size == 4)
rela_write_addend(rel, *(uint32_t *)ptr);
else
rela_write_addend(rel, *(uint64_t *)ptr);
ptr += long_size;
}
}
}
}
static int fill_addrs(void *ptr, uint64_t size, void *addrs)
{
void *end = ptr + size;
int count = 0;
for (; ptr < end; ptr += long_size, addrs += long_size, count++) {
if (long_size == 4)
*(uint32_t *)ptr = r(addrs);
else
*(uint64_t *)ptr = r8(addrs);
}
return count;
}
static void replace_addrs(void *ptr, uint64_t size, void *addrs)
{
void *end = ptr + size;
for (; ptr < end; ptr += long_size, addrs += long_size) {
if (long_size == 4)
w(*(uint32_t *)ptr, addrs);
else
w8(*(uint64_t *)ptr, addrs);
}
}
/* Sort the addresses stored between __start_mcount_loc to __stop_mcount_loc in vmlinux */
static void *sort_mcount_loc(void *arg)
{
struct elf_mcount_loc *emloc = (struct elf_mcount_loc *)arg;
uint64_t offset = emloc->start_mcount_loc - shdr_addr(emloc->init_data_sec)
+ shdr_offset(emloc->init_data_sec);
uint64_t count = emloc->stop_mcount_loc - emloc->start_mcount_loc;
uint64_t size = emloc->stop_mcount_loc - emloc->start_mcount_loc;
unsigned char *start_loc = (void *)emloc->ehdr + offset;
Elf_Ehdr *ehdr = emloc->ehdr;
void *e_msg = NULL;
void *vals;
int count;
qsort(start_loc, count/long_size, long_size, compare_extable);
return NULL;
vals = malloc(long_size * size);
if (!vals) {
snprintf(m_err, ERRSTR_MAXSZ, "Failed to allocate sort array");
pthread_exit(m_err);
}
if (sort_reloc) {
count = fill_relocs(vals, size, ehdr, emloc->start_mcount_loc);
/* gcc may use relocs to save the addresses, but clang does not. */
if (!count) {
count = fill_addrs(vals, size, start_loc);
sort_reloc = 0;
}
} else
count = fill_addrs(vals, size, start_loc);
if (count < 0) {
e_msg = m_err;
goto out;
}
if (count != size / long_size) {
snprintf(m_err, ERRSTR_MAXSZ, "Expected %u mcount elements but found %u\n",
(int)(size / long_size), count);
e_msg = m_err;
goto out;
}
/* zero out any locations not found by function list */
if (function_list_size) {
for (void *ptr = vals; ptr < vals + size; ptr += long_size) {
uint64_t key;
key = long_size == 4 ? r((uint32_t *)ptr) : r8((uint64_t *)ptr);
if (!find_func(key)) {
if (long_size == 4)
*(uint32_t *)ptr = 0;
else
*(uint64_t *)ptr = 0;
}
}
}
compare_values = long_size == 4 ? compare_values_32 : compare_values_64;
qsort(vals, count, long_size, compare_values);
if (sort_reloc)
replace_relocs(vals, size, ehdr, emloc->start_mcount_loc);
else
replace_addrs(vals, size, start_loc);
out:
free(vals);
pthread_exit(e_msg);
}
/* Get the address of __start_mcount_loc and __stop_mcount_loc in System.map */
@ -555,6 +916,8 @@ static void get_mcount_loc(struct elf_mcount_loc *emloc, Elf_Shdr *symtab_sec,
return;
}
}
#else /* MCOUNT_SORT_ENABLED */
static inline int parse_symbols(const char *fname) { return 0; }
#endif
static int do_sort(Elf_Ehdr *ehdr,
@ -866,12 +1229,14 @@ static int do_file(char const *const fname, void *addr)
r2 = r2le;
r8 = r8le;
w = wle;
w8 = w8le;
break;
case ELFDATA2MSB:
r = rbe;
r2 = r2be;
r8 = r8be;
w = wbe;
w8 = w8be;
break;
default:
fprintf(stderr, "unrecognized ELF data encoding %d: %s\n",
@ -887,8 +1252,15 @@ static int do_file(char const *const fname, void *addr)
}
switch (r2(&ehdr->e32.e_machine)) {
case EM_386:
case EM_AARCH64:
#ifdef MCOUNT_SORT_ENABLED
sort_reloc = true;
rela_type = 0x403;
/* arm64 uses patchable function entry placing before function */
before_func = 8;
#endif
/* fallthrough */
case EM_386:
case EM_LOONGARCH:
case EM_RISCV:
case EM_S390:
@ -932,6 +1304,10 @@ static int do_file(char const *const fname, void *addr)
.sym_name = sym32_name,
.sym_value = sym32_value,
.sym_shndx = sym32_shndx,
.rela_offset = rela32_offset,
.rela_info = rela32_info,
.rela_addend = rela32_addend,
.rela_write_addend = rela32_write_addend,
};
e = efuncs;
@ -965,6 +1341,10 @@ static int do_file(char const *const fname, void *addr)
.sym_name = sym64_name,
.sym_value = sym64_value,
.sym_shndx = sym64_shndx,
.rela_offset = rela64_offset,
.rela_info = rela64_info,
.rela_addend = rela64_addend,
.rela_write_addend = rela64_write_addend,
};
e = efuncs;
@ -995,14 +1375,29 @@ int main(int argc, char *argv[])
int i, n_error = 0; /* gcc-4.3.0 false positive complaint */
size_t size = 0;
void *addr = NULL;
int c;
if (argc < 2) {
while ((c = getopt(argc, argv, "s:")) >= 0) {
switch (c) {
case 's':
if (parse_symbols(optarg) < 0) {
fprintf(stderr, "Could not parse %s\n", optarg);
return -1;
}
break;
default:
fprintf(stderr, "usage: sorttable [-s nm-file] vmlinux...\n");
return 0;
}
}
if ((argc - optind) < 1) {
fprintf(stderr, "usage: sorttable vmlinux...\n");
return 0;
}
/* Process each file in turn, allowing deep failure. */
for (i = 1; i < argc; i++) {
for (i = optind; i < argc; i++) {
addr = mmap_file(argv[i], &size);
if (!addr) {
++n_error;