mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/
synced 2025-04-19 20:58:31 +09:00
kernel-6.15-rc1.tasklist_lock
-----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCZ90q3QAKCRCRxhvAZXjc oh2TAP9vrH7ft0TpJN2RyFNels/QaYmoiuw4TdVZhbEzvCUyYgEA1Bnx+OGmkdbT e4W3NkWJpn8BBjHfz3z3P7SImDdcCAw= =a6/i -----END PGP SIGNATURE----- Merge tag 'kernel-6.15-rc1.tasklist_lock' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs Pull tasklist_lock optimizations from Christian Brauner: "According to the performance testbots this brings a 23% performance increase when creating new processes: - Reduce tasklist_lock hold time on exit: - Perform add_device_randomness() without tasklist_lock - Perform free_pid() calls outside of tasklist_lock - Drop irq disablement around pidmap_lock - Add some tasklist_lock asserts - Call flush_sigqueue() lockless by changing release_task() - Don't pointlessly clear TIF_SIGPENDING in __exit_signal() -> clear_tsk_thread_flag()" * tag 'kernel-6.15-rc1.tasklist_lock' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: pid: drop irq disablement around pidmap_lock pid: perform free_pid() calls outside of tasklist_lock pid: sprinkle tasklist_lock asserts exit: hoist get_pid() in release_task() outside of tasklist_lock exit: perform add_device_randomness() without tasklist_lock exit: kill the pointless __exit_signal()->clear_tsk_thread_flag(TIF_SIGPENDING) exit: change the release_task() paths to call flush_sigqueue() lockless
This commit is contained in:
commit
b0cb56cbbd
@ -101,9 +101,9 @@ extern struct pid *get_task_pid(struct task_struct *task, enum pid_type type);
|
||||
* these helpers must be called with the tasklist_lock write-held.
|
||||
*/
|
||||
extern void attach_pid(struct task_struct *task, enum pid_type);
|
||||
extern void detach_pid(struct task_struct *task, enum pid_type);
|
||||
extern void change_pid(struct task_struct *task, enum pid_type,
|
||||
struct pid *pid);
|
||||
void detach_pid(struct pid **pids, struct task_struct *task, enum pid_type);
|
||||
void change_pid(struct pid **pids, struct task_struct *task, enum pid_type,
|
||||
struct pid *pid);
|
||||
extern void exchange_tids(struct task_struct *task, struct task_struct *old);
|
||||
extern void transfer_pid(struct task_struct *old, struct task_struct *new,
|
||||
enum pid_type);
|
||||
@ -129,6 +129,7 @@ extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
|
||||
extern struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
|
||||
size_t set_tid_size);
|
||||
extern void free_pid(struct pid *pid);
|
||||
void free_pids(struct pid **pids);
|
||||
extern void disable_pid_allocation(struct pid_namespace *ns);
|
||||
|
||||
/*
|
||||
|
@ -123,14 +123,22 @@ static __init int kernel_exit_sysfs_init(void)
|
||||
late_initcall(kernel_exit_sysfs_init);
|
||||
#endif
|
||||
|
||||
static void __unhash_process(struct task_struct *p, bool group_dead)
|
||||
/*
|
||||
* For things release_task() would like to do *after* tasklist_lock is released.
|
||||
*/
|
||||
struct release_task_post {
|
||||
struct pid *pids[PIDTYPE_MAX];
|
||||
};
|
||||
|
||||
static void __unhash_process(struct release_task_post *post, struct task_struct *p,
|
||||
bool group_dead)
|
||||
{
|
||||
nr_threads--;
|
||||
detach_pid(p, PIDTYPE_PID);
|
||||
detach_pid(post->pids, p, PIDTYPE_PID);
|
||||
if (group_dead) {
|
||||
detach_pid(p, PIDTYPE_TGID);
|
||||
detach_pid(p, PIDTYPE_PGID);
|
||||
detach_pid(p, PIDTYPE_SID);
|
||||
detach_pid(post->pids, p, PIDTYPE_TGID);
|
||||
detach_pid(post->pids, p, PIDTYPE_PGID);
|
||||
detach_pid(post->pids, p, PIDTYPE_SID);
|
||||
|
||||
list_del_rcu(&p->tasks);
|
||||
list_del_init(&p->sibling);
|
||||
@ -142,7 +150,7 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
|
||||
/*
|
||||
* This function expects the tasklist_lock write-locked.
|
||||
*/
|
||||
static void __exit_signal(struct task_struct *tsk)
|
||||
static void __exit_signal(struct release_task_post *post, struct task_struct *tsk)
|
||||
{
|
||||
struct signal_struct *sig = tsk->signal;
|
||||
bool group_dead = thread_group_leader(tsk);
|
||||
@ -175,9 +183,6 @@ static void __exit_signal(struct task_struct *tsk)
|
||||
sig->curr_target = next_thread(tsk);
|
||||
}
|
||||
|
||||
add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
|
||||
sizeof(unsigned long long));
|
||||
|
||||
/*
|
||||
* Accumulate here the counters for all threads as they die. We could
|
||||
* skip the group leader because it is the last user of signal_struct,
|
||||
@ -198,23 +203,15 @@ static void __exit_signal(struct task_struct *tsk)
|
||||
task_io_accounting_add(&sig->ioac, &tsk->ioac);
|
||||
sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
|
||||
sig->nr_threads--;
|
||||
__unhash_process(tsk, group_dead);
|
||||
__unhash_process(post, tsk, group_dead);
|
||||
write_sequnlock(&sig->stats_lock);
|
||||
|
||||
/*
|
||||
* Do this under ->siglock, we can race with another thread
|
||||
* doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.
|
||||
*/
|
||||
flush_sigqueue(&tsk->pending);
|
||||
tsk->sighand = NULL;
|
||||
spin_unlock(&sighand->siglock);
|
||||
|
||||
__cleanup_sighand(sighand);
|
||||
clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
|
||||
if (group_dead) {
|
||||
flush_sigqueue(&sig->shared_pending);
|
||||
if (group_dead)
|
||||
tty_kref_put(tty);
|
||||
}
|
||||
}
|
||||
|
||||
static void delayed_put_task_struct(struct rcu_head *rhp)
|
||||
@ -240,10 +237,13 @@ void __weak release_thread(struct task_struct *dead_task)
|
||||
|
||||
void release_task(struct task_struct *p)
|
||||
{
|
||||
struct release_task_post post;
|
||||
struct task_struct *leader;
|
||||
struct pid *thread_pid;
|
||||
int zap_leader;
|
||||
repeat:
|
||||
memset(&post, 0, sizeof(post));
|
||||
|
||||
/* don't need to get the RCU readlock here - the process is dead and
|
||||
* can't be modifying its own credentials. But shut RCU-lockdep up */
|
||||
rcu_read_lock();
|
||||
@ -253,10 +253,11 @@ repeat:
|
||||
pidfs_exit(p);
|
||||
cgroup_release(p);
|
||||
|
||||
thread_pid = get_pid(p->thread_pid);
|
||||
|
||||
write_lock_irq(&tasklist_lock);
|
||||
ptrace_release_task(p);
|
||||
thread_pid = get_pid(p->thread_pid);
|
||||
__exit_signal(p);
|
||||
__exit_signal(&post, p);
|
||||
|
||||
/*
|
||||
* If we are the last non-leader member of the thread
|
||||
@ -280,7 +281,20 @@ repeat:
|
||||
write_unlock_irq(&tasklist_lock);
|
||||
proc_flush_pid(thread_pid);
|
||||
put_pid(thread_pid);
|
||||
add_device_randomness(&p->se.sum_exec_runtime,
|
||||
sizeof(p->se.sum_exec_runtime));
|
||||
free_pids(post.pids);
|
||||
release_thread(p);
|
||||
/*
|
||||
* This task was already removed from the process/thread/pid lists
|
||||
* and lock_task_sighand(p) can't succeed. Nobody else can touch
|
||||
* ->pending or, if group dead, signal->shared_pending. We can call
|
||||
* flush_sigqueue() lockless.
|
||||
*/
|
||||
flush_sigqueue(&p->pending);
|
||||
if (thread_group_leader(p))
|
||||
flush_sigqueue(&p->signal->shared_pending);
|
||||
|
||||
put_task_struct_rcu_user(p);
|
||||
|
||||
p = leader;
|
||||
|
82
kernel/pid.c
82
kernel/pid.c
@ -88,20 +88,6 @@ struct pid_namespace init_pid_ns = {
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(init_pid_ns);
|
||||
|
||||
/*
|
||||
* Note: disable interrupts while the pidmap_lock is held as an
|
||||
* interrupt might come in and do read_lock(&tasklist_lock).
|
||||
*
|
||||
* If we don't disable interrupts there is a nasty deadlock between
|
||||
* detach_pid()->free_pid() and another cpu that does
|
||||
* spin_lock(&pidmap_lock) followed by an interrupt routine that does
|
||||
* read_lock(&tasklist_lock);
|
||||
*
|
||||
* After we clean up the tasklist_lock and know there are no
|
||||
* irq handlers that take it we can leave the interrupts enabled.
|
||||
* For now it is easier to be safe than to prove it can't happen.
|
||||
*/
|
||||
|
||||
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
|
||||
seqcount_spinlock_t pidmap_lock_seq = SEQCNT_SPINLOCK_ZERO(pidmap_lock_seq, &pidmap_lock);
|
||||
|
||||
@ -128,11 +114,11 @@ static void delayed_put_pid(struct rcu_head *rhp)
|
||||
|
||||
void free_pid(struct pid *pid)
|
||||
{
|
||||
/* We can be called with write_lock_irq(&tasklist_lock) held */
|
||||
int i;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&pidmap_lock, flags);
|
||||
lockdep_assert_not_held(&tasklist_lock);
|
||||
|
||||
spin_lock(&pidmap_lock);
|
||||
for (i = 0; i <= pid->level; i++) {
|
||||
struct upid *upid = pid->numbers + i;
|
||||
struct pid_namespace *ns = upid->ns;
|
||||
@ -155,11 +141,23 @@ void free_pid(struct pid *pid)
|
||||
idr_remove(&ns->idr, upid->nr);
|
||||
}
|
||||
pidfs_remove_pid(pid);
|
||||
spin_unlock_irqrestore(&pidmap_lock, flags);
|
||||
spin_unlock(&pidmap_lock);
|
||||
|
||||
call_rcu(&pid->rcu, delayed_put_pid);
|
||||
}
|
||||
|
||||
void free_pids(struct pid **pids)
|
||||
{
|
||||
int tmp;
|
||||
|
||||
/*
|
||||
* This can batch pidmap_lock.
|
||||
*/
|
||||
for (tmp = PIDTYPE_MAX; --tmp >= 0; )
|
||||
if (pids[tmp])
|
||||
free_pid(pids[tmp]);
|
||||
}
|
||||
|
||||
struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
|
||||
size_t set_tid_size)
|
||||
{
|
||||
@ -211,7 +209,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
|
||||
}
|
||||
|
||||
idr_preload(GFP_KERNEL);
|
||||
spin_lock_irq(&pidmap_lock);
|
||||
spin_lock(&pidmap_lock);
|
||||
|
||||
if (tid) {
|
||||
nr = idr_alloc(&tmp->idr, NULL, tid,
|
||||
@ -238,7 +236,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
|
||||
nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min,
|
||||
pid_max, GFP_ATOMIC);
|
||||
}
|
||||
spin_unlock_irq(&pidmap_lock);
|
||||
spin_unlock(&pidmap_lock);
|
||||
idr_preload_end();
|
||||
|
||||
if (nr < 0) {
|
||||
@ -272,7 +270,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
|
||||
|
||||
upid = pid->numbers + ns->level;
|
||||
idr_preload(GFP_KERNEL);
|
||||
spin_lock_irq(&pidmap_lock);
|
||||
spin_lock(&pidmap_lock);
|
||||
if (!(ns->pid_allocated & PIDNS_ADDING))
|
||||
goto out_unlock;
|
||||
pidfs_add_pid(pid);
|
||||
@ -281,18 +279,18 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
|
||||
idr_replace(&upid->ns->idr, pid, upid->nr);
|
||||
upid->ns->pid_allocated++;
|
||||
}
|
||||
spin_unlock_irq(&pidmap_lock);
|
||||
spin_unlock(&pidmap_lock);
|
||||
idr_preload_end();
|
||||
|
||||
return pid;
|
||||
|
||||
out_unlock:
|
||||
spin_unlock_irq(&pidmap_lock);
|
||||
spin_unlock(&pidmap_lock);
|
||||
idr_preload_end();
|
||||
put_pid_ns(ns);
|
||||
|
||||
out_free:
|
||||
spin_lock_irq(&pidmap_lock);
|
||||
spin_lock(&pidmap_lock);
|
||||
while (++i <= ns->level) {
|
||||
upid = pid->numbers + i;
|
||||
idr_remove(&upid->ns->idr, upid->nr);
|
||||
@ -302,7 +300,7 @@ out_free:
|
||||
if (ns->pid_allocated == PIDNS_ADDING)
|
||||
idr_set_cursor(&ns->idr, 0);
|
||||
|
||||
spin_unlock_irq(&pidmap_lock);
|
||||
spin_unlock(&pidmap_lock);
|
||||
|
||||
kmem_cache_free(ns->pid_cachep, pid);
|
||||
return ERR_PTR(retval);
|
||||
@ -310,9 +308,9 @@ out_free:
|
||||
|
||||
void disable_pid_allocation(struct pid_namespace *ns)
|
||||
{
|
||||
spin_lock_irq(&pidmap_lock);
|
||||
spin_lock(&pidmap_lock);
|
||||
ns->pid_allocated &= ~PIDNS_ADDING;
|
||||
spin_unlock_irq(&pidmap_lock);
|
||||
spin_unlock(&pidmap_lock);
|
||||
}
|
||||
|
||||
struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
|
||||
@ -339,17 +337,23 @@ static struct pid **task_pid_ptr(struct task_struct *task, enum pid_type type)
|
||||
*/
|
||||
void attach_pid(struct task_struct *task, enum pid_type type)
|
||||
{
|
||||
struct pid *pid = *task_pid_ptr(task, type);
|
||||
struct pid *pid;
|
||||
|
||||
lockdep_assert_held_write(&tasklist_lock);
|
||||
|
||||
pid = *task_pid_ptr(task, type);
|
||||
hlist_add_head_rcu(&task->pid_links[type], &pid->tasks[type]);
|
||||
}
|
||||
|
||||
static void __change_pid(struct task_struct *task, enum pid_type type,
|
||||
struct pid *new)
|
||||
static void __change_pid(struct pid **pids, struct task_struct *task,
|
||||
enum pid_type type, struct pid *new)
|
||||
{
|
||||
struct pid **pid_ptr = task_pid_ptr(task, type);
|
||||
struct pid *pid;
|
||||
struct pid **pid_ptr, *pid;
|
||||
int tmp;
|
||||
|
||||
lockdep_assert_held_write(&tasklist_lock);
|
||||
|
||||
pid_ptr = task_pid_ptr(task, type);
|
||||
pid = *pid_ptr;
|
||||
|
||||
hlist_del_rcu(&task->pid_links[type]);
|
||||
@ -364,18 +368,19 @@ static void __change_pid(struct task_struct *task, enum pid_type type,
|
||||
if (pid_has_task(pid, tmp))
|
||||
return;
|
||||
|
||||
free_pid(pid);
|
||||
WARN_ON(pids[type]);
|
||||
pids[type] = pid;
|
||||
}
|
||||
|
||||
void detach_pid(struct task_struct *task, enum pid_type type)
|
||||
void detach_pid(struct pid **pids, struct task_struct *task, enum pid_type type)
|
||||
{
|
||||
__change_pid(task, type, NULL);
|
||||
__change_pid(pids, task, type, NULL);
|
||||
}
|
||||
|
||||
void change_pid(struct task_struct *task, enum pid_type type,
|
||||
void change_pid(struct pid **pids, struct task_struct *task, enum pid_type type,
|
||||
struct pid *pid)
|
||||
{
|
||||
__change_pid(task, type, pid);
|
||||
__change_pid(pids, task, type, pid);
|
||||
attach_pid(task, type);
|
||||
}
|
||||
|
||||
@ -386,6 +391,8 @@ void exchange_tids(struct task_struct *left, struct task_struct *right)
|
||||
struct hlist_head *head1 = &pid1->tasks[PIDTYPE_PID];
|
||||
struct hlist_head *head2 = &pid2->tasks[PIDTYPE_PID];
|
||||
|
||||
lockdep_assert_held_write(&tasklist_lock);
|
||||
|
||||
/* Swap the single entry tid lists */
|
||||
hlists_swap_heads_rcu(head1, head2);
|
||||
|
||||
@ -403,6 +410,7 @@ void transfer_pid(struct task_struct *old, struct task_struct *new,
|
||||
enum pid_type type)
|
||||
{
|
||||
WARN_ON_ONCE(type == PIDTYPE_PID);
|
||||
lockdep_assert_held_write(&tasklist_lock);
|
||||
hlist_replace_rcu(&old->pid_links[type], &new->pid_links[type]);
|
||||
}
|
||||
|
||||
|
14
kernel/sys.c
14
kernel/sys.c
@ -1085,6 +1085,7 @@ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
|
||||
{
|
||||
struct task_struct *p;
|
||||
struct task_struct *group_leader = current->group_leader;
|
||||
struct pid *pids[PIDTYPE_MAX] = { 0 };
|
||||
struct pid *pgrp;
|
||||
int err;
|
||||
|
||||
@ -1142,13 +1143,14 @@ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
|
||||
goto out;
|
||||
|
||||
if (task_pgrp(p) != pgrp)
|
||||
change_pid(p, PIDTYPE_PGID, pgrp);
|
||||
change_pid(pids, p, PIDTYPE_PGID, pgrp);
|
||||
|
||||
err = 0;
|
||||
out:
|
||||
/* All paths lead to here, thus we are safe. -DaveM */
|
||||
write_unlock_irq(&tasklist_lock);
|
||||
rcu_read_unlock();
|
||||
free_pids(pids);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -1222,21 +1224,22 @@ out:
|
||||
return retval;
|
||||
}
|
||||
|
||||
static void set_special_pids(struct pid *pid)
|
||||
static void set_special_pids(struct pid **pids, struct pid *pid)
|
||||
{
|
||||
struct task_struct *curr = current->group_leader;
|
||||
|
||||
if (task_session(curr) != pid)
|
||||
change_pid(curr, PIDTYPE_SID, pid);
|
||||
change_pid(pids, curr, PIDTYPE_SID, pid);
|
||||
|
||||
if (task_pgrp(curr) != pid)
|
||||
change_pid(curr, PIDTYPE_PGID, pid);
|
||||
change_pid(pids, curr, PIDTYPE_PGID, pid);
|
||||
}
|
||||
|
||||
int ksys_setsid(void)
|
||||
{
|
||||
struct task_struct *group_leader = current->group_leader;
|
||||
struct pid *sid = task_pid(group_leader);
|
||||
struct pid *pids[PIDTYPE_MAX] = { 0 };
|
||||
pid_t session = pid_vnr(sid);
|
||||
int err = -EPERM;
|
||||
|
||||
@ -1252,13 +1255,14 @@ int ksys_setsid(void)
|
||||
goto out;
|
||||
|
||||
group_leader->signal->leader = 1;
|
||||
set_special_pids(sid);
|
||||
set_special_pids(pids, sid);
|
||||
|
||||
proc_clear_tty(group_leader);
|
||||
|
||||
err = session;
|
||||
out:
|
||||
write_unlock_irq(&tasklist_lock);
|
||||
free_pids(pids);
|
||||
if (err > 0) {
|
||||
proc_sid_connector(group_leader);
|
||||
sched_autogroup_create_attach(group_leader);
|
||||
|
Loading…
x
Reference in New Issue
Block a user