Commit 0bb2a48e authored by наб's avatar наб Committed by Brian Behlendorf
Browse files

zed: protect against wait4()/fork() races to the global PID table


This can be very easily triggered by adding a sleep(1) before
the wait4() on a PID-starved system: the reaper thread would wait
for a child before its entry appeared, letting old entries accumulate:

  Invoking "all-debug.sh" eid=3021 pid=391
  Finished "(null)" eid=0 pid=391 time=0.002432s exit=0
  Invoking "all-syslog.sh" eid=3021 pid=336
  Finished "(null)" eid=0 pid=336 time=0.002432s exit=0
  Invoking "history_event-zfs-list-cacher.sh" eid=3021 pid=347
  Invoking "all-debug.sh" eid=3022 pid=349
  Finished "history_event-zfs-list-cacher.sh" eid=3021 pid=347
                                              time=0.001669s exit=0
  Finished "(null)" eid=0 pid=349 time=0.002404s exit=0
  Invoking "all-syslog.sh" eid=3022 pid=370
  Finished "(null)" eid=0 pid=370 time=0.002427s exit=0
  Invoking "history_event-zfs-list-cacher.sh" eid=3022 pid=391
  avl_find(tree, new_node, &where) == NULL
  ASSERT at ../../module/avl/avl.c:641:avl_add()
  Thread 1 "zed" received signal SIGABRT, Aborted.

By employing this wider lock, we atomise [wait, remove] and [fork, add]:
slowing down the reaper thread now just causes some zombies
to accumulate until it can get to them
Reviewed-by: default avatarBrian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: default avatarDon Brady <don.brady@delphix.com>
Signed-off-by: default avatarAhelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
Closes #11963
Closes #11965
parent f15ec889
zfs-issue-13217 NAS-113163 NAS-117105 NAS-117807-1 NAS-117845-dbg NAS-119759-1 NAS-121790 add-check-for-cached-acl-in-zpl_permission add-ctldir-snapcount add-fd-to-zhandle add-json-output-to-zfs add-xattr-related-flag allow-path-to-zhandle-in-ctldir backport-large_block experiment-with-createtxg fix-archive-behavior fix-dosmode-behavior-linux freebsd-simplify-trivial-acl-check integrate release-test/22.02.3 release/21.06-BETA.1 release/21.08-BETA.1 release/21.08-BETA.2 release/22.02 release/22.02-RC.1 release/22.02-RC.2 release/22.02-test release/22.02.1 release/22.02.2 release/22.02.3 release/22.02.4 release/22.12 release/22.12-BETA.1 release/22.12-BETA.2 release/22.12-RC.1 release/22.12.1 release/22.12.2 release/22.12.3 release/22.12.3.3 release/22.12.4 release/22.2-RC.1 stable/angelfish stable/bluefin streaminfo_xattr test-inode-owner-change test-linux-stat-zfscltdir tmprelease/test-21.08 tmprelease/test-21.09 tmprelease/test2-21.09 tmprelease/test3-21.09 tmprelease/test4-21.09 truenas/13.0-u5.3-stable truenas/zfs-2.1-release truenas/zfs-2.1.13-upstream zfs-2.1-release zfs-2.1.2 zfs-2.1.1 zfs-2.1.0 zfs-2.1.0-rc8 zfs-2.1.0-rc7 zfs-2.1.0-rc6 zfs-2.1.0-rc5 TS-22.12.4.2 TS-22.12.4.1 TS-22.12.4 TS-22.12.3.3 TS-22.12.3.2 TS-22.12.3.1 TS-22.12.3 TS-22.12.2 TS-22.12.1 TS-22.12.0 TS-22.12-RC.1 TS-22.12-BETA.2 TS-22.12-BETA.1 TS-22.12-ALPHA.1 TS-22.02.4 TS-22.02.3 TS-22.02.2.1 TS-22.02.2 TS-22.02.1 TS-22.2.1 TS-22.02.0.1 TS-22.02.0 TS-22.2.0 TS-22.02.RELEASE.1 TS-22.02-RC.2 TS-22.02-RC.1 TS-22.02-RC.1-2 TS-22.02-RC.1-1 TS-21.08-BETA.2 TS-21.08-BETA.1 TS-21.06-BETA.1 TS-12.12.3
No related merge requests found
Showing with 7 additions and 6 deletions
+7 -6
......@@ -142,8 +142,10 @@ _zed_exec_fork_child(uint64_t eid, const char *dir, const char *prog,
prog, eid, strerror(ENAMETOOLONG));
return;
}
(void) pthread_mutex_lock(&_launched_processes_lock);
pid = fork();
if (pid < 0) {
(void) pthread_mutex_unlock(&_launched_processes_lock);
zed_log_msg(LOG_WARNING,
"Failed to fork \"%s\" for eid=%llu: %s",
prog, eid, strerror(errno));
......@@ -166,20 +168,19 @@ _zed_exec_fork_child(uint64_t eid, const char *dir, const char *prog,
/* parent process */
__atomic_sub_fetch(&_launched_processes_limit, 1, __ATOMIC_SEQ_CST);
zed_log_msg(LOG_INFO, "Invoking \"%s\" eid=%llu pid=%d",
prog, eid, pid);
node = calloc(1, sizeof (*node));
if (node) {
node->pid = pid;
node->eid = eid;
node->name = strdup(prog);
(void) pthread_mutex_lock(&_launched_processes_lock);
avl_add(&_launched_processes, node);
(void) pthread_mutex_unlock(&_launched_processes_lock);
}
(void) pthread_mutex_unlock(&_launched_processes_lock);
__atomic_sub_fetch(&_launched_processes_limit, 1, __ATOMIC_SEQ_CST);
zed_log_msg(LOG_INFO, "Invoking \"%s\" eid=%llu pid=%d",
prog, eid, pid);
}
static void
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment