From f12c93efd04991bc982a27e2fa6142538c33ca82 Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Sat, 7 May 2022 19:55:24 +0800 Subject: [PATCH] Retry if pthread_create fails with EAGAIN On many Unix-like systems, pthread_create can fail spuriously even if the running machine has enough resources to spawn a new thread. Therefore, if EAGAIN is returned from pthread_create, we actually have to try again. I observed this issue when running the mold linker (https://github.com/rui314/mold) under a heavy load. mold uses OneTBB for parallelization. As another data point, Go has the same logic to retry on EAGAIN: https://go-review.googlesource.com/c/go/+/33894/ nanosleep is defined in POSIX 2001, so I believe that all Unix-like systems support it. Signed-off-by: Rui Ueyama --- src/tbb/rml_thread_monitor.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/tbb/rml_thread_monitor.h b/src/tbb/rml_thread_monitor.h index 13b556380..5b844b232 100644 --- a/src/tbb/rml_thread_monitor.h +++ b/src/tbb/rml_thread_monitor.h @@ -31,6 +31,7 @@ #include #include #include +#include #else #error Unsupported platform #endif @@ -191,8 +192,24 @@ inline thread_monitor::handle_type thread_monitor::launch( void* (*thread_routin check(pthread_attr_init( &s ), "pthread_attr_init has failed"); if( stack_size>0 ) check(pthread_attr_setstacksize( &s, stack_size ), "pthread_attr_setstack_size has failed" ); + pthread_t handle; - check( pthread_create( &handle, &s, thread_routine, arg ), "pthread_create has failed" ); + int tries = 0; + for (;;) { + int error_code = pthread_create(&handle, &s, thread_routine, arg); + if (!error_code) + break; + if (error_code != EAGAIN || tries++ > 20) { + handle_perror(error_code, "pthread_create has failed"); + break; + } + + // pthreaed_create can spuriously fail on many Unix-like systems. + // Retry after tries * 1 millisecond. + struct timespec ts = {0, tries * 1000 * 1000}; + nanosleep(&ts, NULL); + } + check( pthread_attr_destroy( &s ), "pthread_attr_destroy has failed" ); return handle; }