Skip to content

[openmp] Segfaults/assertion errors on certain omp statements after calling omp_pause_resource_all(omp_pause_hard) #154201

@haiyanghee

Description

@haiyanghee

Hi, I hope you are having a great day.

From the omp standard (https://www.openmp.org/spec-html/5.0/openmpsu113.html), although not explicitly mentioned, I believe that it is possible to use omp after calling omp_pause_resource_all(omp_pause_hard)? If not, then please ignore this issue.

I've encountered a couple errors when calling certain omp statements after omp_pause_resource_all(omp_pause_hard), and below are the minimal examples to reproduce the issues I found.

I can reproduce this with the latest master (with HEAD on commit 5c51a88f193a4753818b31ca186b3a1ef1a07ecf) and with llvmorg-17.0.1.

ompResetResourceMutexTest.cc (segfaults)
#include <sys/wait.h> #include <assert.h> #include <stdio.h> #include <stdlib.h> #include <omp.h> #define NUM_THREADS 3 int a_lockCtr, b_lockCtr; //both are 0 initialized void doOmpWork() { #pragma omp parallel num_threads(NUM_THREADS) { #pragma omp critical (a_lock) { ++a_lockCtr; } #pragma omp critical (b_lock) { ++b_lockCtr; } } printf("%s ended\n",__func__); fflush(stdout); } int main() { //use omp to do some work doOmpWork(); assert(a_lockCtr == NUM_THREADS && b_lockCtr == NUM_THREADS); a_lockCtr = b_lockCtr = 0; //reset the counters //omp hard pause should succeed int rc = omp_pause_resource_all(omp_pause_hard); assert(rc == 0); //we should not segfault inside the critical sections of doOmpWork() doOmpWork(); assert(a_lockCtr == NUM_THREADS && b_lockCtr == NUM_THREADS); a_lockCtr = b_lockCtr = 0; //reset the counters printf("main ended\n"); return 0; }
ompResetResourceGetThreadNumTest.cc (segfaults)
#include <sys/wait.h> #include <assert.h> #include <stdio.h> #include <stdlib.h> #include <omp.h> int main() { //omp_get_thread_num() should work even if omp is not yet initialized (although clang does insert `__kmpc_global_thread_num()` in front of main, which actually initailizes omp) int n = omp_get_thread_num(); printf("omp_get_thread_num() = %d\n", n); //use omp to do some work, guarantees omp initialization #pragma omp parallel num_threads(3) { } //omp hard pause should succeed int rc = omp_pause_resource_all(omp_pause_hard); assert(rc == 0); //omp_get_thread_num() should work again with no segfault n = omp_get_thread_num(); printf("omp_get_thread_num() = %d\n", n); printf("main ended\n"); return 0; }
ompResetResourceDoubleForkHandlerAttachTest.cc (deadlocks)
#include <sys/wait.h> #include <assert.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <omp.h> void doOmpWork() { #pragma omp parallel {	#pragma omp for for (int i=0;i<10;++i){ ;	}	} } int main() { //explicitly set the KMP_INIT_AT_FORK environment variable to 1 setenv("KMP_INIT_AT_FORK","1",true); //use omp to do some work doOmpWork(); //omp hard pause should succeed int rc = omp_pause_resource_all(omp_pause_hard); assert(rc == 0); //use omp to do some work doOmpWork(); printf("calling fork\n"); //we'll fork .. this shouldn't deadlock int p = fork(); printf("fork finished\n"); if (!p) { printf("child ended\n"); exit(0); } waitpid(p, NULL, 0); printf("main ended\n"); return 0; }
ompResetResourceParallelNumThreadsTest.cc (assert failure)
#include <sys/wait.h> #include <assert.h> #include <stdio.h> #include <stdlib.h> #include <omp.h> /*  NOTE: interestingly clang inserts `__kmpc_global_thread_num()` into functions that uses omp pragma statements.   So if we wrap the `#pragma omp parallel num_threads (3)` into a function and then call it, there will be no assert failure because `__kmpc_global_thread_num()` re-initializes omp properly */ //void doOmpWork() //{ // #pragma omp parallel num_threads (3) // { // printf("in parallel region 1, thread number %d\n", omp_get_thread_num()); // } //} int main() { #pragma omp parallel num_threads (3) { printf("in parallel region 1, thread number %d\n", omp_get_thread_num()); } //omp hard pause should succeed int rc = omp_pause_resource_all(omp_pause_hard); assert(rc == 0); //calling `parallel num_threads` again should not trigger any omp asserts #pragma omp parallel num_threads (3) { printf("in parallel region 1, thread number %d\n", omp_get_thread_num()); } printf("main ended\n"); return 0; }
ompResetResourceChildExit.cc (assert failure)
#include <sys/wait.h> #include <assert.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <omp.h> //this example is taken from https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58378#c3 int main() { #pragma omp parallel num_threads (3) { } //omp hard pause should succeed int rc = omp_pause_resource_all(omp_pause_hard); assert(rc == 0); int p = fork(); if (!p) { //child should be able to exit properly without omp assert failures exit(0); } waitpid(p, NULL, 0); printf("main ended\n"); return 0; }

I believe my examples are "valid" omp programs that triggered omp errors, but if they are not please let me know.

Thanks.

P.S.
How I discovered these errors is that I was trying to make omp work with forking (I know that llvm's omp supports forking via its own atfork handlers) without relying on omp implementation details. At least for the cases I'm concerned with, it's enough to reset omp (I don't have states, like thread private variables, that need to persist after the reset) before forking, and then re-initialize omp after forking.

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions