16 #ifndef KMP_WAIT_RELEASE_H
17 #define KMP_WAIT_RELEASE_H
57 volatile P *
get() {
return loc; }
80 static inline void __kmp_wait_template(kmp_info_t *this_thr, C *flag,
int final_spin
81 USE_ITT_BUILD_ARG(
void * itt_sync_obj) )
84 volatile typename C::flag_t *spin = flag->get();
88 int tasks_completed = FALSE;
90 KMP_FSYNC_SPIN_INIT(spin, NULL);
91 if (flag->done_check()) {
92 KMP_FSYNC_SPIN_ACQUIRED(spin);
95 th_gtid = this_thr->th.th_info.ds.ds_gtid;
96 KA_TRACE(20, (
"__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
98 #if OMPT_SUPPORT && OMPT_BLAME
99 if (ompt_status == ompt_status_track_callback) {
100 if (this_thr->th.ompt_thread_info.state == ompt_state_idle){
101 if (ompt_callbacks.ompt_callback(ompt_event_idle_begin)) {
102 ompt_callbacks.ompt_callback(ompt_event_idle_begin)(th_gtid + 1);
104 }
else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)) {
105 KMP_DEBUG_ASSERT(this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier ||
106 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit ||
107 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_explicit);
109 ompt_lw_taskteam_t* team = this_thr->th.th_team->t.ompt_serialized_team_info;
110 ompt_parallel_id_t pId;
113 pId = team->ompt_team_info.parallel_id;
114 tId = team->ompt_task_info.task_id;
116 pId = this_thr->th.th_team->t.ompt_team_info.parallel_id;
117 tId = this_thr->th.th_current_task->ompt_task_info.task_id;
119 ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)(pId, tId);
125 KMP_INIT_YIELD(spins);
127 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
130 #ifdef KMP_ADJUST_BLOCKTIME
131 if (__kmp_zero_bt && !this_thr->th.th_team_bt_set)
135 hibernate = this_thr->th.th_team_bt_intervals;
137 hibernate = this_thr->th.th_team_bt_intervals;
147 hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
148 KF_TRACE(20, (
"__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
149 th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
150 hibernate - __kmp_global.g.g_time.dt.t_value));
155 while (flag->notdone_check()) {
165 kmp_task_team_t * task_team = NULL;
166 if (__kmp_tasking_mode != tskm_immediate_exec) {
167 task_team = this_thr->th.th_task_team;
168 if (task_team != NULL) {
169 if (!TCR_SYNC_4(task_team->tt.tt_active)) {
170 KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
171 __kmp_unref_task_team(task_team, this_thr);
172 }
else if (KMP_TASKING_ENABLED(task_team)) {
173 flag->execute_tasks(this_thr, th_gtid, final_spin, &tasks_completed
174 USE_ITT_BUILD_ARG(itt_sync_obj), 0);
179 KMP_FSYNC_SPIN_PREPARE(spin);
180 if (TCR_4(__kmp_global.g.g_done)) {
181 if (__kmp_global.g.g_abort)
182 __kmp_abort_thread();
187 KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
191 KMP_YIELD_SPIN(spins);
195 in_pool = !!TCR_4(this_thr->th.th_in_pool);
196 if (in_pool != !!this_thr->th.th_active_in_pool) {
198 KMP_TEST_THEN_INC32((kmp_int32 *)&__kmp_thread_pool_active_nth);
199 this_thr->th.th_active_in_pool = TRUE;
207 KMP_TEST_THEN_DEC32((kmp_int32 *) &__kmp_thread_pool_active_nth);
208 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
209 this_thr->th.th_active_in_pool = FALSE;
214 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
218 if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
222 if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
225 KF_TRACE(50, (
"__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
227 flag->suspend(th_gtid);
229 if (TCR_4(__kmp_global.g.g_done)) {
230 if (__kmp_global.g.g_abort)
231 __kmp_abort_thread();
237 #if OMPT_SUPPORT && OMPT_BLAME
238 if (ompt_status == ompt_status_track_callback) {
239 if (this_thr->th.ompt_thread_info.state == ompt_state_idle){
240 if (ompt_callbacks.ompt_callback(ompt_event_idle_end)) {
241 ompt_callbacks.ompt_callback(ompt_event_idle_end)(th_gtid + 1);
243 }
else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)) {
244 KMP_DEBUG_ASSERT(this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier ||
245 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit ||
246 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_explicit);
248 ompt_lw_taskteam_t* team = this_thr->th.th_team->t.ompt_serialized_team_info;
249 ompt_parallel_id_t pId;
252 pId = team->ompt_team_info.parallel_id;
253 tId = team->ompt_task_info.task_id;
255 pId = this_thr->th.th_team->t.ompt_team_info.parallel_id;
256 tId = this_thr->th.th_current_task->ompt_task_info.task_id;
258 ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)(pId, tId);
263 KMP_FSYNC_SPIN_ACQUIRED(spin);
270 static inline void __kmp_release_template(C *flag)
274 kmp_info_t * wait_thr = flag->get_waiter(0);
275 int target_gtid = wait_thr->th.th_info.ds.ds_gtid;
276 int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
278 KF_TRACE(20, (
"__kmp_release: T#%d releasing T#%d spin(%p)\n", gtid, target_gtid, flag->get()));
279 KMP_DEBUG_ASSERT(flag->get());
280 KMP_FSYNC_RELEASING(flag->get());
282 typename C::flag_t old_spin = flag->internal_release();
284 KF_TRACE(100, (
"__kmp_release: T#%d old spin(%p)=%d, set new spin=%d\n",
285 gtid, flag->get(), old_spin, *(flag->get())));
287 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
289 if (flag->is_sleeping_val(old_spin)) {
290 for (
unsigned int i=0; i<flag->get_num_waiters(); ++i) {
291 kmp_info_t * waiter = flag->get_waiter(i);
292 int wait_gtid = waiter->th.th_info.ds.ds_gtid;
294 KF_TRACE(50, (
"__kmp_release: T#%d waking up thread T#%d since sleep spin(%p) set\n",
295 gtid, wait_gtid, flag->get()));
296 flag->resume(wait_gtid);
299 KF_TRACE(50, (
"__kmp_release: T#%d don't wake up thread T#%d since sleep spin(%p) not set\n",
300 gtid, target_gtid, flag->get()));
305 template <
typename FlagType>
306 struct flag_traits {};
309 struct flag_traits<kmp_uint32> {
310 typedef kmp_uint32 flag_t;
312 static inline flag_t tcr(flag_t f) {
return TCR_4(f); }
313 static inline flag_t test_then_add4(
volatile flag_t *f) {
return KMP_TEST_THEN_ADD4_32((
volatile kmp_int32 *)f); }
314 static inline flag_t test_then_or(
volatile flag_t *f, flag_t v) {
return KMP_TEST_THEN_OR32((
volatile kmp_int32 *)f, v); }
315 static inline flag_t test_then_and(
volatile flag_t *f, flag_t v) {
return KMP_TEST_THEN_AND32((
volatile kmp_int32 *)f, v); }
319 struct flag_traits<kmp_uint64> {
320 typedef kmp_uint64 flag_t;
322 static inline flag_t tcr(flag_t f) {
return TCR_8(f); }
323 static inline flag_t test_then_add4(
volatile flag_t *f) {
return KMP_TEST_THEN_ADD4_64((
volatile kmp_int64 *)f); }
324 static inline flag_t test_then_or(
volatile flag_t *f, flag_t v) {
return KMP_TEST_THEN_OR64((
volatile kmp_int64 *)f, v); }
325 static inline flag_t test_then_and(
volatile flag_t *f, flag_t v) {
return KMP_TEST_THEN_AND64((
volatile kmp_int64 *)f, v); }
328 template <
typename FlagType>
329 class kmp_basic_flag :
public kmp_flag<FlagType> {
330 typedef flag_traits<FlagType> traits_type;
332 kmp_info_t * waiting_threads[1];
333 kmp_uint32 num_waiting_threads;
335 kmp_basic_flag(
volatile FlagType *p) :
kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
336 kmp_basic_flag(
volatile FlagType *p, kmp_info_t *thr) :
kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
337 waiting_threads[0] = thr;
339 kmp_basic_flag(
volatile FlagType *p, FlagType c) :
kmp_flag<FlagType>(p, traits_type::t), checker(c), num_waiting_threads(0) {}
344 kmp_info_t * get_waiter(kmp_uint32 i) {
345 KMP_DEBUG_ASSERT(i<num_waiting_threads);
346 return waiting_threads[i];
351 kmp_uint32 get_num_waiters() {
return num_waiting_threads; }
357 void set_waiter(kmp_info_t *thr) {
358 waiting_threads[0] = thr;
359 num_waiting_threads = 1;
364 bool done_check() {
return traits_type::tcr(*(this->
get())) == checker; }
369 bool done_check_val(FlagType old_loc) {
return old_loc == checker; }
377 bool notdone_check() {
return traits_type::tcr(*(this->
get())) != checker; }
382 FlagType internal_release() {
383 return traits_type::test_then_add4((
volatile FlagType *)this->
get());
389 FlagType set_sleeping() {
390 return traits_type::test_then_or((
volatile FlagType *)this->
get(), KMP_BARRIER_SLEEP_STATE);
396 FlagType unset_sleeping() {
397 return traits_type::test_then_and((
volatile FlagType *)this->
get(), ~KMP_BARRIER_SLEEP_STATE);
403 bool is_sleeping_val(FlagType old_loc) {
return old_loc & KMP_BARRIER_SLEEP_STATE; }
407 bool is_sleeping() {
return is_sleeping_val(*(this->
get())); }
410 class kmp_flag_32 :
public kmp_basic_flag<kmp_uint32> {
412 kmp_flag_32(
volatile kmp_uint32 *p) : kmp_basic_flag<kmp_uint32>(p) {}
413 kmp_flag_32(
volatile kmp_uint32 *p, kmp_info_t *thr) : kmp_basic_flag<kmp_uint32>(p, thr) {}
414 kmp_flag_32(
volatile kmp_uint32 *p, kmp_uint32 c) : kmp_basic_flag<kmp_uint32>(p, c) {}
415 void suspend(
int th_gtid) { __kmp_suspend_32(th_gtid,
this); }
416 void resume(
int th_gtid) { __kmp_resume_32(th_gtid,
this); }
417 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid,
int final_spin,
int *thread_finished
418 USE_ITT_BUILD_ARG(
void * itt_sync_obj), kmp_int32 is_constrained) {
419 return __kmp_execute_tasks_32(this_thr, gtid,
this, final_spin, thread_finished
420 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
422 void wait(kmp_info_t *this_thr,
int final_spin
423 USE_ITT_BUILD_ARG(
void * itt_sync_obj)) {
424 __kmp_wait_template(this_thr,
this, final_spin
425 USE_ITT_BUILD_ARG(itt_sync_obj));
427 void release() { __kmp_release_template(
this); }
430 class kmp_flag_64 :
public kmp_basic_flag<kmp_uint64> {
432 kmp_flag_64(
volatile kmp_uint64 *p) : kmp_basic_flag<kmp_uint64>(p) {}
433 kmp_flag_64(
volatile kmp_uint64 *p, kmp_info_t *thr) : kmp_basic_flag<kmp_uint64>(p, thr) {}
434 kmp_flag_64(
volatile kmp_uint64 *p, kmp_uint64 c) : kmp_basic_flag<kmp_uint64>(p, c) {}
435 void suspend(
int th_gtid) { __kmp_suspend_64(th_gtid,
this); }
436 void resume(
int th_gtid) { __kmp_resume_64(th_gtid,
this); }
437 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid,
int final_spin,
int *thread_finished
438 USE_ITT_BUILD_ARG(
void * itt_sync_obj), kmp_int32 is_constrained) {
439 return __kmp_execute_tasks_64(this_thr, gtid,
this, final_spin, thread_finished
440 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
442 void wait(kmp_info_t *this_thr,
int final_spin
443 USE_ITT_BUILD_ARG(
void * itt_sync_obj)) {
444 __kmp_wait_template(this_thr,
this, final_spin
445 USE_ITT_BUILD_ARG(itt_sync_obj));
447 void release() { __kmp_release_template(
this); }
451 class kmp_flag_oncore :
public kmp_flag<kmp_uint64> {
453 kmp_info_t * waiting_threads[1];
454 kmp_uint32 num_waiting_threads;
457 enum barrier_type bt;
458 kmp_info_t * this_thr;
462 unsigned char& byteref(
volatile kmp_uint64* loc,
size_t offset) {
return ((
unsigned char *)loc)[offset]; }
464 kmp_flag_oncore(
volatile kmp_uint64 *p)
466 kmp_flag_oncore(
volatile kmp_uint64 *p, kmp_uint32 idx)
467 :
kmp_flag<kmp_uint64>(p,
flag_oncore), num_waiting_threads(0), offset(idx), flag_switch(false) {}
468 kmp_flag_oncore(
volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
enum barrier_type bar_t,
475 flag_switch(false), bt(bar_t), this_thr(thr)
480 kmp_info_t * get_waiter(kmp_uint32 i) {
481 KMP_DEBUG_ASSERT(i<num_waiting_threads);
482 return waiting_threads[i];
484 kmp_uint32 get_num_waiters() {
return num_waiting_threads; }
485 void set_waiter(kmp_info_t *thr) {
486 waiting_threads[0] = thr;
487 num_waiting_threads = 1;
489 bool done_check_val(kmp_uint64 old_loc) {
return byteref(&old_loc,offset) == checker; }
490 bool done_check() {
return done_check_val(*
get()); }
491 bool notdone_check() {
493 if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
495 if (byteref(
get(),offset) != 1 && !flag_switch)
497 else if (flag_switch) {
498 this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
499 kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go, (kmp_uint64)KMP_BARRIER_STATE_BUMP);
500 __kmp_wait_64(this_thr, &flag, TRUE
508 kmp_uint64 internal_release() {
510 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
512 byteref(
get(),offset) = 1;
516 byteref(&mask,offset) = 1;
517 old_val = KMP_TEST_THEN_OR64((
volatile kmp_int64 *)
get(), mask);
521 kmp_uint64 set_sleeping() {
522 return KMP_TEST_THEN_OR64((kmp_int64
volatile *)
get(), KMP_BARRIER_SLEEP_STATE);
524 kmp_uint64 unset_sleeping() {
525 return KMP_TEST_THEN_AND64((kmp_int64
volatile *)
get(), ~KMP_BARRIER_SLEEP_STATE);
527 bool is_sleeping_val(kmp_uint64 old_loc) {
return old_loc & KMP_BARRIER_SLEEP_STATE; }
528 bool is_sleeping() {
return is_sleeping_val(*
get()); }
529 void wait(kmp_info_t *this_thr,
int final_spin
530 USE_ITT_BUILD_ARG(
void * itt_sync_obj)) {
531 __kmp_wait_template(this_thr,
this, final_spin
532 USE_ITT_BUILD_ARG(itt_sync_obj));
534 void release() { __kmp_release_template(
this); }
535 void suspend(
int th_gtid) { __kmp_suspend_oncore(th_gtid,
this); }
536 void resume(
int th_gtid) { __kmp_resume_oncore(th_gtid,
this); }
537 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid,
int final_spin,
int *thread_finished
538 USE_ITT_BUILD_ARG(
void * itt_sync_obj), kmp_int32 is_constrained) {
539 return __kmp_execute_tasks_oncore(this_thr, gtid,
this, final_spin, thread_finished
540 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
548 #endif // KMP_WAIT_RELEASE_H