32 #include "kmp_error.h"
33 #include "kmp_stats.h"
34 #if KMP_OS_WINDOWS && KMP_ARCH_X86
39 #include "ompt-internal.h"
40 #include "ompt-specific.h"
47 template<
typename T >
53 struct i_maxmin< int > {
54 static const int mx = 0x7fffffff;
55 static const int mn = 0x80000000;
58 struct i_maxmin< unsigned int > {
59 static const unsigned int mx = 0xffffffff;
60 static const unsigned int mn = 0x00000000;
63 struct i_maxmin< long long > {
64 static const long long mx = 0x7fffffffffffffffLL;
65 static const long long mn = 0x8000000000000000LL;
68 struct i_maxmin< unsigned long long > {
69 static const unsigned long long mx = 0xffffffffffffffffLL;
70 static const unsigned long long mn = 0x0000000000000000LL;
74 #ifdef KMP_STATIC_STEAL_ENABLED
77 template<
typename T >
78 struct dispatch_private_infoXX_template {
79 typedef typename traits_t< T >::unsigned_t UT;
80 typedef typename traits_t< T >::signed_t ST;
87 T static_steal_counter;
97 struct KMP_ALIGN( 32 ) {
114 template<
typename T >
115 struct dispatch_private_infoXX_template {
116 typedef typename traits_t< T >::unsigned_t UT;
117 typedef typename traits_t< T >::signed_t ST;
140 template<
typename T >
141 struct KMP_ALIGN_CACHE dispatch_private_info_template {
143 union KMP_ALIGN_CACHE private_info_tmpl {
144 dispatch_private_infoXX_template< T > p;
145 dispatch_private_info64_t p64;
149 kmp_uint32 ordered_bumped;
150 kmp_int32 ordered_dummy[KMP_MAX_ORDERED-3];
151 dispatch_private_info * next;
153 kmp_uint32 type_size;
154 enum cons_type pushed_ws;
159 template<
typename UT >
160 struct dispatch_shared_infoXX_template {
163 volatile UT iteration;
164 volatile UT num_done;
165 volatile UT ordered_iteration;
166 UT ordered_dummy[KMP_MAX_ORDERED-1];
170 template<
typename UT >
171 struct dispatch_shared_info_template {
173 union shared_info_tmpl {
174 dispatch_shared_infoXX_template< UT > s;
175 dispatch_shared_info64_t s64;
177 volatile kmp_uint32 buffer_index;
183 #undef USE_TEST_LOCKS
186 template<
typename T >
187 static __forceinline T
188 test_then_add(
volatile T *p, T d ) { KMP_ASSERT(0); };
191 __forceinline kmp_int32
192 test_then_add< kmp_int32 >(
volatile kmp_int32 *p, kmp_int32 d )
195 r = KMP_TEST_THEN_ADD32( p, d );
200 __forceinline kmp_int64
201 test_then_add< kmp_int64 >(
volatile kmp_int64 *p, kmp_int64 d )
204 r = KMP_TEST_THEN_ADD64( p, d );
209 template<
typename T >
210 static __forceinline T
211 test_then_inc_acq(
volatile T *p ) { KMP_ASSERT(0); };
214 __forceinline kmp_int32
215 test_then_inc_acq< kmp_int32 >(
volatile kmp_int32 *p )
218 r = KMP_TEST_THEN_INC_ACQ32( p );
223 __forceinline kmp_int64
224 test_then_inc_acq< kmp_int64 >(
volatile kmp_int64 *p )
227 r = KMP_TEST_THEN_INC_ACQ64( p );
232 template<
typename T >
233 static __forceinline T
234 test_then_inc(
volatile T *p ) { KMP_ASSERT(0); };
237 __forceinline kmp_int32
238 test_then_inc< kmp_int32 >(
volatile kmp_int32 *p )
241 r = KMP_TEST_THEN_INC32( p );
246 __forceinline kmp_int64
247 test_then_inc< kmp_int64 >(
volatile kmp_int64 *p )
250 r = KMP_TEST_THEN_INC64( p );
255 template<
typename T >
256 static __forceinline kmp_int32
257 compare_and_swap(
volatile T *p, T c, T s ) { KMP_ASSERT(0); };
260 __forceinline kmp_int32
261 compare_and_swap< kmp_int32 >(
volatile kmp_int32 *p, kmp_int32 c, kmp_int32 s )
263 return KMP_COMPARE_AND_STORE_REL32( p, c, s );
267 __forceinline kmp_int32
268 compare_and_swap< kmp_int64 >(
volatile kmp_int64 *p, kmp_int64 c, kmp_int64 s )
270 return KMP_COMPARE_AND_STORE_REL64( p, c, s );
286 template<
typename UT >
289 __kmp_wait_yield(
volatile UT * spinner,
291 kmp_uint32 (* pred)( UT, UT )
292 USE_ITT_BUILD_ARG(
void * obj)
296 register volatile UT * spin = spinner;
297 register UT check = checker;
298 register kmp_uint32 spins;
299 register kmp_uint32 (*f) ( UT, UT ) = pred;
302 KMP_FSYNC_SPIN_INIT( obj, (
void*) spin );
303 KMP_INIT_YIELD( spins );
305 while(!f(r = *spin, check))
307 KMP_FSYNC_SPIN_PREPARE( obj );
316 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
317 KMP_YIELD_SPIN( spins );
319 KMP_FSYNC_SPIN_ACQUIRED( obj );
323 template<
typename UT >
324 static kmp_uint32 __kmp_eq( UT value, UT checker) {
325 return value == checker;
328 template<
typename UT >
329 static kmp_uint32 __kmp_neq( UT value, UT checker) {
330 return value != checker;
333 template<
typename UT >
334 static kmp_uint32 __kmp_lt( UT value, UT checker) {
335 return value < checker;
338 template<
typename UT >
339 static kmp_uint32 __kmp_ge( UT value, UT checker) {
340 return value >= checker;
343 template<
typename UT >
344 static kmp_uint32 __kmp_le( UT value, UT checker) {
345 return value <= checker;
353 __kmp_dispatch_deo_error(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
357 KMP_DEBUG_ASSERT( gtid_ref );
359 if ( __kmp_env_consistency_check ) {
360 th = __kmp_threads[*gtid_ref];
361 if ( th -> th.th_root -> r.r_active
362 && ( th -> th.th_dispatch -> th_dispatch_pr_current -> pushed_ws != ct_none ) ) {
363 #if KMP_USE_DYNAMIC_LOCK
364 __kmp_push_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref, NULL, 0 );
366 __kmp_push_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref, NULL );
372 template<
typename UT >
374 __kmp_dispatch_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
376 typedef typename traits_t< UT >::signed_t ST;
377 dispatch_private_info_template< UT > * pr;
379 int gtid = *gtid_ref;
381 kmp_info_t *th = __kmp_threads[ gtid ];
382 KMP_DEBUG_ASSERT( th -> th.th_dispatch );
384 KD_TRACE(100, (
"__kmp_dispatch_deo: T#%d called\n", gtid ) );
385 if ( __kmp_env_consistency_check ) {
386 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
>
387 ( th -> th.th_dispatch -> th_dispatch_pr_current );
388 if ( pr -> pushed_ws != ct_none ) {
389 #if KMP_USE_DYNAMIC_LOCK
390 __kmp_push_sync( gtid, ct_ordered_in_pdo, loc_ref, NULL, 0 );
392 __kmp_push_sync( gtid, ct_ordered_in_pdo, loc_ref, NULL );
397 if ( ! th -> th.th_team -> t.t_serialized ) {
398 dispatch_shared_info_template< UT > * sh =
reinterpret_cast< dispatch_shared_info_template< UT >*
>
399 ( th -> th.th_dispatch -> th_dispatch_sh_current );
402 if ( ! __kmp_env_consistency_check ) {
403 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
>
404 ( th -> th.th_dispatch -> th_dispatch_pr_current );
406 lower = pr->u.p.ordered_lower;
408 #if ! defined( KMP_GOMP_COMPAT )
409 if ( __kmp_env_consistency_check ) {
410 if ( pr->ordered_bumped ) {
411 struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
412 __kmp_error_construct2(
413 kmp_i18n_msg_CnsMultipleNesting,
414 ct_ordered_in_pdo, loc_ref,
415 & p->stack_data[ p->w_top ]
426 buff = __kmp_str_format(
427 "__kmp_dispatch_deo: T#%%d before wait: ordered_iter:%%%s lower:%%%s\n",
428 traits_t< UT >::spec, traits_t< UT >::spec );
429 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
430 __kmp_str_free( &buff );
434 __kmp_wait_yield< UT >( &sh->u.s.ordered_iteration, lower, __kmp_ge< UT >
435 USE_ITT_BUILD_ARG( NULL )
442 buff = __kmp_str_format(
443 "__kmp_dispatch_deo: T#%%d after wait: ordered_iter:%%%s lower:%%%s\n",
444 traits_t< UT >::spec, traits_t< UT >::spec );
445 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
446 __kmp_str_free( &buff );
450 KD_TRACE(100, (
"__kmp_dispatch_deo: T#%d returned\n", gtid ) );
454 __kmp_dispatch_dxo_error(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
458 if ( __kmp_env_consistency_check ) {
459 th = __kmp_threads[*gtid_ref];
460 if ( th -> th.th_dispatch -> th_dispatch_pr_current -> pushed_ws != ct_none ) {
461 __kmp_pop_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref );
466 template<
typename UT >
468 __kmp_dispatch_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
470 typedef typename traits_t< UT >::signed_t ST;
471 dispatch_private_info_template< UT > * pr;
473 int gtid = *gtid_ref;
475 kmp_info_t *th = __kmp_threads[ gtid ];
476 KMP_DEBUG_ASSERT( th -> th.th_dispatch );
478 KD_TRACE(100, (
"__kmp_dispatch_dxo: T#%d called\n", gtid ) );
479 if ( __kmp_env_consistency_check ) {
480 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
>
481 ( th -> th.th_dispatch -> th_dispatch_pr_current );
482 if ( pr -> pushed_ws != ct_none ) {
483 __kmp_pop_sync( gtid, ct_ordered_in_pdo, loc_ref );
487 if ( ! th -> th.th_team -> t.t_serialized ) {
488 dispatch_shared_info_template< UT > * sh =
reinterpret_cast< dispatch_shared_info_template< UT >*
>
489 ( th -> th.th_dispatch -> th_dispatch_sh_current );
491 if ( ! __kmp_env_consistency_check ) {
492 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
>
493 ( th -> th.th_dispatch -> th_dispatch_pr_current );
496 KMP_FSYNC_RELEASING( & sh->u.s.ordered_iteration );
497 #if ! defined( KMP_GOMP_COMPAT )
498 if ( __kmp_env_consistency_check ) {
499 if ( pr->ordered_bumped != 0 ) {
500 struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
502 __kmp_error_construct2(
503 kmp_i18n_msg_CnsMultipleNesting,
504 ct_ordered_in_pdo, loc_ref,
505 & p->stack_data[ p->w_top ]
513 pr->ordered_bumped += 1;
515 KD_TRACE(1000, (
"__kmp_dispatch_dxo: T#%d bumping ordered ordered_bumped=%d\n",
516 gtid, pr->ordered_bumped ) );
521 test_then_inc< ST >( (
volatile ST *) & sh->u.s.ordered_iteration );
525 KD_TRACE(100, (
"__kmp_dispatch_dxo: T#%d returned\n", gtid ) );
529 template<
typename UT >
530 static __forceinline
long double
531 __kmp_pow(
long double x, UT y) {
534 KMP_DEBUG_ASSERT(x > 0.0 && x < 1.0);
550 template<
typename T >
551 static __inline
typename traits_t< T >::unsigned_t
552 __kmp_dispatch_guided_remaining(
554 typename traits_t< T >::floating_t base,
555 typename traits_t< T >::unsigned_t idx
564 typedef typename traits_t< T >::unsigned_t UT;
566 long double x = tc * __kmp_pow< UT >(base, idx);
578 static int guided_int_param = 2;
579 static double guided_flt_param = 0.5;
583 template<
typename T >
591 typename traits_t< T >::signed_t st,
592 typename traits_t< T >::signed_t chunk,
595 typedef typename traits_t< T >::unsigned_t UT;
596 typedef typename traits_t< T >::signed_t ST;
597 typedef typename traits_t< T >::floating_t DBL;
598 static const int ___kmp_size_type =
sizeof( UT );
604 kmp_uint32 my_buffer_index;
605 dispatch_private_info_template< T > * pr;
606 dispatch_shared_info_template< UT >
volatile * sh;
608 KMP_BUILD_ASSERT(
sizeof( dispatch_private_info_template< T > ) ==
sizeof( dispatch_private_info ) );
609 KMP_BUILD_ASSERT(
sizeof( dispatch_shared_info_template< UT > ) ==
sizeof( dispatch_shared_info ) );
611 if ( ! TCR_4( __kmp_init_parallel ) )
612 __kmp_parallel_initialize();
614 #if INCLUDE_SSC_MARKS
615 SSC_MARK_DISPATCH_INIT();
621 buff = __kmp_str_format(
622 "__kmp_dispatch_init: T#%%d called: schedule:%%d chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n",
623 traits_t< ST >::spec, traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
624 KD_TRACE(10, ( buff, gtid, schedule, chunk, lb, ub, st ) );
625 __kmp_str_free( &buff );
629 th = __kmp_threads[ gtid ];
630 team = th -> th.th_team;
631 active = ! team -> t.t_serialized;
632 th->th.th_ident = loc;
635 kmp_uint64 cur_chunk = chunk;
636 int itt_need_metadata_reporting = __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
637 KMP_MASTER_GTID(gtid) &&
639 th->th.th_teams_microtask == NULL &&
641 team->t.t_active_level == 1;
644 pr =
reinterpret_cast< dispatch_private_info_template< T >*
>
645 ( th -> th.th_dispatch -> th_disp_buffer );
647 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
648 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
650 my_buffer_index = th->th.th_dispatch->th_disp_index ++;
653 pr =
reinterpret_cast< dispatch_private_info_template< T > *
>
654 ( &th -> th.th_dispatch -> th_disp_buffer[ my_buffer_index % KMP_MAX_DISP_BUF ] );
655 sh =
reinterpret_cast< dispatch_shared_info_template< UT >
volatile *
>
656 ( &team -> t.t_disp_buffer[ my_buffer_index % KMP_MAX_DISP_BUF ] );
666 pr->type_size = ___kmp_size_type;
674 schedule = __kmp_static;
676 if ( schedule == kmp_sch_runtime ) {
678 schedule = team -> t.t_sched.r_sched_type;
681 schedule = __kmp_guided;
683 schedule = __kmp_static;
686 chunk = team -> t.t_sched.chunk;
692 buff = __kmp_str_format(
693 "__kmp_dispatch_init: T#%%d new: schedule:%%d chunk:%%%s\n",
694 traits_t< ST >::spec );
695 KD_TRACE(10, ( buff, gtid, schedule, chunk ) );
696 __kmp_str_free( &buff );
701 schedule = __kmp_guided;
704 chunk = KMP_DEFAULT_CHUNK;
710 schedule = __kmp_auto;
715 buff = __kmp_str_format(
716 "__kmp_dispatch_init: kmp_sch_auto: T#%%d new: schedule:%%d chunk:%%%s\n",
717 traits_t< ST >::spec );
718 KD_TRACE(10, ( buff, gtid, schedule, chunk ) );
719 __kmp_str_free( &buff );
725 if ( team->t.t_nproc > 1<<20 && schedule == kmp_sch_guided_analytical_chunked ) {
726 schedule = kmp_sch_guided_iterative_chunked;
727 KMP_WARNING( DispatchManyThreads );
729 pr->u.p.parm1 = chunk;
732 "unknown scheduling type" );
736 if ( __kmp_env_consistency_check ) {
738 __kmp_error_construct(
739 kmp_i18n_msg_CnsLoopIncrZeroProhibited,
740 ( pr->ordered ? ct_pdo_ordered : ct_pdo ), loc
745 tc = ( ub - lb + st );
760 }
else if ( ub < lb ) {
770 pr->u.p.last_upper = ub + st;
776 if ( pr->ordered == 0 ) {
777 th -> th.th_dispatch -> th_deo_fcn = __kmp_dispatch_deo_error;
778 th -> th.th_dispatch -> th_dxo_fcn = __kmp_dispatch_dxo_error;
780 pr->ordered_bumped = 0;
782 pr->u.p.ordered_lower = 1;
783 pr->u.p.ordered_upper = 0;
785 th -> th.th_dispatch -> th_deo_fcn = __kmp_dispatch_deo< UT >;
786 th -> th.th_dispatch -> th_dxo_fcn = __kmp_dispatch_dxo< UT >;
790 if ( __kmp_env_consistency_check ) {
791 enum cons_type ws = pr->ordered ? ct_pdo_ordered : ct_pdo;
793 __kmp_push_workshare( gtid, ws, loc );
796 __kmp_check_workshare( gtid, ws, loc );
797 pr->pushed_ws = ct_none;
801 switch ( schedule ) {
802 #if ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 )
805 T nproc = team->t.t_nproc;
808 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_static_steal case\n", gtid ) );
810 ntc = (tc % chunk ? 1 : 0) + tc / chunk;
811 if ( nproc > 1 && ntc >= nproc ) {
812 T
id = __kmp_tid_from_gtid(gtid);
813 T small_chunk, extras;
815 small_chunk = ntc / nproc;
816 extras = ntc % nproc;
818 init =
id * small_chunk + (
id < extras ?
id : extras );
819 pr->u.p.count = init;
820 pr->u.p.ub = init + small_chunk + (
id < extras ? 1 : 0 );
828 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_balanced\n",
830 schedule = kmp_sch_static_balanced;
836 case kmp_sch_static_balanced:
838 T nproc = team->t.t_nproc;
841 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_static_balanced case\n",
845 T
id = __kmp_tid_from_gtid(gtid);
851 pr->u.p.parm1 = (
id == tc - 1);
854 pr->u.p.parm1 = FALSE;
858 T small_chunk = tc / nproc;
859 T extras = tc % nproc;
860 init =
id * small_chunk + (
id < extras ?
id : extras);
861 limit = init + small_chunk - (
id < extras ? 0 : 1);
862 pr->u.p.parm1 = (
id == nproc - 1);
868 pr->u.p.parm1 = TRUE;
872 pr->u.p.parm1 = FALSE;
878 if ( itt_need_metadata_reporting )
879 cur_chunk = limit - init + 1;
882 pr->u.p.lb = lb + init;
883 pr->u.p.ub = lb + limit;
885 T ub_tmp = lb + limit * st;
886 pr->u.p.lb = lb + init * st;
889 pr->u.p.ub = ( ub_tmp + st > ub ? ub : ub_tmp );
891 pr->u.p.ub = ( ub_tmp + st < ub ? ub : ub_tmp );
895 pr->u.p.ordered_lower = init;
896 pr->u.p.ordered_upper = limit;
900 case kmp_sch_guided_iterative_chunked :
902 T nproc = team->t.t_nproc;
903 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_guided_iterative_chunked case\n",gtid));
906 if ( (2L * chunk + 1 ) * nproc >= tc ) {
908 schedule = kmp_sch_dynamic_chunked;
911 pr->u.p.parm2 = guided_int_param * nproc * ( chunk + 1 );
912 *(
double*)&pr->u.p.parm3 = guided_flt_param / nproc;
915 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_greedy\n",gtid));
916 schedule = kmp_sch_static_greedy;
918 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",gtid));
923 case kmp_sch_guided_analytical_chunked:
925 T nproc = team->t.t_nproc;
926 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_guided_analytical_chunked case\n", gtid));
929 if ( (2L * chunk + 1 ) * nproc >= tc ) {
931 schedule = kmp_sch_dynamic_chunked;
936 #if KMP_OS_WINDOWS && KMP_ARCH_X86
949 unsigned int oldFpcw = _control87(0,0);
950 _control87(_PC_64,_MCW_PC);
953 long double target = ((
long double)chunk * 2 + 1) * nproc / tc;
960 x = (
long double)1.0 - (
long double)0.5 / nproc;
971 ptrdiff_t natural_alignment = (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1;
973 KMP_DEBUG_ASSERT( ( ( (ptrdiff_t)&pr->u.p.parm3 ) & ( natural_alignment ) ) == 0 );
978 *(DBL*)&pr->u.p.parm3 = x;
991 p = __kmp_pow< UT >(x,right);
996 }
while(p>target && right < (1<<27));
1003 while ( left + 1 < right ) {
1004 mid = (left + right) / 2;
1005 if ( __kmp_pow< UT >(x,mid) > target ) {
1014 KMP_ASSERT(cross && __kmp_pow< UT >(x, cross - 1) > target && __kmp_pow< UT >(x, cross) <= target);
1017 pr->u.p.parm2 = cross;
1020 #if ( ( KMP_OS_LINUX || KMP_OS_WINDOWS ) && KMP_ARCH_X86 ) && ( ! defined( KMP_I8 ) )
1021 #define GUIDED_ANALYTICAL_WORKAROUND (*( DBL * )&pr->u.p.parm3)
1023 #define GUIDED_ANALYTICAL_WORKAROUND (x)
1026 pr->u.p.count = tc - __kmp_dispatch_guided_remaining(tc, GUIDED_ANALYTICAL_WORKAROUND, cross) - cross * chunk;
1027 #if KMP_OS_WINDOWS && KMP_ARCH_X86
1029 _control87(oldFpcw,_MCW_PC);
1033 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_greedy\n",
1035 schedule = kmp_sch_static_greedy;
1041 case kmp_sch_static_greedy:
1042 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",gtid));
1043 pr->u.p.parm1 = ( team -> t.t_nproc > 1 ) ?
1044 ( tc + team->t.t_nproc - 1 ) / team->t.t_nproc :
1047 case kmp_sch_static_chunked :
1048 case kmp_sch_dynamic_chunked :
1049 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", gtid));
1051 case kmp_sch_trapezoidal :
1055 T parm1, parm2, parm3, parm4;
1056 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_trapezoidal case\n", gtid ) );
1061 parm2 = ( tc / (2 * team->t.t_nproc) );
1072 }
else if ( parm1 > parm2 ) {
1077 parm3 = ( parm2 + parm1 );
1078 parm3 = ( 2 * tc + parm3 - 1) / parm3;
1085 parm4 = ( parm3 - 1 );
1086 parm4 = ( parm2 - parm1 ) / parm4;
1093 pr->u.p.parm1 = parm1;
1094 pr->u.p.parm2 = parm2;
1095 pr->u.p.parm3 = parm3;
1096 pr->u.p.parm4 = parm4;
1104 KMP_MSG( UnknownSchedTypeDetected ),
1105 KMP_HNT( GetNewerLibrary ),
1111 pr->schedule = schedule;
1115 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d sh->buffer_index:%d\n",
1116 gtid, my_buffer_index, sh->buffer_index) );
1117 __kmp_wait_yield< kmp_uint32 >( & sh->buffer_index, my_buffer_index, __kmp_eq< kmp_uint32 >
1118 USE_ITT_BUILD_ARG( NULL )
1123 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d sh->buffer_index:%d\n",
1124 gtid, my_buffer_index, sh->buffer_index) );
1126 th -> th.th_dispatch -> th_dispatch_pr_current = (dispatch_private_info_t*) pr;
1127 th -> th.th_dispatch -> th_dispatch_sh_current = (dispatch_shared_info_t*) sh;
1129 if ( pr->ordered ) {
1130 __kmp_itt_ordered_init( gtid );
1133 if ( itt_need_metadata_reporting ) {
1135 kmp_uint64 schedtype = 0;
1136 switch ( schedule ) {
1137 case kmp_sch_static_chunked:
1138 case kmp_sch_static_balanced:
1140 case kmp_sch_static_greedy:
1141 cur_chunk = pr->u.p.parm1;
1143 case kmp_sch_dynamic_chunked:
1146 case kmp_sch_guided_iterative_chunked:
1147 case kmp_sch_guided_analytical_chunked:
1156 __kmp_itt_metadata_loop(loc, schedtype, tc, cur_chunk);
1165 buff = __kmp_str_format(
1166 "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s lb:%%%s ub:%%%s" \
1167 " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s" \
1168 " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n",
1169 traits_t< UT >::spec, traits_t< T >::spec, traits_t< T >::spec,
1170 traits_t< ST >::spec, traits_t< UT >::spec, traits_t< UT >::spec,
1171 traits_t< UT >::spec, traits_t< UT >::spec, traits_t< T >::spec,
1172 traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec );
1173 KD_TRACE(10, ( buff,
1174 gtid, pr->schedule, pr->ordered, pr->u.p.lb, pr->u.p.ub,
1175 pr->u.p.st, pr->u.p.tc, pr->u.p.count,
1176 pr->u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1,
1177 pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4 ) );
1178 __kmp_str_free( &buff );
1181 #if ( KMP_STATIC_STEAL_ENABLED )
1182 if ( ___kmp_size_type < 8 ) {
1191 volatile T * p = &pr->u.p.static_steal_counter;
1195 #endif // ( KMP_STATIC_STEAL_ENABLED && USE_STEALING )
1197 #if OMPT_SUPPORT && OMPT_TRACE
1198 if ((ompt_status == ompt_status_track_callback) &&
1199 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
1200 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1201 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
1202 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
1203 team_info->parallel_id, task_info->task_id, team_info->microtask);
1215 template<
typename UT >
1217 __kmp_dispatch_finish(
int gtid,
ident_t *loc )
1219 typedef typename traits_t< UT >::signed_t ST;
1220 kmp_info_t *th = __kmp_threads[ gtid ];
1222 KD_TRACE(100, (
"__kmp_dispatch_finish: T#%d called\n", gtid ) );
1223 if ( ! th -> th.th_team -> t.t_serialized ) {
1225 dispatch_private_info_template< UT > * pr =
1226 reinterpret_cast< dispatch_private_info_template< UT >*
>
1227 ( th->th.th_dispatch->th_dispatch_pr_current );
1228 dispatch_shared_info_template< UT >
volatile * sh =
1229 reinterpret_cast< dispatch_shared_info_template< UT >volatile*
>
1230 ( th->th.th_dispatch->th_dispatch_sh_current );
1231 KMP_DEBUG_ASSERT( pr );
1232 KMP_DEBUG_ASSERT( sh );
1233 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
1234 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
1236 if ( pr->ordered_bumped ) {
1237 KD_TRACE(1000, (
"__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
1239 pr->ordered_bumped = 0;
1241 UT lower = pr->u.p.ordered_lower;
1247 buff = __kmp_str_format(
1248 "__kmp_dispatch_finish: T#%%d before wait: ordered_iteration:%%%s lower:%%%s\n",
1249 traits_t< UT >::spec, traits_t< UT >::spec );
1250 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
1251 __kmp_str_free( &buff );
1255 __kmp_wait_yield< UT >(&sh->u.s.ordered_iteration, lower, __kmp_ge< UT >
1256 USE_ITT_BUILD_ARG(NULL)
1263 buff = __kmp_str_format(
1264 "__kmp_dispatch_finish: T#%%d after wait: ordered_iteration:%%%s lower:%%%s\n",
1265 traits_t< UT >::spec, traits_t< UT >::spec );
1266 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
1267 __kmp_str_free( &buff );
1271 test_then_inc< ST >( (
volatile ST *) & sh->u.s.ordered_iteration );
1274 KD_TRACE(100, (
"__kmp_dispatch_finish: T#%d returned\n", gtid ) );
1277 #ifdef KMP_GOMP_COMPAT
1279 template<
typename UT >
1281 __kmp_dispatch_finish_chunk(
int gtid,
ident_t *loc )
1283 typedef typename traits_t< UT >::signed_t ST;
1284 kmp_info_t *th = __kmp_threads[ gtid ];
1286 KD_TRACE(100, (
"__kmp_dispatch_finish_chunk: T#%d called\n", gtid ) );
1287 if ( ! th -> th.th_team -> t.t_serialized ) {
1289 dispatch_private_info_template< UT > * pr =
1290 reinterpret_cast< dispatch_private_info_template< UT >*
>
1291 ( th->th.th_dispatch->th_dispatch_pr_current );
1292 dispatch_shared_info_template< UT >
volatile * sh =
1293 reinterpret_cast< dispatch_shared_info_template< UT >volatile*
>
1294 ( th->th.th_dispatch->th_dispatch_sh_current );
1295 KMP_DEBUG_ASSERT( pr );
1296 KMP_DEBUG_ASSERT( sh );
1297 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
1298 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
1301 UT lower = pr->u.p.ordered_lower;
1302 UT upper = pr->u.p.ordered_upper;
1303 UT inc = upper - lower + 1;
1305 if ( pr->ordered_bumped == inc ) {
1306 KD_TRACE(1000, (
"__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
1308 pr->ordered_bumped = 0;
1310 inc -= pr->ordered_bumped;
1316 buff = __kmp_str_format(
1317 "__kmp_dispatch_finish_chunk: T#%%d before wait: " \
1318 "ordered_iteration:%%%s lower:%%%s upper:%%%s\n",
1319 traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec );
1320 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower, upper ) );
1321 __kmp_str_free( &buff );
1325 __kmp_wait_yield< UT >(&sh->u.s.ordered_iteration, lower, __kmp_ge< UT >
1326 USE_ITT_BUILD_ARG(NULL)
1330 KD_TRACE(1000, (
"__kmp_dispatch_finish_chunk: T#%d resetting ordered_bumped to zero\n",
1332 pr->ordered_bumped = 0;
1338 buff = __kmp_str_format(
1339 "__kmp_dispatch_finish_chunk: T#%%d after wait: " \
1340 "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n",
1341 traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec );
1342 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper ) );
1343 __kmp_str_free( &buff );
1347 test_then_add< ST >( (
volatile ST *) & sh->u.s.ordered_iteration, inc);
1351 KD_TRACE(100, (
"__kmp_dispatch_finish_chunk: T#%d returned\n", gtid ) );
1359 #if OMPT_SUPPORT && OMPT_TRACE
1360 #define OMPT_LOOP_END \
1361 if (status == 0) { \
1362 if ((ompt_status == ompt_status_track_callback) && \
1363 ompt_callbacks.ompt_callback(ompt_event_loop_end)) { \
1364 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \
1365 ompt_task_info_t *task_info = __ompt_get_taskinfo(0); \
1366 ompt_callbacks.ompt_callback(ompt_event_loop_end)( \
1367 team_info->parallel_id, task_info->task_id); \
1371 #define OMPT_LOOP_END // no-op
1374 template<
typename T >
1376 __kmp_dispatch_next(
1377 ident_t *loc,
int gtid, kmp_int32 *p_last, T *p_lb, T *p_ub,
typename traits_t< T >::signed_t *p_st
1380 typedef typename traits_t< T >::unsigned_t UT;
1381 typedef typename traits_t< T >::signed_t ST;
1382 typedef typename traits_t< T >::floating_t DBL;
1383 #if ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 )
1384 static const int ___kmp_size_type =
sizeof( UT );
1388 dispatch_private_info_template< T > * pr;
1389 kmp_info_t * th = __kmp_threads[ gtid ];
1390 kmp_team_t * team = th -> th.th_team;
1392 KMP_DEBUG_ASSERT( p_lb && p_ub && p_st );
1397 buff = __kmp_str_format(
1398 "__kmp_dispatch_next: T#%%d called p_lb:%%%s p_ub:%%%s p_st:%%%s p_last: %%p\n",
1399 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
1400 KD_TRACE(1000, ( buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last ) );
1401 __kmp_str_free( &buff );
1405 if ( team -> t.t_serialized ) {
1407 pr =
reinterpret_cast< dispatch_private_info_template< T >*
>
1408 ( th -> th.th_dispatch -> th_disp_buffer );
1409 KMP_DEBUG_ASSERT( pr );
1411 if ( (status = (pr->u.p.tc != 0)) == 0 ) {
1418 if ( __kmp_env_consistency_check ) {
1419 if ( pr->pushed_ws != ct_none ) {
1420 pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc );
1423 }
else if ( pr->nomerge ) {
1426 UT limit, trip, init;
1428 T chunk = pr->u.p.parm1;
1430 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n", gtid ) );
1432 init = chunk * pr->u.p.count++;
1433 trip = pr->u.p.tc - 1;
1435 if ( (status = (init <= trip)) == 0 ) {
1442 if ( __kmp_env_consistency_check ) {
1443 if ( pr->pushed_ws != ct_none ) {
1444 pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc );
1449 limit = chunk + init - 1;
1452 if ( (last = (limit >= trip)) != 0 ) {
1455 pr->u.p.last_upper = pr->u.p.ub;
1458 if ( p_last != NULL )
1463 *p_lb = start + init;
1464 *p_ub = start + limit;
1466 *p_lb = start + init * incr;
1467 *p_ub = start + limit * incr;
1470 if ( pr->ordered ) {
1471 pr->u.p.ordered_lower = init;
1472 pr->u.p.ordered_upper = limit;
1477 buff = __kmp_str_format(
1478 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1479 traits_t< UT >::spec, traits_t< UT >::spec );
1480 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1481 __kmp_str_free( &buff );
1491 pr->u.p.last_upper = *p_ub;
1493 if ( p_last != NULL )
1502 buff = __kmp_str_format(
1503 "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s " \
1504 "p_ub:%%%s p_st:%%%s p_last:%%p %%d returning:%%d\n",
1505 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
1506 KD_TRACE(10, ( buff, gtid, *p_lb, *p_ub, *p_st, p_last, *p_last, status) );
1507 __kmp_str_free( &buff );
1510 #if INCLUDE_SSC_MARKS
1511 SSC_MARK_DISPATCH_NEXT();
1517 dispatch_shared_info_template< UT > *sh;
1520 UT limit, trip, init;
1522 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
1523 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
1525 pr =
reinterpret_cast< dispatch_private_info_template< T >*
>
1526 ( th->th.th_dispatch->th_dispatch_pr_current );
1527 KMP_DEBUG_ASSERT( pr );
1528 sh =
reinterpret_cast< dispatch_shared_info_template< UT >*
>
1529 ( th->th.th_dispatch->th_dispatch_sh_current );
1530 KMP_DEBUG_ASSERT( sh );
1532 if ( pr->u.p.tc == 0 ) {
1536 switch (pr->schedule) {
1537 #if ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 )
1540 T chunk = pr->u.p.parm1;
1542 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_static_steal case\n", gtid) );
1544 trip = pr->u.p.tc - 1;
1546 if ( ___kmp_size_type > 4 ) {
1549 init = ( pr->u.p.count )++;
1550 status = ( init < (UT)pr->u.p.ub );
1562 union_i4 vold, vnew;
1563 vold.b = *(
volatile kmp_int64 * )(&pr->u.p.count);
1566 while( ! KMP_COMPARE_AND_STORE_ACQ64(
1567 (
volatile kmp_int64* )&pr->u.p.count,
1568 *VOLATILE_CAST(kmp_int64 *)&vold.b,
1569 *VOLATILE_CAST(kmp_int64 *)&vnew.b ) ) {
1571 vold.b = *(
volatile kmp_int64 * )(&pr->u.p.count);
1576 init = vnew.p.count;
1577 status = ( init < (UT)vnew.p.ub ) ;
1581 kmp_info_t **other_threads = team->t.t_threads;
1582 int while_limit = 10;
1583 int while_index = 0;
1587 while ( ( !status ) && ( while_limit != ++while_index ) ) {
1588 union_i4 vold, vnew;
1589 kmp_int32 remaining;
1590 T victimIdx = pr->u.p.parm4;
1591 T oldVictimIdx = victimIdx;
1592 dispatch_private_info_template< T > * victim;
1596 victimIdx = team->t.t_nproc - 1;
1600 victim =
reinterpret_cast< dispatch_private_info_template< T >*
>
1601 ( other_threads[victimIdx]->th.th_dispatch->th_dispatch_pr_current );
1602 }
while ( (victim == NULL || victim == pr) && oldVictimIdx != victimIdx );
1605 ( (*(
volatile T * )&victim->u.p.static_steal_counter) !=
1606 (*(
volatile T * )&pr->u.p.static_steal_counter) ) ) {
1612 if ( oldVictimIdx == victimIdx ) {
1615 pr->u.p.parm4 = victimIdx;
1618 vold.b = *(
volatile kmp_int64 * )( &victim->u.p.count );
1621 KMP_DEBUG_ASSERT( (vnew.p.ub - 1) * (UT)chunk <= trip );
1622 if ( vnew.p.count >= (UT)vnew.p.ub || (remaining = vnew.p.ub - vnew.p.count) < 4 ) {
1625 vnew.p.ub -= (remaining >> 2);
1626 KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip);
1627 #pragma warning( push )
1629 #pragma warning( disable: 186 )
1630 KMP_DEBUG_ASSERT(vnew.p.ub >= 0);
1631 #pragma warning( pop )
1633 if ( KMP_COMPARE_AND_STORE_ACQ64(
1634 (
volatile kmp_int64 * )&victim->u.p.count,
1635 *VOLATILE_CAST(kmp_int64 *)&vold.b,
1636 *VOLATILE_CAST(kmp_int64 *)&vnew.b ) ) {
1645 init = vold.p.count;
1647 pr->u.p.count = init + 1;
1648 pr->u.p.ub = vnew.p.count;
1651 vold.p.count = init + 1;
1653 *(
volatile kmp_int64 * )(&pr->u.p.count) = vold.b;
1654 #endif // KMP_ARCH_X86
1665 if ( p_st != NULL ) *p_st = 0;
1667 start = pr->u.p.parm2;
1669 limit = chunk + init - 1;
1672 KMP_DEBUG_ASSERT(init <= trip);
1673 if ( (last = (limit >= trip)) != 0 )
1675 if ( p_st != NULL ) *p_st = incr;
1678 *p_lb = start + init;
1679 *p_ub = start + limit;
1681 *p_lb = start + init * incr;
1682 *p_ub = start + limit * incr;
1685 if ( pr->ordered ) {
1686 pr->u.p.ordered_lower = init;
1687 pr->u.p.ordered_upper = limit;
1692 buff = __kmp_str_format(
1693 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1694 traits_t< UT >::spec, traits_t< UT >::spec );
1695 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1696 __kmp_str_free( &buff );
1703 #endif // ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 )
1704 case kmp_sch_static_balanced:
1706 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_static_balanced case\n", gtid) );
1707 if ( (status = !pr->u.p.count) != 0 ) {
1711 last = pr->u.p.parm1;
1715 pr->u.p.lb = pr->u.p.ub + pr->u.p.st;
1717 if ( pr->ordered ) {
1722 buff = __kmp_str_format(
1723 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1724 traits_t< UT >::spec, traits_t< UT >::spec );
1725 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1726 __kmp_str_free( &buff );
1732 case kmp_sch_static_greedy:
1733 case kmp_sch_static_chunked:
1737 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_static_[affinity|chunked] case\n",
1739 parm1 = pr->u.p.parm1;
1741 trip = pr->u.p.tc - 1;
1742 init = parm1 * (pr->u.p.count + __kmp_tid_from_gtid(gtid));
1744 if ( (status = (init <= trip)) != 0 ) {
1747 limit = parm1 + init - 1;
1749 if ( (last = (limit >= trip)) != 0 )
1752 if ( p_st != NULL ) *p_st = incr;
1754 pr->u.p.count += team->t.t_nproc;
1757 *p_lb = start + init;
1758 *p_ub = start + limit;
1761 *p_lb = start + init * incr;
1762 *p_ub = start + limit * incr;
1765 if ( pr->ordered ) {
1766 pr->u.p.ordered_lower = init;
1767 pr->u.p.ordered_upper = limit;
1772 buff = __kmp_str_format(
1773 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1774 traits_t< UT >::spec, traits_t< UT >::spec );
1775 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1776 __kmp_str_free( &buff );
1784 case kmp_sch_dynamic_chunked:
1786 T chunk = pr->u.p.parm1;
1788 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n",
1791 init = chunk * test_then_inc_acq< ST >((
volatile ST *) & sh->u.s.iteration );
1792 trip = pr->u.p.tc - 1;
1794 if ( (status = (init <= trip)) == 0 ) {
1797 if ( p_st != NULL ) *p_st = 0;
1800 limit = chunk + init - 1;
1803 if ( (last = (limit >= trip)) != 0 )
1806 if ( p_st != NULL ) *p_st = incr;
1809 *p_lb = start + init;
1810 *p_ub = start + limit;
1812 *p_lb = start + init * incr;
1813 *p_ub = start + limit * incr;
1816 if ( pr->ordered ) {
1817 pr->u.p.ordered_lower = init;
1818 pr->u.p.ordered_upper = limit;
1823 buff = __kmp_str_format(
1824 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1825 traits_t< UT >::spec, traits_t< UT >::spec );
1826 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1827 __kmp_str_free( &buff );
1835 case kmp_sch_guided_iterative_chunked:
1837 T chunkspec = pr->u.p.parm1;
1839 (
"__kmp_dispatch_next: T#%d kmp_sch_guided_chunked iterative case\n",gtid));
1844 init = sh->u.s.iteration;
1845 remaining = trip - init;
1846 if ( remaining <= 0 ) {
1851 if ( (T)remaining < pr->u.p.parm2 ) {
1854 init = test_then_add<ST>( (ST*)&sh->u.s.iteration, (ST)chunkspec );
1855 remaining = trip - init;
1856 if (remaining <= 0) {
1861 if ( (T)remaining > chunkspec ) {
1862 limit = init + chunkspec - 1;
1865 limit = init + remaining - 1;
1870 limit = init + (UT)( remaining * *(
double*)&pr->u.p.parm3 );
1871 if ( compare_and_swap<ST>( (ST*)&sh->u.s.iteration, (ST)init, (ST)limit ) ) {
1878 if ( status != 0 ) {
1883 *p_lb = start + init * incr;
1884 *p_ub = start + limit * incr;
1885 if ( pr->ordered ) {
1886 pr->u.p.ordered_lower = init;
1887 pr->u.p.ordered_upper = limit;
1892 buff = __kmp_str_format(
1893 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1894 traits_t< UT >::spec, traits_t< UT >::spec );
1895 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1896 __kmp_str_free( &buff );
1909 case kmp_sch_guided_analytical_chunked:
1911 T chunkspec = pr->u.p.parm1;
1913 #if KMP_OS_WINDOWS && KMP_ARCH_X86
1916 unsigned int oldFpcw;
1917 unsigned int fpcwSet = 0;
1919 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_guided_chunked analytical case\n",
1924 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
1925 KMP_DEBUG_ASSERT((2UL * chunkspec + 1) * (UT)team->t.t_nproc < trip);
1928 chunkIdx = test_then_inc_acq< ST >((
volatile ST *) & sh->u.s.iteration );
1929 if ( chunkIdx >= (UT)pr->u.p.parm2 ) {
1932 init = chunkIdx * chunkspec + pr->u.p.count;
1934 if ( (status = (init > 0 && init <= trip)) != 0 ) {
1935 limit = init + chunkspec -1;
1937 if ( (last = (limit >= trip)) != 0 )
1946 #if KMP_OS_WINDOWS && KMP_ARCH_X86
1951 oldFpcw = _control87(0,0);
1952 _control87(_PC_64,_MCW_PC);
1957 init = __kmp_dispatch_guided_remaining< T >(
1958 trip, *( DBL * )&pr->u.p.parm3, chunkIdx );
1959 KMP_DEBUG_ASSERT(init);
1963 limit = trip - __kmp_dispatch_guided_remaining< T >(
1964 trip, *( DBL * )&pr->u.p.parm3, chunkIdx + 1 );
1965 KMP_ASSERT(init <= limit);
1966 if ( init < limit ) {
1967 KMP_DEBUG_ASSERT(limit <= trip);
1974 #if KMP_OS_WINDOWS && KMP_ARCH_X86
1978 if ( fpcwSet && ( oldFpcw & fpcwSet ) )
1979 _control87(oldFpcw,_MCW_PC);
1981 if ( status != 0 ) {
1986 *p_lb = start + init * incr;
1987 *p_ub = start + limit * incr;
1988 if ( pr->ordered ) {
1989 pr->u.p.ordered_lower = init;
1990 pr->u.p.ordered_upper = limit;
1995 buff = __kmp_str_format(
1996 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1997 traits_t< UT >::spec, traits_t< UT >::spec );
1998 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1999 __kmp_str_free( &buff );
2012 case kmp_sch_trapezoidal:
2015 T parm2 = pr->u.p.parm2;
2016 T parm3 = pr->u.p.parm3;
2017 T parm4 = pr->u.p.parm4;
2018 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_trapezoidal case\n",
2021 index = test_then_inc< ST >( (
volatile ST *) & sh->u.s.iteration );
2023 init = ( index * ( (2*parm2) - (index-1)*parm4 ) ) / 2;
2024 trip = pr->u.p.tc - 1;
2026 if ( (status = ((T)index < parm3 && init <= trip)) == 0 ) {
2029 if ( p_st != NULL ) *p_st = 0;
2032 limit = ( (index+1) * ( 2*parm2 - index*parm4 ) ) / 2 - 1;
2035 if ( (last = (limit >= trip)) != 0 )
2038 if ( p_st != NULL ) *p_st = incr;
2041 *p_lb = start + init;
2042 *p_ub = start + limit;
2044 *p_lb = start + init * incr;
2045 *p_ub = start + limit * incr;
2048 if ( pr->ordered ) {
2049 pr->u.p.ordered_lower = init;
2050 pr->u.p.ordered_upper = limit;
2055 buff = __kmp_str_format(
2056 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
2057 traits_t< UT >::spec, traits_t< UT >::spec );
2058 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
2059 __kmp_str_free( &buff );
2071 KMP_MSG( UnknownSchedTypeDetected ),
2072 KMP_HNT( GetNewerLibrary ),
2080 if ( status == 0 ) {
2083 num_done = test_then_inc< ST >( (
volatile ST *) & sh->u.s.num_done );
2088 buff = __kmp_str_format(
2089 "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n",
2090 traits_t< UT >::spec );
2091 KD_TRACE(100, ( buff, gtid, sh->u.s.num_done ) );
2092 __kmp_str_free( &buff );
2096 if ( (ST)num_done == team->t.t_nproc-1 ) {
2101 sh->u.s.num_done = 0;
2102 sh->u.s.iteration = 0;
2105 if ( pr->ordered ) {
2106 sh->u.s.ordered_iteration = 0;
2111 sh -> buffer_index += KMP_MAX_DISP_BUF;
2112 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d change buffer_index:%d\n",
2113 gtid, sh->buffer_index) );
2118 if ( __kmp_env_consistency_check ) {
2119 if ( pr->pushed_ws != ct_none ) {
2120 pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc );
2124 th -> th.th_dispatch -> th_deo_fcn = NULL;
2125 th -> th.th_dispatch -> th_dxo_fcn = NULL;
2126 th -> th.th_dispatch -> th_dispatch_sh_current = NULL;
2127 th -> th.th_dispatch -> th_dispatch_pr_current = NULL;
2131 pr->u.p.last_upper = pr->u.p.ub;
2134 if ( p_last != NULL && status != 0 )
2142 buff = __kmp_str_format(
2143 "__kmp_dispatch_next: T#%%d normal case: " \
2144 "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p returning:%%d\n",
2145 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
2146 KD_TRACE(10, ( buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last, status ) );
2147 __kmp_str_free( &buff );
2150 #if INCLUDE_SSC_MARKS
2151 SSC_MARK_DISPATCH_NEXT();
2157 template<
typename T >
2159 __kmp_dist_get_bounds(
2162 kmp_int32 *plastiter,
2165 typename traits_t< T >::signed_t incr
2168 typedef typename traits_t< T >::unsigned_t UT;
2169 typedef typename traits_t< T >::signed_t ST;
2170 register kmp_uint32 team_id;
2171 register kmp_uint32 nteams;
2172 register UT trip_count;
2173 register kmp_team_t *team;
2176 KMP_DEBUG_ASSERT( plastiter && plower && pupper );
2177 KE_TRACE( 10, (
"__kmpc_dist_get_bounds called (%d)\n", gtid));
2182 buff = __kmp_str_format(
"__kmpc_dist_get_bounds: T#%%d liter=%%d "\
2183 "iter=(%%%s, %%%s, %%%s) signed?<%s>\n",
2184 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
2185 traits_t< T >::spec );
2186 KD_TRACE(100, ( buff, gtid, *plastiter, *plower, *pupper, incr ) );
2187 __kmp_str_free( &buff );
2191 if( __kmp_env_consistency_check ) {
2193 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
2195 if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
2205 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
2208 th = __kmp_threads[gtid];
2209 KMP_DEBUG_ASSERT(th->th.th_teams_microtask);
2210 team = th->th.th_team;
2212 nteams = th->th.th_teams_size.nteams;
2214 team_id = team->t.t_master_tid;
2215 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
2219 trip_count = *pupper - *plower + 1;
2220 }
else if(incr == -1) {
2221 trip_count = *plower - *pupper + 1;
2223 trip_count = (ST)(*pupper - *plower) / incr + 1;
2225 if( trip_count <= nteams ) {
2227 __kmp_static == kmp_sch_static_greedy || \
2228 __kmp_static == kmp_sch_static_balanced
2231 if( team_id < trip_count ) {
2232 *pupper = *plower = *plower + team_id * incr;
2234 *plower = *pupper + incr;
2236 if( plastiter != NULL )
2237 *plastiter = ( team_id == trip_count - 1 );
2239 if( __kmp_static == kmp_sch_static_balanced ) {
2240 register UT chunk = trip_count / nteams;
2241 register UT extras = trip_count % nteams;
2242 *plower += incr * ( team_id * chunk + ( team_id < extras ? team_id : extras ) );
2243 *pupper = *plower + chunk * incr - ( team_id < extras ? 0 : incr );
2244 if( plastiter != NULL )
2245 *plastiter = ( team_id == nteams - 1 );
2247 register T chunk_inc_count =
2248 ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr;
2249 register T upper = *pupper;
2250 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
2252 *plower += team_id * chunk_inc_count;
2253 *pupper = *plower + chunk_inc_count - incr;
2256 if( *pupper < *plower )
2257 *pupper = i_maxmin< T >::mx;
2258 if( plastiter != NULL )
2259 *plastiter = *plower <= upper && *pupper > upper - incr;
2260 if( *pupper > upper )
2263 if( *pupper > *plower )
2264 *pupper = i_maxmin< T >::mn;
2265 if( plastiter != NULL )
2266 *plastiter = *plower >= upper && *pupper < upper - incr;
2267 if( *pupper < upper )
2298 kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk )
2301 KMP_DEBUG_ASSERT( __kmp_init_serial );
2302 __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2309 kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk )
2312 KMP_DEBUG_ASSERT( __kmp_init_serial );
2313 __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2321 kmp_int64 lb, kmp_int64 ub,
2322 kmp_int64 st, kmp_int64 chunk )
2325 KMP_DEBUG_ASSERT( __kmp_init_serial );
2326 __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2334 kmp_uint64 lb, kmp_uint64 ub,
2335 kmp_int64 st, kmp_int64 chunk )
2338 KMP_DEBUG_ASSERT( __kmp_init_serial );
2339 __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2353 kmp_int32 *p_last, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk )
2356 KMP_DEBUG_ASSERT( __kmp_init_serial );
2357 __kmp_dist_get_bounds< kmp_int32 >( loc, gtid, p_last, &lb, &ub, st );
2358 __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2362 __kmpc_dist_dispatch_init_4u(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2363 kmp_int32 *p_last, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk )
2366 KMP_DEBUG_ASSERT( __kmp_init_serial );
2367 __kmp_dist_get_bounds< kmp_uint32 >( loc, gtid, p_last, &lb, &ub, st );
2368 __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2372 __kmpc_dist_dispatch_init_8(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2373 kmp_int32 *p_last, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk )
2376 KMP_DEBUG_ASSERT( __kmp_init_serial );
2377 __kmp_dist_get_bounds< kmp_int64 >( loc, gtid, p_last, &lb, &ub, st );
2378 __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2382 __kmpc_dist_dispatch_init_8u(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2383 kmp_int32 *p_last, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk )
2386 KMP_DEBUG_ASSERT( __kmp_init_serial );
2387 __kmp_dist_get_bounds< kmp_uint64 >( loc, gtid, p_last, &lb, &ub, st );
2388 __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2405 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st )
2407 return __kmp_dispatch_next< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2415 kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st )
2417 return __kmp_dispatch_next< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2425 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st )
2427 return __kmp_dispatch_next< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2435 kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st )
2437 return __kmp_dispatch_next< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2449 __kmp_dispatch_finish< kmp_uint32 >( gtid, loc );
2458 __kmp_dispatch_finish< kmp_uint64 >( gtid, loc );
2467 __kmp_dispatch_finish< kmp_uint32 >( gtid, loc );
2476 __kmp_dispatch_finish< kmp_uint64 >( gtid, loc );
2483 kmp_uint32 __kmp_eq_4( kmp_uint32 value, kmp_uint32 checker) {
2484 return value == checker;
2487 kmp_uint32 __kmp_neq_4( kmp_uint32 value, kmp_uint32 checker) {
2488 return value != checker;
2491 kmp_uint32 __kmp_lt_4( kmp_uint32 value, kmp_uint32 checker) {
2492 return value < checker;
2495 kmp_uint32 __kmp_ge_4( kmp_uint32 value, kmp_uint32 checker) {
2496 return value >= checker;
2499 kmp_uint32 __kmp_le_4( kmp_uint32 value, kmp_uint32 checker) {
2500 return value <= checker;
2502 kmp_uint32 __kmp_eq_8( kmp_uint64 value, kmp_uint64 checker) {
2503 return value == checker;
2506 kmp_uint32 __kmp_neq_8( kmp_uint64 value, kmp_uint64 checker) {
2507 return value != checker;
2510 kmp_uint32 __kmp_lt_8( kmp_uint64 value, kmp_uint64 checker) {
2511 return value < checker;
2514 kmp_uint32 __kmp_ge_8( kmp_uint64 value, kmp_uint64 checker) {
2515 return value >= checker;
2518 kmp_uint32 __kmp_le_8( kmp_uint64 value, kmp_uint64 checker) {
2519 return value <= checker;
2523 __kmp_wait_yield_4(
volatile kmp_uint32 * spinner,
2525 kmp_uint32 (* pred)( kmp_uint32, kmp_uint32 )
2530 register volatile kmp_uint32 * spin = spinner;
2531 register kmp_uint32 check = checker;
2532 register kmp_uint32 spins;
2533 register kmp_uint32 (*f) ( kmp_uint32, kmp_uint32 ) = pred;
2534 register kmp_uint32 r;
2536 KMP_FSYNC_SPIN_INIT( obj, (
void*) spin );
2537 KMP_INIT_YIELD( spins );
2539 while(!f(r = TCR_4(*spin), check)) {
2540 KMP_FSYNC_SPIN_PREPARE( obj );
2548 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2549 KMP_YIELD_SPIN( spins );
2551 KMP_FSYNC_SPIN_ACQUIRED( obj );
2556 __kmp_wait_yield_8(
volatile kmp_uint64 * spinner,
2558 kmp_uint32 (* pred)( kmp_uint64, kmp_uint64 )
2563 register volatile kmp_uint64 * spin = spinner;
2564 register kmp_uint64 check = checker;
2565 register kmp_uint32 spins;
2566 register kmp_uint32 (*f) ( kmp_uint64, kmp_uint64 ) = pred;
2567 register kmp_uint64 r;
2569 KMP_FSYNC_SPIN_INIT( obj, (
void*) spin );
2570 KMP_INIT_YIELD( spins );
2572 while(!f(r = *spin, check))
2574 KMP_FSYNC_SPIN_PREPARE( obj );
2583 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2584 KMP_YIELD_SPIN( spins );
2586 KMP_FSYNC_SPIN_ACQUIRED( obj );
2592 #ifdef KMP_GOMP_COMPAT
2595 __kmp_aux_dispatch_init_4(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2596 kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
2597 kmp_int32 chunk,
int push_ws )
2599 __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk,
2604 __kmp_aux_dispatch_init_4u(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2605 kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
2606 kmp_int32 chunk,
int push_ws )
2608 __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk,
2613 __kmp_aux_dispatch_init_8(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2614 kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
2615 kmp_int64 chunk,
int push_ws )
2617 __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk,
2622 __kmp_aux_dispatch_init_8u(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2623 kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
2624 kmp_int64 chunk,
int push_ws )
2626 __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk,
2631 __kmp_aux_dispatch_fini_chunk_4(
ident_t *loc, kmp_int32 gtid )
2633 __kmp_dispatch_finish_chunk< kmp_uint32 >( gtid, loc );
2637 __kmp_aux_dispatch_fini_chunk_8(
ident_t *loc, kmp_int32 gtid )
2639 __kmp_dispatch_finish_chunk< kmp_uint64 >( gtid, loc );
2643 __kmp_aux_dispatch_fini_chunk_4u(
ident_t *loc, kmp_int32 gtid )
2645 __kmp_dispatch_finish_chunk< kmp_uint32 >( gtid, loc );
2649 __kmp_aux_dispatch_fini_chunk_8u(
ident_t *loc, kmp_int32 gtid )
2651 __kmp_dispatch_finish_chunk< kmp_uint64 >( gtid, loc );
void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk)
int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st)
void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk)
int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st)
int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk)
void __kmpc_dist_dispatch_init_4(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 *p_last, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk)
int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st)
void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk)