LLVM OpenMP* Runtime Library
kmp_atomic.c
1 /*
2  * kmp_atomic.c -- ATOMIC implementation routines
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 // The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include "kmp_atomic.h"
17 #include "kmp.h" // TRUE, asm routines prototypes
18 
19 typedef unsigned char uchar;
20 typedef unsigned short ushort;
21 
539 /*
540  * Global vars
541  */
542 
543 #ifndef KMP_GOMP_COMPAT
544 int __kmp_atomic_mode = 1; // Intel perf
545 #else
546 int __kmp_atomic_mode = 2; // GOMP compatibility
547 #endif /* KMP_GOMP_COMPAT */
548 
549 KMP_ALIGN(128)
550 
551 kmp_atomic_lock_t __kmp_atomic_lock; /* Control access to all user coded atomics in Gnu compat mode */
552 kmp_atomic_lock_t __kmp_atomic_lock_1i; /* Control access to all user coded atomics for 1-byte fixed data types */
553 kmp_atomic_lock_t __kmp_atomic_lock_2i; /* Control access to all user coded atomics for 2-byte fixed data types */
554 kmp_atomic_lock_t __kmp_atomic_lock_4i; /* Control access to all user coded atomics for 4-byte fixed data types */
555 kmp_atomic_lock_t __kmp_atomic_lock_4r; /* Control access to all user coded atomics for kmp_real32 data type */
556 kmp_atomic_lock_t __kmp_atomic_lock_8i; /* Control access to all user coded atomics for 8-byte fixed data types */
557 kmp_atomic_lock_t __kmp_atomic_lock_8r; /* Control access to all user coded atomics for kmp_real64 data type */
558 kmp_atomic_lock_t __kmp_atomic_lock_8c; /* Control access to all user coded atomics for complex byte data type */
559 kmp_atomic_lock_t __kmp_atomic_lock_10r; /* Control access to all user coded atomics for long double data type */
560 kmp_atomic_lock_t __kmp_atomic_lock_16r; /* Control access to all user coded atomics for _Quad data type */
561 kmp_atomic_lock_t __kmp_atomic_lock_16c; /* Control access to all user coded atomics for double complex data type*/
562 kmp_atomic_lock_t __kmp_atomic_lock_20c; /* Control access to all user coded atomics for long double complex type*/
563 kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user coded atomics for _Quad complex data type */
564 
565 
566 /*
567  2007-03-02:
568  Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a
569  bug on *_32 and *_32e. This is just a temporary workaround for the problem.
570  It seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG
571  routines in assembler language.
572 */
573 #define KMP_ATOMIC_VOLATILE volatile
574 
575 #if ( KMP_ARCH_X86 ) && KMP_HAVE_QUAD
576 
577  static inline void operator +=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q += rhs.q; };
578  static inline void operator -=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q -= rhs.q; };
579  static inline void operator *=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q *= rhs.q; };
580  static inline void operator /=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q /= rhs.q; };
581  static inline bool operator < ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q < rhs.q; }
582  static inline bool operator > ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q > rhs.q; }
583 
584  static inline void operator +=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q += rhs.q; };
585  static inline void operator -=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q -= rhs.q; };
586  static inline void operator *=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q *= rhs.q; };
587  static inline void operator /=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q /= rhs.q; };
588  static inline bool operator < ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q < rhs.q; }
589  static inline bool operator > ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q > rhs.q; }
590 
591  static inline void operator +=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q += rhs.q; };
592  static inline void operator -=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q -= rhs.q; };
593  static inline void operator *=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q *= rhs.q; };
594  static inline void operator /=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q /= rhs.q; };
595 
596  static inline void operator +=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q += rhs.q; };
597  static inline void operator -=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q -= rhs.q; };
598  static inline void operator *=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q *= rhs.q; };
599  static inline void operator /=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q /= rhs.q; };
600 
601 #endif
602 
603 /* ------------------------------------------------------------------------ */
604 /* ATOMIC implementation routines */
605 /* one routine for each operation and operand type */
606 /* ------------------------------------------------------------------------ */
607 
608 // All routines declarations looks like
609 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
610 // ------------------------------------------------------------------------
611 
612 #define KMP_CHECK_GTID \
613  if ( gtid == KMP_GTID_UNKNOWN ) { \
614  gtid = __kmp_entry_gtid(); \
615  } // check and get gtid when needed
616 
617 // Beginning of a definition (provides name, parameters, gebug trace)
618 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
619 // OP_ID - operation identifier (add, sub, mul, ...)
620 // TYPE - operands' type
621 #define ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
622 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
623 { \
624  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
625  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
626 
627 // ------------------------------------------------------------------------
628 // Lock variables used for critical sections for various size operands
629 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
630 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
631 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
632 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
633 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
634 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
635 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
636 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
637 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
638 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
639 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
640 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
641 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
642 
643 // ------------------------------------------------------------------------
644 // Operation on *lhs, rhs bound by critical section
645 // OP - operator (it's supposed to contain an assignment)
646 // LCK_ID - lock identifier
647 // Note: don't check gtid as it should always be valid
648 // 1, 2-byte - expect valid parameter, other - check before this macro
649 #define OP_CRITICAL(OP,LCK_ID) \
650  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
651  \
652  (*lhs) OP (rhs); \
653  \
654  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
655 
656 // ------------------------------------------------------------------------
657 // For GNU compatibility, we may need to use a critical section,
658 // even though it is not required by the ISA.
659 //
660 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
661 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
662 // critical section. On Intel(R) 64, all atomic operations are done with fetch
663 // and add or compare and exchange. Therefore, the FLAG parameter to this
664 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
665 // require a critical section, where we predict that they will be implemented
666 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
667 //
668 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
669 // the FLAG parameter should always be 1. If we know that we will be using
670 // a critical section, then we want to make certain that we use the generic
671 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
672 // locks that are specialized based upon the size or type of the data.
673 //
674 // If FLAG is 0, then we are relying on dead code elimination by the build
675 // compiler to get rid of the useless block of code, and save a needless
676 // branch at runtime.
677 //
678 
679 #ifdef KMP_GOMP_COMPAT
680 # define OP_GOMP_CRITICAL(OP,FLAG) \
681  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
682  KMP_CHECK_GTID; \
683  OP_CRITICAL( OP, 0 ); \
684  return; \
685  }
686 # else
687 # define OP_GOMP_CRITICAL(OP,FLAG)
688 #endif /* KMP_GOMP_COMPAT */
689 
690 #if KMP_MIC
691 # define KMP_DO_PAUSE _mm_delay_32( 1 )
692 #else
693 # define KMP_DO_PAUSE KMP_CPU_PAUSE()
694 #endif /* KMP_MIC */
695 
696 // ------------------------------------------------------------------------
697 // Operation on *lhs, rhs using "compare_and_store" routine
698 // TYPE - operands' type
699 // BITS - size in bits, used to distinguish low level calls
700 // OP - operator
701 #define OP_CMPXCHG(TYPE,BITS,OP) \
702  { \
703  TYPE old_value, new_value; \
704  old_value = *(TYPE volatile *)lhs; \
705  new_value = old_value OP rhs; \
706  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
707  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
708  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
709  { \
710  KMP_DO_PAUSE; \
711  \
712  old_value = *(TYPE volatile *)lhs; \
713  new_value = old_value OP rhs; \
714  } \
715  }
716 
717 #if USE_CMPXCHG_FIX
718 // 2007-06-25:
719 // workaround for C78287 (complex(kind=4) data type)
720 // lin_32, lin_32e, win_32 and win_32e are affected (I verified the asm)
721 // Compiler ignores the volatile qualifier of the temp_val in the OP_CMPXCHG macro.
722 // This is a problem of the compiler.
723 // Related tracker is C76005, targeted to 11.0.
724 // I verified the asm of the workaround.
725 #define OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \
726  { \
727  struct _sss { \
728  TYPE cmp; \
729  kmp_int##BITS *vvv; \
730  }; \
731  struct _sss old_value, new_value; \
732  old_value.vvv = ( kmp_int##BITS * )&old_value.cmp; \
733  new_value.vvv = ( kmp_int##BITS * )&new_value.cmp; \
734  *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs; \
735  new_value.cmp = old_value.cmp OP rhs; \
736  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
737  *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
738  *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv ) ) \
739  { \
740  KMP_DO_PAUSE; \
741  \
742  *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs; \
743  new_value.cmp = old_value.cmp OP rhs; \
744  } \
745  }
746 // end of the first part of the workaround for C78287
747 #endif // USE_CMPXCHG_FIX
748 
749 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
750 
751 // ------------------------------------------------------------------------
752 // X86 or X86_64: no alignment problems ====================================
753 #define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
754 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
755  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
756  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
757  KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \
758 }
759 // -------------------------------------------------------------------------
760 #define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
761 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
762  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
763  OP_CMPXCHG(TYPE,BITS,OP) \
764 }
765 #if USE_CMPXCHG_FIX
766 // -------------------------------------------------------------------------
767 // workaround for C78287 (complex(kind=4) data type)
768 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
769 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
770  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
771  OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \
772 }
773 // end of the second part of the workaround for C78287
774 #endif
775 
776 #else
777 // -------------------------------------------------------------------------
778 // Code for other architectures that don't handle unaligned accesses.
779 #define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
780 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
781  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
782  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
783  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
784  KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \
785  } else { \
786  KMP_CHECK_GTID; \
787  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
788  } \
789 }
790 // -------------------------------------------------------------------------
791 #define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
792 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
793  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
794  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
795  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
796  } else { \
797  KMP_CHECK_GTID; \
798  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
799  } \
800 }
801 #if USE_CMPXCHG_FIX
802 // -------------------------------------------------------------------------
803 // workaround for C78287 (complex(kind=4) data type)
804 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
805 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
806  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
807  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
808  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
809  } else { \
810  KMP_CHECK_GTID; \
811  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
812  } \
813 }
814 // end of the second part of the workaround for C78287
815 #endif // USE_CMPXCHG_FIX
816 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
817 
818 // Routines for ATOMIC 4-byte operands addition and subtraction
819 ATOMIC_FIXED_ADD( fixed4, add, kmp_int32, 32, +, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add
820 ATOMIC_FIXED_ADD( fixed4, sub, kmp_int32, 32, -, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub
821 
822 ATOMIC_CMPXCHG( float4, add, kmp_real32, 32, +, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add
823 ATOMIC_CMPXCHG( float4, sub, kmp_real32, 32, -, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub
824 
825 // Routines for ATOMIC 8-byte operands addition and subtraction
826 ATOMIC_FIXED_ADD( fixed8, add, kmp_int64, 64, +, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add
827 ATOMIC_FIXED_ADD( fixed8, sub, kmp_int64, 64, -, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub
828 
829 ATOMIC_CMPXCHG( float8, add, kmp_real64, 64, +, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add
830 ATOMIC_CMPXCHG( float8, sub, kmp_real64, 64, -, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub
831 
832 // ------------------------------------------------------------------------
833 // Entries definition for integer operands
834 // TYPE_ID - operands type and size (fixed4, float4)
835 // OP_ID - operation identifier (add, sub, mul, ...)
836 // TYPE - operand type
837 // BITS - size in bits, used to distinguish low level calls
838 // OP - operator (used in critical section)
839 // LCK_ID - lock identifier, used to possibly distinguish lock variable
840 // MASK - used for alignment check
841 
842 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
843 // ------------------------------------------------------------------------
844 // Routines for ATOMIC integer operands, other operators
845 // ------------------------------------------------------------------------
846 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
847 ATOMIC_CMPXCHG( fixed1, add, kmp_int8, 8, +, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add
848 ATOMIC_CMPXCHG( fixed1, andb, kmp_int8, 8, &, 1i, 0, 0 ) // __kmpc_atomic_fixed1_andb
849 ATOMIC_CMPXCHG( fixed1, div, kmp_int8, 8, /, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div
850 ATOMIC_CMPXCHG( fixed1u, div, kmp_uint8, 8, /, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div
851 ATOMIC_CMPXCHG( fixed1, mul, kmp_int8, 8, *, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul
852 ATOMIC_CMPXCHG( fixed1, orb, kmp_int8, 8, |, 1i, 0, 0 ) // __kmpc_atomic_fixed1_orb
853 ATOMIC_CMPXCHG( fixed1, shl, kmp_int8, 8, <<, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl
854 ATOMIC_CMPXCHG( fixed1, shr, kmp_int8, 8, >>, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr
855 ATOMIC_CMPXCHG( fixed1u, shr, kmp_uint8, 8, >>, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr
856 ATOMIC_CMPXCHG( fixed1, sub, kmp_int8, 8, -, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub
857 ATOMIC_CMPXCHG( fixed1, xor, kmp_int8, 8, ^, 1i, 0, 0 ) // __kmpc_atomic_fixed1_xor
858 ATOMIC_CMPXCHG( fixed2, add, kmp_int16, 16, +, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add
859 ATOMIC_CMPXCHG( fixed2, andb, kmp_int16, 16, &, 2i, 1, 0 ) // __kmpc_atomic_fixed2_andb
860 ATOMIC_CMPXCHG( fixed2, div, kmp_int16, 16, /, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div
861 ATOMIC_CMPXCHG( fixed2u, div, kmp_uint16, 16, /, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div
862 ATOMIC_CMPXCHG( fixed2, mul, kmp_int16, 16, *, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul
863 ATOMIC_CMPXCHG( fixed2, orb, kmp_int16, 16, |, 2i, 1, 0 ) // __kmpc_atomic_fixed2_orb
864 ATOMIC_CMPXCHG( fixed2, shl, kmp_int16, 16, <<, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl
865 ATOMIC_CMPXCHG( fixed2, shr, kmp_int16, 16, >>, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr
866 ATOMIC_CMPXCHG( fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr
867 ATOMIC_CMPXCHG( fixed2, sub, kmp_int16, 16, -, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub
868 ATOMIC_CMPXCHG( fixed2, xor, kmp_int16, 16, ^, 2i, 1, 0 ) // __kmpc_atomic_fixed2_xor
869 ATOMIC_CMPXCHG( fixed4, andb, kmp_int32, 32, &, 4i, 3, 0 ) // __kmpc_atomic_fixed4_andb
870 ATOMIC_CMPXCHG( fixed4, div, kmp_int32, 32, /, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div
871 ATOMIC_CMPXCHG( fixed4u, div, kmp_uint32, 32, /, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div
872 ATOMIC_CMPXCHG( fixed4, mul, kmp_int32, 32, *, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_mul
873 ATOMIC_CMPXCHG( fixed4, orb, kmp_int32, 32, |, 4i, 3, 0 ) // __kmpc_atomic_fixed4_orb
874 ATOMIC_CMPXCHG( fixed4, shl, kmp_int32, 32, <<, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl
875 ATOMIC_CMPXCHG( fixed4, shr, kmp_int32, 32, >>, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr
876 ATOMIC_CMPXCHG( fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr
877 ATOMIC_CMPXCHG( fixed4, xor, kmp_int32, 32, ^, 4i, 3, 0 ) // __kmpc_atomic_fixed4_xor
878 ATOMIC_CMPXCHG( fixed8, andb, kmp_int64, 64, &, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andb
879 ATOMIC_CMPXCHG( fixed8, div, kmp_int64, 64, /, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div
880 ATOMIC_CMPXCHG( fixed8u, div, kmp_uint64, 64, /, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div
881 ATOMIC_CMPXCHG( fixed8, mul, kmp_int64, 64, *, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul
882 ATOMIC_CMPXCHG( fixed8, orb, kmp_int64, 64, |, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orb
883 ATOMIC_CMPXCHG( fixed8, shl, kmp_int64, 64, <<, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl
884 ATOMIC_CMPXCHG( fixed8, shr, kmp_int64, 64, >>, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr
885 ATOMIC_CMPXCHG( fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr
886 ATOMIC_CMPXCHG( fixed8, xor, kmp_int64, 64, ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_xor
887 ATOMIC_CMPXCHG( float4, div, kmp_real32, 32, /, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div
888 ATOMIC_CMPXCHG( float4, mul, kmp_real32, 32, *, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul
889 ATOMIC_CMPXCHG( float8, div, kmp_real64, 64, /, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div
890 ATOMIC_CMPXCHG( float8, mul, kmp_real64, 64, *, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul
891 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
892 
893 
894 /* ------------------------------------------------------------------------ */
895 /* Routines for C/C++ Reduction operators && and || */
896 /* ------------------------------------------------------------------------ */
897 
898 // ------------------------------------------------------------------------
899 // Need separate macros for &&, || because there is no combined assignment
900 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
901 #define ATOMIC_CRIT_L(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
902 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
903  OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG ) \
904  OP_CRITICAL( = *lhs OP, LCK_ID ) \
905 }
906 
907 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
908 
909 // ------------------------------------------------------------------------
910 // X86 or X86_64: no alignment problems ===================================
911 #define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
912 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
913  OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG ) \
914  OP_CMPXCHG(TYPE,BITS,OP) \
915 }
916 
917 #else
918 // ------------------------------------------------------------------------
919 // Code for other architectures that don't handle unaligned accesses.
920 #define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
921 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
922  OP_GOMP_CRITICAL(= *lhs OP,GOMP_FLAG) \
923  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
924  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
925  } else { \
926  KMP_CHECK_GTID; \
927  OP_CRITICAL(= *lhs OP,LCK_ID) /* unaligned - use critical */ \
928  } \
929 }
930 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
931 
932 ATOMIC_CMPX_L( fixed1, andl, char, 8, &&, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_andl
933 ATOMIC_CMPX_L( fixed1, orl, char, 8, ||, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_orl
934 ATOMIC_CMPX_L( fixed2, andl, short, 16, &&, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_andl
935 ATOMIC_CMPX_L( fixed2, orl, short, 16, ||, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_orl
936 ATOMIC_CMPX_L( fixed4, andl, kmp_int32, 32, &&, 4i, 3, 0 ) // __kmpc_atomic_fixed4_andl
937 ATOMIC_CMPX_L( fixed4, orl, kmp_int32, 32, ||, 4i, 3, 0 ) // __kmpc_atomic_fixed4_orl
938 ATOMIC_CMPX_L( fixed8, andl, kmp_int64, 64, &&, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andl
939 ATOMIC_CMPX_L( fixed8, orl, kmp_int64, 64, ||, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orl
940 
941 
942 /* ------------------------------------------------------------------------- */
943 /* Routines for Fortran operators that matched no one in C: */
944 /* MAX, MIN, .EQV., .NEQV. */
945 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
946 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
947 /* ------------------------------------------------------------------------- */
948 
949 // -------------------------------------------------------------------------
950 // MIN and MAX need separate macros
951 // OP - operator to check if we need any actions?
952 #define MIN_MAX_CRITSECT(OP,LCK_ID) \
953  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
954  \
955  if ( *lhs OP rhs ) { /* still need actions? */ \
956  *lhs = rhs; \
957  } \
958  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
959 
960 // -------------------------------------------------------------------------
961 #ifdef KMP_GOMP_COMPAT
962 #define GOMP_MIN_MAX_CRITSECT(OP,FLAG) \
963  if (( FLAG ) && ( __kmp_atomic_mode == 2 )) { \
964  KMP_CHECK_GTID; \
965  MIN_MAX_CRITSECT( OP, 0 ); \
966  return; \
967  }
968 #else
969 #define GOMP_MIN_MAX_CRITSECT(OP,FLAG)
970 #endif /* KMP_GOMP_COMPAT */
971 
972 // -------------------------------------------------------------------------
973 #define MIN_MAX_CMPXCHG(TYPE,BITS,OP) \
974  { \
975  TYPE KMP_ATOMIC_VOLATILE temp_val; \
976  TYPE old_value; \
977  temp_val = *lhs; \
978  old_value = temp_val; \
979  while ( old_value OP rhs && /* still need actions? */ \
980  ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
981  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
982  *VOLATILE_CAST(kmp_int##BITS *) &rhs ) ) \
983  { \
984  KMP_CPU_PAUSE(); \
985  temp_val = *lhs; \
986  old_value = temp_val; \
987  } \
988  }
989 
990 // -------------------------------------------------------------------------
991 // 1-byte, 2-byte operands - use critical section
992 #define MIN_MAX_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
993 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
994  if ( *lhs OP rhs ) { /* need actions? */ \
995  GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \
996  MIN_MAX_CRITSECT(OP,LCK_ID) \
997  } \
998 }
999 
1000 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1001 
1002 // -------------------------------------------------------------------------
1003 // X86 or X86_64: no alignment problems ====================================
1004 #define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1005 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1006  if ( *lhs OP rhs ) { \
1007  GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \
1008  MIN_MAX_CMPXCHG(TYPE,BITS,OP) \
1009  } \
1010 }
1011 
1012 #else
1013 // -------------------------------------------------------------------------
1014 // Code for other architectures that don't handle unaligned accesses.
1015 #define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1016 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1017  if ( *lhs OP rhs ) { \
1018  GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \
1019  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1020  MIN_MAX_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1021  } else { \
1022  KMP_CHECK_GTID; \
1023  MIN_MAX_CRITSECT(OP,LCK_ID) /* unaligned address */ \
1024  } \
1025  } \
1026 }
1027 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1028 
1029 MIN_MAX_COMPXCHG( fixed1, max, char, 8, <, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max
1030 MIN_MAX_COMPXCHG( fixed1, min, char, 8, >, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min
1031 MIN_MAX_COMPXCHG( fixed2, max, short, 16, <, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max
1032 MIN_MAX_COMPXCHG( fixed2, min, short, 16, >, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min
1033 MIN_MAX_COMPXCHG( fixed4, max, kmp_int32, 32, <, 4i, 3, 0 ) // __kmpc_atomic_fixed4_max
1034 MIN_MAX_COMPXCHG( fixed4, min, kmp_int32, 32, >, 4i, 3, 0 ) // __kmpc_atomic_fixed4_min
1035 MIN_MAX_COMPXCHG( fixed8, max, kmp_int64, 64, <, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max
1036 MIN_MAX_COMPXCHG( fixed8, min, kmp_int64, 64, >, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min
1037 MIN_MAX_COMPXCHG( float4, max, kmp_real32, 32, <, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max
1038 MIN_MAX_COMPXCHG( float4, min, kmp_real32, 32, >, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min
1039 MIN_MAX_COMPXCHG( float8, max, kmp_real64, 64, <, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max
1040 MIN_MAX_COMPXCHG( float8, min, kmp_real64, 64, >, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min
1041 #if KMP_HAVE_QUAD
1042 MIN_MAX_CRITICAL( float16, max, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max
1043 MIN_MAX_CRITICAL( float16, min, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min
1044 #if ( KMP_ARCH_X86 )
1045  MIN_MAX_CRITICAL( float16, max_a16, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16
1046  MIN_MAX_CRITICAL( float16, min_a16, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_min_a16
1047 #endif
1048 #endif
1049 // ------------------------------------------------------------------------
1050 // Need separate macros for .EQV. because of the need of complement (~)
1051 // OP ignored for critical sections, ^=~ used instead
1052 #define ATOMIC_CRIT_EQV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1053 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1054  OP_GOMP_CRITICAL(^=~,GOMP_FLAG) /* send assignment */ \
1055  OP_CRITICAL(^=~,LCK_ID) /* send assignment and complement */ \
1056 }
1057 
1058 // ------------------------------------------------------------------------
1059 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1060 // ------------------------------------------------------------------------
1061 // X86 or X86_64: no alignment problems ===================================
1062 #define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1063 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1064  OP_GOMP_CRITICAL(^=~,GOMP_FLAG) /* send assignment */ \
1065  OP_CMPXCHG(TYPE,BITS,OP) \
1066 }
1067 // ------------------------------------------------------------------------
1068 #else
1069 // ------------------------------------------------------------------------
1070 // Code for other architectures that don't handle unaligned accesses.
1071 #define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1072 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1073  OP_GOMP_CRITICAL(^=~,GOMP_FLAG) \
1074  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1075  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1076  } else { \
1077  KMP_CHECK_GTID; \
1078  OP_CRITICAL(^=~,LCK_ID) /* unaligned address - use critical */ \
1079  } \
1080 }
1081 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1082 
1083 ATOMIC_CMPXCHG( fixed1, neqv, kmp_int8, 8, ^, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv
1084 ATOMIC_CMPXCHG( fixed2, neqv, kmp_int16, 16, ^, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv
1085 ATOMIC_CMPXCHG( fixed4, neqv, kmp_int32, 32, ^, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv
1086 ATOMIC_CMPXCHG( fixed8, neqv, kmp_int64, 64, ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv
1087 ATOMIC_CMPX_EQV( fixed1, eqv, kmp_int8, 8, ^~, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv
1088 ATOMIC_CMPX_EQV( fixed2, eqv, kmp_int16, 16, ^~, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv
1089 ATOMIC_CMPX_EQV( fixed4, eqv, kmp_int32, 32, ^~, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv
1090 ATOMIC_CMPX_EQV( fixed8, eqv, kmp_int64, 64, ^~, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv
1091 
1092 
1093 // ------------------------------------------------------------------------
1094 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1095 // TYPE_ID, OP_ID, TYPE - detailed above
1096 // OP - operator
1097 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1098 #define ATOMIC_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1099 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1100  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) /* send assignment */ \
1101  OP_CRITICAL(OP##=,LCK_ID) /* send assignment */ \
1102 }
1103 
1104 /* ------------------------------------------------------------------------- */
1105 // routines for long double type
1106 ATOMIC_CRITICAL( float10, add, long double, +, 10r, 1 ) // __kmpc_atomic_float10_add
1107 ATOMIC_CRITICAL( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub
1108 ATOMIC_CRITICAL( float10, mul, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul
1109 ATOMIC_CRITICAL( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div
1110 #if KMP_HAVE_QUAD
1111 // routines for _Quad type
1112 ATOMIC_CRITICAL( float16, add, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add
1113 ATOMIC_CRITICAL( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub
1114 ATOMIC_CRITICAL( float16, mul, QUAD_LEGACY, *, 16r, 1 ) // __kmpc_atomic_float16_mul
1115 ATOMIC_CRITICAL( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div
1116 #if ( KMP_ARCH_X86 )
1117  ATOMIC_CRITICAL( float16, add_a16, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_add_a16
1118  ATOMIC_CRITICAL( float16, sub_a16, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16
1119  ATOMIC_CRITICAL( float16, mul_a16, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16
1120  ATOMIC_CRITICAL( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16
1121 #endif
1122 #endif
1123 // routines for complex types
1124 
1125 #if USE_CMPXCHG_FIX
1126 // workaround for C78287 (complex(kind=4) data type)
1127 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_add
1128 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_sub
1129 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_mul
1130 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_div
1131 // end of the workaround for C78287
1132 #else
1133 ATOMIC_CRITICAL( cmplx4, add, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_add
1134 ATOMIC_CRITICAL( cmplx4, sub, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub
1135 ATOMIC_CRITICAL( cmplx4, mul, kmp_cmplx32, *, 8c, 1 ) // __kmpc_atomic_cmplx4_mul
1136 ATOMIC_CRITICAL( cmplx4, div, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div
1137 #endif // USE_CMPXCHG_FIX
1138 
1139 ATOMIC_CRITICAL( cmplx8, add, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_add
1140 ATOMIC_CRITICAL( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub
1141 ATOMIC_CRITICAL( cmplx8, mul, kmp_cmplx64, *, 16c, 1 ) // __kmpc_atomic_cmplx8_mul
1142 ATOMIC_CRITICAL( cmplx8, div, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div
1143 ATOMIC_CRITICAL( cmplx10, add, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_add
1144 ATOMIC_CRITICAL( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub
1145 ATOMIC_CRITICAL( cmplx10, mul, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul
1146 ATOMIC_CRITICAL( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div
1147 #if KMP_HAVE_QUAD
1148 ATOMIC_CRITICAL( cmplx16, add, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add
1149 ATOMIC_CRITICAL( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub
1150 ATOMIC_CRITICAL( cmplx16, mul, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul
1151 ATOMIC_CRITICAL( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div
1152 #if ( KMP_ARCH_X86 )
1153  ATOMIC_CRITICAL( cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_a16
1154  ATOMIC_CRITICAL( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16
1155  ATOMIC_CRITICAL( cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16
1156  ATOMIC_CRITICAL( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16
1157 #endif
1158 #endif
1159 
1160 #if OMP_40_ENABLED
1161 
1162 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1163 // Supported only on IA-32 architecture and Intel(R) 64
1164 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1165 
1166 // ------------------------------------------------------------------------
1167 // Operation on *lhs, rhs bound by critical section
1168 // OP - operator (it's supposed to contain an assignment)
1169 // LCK_ID - lock identifier
1170 // Note: don't check gtid as it should always be valid
1171 // 1, 2-byte - expect valid parameter, other - check before this macro
1172 #define OP_CRITICAL_REV(OP,LCK_ID) \
1173  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1174  \
1175  (*lhs) = (rhs) OP (*lhs); \
1176  \
1177  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1178 
1179 #ifdef KMP_GOMP_COMPAT
1180 #define OP_GOMP_CRITICAL_REV(OP,FLAG) \
1181  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1182  KMP_CHECK_GTID; \
1183  OP_CRITICAL_REV( OP, 0 ); \
1184  return; \
1185  }
1186 #else
1187 #define OP_GOMP_CRITICAL_REV(OP,FLAG)
1188 #endif /* KMP_GOMP_COMPAT */
1189 
1190 
1191 // Beginning of a definition (provides name, parameters, gebug trace)
1192 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1193 // OP_ID - operation identifier (add, sub, mul, ...)
1194 // TYPE - operands' type
1195 #define ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
1196 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
1197 { \
1198  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1199  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid ));
1200 
1201 // ------------------------------------------------------------------------
1202 // Operation on *lhs, rhs using "compare_and_store" routine
1203 // TYPE - operands' type
1204 // BITS - size in bits, used to distinguish low level calls
1205 // OP - operator
1206 // Note: temp_val introduced in order to force the compiler to read
1207 // *lhs only once (w/o it the compiler reads *lhs twice)
1208 #define OP_CMPXCHG_REV(TYPE,BITS,OP) \
1209  { \
1210  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1211  TYPE old_value, new_value; \
1212  temp_val = *lhs; \
1213  old_value = temp_val; \
1214  new_value = rhs OP old_value; \
1215  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1216  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1217  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
1218  { \
1219  KMP_DO_PAUSE; \
1220  \
1221  temp_val = *lhs; \
1222  old_value = temp_val; \
1223  new_value = rhs OP old_value; \
1224  } \
1225  }
1226 
1227 // -------------------------------------------------------------------------
1228 #define ATOMIC_CMPXCHG_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,GOMP_FLAG) \
1229 ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void) \
1230  OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \
1231  OP_CMPXCHG_REV(TYPE,BITS,OP) \
1232 }
1233 
1234 // ------------------------------------------------------------------------
1235 // Entries definition for integer operands
1236 // TYPE_ID - operands type and size (fixed4, float4)
1237 // OP_ID - operation identifier (add, sub, mul, ...)
1238 // TYPE - operand type
1239 // BITS - size in bits, used to distinguish low level calls
1240 // OP - operator (used in critical section)
1241 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1242 
1243 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
1244 // ------------------------------------------------------------------------
1245 // Routines for ATOMIC integer operands, other operators
1246 // ------------------------------------------------------------------------
1247 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
1248 ATOMIC_CMPXCHG_REV( fixed1, div, kmp_int8, 8, /, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_rev
1249 ATOMIC_CMPXCHG_REV( fixed1u, div, kmp_uint8, 8, /, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_rev
1250 ATOMIC_CMPXCHG_REV( fixed1, shl, kmp_int8, 8, <<, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_rev
1251 ATOMIC_CMPXCHG_REV( fixed1, shr, kmp_int8, 8, >>, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_rev
1252 ATOMIC_CMPXCHG_REV( fixed1u, shr, kmp_uint8, 8, >>, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_rev
1253 ATOMIC_CMPXCHG_REV( fixed1, sub, kmp_int8, 8, -, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_rev
1254 
1255 ATOMIC_CMPXCHG_REV( fixed2, div, kmp_int16, 16, /, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_rev
1256 ATOMIC_CMPXCHG_REV( fixed2u, div, kmp_uint16, 16, /, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_rev
1257 ATOMIC_CMPXCHG_REV( fixed2, shl, kmp_int16, 16, <<, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_rev
1258 ATOMIC_CMPXCHG_REV( fixed2, shr, kmp_int16, 16, >>, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_rev
1259 ATOMIC_CMPXCHG_REV( fixed2u, shr, kmp_uint16, 16, >>, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_rev
1260 ATOMIC_CMPXCHG_REV( fixed2, sub, kmp_int16, 16, -, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_rev
1261 
1262 ATOMIC_CMPXCHG_REV( fixed4, div, kmp_int32, 32, /, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_rev
1263 ATOMIC_CMPXCHG_REV( fixed4u, div, kmp_uint32, 32, /, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_rev
1264 ATOMIC_CMPXCHG_REV( fixed4, shl, kmp_int32, 32, <<, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_rev
1265 ATOMIC_CMPXCHG_REV( fixed4, shr, kmp_int32, 32, >>, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_rev
1266 ATOMIC_CMPXCHG_REV( fixed4u, shr, kmp_uint32, 32, >>, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_rev
1267 ATOMIC_CMPXCHG_REV( fixed4, sub, kmp_int32, 32, -, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_sub_rev
1268 
1269 ATOMIC_CMPXCHG_REV( fixed8, div, kmp_int64, 64, /, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_rev
1270 ATOMIC_CMPXCHG_REV( fixed8u, div, kmp_uint64, 64, /, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_rev
1271 ATOMIC_CMPXCHG_REV( fixed8, shl, kmp_int64, 64, <<, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_rev
1272 ATOMIC_CMPXCHG_REV( fixed8, shr, kmp_int64, 64, >>, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_rev
1273 ATOMIC_CMPXCHG_REV( fixed8u, shr, kmp_uint64, 64, >>, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_rev
1274 ATOMIC_CMPXCHG_REV( fixed8, sub, kmp_int64, 64, -, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_rev
1275 
1276 ATOMIC_CMPXCHG_REV( float4, div, kmp_real32, 32, /, 4r, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_rev
1277 ATOMIC_CMPXCHG_REV( float4, sub, kmp_real32, 32, -, 4r, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_rev
1278 
1279 ATOMIC_CMPXCHG_REV( float8, div, kmp_real64, 64, /, 8r, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_rev
1280 ATOMIC_CMPXCHG_REV( float8, sub, kmp_real64, 64, -, 8r, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_rev
1281 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
1282 
1283 // ------------------------------------------------------------------------
1284 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1285 // TYPE_ID, OP_ID, TYPE - detailed above
1286 // OP - operator
1287 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1288 #define ATOMIC_CRITICAL_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1289 ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void) \
1290  OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \
1291  OP_CRITICAL_REV(OP,LCK_ID) \
1292 }
1293 
1294 /* ------------------------------------------------------------------------- */
1295 // routines for long double type
1296 ATOMIC_CRITICAL_REV( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_rev
1297 ATOMIC_CRITICAL_REV( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_rev
1298 #if KMP_HAVE_QUAD
1299 // routines for _Quad type
1300 ATOMIC_CRITICAL_REV( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_rev
1301 ATOMIC_CRITICAL_REV( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_rev
1302 #if ( KMP_ARCH_X86 )
1303  ATOMIC_CRITICAL_REV( float16, sub_a16, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_rev
1304  ATOMIC_CRITICAL_REV( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_rev
1305 #endif
1306 #endif
1307 
1308 // routines for complex types
1309 ATOMIC_CRITICAL_REV( cmplx4, sub, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_rev
1310 ATOMIC_CRITICAL_REV( cmplx4, div, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_rev
1311 ATOMIC_CRITICAL_REV( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_rev
1312 ATOMIC_CRITICAL_REV( cmplx8, div, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_rev
1313 ATOMIC_CRITICAL_REV( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_rev
1314 ATOMIC_CRITICAL_REV( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_rev
1315 #if KMP_HAVE_QUAD
1316 ATOMIC_CRITICAL_REV( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_rev
1317 ATOMIC_CRITICAL_REV( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_rev
1318 #if ( KMP_ARCH_X86 )
1319  ATOMIC_CRITICAL_REV( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_rev
1320  ATOMIC_CRITICAL_REV( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_rev
1321 #endif
1322 #endif
1323 
1324 
1325 #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
1326 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1327 
1328 #endif //OMP_40_ENABLED
1329 
1330 
1331 /* ------------------------------------------------------------------------ */
1332 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */
1333 /* Note: in order to reduce the total number of types combinations */
1334 /* it is supposed that compiler converts RHS to longest floating type,*/
1335 /* that is _Quad, before call to any of these routines */
1336 /* Conversion to _Quad will be done by the compiler during calculation, */
1337 /* conversion back to TYPE - before the assignment, like: */
1338 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
1339 /* Performance penalty expected because of SW emulation use */
1340 /* ------------------------------------------------------------------------ */
1341 
1342 #define ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1343 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( ident_t *id_ref, int gtid, TYPE * lhs, RTYPE rhs ) \
1344 { \
1345  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1346  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", gtid ));
1347 
1348 // -------------------------------------------------------------------------
1349 #define ATOMIC_CRITICAL_FP(TYPE_ID,TYPE,OP_ID,OP,RTYPE_ID,RTYPE,LCK_ID,GOMP_FLAG) \
1350 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1351  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) /* send assignment */ \
1352  OP_CRITICAL(OP##=,LCK_ID) /* send assignment */ \
1353 }
1354 
1355 // -------------------------------------------------------------------------
1356 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1357 // -------------------------------------------------------------------------
1358 // X86 or X86_64: no alignment problems ====================================
1359 #define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1360 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1361  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1362  OP_CMPXCHG(TYPE,BITS,OP) \
1363 }
1364 // -------------------------------------------------------------------------
1365 #else
1366 // ------------------------------------------------------------------------
1367 // Code for other architectures that don't handle unaligned accesses.
1368 #define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1369 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1370  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1371  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1372  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1373  } else { \
1374  KMP_CHECK_GTID; \
1375  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
1376  } \
1377 }
1378 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1379 
1380 // RHS=float8
1381 ATOMIC_CMPXCHG_MIX( fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_float8
1382 ATOMIC_CMPXCHG_MIX( fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_float8
1383 ATOMIC_CMPXCHG_MIX( fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_float8
1384 ATOMIC_CMPXCHG_MIX( fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_float8
1385 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_float8
1386 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_float8
1387 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_float8
1388 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_float8
1389 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_float8
1390 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_float8
1391 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_float8
1392 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_float8
1393 
1394 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them)
1395 #if KMP_HAVE_QUAD
1396 ATOMIC_CMPXCHG_MIX( fixed1, char, add, 8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_fp
1397 ATOMIC_CMPXCHG_MIX( fixed1, char, sub, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_fp
1398 ATOMIC_CMPXCHG_MIX( fixed1, char, mul, 8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_fp
1399 ATOMIC_CMPXCHG_MIX( fixed1, char, div, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_fp
1400 ATOMIC_CMPXCHG_MIX( fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_fp
1401 
1402 ATOMIC_CMPXCHG_MIX( fixed2, short, add, 16, +, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_fp
1403 ATOMIC_CMPXCHG_MIX( fixed2, short, sub, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_fp
1404 ATOMIC_CMPXCHG_MIX( fixed2, short, mul, 16, *, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_fp
1405 ATOMIC_CMPXCHG_MIX( fixed2, short, div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_fp
1406 ATOMIC_CMPXCHG_MIX( fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_fp
1407 
1408 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add_fp
1409 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub_fp
1410 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_fp
1411 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_fp
1412 ATOMIC_CMPXCHG_MIX( fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_div_fp
1413 
1414 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_fp
1415 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_fp
1416 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_fp
1417 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_fp
1418 ATOMIC_CMPXCHG_MIX( fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_fp
1419 
1420 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_fp
1421 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_fp
1422 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_fp
1423 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_fp
1424 
1425 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_fp
1426 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_fp
1427 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_fp
1428 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_fp
1429 
1430 ATOMIC_CRITICAL_FP( float10, long double, add, +, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_add_fp
1431 ATOMIC_CRITICAL_FP( float10, long double, sub, -, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_sub_fp
1432 ATOMIC_CRITICAL_FP( float10, long double, mul, *, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_mul_fp
1433 ATOMIC_CRITICAL_FP( float10, long double, div, /, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_div_fp
1434 #endif
1435 
1436 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1437 // ------------------------------------------------------------------------
1438 // X86 or X86_64: no alignment problems ====================================
1439 #if USE_CMPXCHG_FIX
1440 // workaround for C78287 (complex(kind=4) data type)
1441 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1442 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1443  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1444  OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \
1445 }
1446 // end of the second part of the workaround for C78287
1447 #else
1448 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1449 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1450  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1451  OP_CMPXCHG(TYPE,BITS,OP) \
1452 }
1453 #endif // USE_CMPXCHG_FIX
1454 #else
1455 // ------------------------------------------------------------------------
1456 // Code for other architectures that don't handle unaligned accesses.
1457 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1458 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1459  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1460  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1461  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1462  } else { \
1463  KMP_CHECK_GTID; \
1464  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
1465  } \
1466 }
1467 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1468 
1469 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_add_cmplx8
1470 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_sub_cmplx8
1471 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_mul_cmplx8
1472 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_div_cmplx8
1473 
1474 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1475 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1476 
1478 // ------------------------------------------------------------------------
1479 // Atomic READ routines
1480 // ------------------------------------------------------------------------
1481 
1482 // ------------------------------------------------------------------------
1483 // Beginning of a definition (provides name, parameters, gebug trace)
1484 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1485 // OP_ID - operation identifier (add, sub, mul, ...)
1486 // TYPE - operands' type
1487 #define ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
1488 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * loc ) \
1489 { \
1490  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1491  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
1492 
1493 // ------------------------------------------------------------------------
1494 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1495 // TYPE - operands' type
1496 // BITS - size in bits, used to distinguish low level calls
1497 // OP - operator
1498 // Note: temp_val introduced in order to force the compiler to read
1499 // *lhs only once (w/o it the compiler reads *lhs twice)
1500 // TODO: check if it is still necessary
1501 // Return old value regardless of the result of "compare & swap# operation
1502 
1503 #define OP_CMPXCHG_READ(TYPE,BITS,OP) \
1504  { \
1505  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1506  union f_i_union { \
1507  TYPE f_val; \
1508  kmp_int##BITS i_val; \
1509  }; \
1510  union f_i_union old_value; \
1511  temp_val = *loc; \
1512  old_value.f_val = temp_val; \
1513  old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( (kmp_int##BITS *) loc, \
1514  *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val, \
1515  *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val ); \
1516  new_value = old_value.f_val; \
1517  return new_value; \
1518  }
1519 
1520 // -------------------------------------------------------------------------
1521 // Operation on *lhs, rhs bound by critical section
1522 // OP - operator (it's supposed to contain an assignment)
1523 // LCK_ID - lock identifier
1524 // Note: don't check gtid as it should always be valid
1525 // 1, 2-byte - expect valid parameter, other - check before this macro
1526 #define OP_CRITICAL_READ(OP,LCK_ID) \
1527  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1528  \
1529  new_value = (*loc); \
1530  \
1531  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1532 
1533 // -------------------------------------------------------------------------
1534 #ifdef KMP_GOMP_COMPAT
1535 #define OP_GOMP_CRITICAL_READ(OP,FLAG) \
1536  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1537  KMP_CHECK_GTID; \
1538  OP_CRITICAL_READ( OP, 0 ); \
1539  return new_value; \
1540  }
1541 #else
1542 #define OP_GOMP_CRITICAL_READ(OP,FLAG)
1543 #endif /* KMP_GOMP_COMPAT */
1544 
1545 // -------------------------------------------------------------------------
1546 #define ATOMIC_FIXED_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1547 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \
1548  TYPE new_value; \
1549  OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) \
1550  new_value = KMP_TEST_THEN_ADD##BITS( loc, OP 0 ); \
1551  return new_value; \
1552 }
1553 // -------------------------------------------------------------------------
1554 #define ATOMIC_CMPXCHG_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1555 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \
1556  TYPE new_value; \
1557  OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) \
1558  OP_CMPXCHG_READ(TYPE,BITS,OP) \
1559 }
1560 // ------------------------------------------------------------------------
1561 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1562 // TYPE_ID, OP_ID, TYPE - detailed above
1563 // OP - operator
1564 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1565 #define ATOMIC_CRITICAL_READ(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1566 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \
1567  TYPE new_value; \
1568  OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) /* send assignment */ \
1569  OP_CRITICAL_READ(OP,LCK_ID) /* send assignment */ \
1570  return new_value; \
1571 }
1572 
1573 // ------------------------------------------------------------------------
1574 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return value doesn't work.
1575 // Let's return the read value through the additional parameter.
1576 
1577 #if ( KMP_OS_WINDOWS )
1578 
1579 #define OP_CRITICAL_READ_WRK(OP,LCK_ID) \
1580  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1581  \
1582  (*out) = (*loc); \
1583  \
1584  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1585 // ------------------------------------------------------------------------
1586 #ifdef KMP_GOMP_COMPAT
1587 #define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG) \
1588  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1589  KMP_CHECK_GTID; \
1590  OP_CRITICAL_READ_WRK( OP, 0 ); \
1591  }
1592 #else
1593 #define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG)
1594 #endif /* KMP_GOMP_COMPAT */
1595 // ------------------------------------------------------------------------
1596 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \
1597 void __kmpc_atomic_##TYPE_ID##_##OP_ID( TYPE * out, ident_t *id_ref, int gtid, TYPE * loc ) \
1598 { \
1599  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1600  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
1601 
1602 // ------------------------------------------------------------------------
1603 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1604 ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \
1605  OP_GOMP_CRITICAL_READ_WRK(OP##=,GOMP_FLAG) /* send assignment */ \
1606  OP_CRITICAL_READ_WRK(OP,LCK_ID) /* send assignment */ \
1607 }
1608 
1609 #endif // KMP_OS_WINDOWS
1610 
1611 // ------------------------------------------------------------------------
1612 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1613 ATOMIC_FIXED_READ( fixed4, rd, kmp_int32, 32, +, 0 ) // __kmpc_atomic_fixed4_rd
1614 ATOMIC_FIXED_READ( fixed8, rd, kmp_int64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_rd
1615 ATOMIC_CMPXCHG_READ( float4, rd, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_rd
1616 ATOMIC_CMPXCHG_READ( float8, rd, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_rd
1617 
1618 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
1619 ATOMIC_CMPXCHG_READ( fixed1, rd, kmp_int8, 8, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_rd
1620 ATOMIC_CMPXCHG_READ( fixed2, rd, kmp_int16, 16, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_rd
1621 
1622 ATOMIC_CRITICAL_READ( float10, rd, long double, +, 10r, 1 ) // __kmpc_atomic_float10_rd
1623 #if KMP_HAVE_QUAD
1624 ATOMIC_CRITICAL_READ( float16, rd, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_rd
1625 #endif // KMP_HAVE_QUAD
1626 
1627 // Fix for CQ220361 on Windows* OS
1628 #if ( KMP_OS_WINDOWS )
1629  ATOMIC_CRITICAL_READ_WRK( cmplx4, rd, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_rd
1630 #else
1631  ATOMIC_CRITICAL_READ( cmplx4, rd, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_rd
1632 #endif
1633 ATOMIC_CRITICAL_READ( cmplx8, rd, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_rd
1634 ATOMIC_CRITICAL_READ( cmplx10, rd, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_rd
1635 #if KMP_HAVE_QUAD
1636 ATOMIC_CRITICAL_READ( cmplx16, rd, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_rd
1637 #if ( KMP_ARCH_X86 )
1638  ATOMIC_CRITICAL_READ( float16, a16_rd, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_a16_rd
1639  ATOMIC_CRITICAL_READ( cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_rd
1640 #endif
1641 #endif
1642 
1643 
1644 // ------------------------------------------------------------------------
1645 // Atomic WRITE routines
1646 // ------------------------------------------------------------------------
1647 
1648 #define ATOMIC_XCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1649 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1650  OP_GOMP_CRITICAL(OP,GOMP_FLAG) \
1651  KMP_XCHG_FIXED##BITS( lhs, rhs ); \
1652 }
1653 // ------------------------------------------------------------------------
1654 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1655 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1656  OP_GOMP_CRITICAL(OP,GOMP_FLAG) \
1657  KMP_XCHG_REAL##BITS( lhs, rhs ); \
1658 }
1659 
1660 
1661 // ------------------------------------------------------------------------
1662 // Operation on *lhs, rhs using "compare_and_store" routine
1663 // TYPE - operands' type
1664 // BITS - size in bits, used to distinguish low level calls
1665 // OP - operator
1666 // Note: temp_val introduced in order to force the compiler to read
1667 // *lhs only once (w/o it the compiler reads *lhs twice)
1668 #define OP_CMPXCHG_WR(TYPE,BITS,OP) \
1669  { \
1670  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1671  TYPE old_value, new_value; \
1672  temp_val = *lhs; \
1673  old_value = temp_val; \
1674  new_value = rhs; \
1675  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1676  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1677  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
1678  { \
1679  KMP_CPU_PAUSE(); \
1680  \
1681  temp_val = *lhs; \
1682  old_value = temp_val; \
1683  new_value = rhs; \
1684  } \
1685  }
1686 
1687 // -------------------------------------------------------------------------
1688 #define ATOMIC_CMPXCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1689 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1690  OP_GOMP_CRITICAL(OP,GOMP_FLAG) \
1691  OP_CMPXCHG_WR(TYPE,BITS,OP) \
1692 }
1693 
1694 // ------------------------------------------------------------------------
1695 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1696 // TYPE_ID, OP_ID, TYPE - detailed above
1697 // OP - operator
1698 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1699 #define ATOMIC_CRITICAL_WR(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1700 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1701  OP_GOMP_CRITICAL(OP,GOMP_FLAG) /* send assignment */ \
1702  OP_CRITICAL(OP,LCK_ID) /* send assignment */ \
1703 }
1704 // -------------------------------------------------------------------------
1705 
1706 ATOMIC_XCHG_WR( fixed1, wr, kmp_int8, 8, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_wr
1707 ATOMIC_XCHG_WR( fixed2, wr, kmp_int16, 16, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_wr
1708 ATOMIC_XCHG_WR( fixed4, wr, kmp_int32, 32, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_wr
1709 #if ( KMP_ARCH_X86 )
1710  ATOMIC_CMPXCHG_WR( fixed8, wr, kmp_int64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_wr
1711 #else
1712  ATOMIC_XCHG_WR( fixed8, wr, kmp_int64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_wr
1713 #endif
1714 
1715 ATOMIC_XCHG_FLOAT_WR( float4, wr, kmp_real32, 32, =, KMP_ARCH_X86 ) // __kmpc_atomic_float4_wr
1716 #if ( KMP_ARCH_X86 )
1717  ATOMIC_CMPXCHG_WR( float8, wr, kmp_real64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_float8_wr
1718 #else
1719  ATOMIC_XCHG_FLOAT_WR( float8, wr, kmp_real64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_float8_wr
1720 #endif
1721 
1722 ATOMIC_CRITICAL_WR( float10, wr, long double, =, 10r, 1 ) // __kmpc_atomic_float10_wr
1723 #if KMP_HAVE_QUAD
1724 ATOMIC_CRITICAL_WR( float16, wr, QUAD_LEGACY, =, 16r, 1 ) // __kmpc_atomic_float16_wr
1725 #endif
1726 ATOMIC_CRITICAL_WR( cmplx4, wr, kmp_cmplx32, =, 8c, 1 ) // __kmpc_atomic_cmplx4_wr
1727 ATOMIC_CRITICAL_WR( cmplx8, wr, kmp_cmplx64, =, 16c, 1 ) // __kmpc_atomic_cmplx8_wr
1728 ATOMIC_CRITICAL_WR( cmplx10, wr, kmp_cmplx80, =, 20c, 1 ) // __kmpc_atomic_cmplx10_wr
1729 #if KMP_HAVE_QUAD
1730 ATOMIC_CRITICAL_WR( cmplx16, wr, CPLX128_LEG, =, 32c, 1 ) // __kmpc_atomic_cmplx16_wr
1731 #if ( KMP_ARCH_X86 )
1732  ATOMIC_CRITICAL_WR( float16, a16_wr, Quad_a16_t, =, 16r, 1 ) // __kmpc_atomic_float16_a16_wr
1733  ATOMIC_CRITICAL_WR( cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_wr
1734 #endif
1735 #endif
1736 
1737 
1738 // ------------------------------------------------------------------------
1739 // Atomic CAPTURE routines
1740 // ------------------------------------------------------------------------
1741 
1742 // Beginning of a definition (provides name, parameters, gebug trace)
1743 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1744 // OP_ID - operation identifier (add, sub, mul, ...)
1745 // TYPE - operands' type
1746 #define ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,RET_TYPE) \
1747 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, int flag ) \
1748 { \
1749  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1750  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
1751 
1752 // -------------------------------------------------------------------------
1753 // Operation on *lhs, rhs bound by critical section
1754 // OP - operator (it's supposed to contain an assignment)
1755 // LCK_ID - lock identifier
1756 // Note: don't check gtid as it should always be valid
1757 // 1, 2-byte - expect valid parameter, other - check before this macro
1758 #define OP_CRITICAL_CPT(OP,LCK_ID) \
1759  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1760  \
1761  if( flag ) { \
1762  (*lhs) OP rhs; \
1763  new_value = (*lhs); \
1764  } else { \
1765  new_value = (*lhs); \
1766  (*lhs) OP rhs; \
1767  } \
1768  \
1769  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1770  return new_value;
1771 
1772 // ------------------------------------------------------------------------
1773 #ifdef KMP_GOMP_COMPAT
1774 #define OP_GOMP_CRITICAL_CPT(OP,FLAG) \
1775  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1776  KMP_CHECK_GTID; \
1777  OP_CRITICAL_CPT( OP##=, 0 ); \
1778  }
1779 #else
1780 #define OP_GOMP_CRITICAL_CPT(OP,FLAG)
1781 #endif /* KMP_GOMP_COMPAT */
1782 
1783 // ------------------------------------------------------------------------
1784 // Operation on *lhs, rhs using "compare_and_store" routine
1785 // TYPE - operands' type
1786 // BITS - size in bits, used to distinguish low level calls
1787 // OP - operator
1788 // Note: temp_val introduced in order to force the compiler to read
1789 // *lhs only once (w/o it the compiler reads *lhs twice)
1790 #define OP_CMPXCHG_CPT(TYPE,BITS,OP) \
1791  { \
1792  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1793  TYPE old_value, new_value; \
1794  temp_val = *lhs; \
1795  old_value = temp_val; \
1796  new_value = old_value OP rhs; \
1797  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1798  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1799  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
1800  { \
1801  KMP_CPU_PAUSE(); \
1802  \
1803  temp_val = *lhs; \
1804  old_value = temp_val; \
1805  new_value = old_value OP rhs; \
1806  } \
1807  if( flag ) { \
1808  return new_value; \
1809  } else \
1810  return old_value; \
1811  }
1812 
1813 // -------------------------------------------------------------------------
1814 #define ATOMIC_CMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1815 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
1816  TYPE new_value; \
1817  OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \
1818  OP_CMPXCHG_CPT(TYPE,BITS,OP) \
1819 }
1820 
1821 // -------------------------------------------------------------------------
1822 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1823 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
1824  TYPE old_value, new_value; \
1825  OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \
1826  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
1827  old_value = KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \
1828  if( flag ) { \
1829  return old_value OP rhs; \
1830  } else \
1831  return old_value; \
1832 }
1833 // -------------------------------------------------------------------------
1834 
1835 ATOMIC_FIXED_ADD_CPT( fixed4, add_cpt, kmp_int32, 32, +, 0 ) // __kmpc_atomic_fixed4_add_cpt
1836 ATOMIC_FIXED_ADD_CPT( fixed4, sub_cpt, kmp_int32, 32, -, 0 ) // __kmpc_atomic_fixed4_sub_cpt
1837 ATOMIC_FIXED_ADD_CPT( fixed8, add_cpt, kmp_int64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_cpt
1838 ATOMIC_FIXED_ADD_CPT( fixed8, sub_cpt, kmp_int64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt
1839 
1840 ATOMIC_CMPXCHG_CPT( float4, add_cpt, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_cpt
1841 ATOMIC_CMPXCHG_CPT( float4, sub_cpt, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt
1842 ATOMIC_CMPXCHG_CPT( float8, add_cpt, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_cpt
1843 ATOMIC_CMPXCHG_CPT( float8, sub_cpt, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt
1844 
1845 // ------------------------------------------------------------------------
1846 // Entries definition for integer operands
1847 // TYPE_ID - operands type and size (fixed4, float4)
1848 // OP_ID - operation identifier (add, sub, mul, ...)
1849 // TYPE - operand type
1850 // BITS - size in bits, used to distinguish low level calls
1851 // OP - operator (used in critical section)
1852 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
1853 // ------------------------------------------------------------------------
1854 // Routines for ATOMIC integer operands, other operators
1855 // ------------------------------------------------------------------------
1856 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1857 ATOMIC_CMPXCHG_CPT( fixed1, add_cpt, kmp_int8, 8, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_cpt
1858 ATOMIC_CMPXCHG_CPT( fixed1, andb_cpt, kmp_int8, 8, &, 0 ) // __kmpc_atomic_fixed1_andb_cpt
1859 ATOMIC_CMPXCHG_CPT( fixed1, div_cpt, kmp_int8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt
1860 ATOMIC_CMPXCHG_CPT( fixed1u, div_cpt, kmp_uint8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt
1861 ATOMIC_CMPXCHG_CPT( fixed1, mul_cpt, kmp_int8, 8, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_cpt
1862 ATOMIC_CMPXCHG_CPT( fixed1, orb_cpt, kmp_int8, 8, |, 0 ) // __kmpc_atomic_fixed1_orb_cpt
1863 ATOMIC_CMPXCHG_CPT( fixed1, shl_cpt, kmp_int8, 8, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_cpt
1864 ATOMIC_CMPXCHG_CPT( fixed1, shr_cpt, kmp_int8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_cpt
1865 ATOMIC_CMPXCHG_CPT( fixed1u, shr_cpt, kmp_uint8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_cpt
1866 ATOMIC_CMPXCHG_CPT( fixed1, sub_cpt, kmp_int8, 8, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt
1867 ATOMIC_CMPXCHG_CPT( fixed1, xor_cpt, kmp_int8, 8, ^, 0 ) // __kmpc_atomic_fixed1_xor_cpt
1868 ATOMIC_CMPXCHG_CPT( fixed2, add_cpt, kmp_int16, 16, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_cpt
1869 ATOMIC_CMPXCHG_CPT( fixed2, andb_cpt, kmp_int16, 16, &, 0 ) // __kmpc_atomic_fixed2_andb_cpt
1870 ATOMIC_CMPXCHG_CPT( fixed2, div_cpt, kmp_int16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt
1871 ATOMIC_CMPXCHG_CPT( fixed2u, div_cpt, kmp_uint16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt
1872 ATOMIC_CMPXCHG_CPT( fixed2, mul_cpt, kmp_int16, 16, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_cpt
1873 ATOMIC_CMPXCHG_CPT( fixed2, orb_cpt, kmp_int16, 16, |, 0 ) // __kmpc_atomic_fixed2_orb_cpt
1874 ATOMIC_CMPXCHG_CPT( fixed2, shl_cpt, kmp_int16, 16, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_cpt
1875 ATOMIC_CMPXCHG_CPT( fixed2, shr_cpt, kmp_int16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_cpt
1876 ATOMIC_CMPXCHG_CPT( fixed2u, shr_cpt, kmp_uint16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_cpt
1877 ATOMIC_CMPXCHG_CPT( fixed2, sub_cpt, kmp_int16, 16, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt
1878 ATOMIC_CMPXCHG_CPT( fixed2, xor_cpt, kmp_int16, 16, ^, 0 ) // __kmpc_atomic_fixed2_xor_cpt
1879 ATOMIC_CMPXCHG_CPT( fixed4, andb_cpt, kmp_int32, 32, &, 0 ) // __kmpc_atomic_fixed4_andb_cpt
1880 ATOMIC_CMPXCHG_CPT( fixed4, div_cpt, kmp_int32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_cpt
1881 ATOMIC_CMPXCHG_CPT( fixed4u, div_cpt, kmp_uint32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_cpt
1882 ATOMIC_CMPXCHG_CPT( fixed4, mul_cpt, kmp_int32, 32, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_mul_cpt
1883 ATOMIC_CMPXCHG_CPT( fixed4, orb_cpt, kmp_int32, 32, |, 0 ) // __kmpc_atomic_fixed4_orb_cpt
1884 ATOMIC_CMPXCHG_CPT( fixed4, shl_cpt, kmp_int32, 32, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_cpt
1885 ATOMIC_CMPXCHG_CPT( fixed4, shr_cpt, kmp_int32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_cpt
1886 ATOMIC_CMPXCHG_CPT( fixed4u, shr_cpt, kmp_uint32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_cpt
1887 ATOMIC_CMPXCHG_CPT( fixed4, xor_cpt, kmp_int32, 32, ^, 0 ) // __kmpc_atomic_fixed4_xor_cpt
1888 ATOMIC_CMPXCHG_CPT( fixed8, andb_cpt, kmp_int64, 64, &, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andb_cpt
1889 ATOMIC_CMPXCHG_CPT( fixed8, div_cpt, kmp_int64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt
1890 ATOMIC_CMPXCHG_CPT( fixed8u, div_cpt, kmp_uint64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt
1891 ATOMIC_CMPXCHG_CPT( fixed8, mul_cpt, kmp_int64, 64, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_cpt
1892 ATOMIC_CMPXCHG_CPT( fixed8, orb_cpt, kmp_int64, 64, |, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orb_cpt
1893 ATOMIC_CMPXCHG_CPT( fixed8, shl_cpt, kmp_int64, 64, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_cpt
1894 ATOMIC_CMPXCHG_CPT( fixed8, shr_cpt, kmp_int64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_cpt
1895 ATOMIC_CMPXCHG_CPT( fixed8u, shr_cpt, kmp_uint64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_cpt
1896 ATOMIC_CMPXCHG_CPT( fixed8, xor_cpt, kmp_int64, 64, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_xor_cpt
1897 ATOMIC_CMPXCHG_CPT( float4, div_cpt, kmp_real32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt
1898 ATOMIC_CMPXCHG_CPT( float4, mul_cpt, kmp_real32, 32, *, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_cpt
1899 ATOMIC_CMPXCHG_CPT( float8, div_cpt, kmp_real64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt
1900 ATOMIC_CMPXCHG_CPT( float8, mul_cpt, kmp_real64, 64, *, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_cpt
1901 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1902 
1903 // ------------------------------------------------------------------------
1904 // Routines for C/C++ Reduction operators && and ||
1905 // ------------------------------------------------------------------------
1906 
1907 // -------------------------------------------------------------------------
1908 // Operation on *lhs, rhs bound by critical section
1909 // OP - operator (it's supposed to contain an assignment)
1910 // LCK_ID - lock identifier
1911 // Note: don't check gtid as it should always be valid
1912 // 1, 2-byte - expect valid parameter, other - check before this macro
1913 #define OP_CRITICAL_L_CPT(OP,LCK_ID) \
1914  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1915  \
1916  if( flag ) { \
1917  new_value OP rhs; \
1918  } else \
1919  new_value = (*lhs); \
1920  \
1921  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1922 
1923 // ------------------------------------------------------------------------
1924 #ifdef KMP_GOMP_COMPAT
1925 #define OP_GOMP_CRITICAL_L_CPT(OP,FLAG) \
1926  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1927  KMP_CHECK_GTID; \
1928  OP_CRITICAL_L_CPT( OP, 0 ); \
1929  return new_value; \
1930  }
1931 #else
1932 #define OP_GOMP_CRITICAL_L_CPT(OP,FLAG)
1933 #endif /* KMP_GOMP_COMPAT */
1934 
1935 // ------------------------------------------------------------------------
1936 // Need separate macros for &&, || because there is no combined assignment
1937 #define ATOMIC_CMPX_L_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1938 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
1939  TYPE new_value; \
1940  OP_GOMP_CRITICAL_L_CPT( = *lhs OP, GOMP_FLAG ) \
1941  OP_CMPXCHG_CPT(TYPE,BITS,OP) \
1942 }
1943 
1944 ATOMIC_CMPX_L_CPT( fixed1, andl_cpt, char, 8, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_andl_cpt
1945 ATOMIC_CMPX_L_CPT( fixed1, orl_cpt, char, 8, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_orl_cpt
1946 ATOMIC_CMPX_L_CPT( fixed2, andl_cpt, short, 16, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_andl_cpt
1947 ATOMIC_CMPX_L_CPT( fixed2, orl_cpt, short, 16, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_orl_cpt
1948 ATOMIC_CMPX_L_CPT( fixed4, andl_cpt, kmp_int32, 32, &&, 0 ) // __kmpc_atomic_fixed4_andl_cpt
1949 ATOMIC_CMPX_L_CPT( fixed4, orl_cpt, kmp_int32, 32, ||, 0 ) // __kmpc_atomic_fixed4_orl_cpt
1950 ATOMIC_CMPX_L_CPT( fixed8, andl_cpt, kmp_int64, 64, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andl_cpt
1951 ATOMIC_CMPX_L_CPT( fixed8, orl_cpt, kmp_int64, 64, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orl_cpt
1952 
1953 
1954 // -------------------------------------------------------------------------
1955 // Routines for Fortran operators that matched no one in C:
1956 // MAX, MIN, .EQV., .NEQV.
1957 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
1958 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
1959 // -------------------------------------------------------------------------
1960 
1961 // -------------------------------------------------------------------------
1962 // MIN and MAX need separate macros
1963 // OP - operator to check if we need any actions?
1964 #define MIN_MAX_CRITSECT_CPT(OP,LCK_ID) \
1965  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1966  \
1967  if ( *lhs OP rhs ) { /* still need actions? */ \
1968  old_value = *lhs; \
1969  *lhs = rhs; \
1970  if ( flag ) \
1971  new_value = rhs; \
1972  else \
1973  new_value = old_value; \
1974  } \
1975  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1976  return new_value; \
1977 
1978 // -------------------------------------------------------------------------
1979 #ifdef KMP_GOMP_COMPAT
1980 #define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG) \
1981  if (( FLAG ) && ( __kmp_atomic_mode == 2 )) { \
1982  KMP_CHECK_GTID; \
1983  MIN_MAX_CRITSECT_CPT( OP, 0 ); \
1984  }
1985 #else
1986 #define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG)
1987 #endif /* KMP_GOMP_COMPAT */
1988 
1989 // -------------------------------------------------------------------------
1990 #define MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP) \
1991  { \
1992  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1993  /*TYPE old_value; */ \
1994  temp_val = *lhs; \
1995  old_value = temp_val; \
1996  while ( old_value OP rhs && /* still need actions? */ \
1997  ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1998  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1999  *VOLATILE_CAST(kmp_int##BITS *) &rhs ) ) \
2000  { \
2001  KMP_CPU_PAUSE(); \
2002  temp_val = *lhs; \
2003  old_value = temp_val; \
2004  } \
2005  if( flag ) \
2006  return rhs; \
2007  else \
2008  return old_value; \
2009  }
2010 
2011 // -------------------------------------------------------------------------
2012 // 1-byte, 2-byte operands - use critical section
2013 #define MIN_MAX_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2014 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2015  TYPE new_value, old_value; \
2016  if ( *lhs OP rhs ) { /* need actions? */ \
2017  GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG) \
2018  MIN_MAX_CRITSECT_CPT(OP,LCK_ID) \
2019  } \
2020  return *lhs; \
2021 }
2022 
2023 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
2024 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2025  TYPE new_value, old_value; \
2026  if ( *lhs OP rhs ) { \
2027  GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG) \
2028  MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP) \
2029  } \
2030  return *lhs; \
2031 }
2032 
2033 
2034 MIN_MAX_COMPXCHG_CPT( fixed1, max_cpt, char, 8, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max_cpt
2035 MIN_MAX_COMPXCHG_CPT( fixed1, min_cpt, char, 8, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min_cpt
2036 MIN_MAX_COMPXCHG_CPT( fixed2, max_cpt, short, 16, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max_cpt
2037 MIN_MAX_COMPXCHG_CPT( fixed2, min_cpt, short, 16, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min_cpt
2038 MIN_MAX_COMPXCHG_CPT( fixed4, max_cpt, kmp_int32, 32, <, 0 ) // __kmpc_atomic_fixed4_max_cpt
2039 MIN_MAX_COMPXCHG_CPT( fixed4, min_cpt, kmp_int32, 32, >, 0 ) // __kmpc_atomic_fixed4_min_cpt
2040 MIN_MAX_COMPXCHG_CPT( fixed8, max_cpt, kmp_int64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max_cpt
2041 MIN_MAX_COMPXCHG_CPT( fixed8, min_cpt, kmp_int64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min_cpt
2042 MIN_MAX_COMPXCHG_CPT( float4, max_cpt, kmp_real32, 32, <, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max_cpt
2043 MIN_MAX_COMPXCHG_CPT( float4, min_cpt, kmp_real32, 32, >, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min_cpt
2044 MIN_MAX_COMPXCHG_CPT( float8, max_cpt, kmp_real64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max_cpt
2045 MIN_MAX_COMPXCHG_CPT( float8, min_cpt, kmp_real64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min_cpt
2046 #if KMP_HAVE_QUAD
2047 MIN_MAX_CRITICAL_CPT( float16, max_cpt, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max_cpt
2048 MIN_MAX_CRITICAL_CPT( float16, min_cpt, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min_cpt
2049 #if ( KMP_ARCH_X86 )
2050  MIN_MAX_CRITICAL_CPT( float16, max_a16_cpt, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16_cpt
2051  MIN_MAX_CRITICAL_CPT( float16, min_a16_cpt, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_mix_a16_cpt
2052 #endif
2053 #endif
2054 
2055 // ------------------------------------------------------------------------
2056 #ifdef KMP_GOMP_COMPAT
2057 #define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG) \
2058  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2059  KMP_CHECK_GTID; \
2060  OP_CRITICAL_CPT( OP, 0 ); \
2061  }
2062 #else
2063 #define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG)
2064 #endif /* KMP_GOMP_COMPAT */
2065 // ------------------------------------------------------------------------
2066 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
2067 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2068  TYPE new_value; \
2069  OP_GOMP_CRITICAL_EQV_CPT(^=~,GOMP_FLAG) /* send assignment */ \
2070  OP_CMPXCHG_CPT(TYPE,BITS,OP) \
2071 }
2072 
2073 // ------------------------------------------------------------------------
2074 
2075 ATOMIC_CMPXCHG_CPT( fixed1, neqv_cpt, kmp_int8, 8, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv_cpt
2076 ATOMIC_CMPXCHG_CPT( fixed2, neqv_cpt, kmp_int16, 16, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv_cpt
2077 ATOMIC_CMPXCHG_CPT( fixed4, neqv_cpt, kmp_int32, 32, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv_cpt
2078 ATOMIC_CMPXCHG_CPT( fixed8, neqv_cpt, kmp_int64, 64, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv_cpt
2079 ATOMIC_CMPX_EQV_CPT( fixed1, eqv_cpt, kmp_int8, 8, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv_cpt
2080 ATOMIC_CMPX_EQV_CPT( fixed2, eqv_cpt, kmp_int16, 16, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv_cpt
2081 ATOMIC_CMPX_EQV_CPT( fixed4, eqv_cpt, kmp_int32, 32, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv_cpt
2082 ATOMIC_CMPX_EQV_CPT( fixed8, eqv_cpt, kmp_int64, 64, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv_cpt
2083 
2084 // ------------------------------------------------------------------------
2085 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
2086 // TYPE_ID, OP_ID, TYPE - detailed above
2087 // OP - operator
2088 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2089 #define ATOMIC_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2090 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2091  TYPE new_value; \
2092  OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) /* send assignment */ \
2093  OP_CRITICAL_CPT(OP##=,LCK_ID) /* send assignment */ \
2094 }
2095 
2096 // ------------------------------------------------------------------------
2097 
2098 // Workaround for cmplx4. Regular routines with return value don't work
2099 // on Win_32e. Let's return captured values through the additional parameter.
2100 #define OP_CRITICAL_CPT_WRK(OP,LCK_ID) \
2101  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2102  \
2103  if( flag ) { \
2104  (*lhs) OP rhs; \
2105  (*out) = (*lhs); \
2106  } else { \
2107  (*out) = (*lhs); \
2108  (*lhs) OP rhs; \
2109  } \
2110  \
2111  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2112  return;
2113 // ------------------------------------------------------------------------
2114 
2115 #ifdef KMP_GOMP_COMPAT
2116 #define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG) \
2117  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2118  KMP_CHECK_GTID; \
2119  OP_CRITICAL_CPT_WRK( OP##=, 0 ); \
2120  }
2121 #else
2122 #define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG)
2123 #endif /* KMP_GOMP_COMPAT */
2124 // ------------------------------------------------------------------------
2125 
2126 #define ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \
2127 void __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out, int flag ) \
2128 { \
2129  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
2130  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
2131 // ------------------------------------------------------------------------
2132 
2133 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2134 ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \
2135  OP_GOMP_CRITICAL_CPT_WRK(OP,GOMP_FLAG) \
2136  OP_CRITICAL_CPT_WRK(OP##=,LCK_ID) \
2137 }
2138 // The end of workaround for cmplx4
2139 
2140 /* ------------------------------------------------------------------------- */
2141 // routines for long double type
2142 ATOMIC_CRITICAL_CPT( float10, add_cpt, long double, +, 10r, 1 ) // __kmpc_atomic_float10_add_cpt
2143 ATOMIC_CRITICAL_CPT( float10, sub_cpt, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt
2144 ATOMIC_CRITICAL_CPT( float10, mul_cpt, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul_cpt
2145 ATOMIC_CRITICAL_CPT( float10, div_cpt, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt
2146 #if KMP_HAVE_QUAD
2147 // routines for _Quad type
2148 ATOMIC_CRITICAL_CPT( float16, add_cpt, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add_cpt
2149 ATOMIC_CRITICAL_CPT( float16, sub_cpt, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt
2150 ATOMIC_CRITICAL_CPT( float16, mul_cpt, QUAD_LEGACY, *, 16r, 1 ) // __kmpc_atomic_float16_mul_cpt
2151 ATOMIC_CRITICAL_CPT( float16, div_cpt, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_cpt
2152 #if ( KMP_ARCH_X86 )
2153  ATOMIC_CRITICAL_CPT( float16, add_a16_cpt, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_add_a16_cpt
2154  ATOMIC_CRITICAL_CPT( float16, sub_a16_cpt, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_cpt
2155  ATOMIC_CRITICAL_CPT( float16, mul_a16_cpt, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16_cpt
2156  ATOMIC_CRITICAL_CPT( float16, div_a16_cpt, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt
2157 #endif
2158 #endif
2159 
2160 // routines for complex types
2161 
2162 // cmplx4 routines to return void
2163 ATOMIC_CRITICAL_CPT_WRK( cmplx4, add_cpt, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_add_cpt
2164 ATOMIC_CRITICAL_CPT_WRK( cmplx4, sub_cpt, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_cpt
2165 ATOMIC_CRITICAL_CPT_WRK( cmplx4, mul_cpt, kmp_cmplx32, *, 8c, 1 ) // __kmpc_atomic_cmplx4_mul_cpt
2166 ATOMIC_CRITICAL_CPT_WRK( cmplx4, div_cpt, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_cpt
2167 
2168 ATOMIC_CRITICAL_CPT( cmplx8, add_cpt, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_add_cpt
2169 ATOMIC_CRITICAL_CPT( cmplx8, sub_cpt, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_cpt
2170 ATOMIC_CRITICAL_CPT( cmplx8, mul_cpt, kmp_cmplx64, *, 16c, 1 ) // __kmpc_atomic_cmplx8_mul_cpt
2171 ATOMIC_CRITICAL_CPT( cmplx8, div_cpt, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_cpt
2172 ATOMIC_CRITICAL_CPT( cmplx10, add_cpt, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_add_cpt
2173 ATOMIC_CRITICAL_CPT( cmplx10, sub_cpt, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt
2174 ATOMIC_CRITICAL_CPT( cmplx10, mul_cpt, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul_cpt
2175 ATOMIC_CRITICAL_CPT( cmplx10, div_cpt, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt
2176 #if KMP_HAVE_QUAD
2177 ATOMIC_CRITICAL_CPT( cmplx16, add_cpt, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_cpt
2178 ATOMIC_CRITICAL_CPT( cmplx16, sub_cpt, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt
2179 ATOMIC_CRITICAL_CPT( cmplx16, mul_cpt, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_cpt
2180 ATOMIC_CRITICAL_CPT( cmplx16, div_cpt, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_cpt
2181 #if ( KMP_ARCH_X86 )
2182  ATOMIC_CRITICAL_CPT( cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_a16_cpt
2183  ATOMIC_CRITICAL_CPT( cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_cpt
2184  ATOMIC_CRITICAL_CPT( cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16_cpt
2185  ATOMIC_CRITICAL_CPT( cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt
2186 #endif
2187 #endif
2188 
2189 #if OMP_40_ENABLED
2190 
2191 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr binop x; v = x; } for non-commutative operations.
2192 // Supported only on IA-32 architecture and Intel(R) 64
2193 
2194 // -------------------------------------------------------------------------
2195 // Operation on *lhs, rhs bound by critical section
2196 // OP - operator (it's supposed to contain an assignment)
2197 // LCK_ID - lock identifier
2198 // Note: don't check gtid as it should always be valid
2199 // 1, 2-byte - expect valid parameter, other - check before this macro
2200 #define OP_CRITICAL_CPT_REV(OP,LCK_ID) \
2201  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2202  \
2203  if( flag ) { \
2204  /*temp_val = (*lhs);*/\
2205  (*lhs) = (rhs) OP (*lhs); \
2206  new_value = (*lhs); \
2207  } else { \
2208  new_value = (*lhs);\
2209  (*lhs) = (rhs) OP (*lhs); \
2210  } \
2211  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2212  return new_value;
2213 
2214 // ------------------------------------------------------------------------
2215 #ifdef KMP_GOMP_COMPAT
2216 #define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG) \
2217  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2218  KMP_CHECK_GTID; \
2219  OP_CRITICAL_CPT_REV( OP, 0 ); \
2220  }
2221 #else
2222 #define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG)
2223 #endif /* KMP_GOMP_COMPAT */
2224 
2225 // ------------------------------------------------------------------------
2226 // Operation on *lhs, rhs using "compare_and_store" routine
2227 // TYPE - operands' type
2228 // BITS - size in bits, used to distinguish low level calls
2229 // OP - operator
2230 // Note: temp_val introduced in order to force the compiler to read
2231 // *lhs only once (w/o it the compiler reads *lhs twice)
2232 #define OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \
2233  { \
2234  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2235  TYPE old_value, new_value; \
2236  temp_val = *lhs; \
2237  old_value = temp_val; \
2238  new_value = rhs OP old_value; \
2239  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
2240  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
2241  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
2242  { \
2243  KMP_CPU_PAUSE(); \
2244  \
2245  temp_val = *lhs; \
2246  old_value = temp_val; \
2247  new_value = rhs OP old_value; \
2248  } \
2249  if( flag ) { \
2250  return new_value; \
2251  } else \
2252  return old_value; \
2253  }
2254 
2255 // -------------------------------------------------------------------------
2256 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
2257 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2258  TYPE new_value; \
2259  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2260  OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \
2261  OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \
2262 }
2263 
2264 
2265 ATOMIC_CMPXCHG_CPT_REV( fixed1, div_cpt_rev, kmp_int8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt_rev
2266 ATOMIC_CMPXCHG_CPT_REV( fixed1u, div_cpt_rev, kmp_uint8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt_rev
2267 ATOMIC_CMPXCHG_CPT_REV( fixed1, shl_cpt_rev, kmp_int8, 8, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_cpt_rev
2268 ATOMIC_CMPXCHG_CPT_REV( fixed1, shr_cpt_rev, kmp_int8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_cpt_rev
2269 ATOMIC_CMPXCHG_CPT_REV( fixed1u, shr_cpt_rev, kmp_uint8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_cpt_rev
2270 ATOMIC_CMPXCHG_CPT_REV( fixed1, sub_cpt_rev, kmp_int8, 8, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt_rev
2271 ATOMIC_CMPXCHG_CPT_REV( fixed2, div_cpt_rev, kmp_int16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt_rev
2272 ATOMIC_CMPXCHG_CPT_REV( fixed2u, div_cpt_rev, kmp_uint16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt_rev
2273 ATOMIC_CMPXCHG_CPT_REV( fixed2, shl_cpt_rev, kmp_int16, 16, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_cpt_rev
2274 ATOMIC_CMPXCHG_CPT_REV( fixed2, shr_cpt_rev, kmp_int16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_cpt_rev
2275 ATOMIC_CMPXCHG_CPT_REV( fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_cpt_rev
2276 ATOMIC_CMPXCHG_CPT_REV( fixed2, sub_cpt_rev, kmp_int16, 16, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt_rev
2277 ATOMIC_CMPXCHG_CPT_REV( fixed4, div_cpt_rev, kmp_int32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_cpt_rev
2278 ATOMIC_CMPXCHG_CPT_REV( fixed4u, div_cpt_rev, kmp_uint32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_cpt_rev
2279 ATOMIC_CMPXCHG_CPT_REV( fixed4, shl_cpt_rev, kmp_int32, 32, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_cpt_rev
2280 ATOMIC_CMPXCHG_CPT_REV( fixed4, shr_cpt_rev, kmp_int32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_cpt_rev
2281 ATOMIC_CMPXCHG_CPT_REV( fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_cpt_rev
2282 ATOMIC_CMPXCHG_CPT_REV( fixed4, sub_cpt_rev, kmp_int32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_sub_cpt_rev
2283 ATOMIC_CMPXCHG_CPT_REV( fixed8, div_cpt_rev, kmp_int64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt_rev
2284 ATOMIC_CMPXCHG_CPT_REV( fixed8u, div_cpt_rev, kmp_uint64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt_rev
2285 ATOMIC_CMPXCHG_CPT_REV( fixed8, shl_cpt_rev, kmp_int64, 64, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_cpt_rev
2286 ATOMIC_CMPXCHG_CPT_REV( fixed8, shr_cpt_rev, kmp_int64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_cpt_rev
2287 ATOMIC_CMPXCHG_CPT_REV( fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_cpt_rev
2288 ATOMIC_CMPXCHG_CPT_REV( fixed8, sub_cpt_rev, kmp_int64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt_rev
2289 ATOMIC_CMPXCHG_CPT_REV( float4, div_cpt_rev, kmp_real32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt_rev
2290 ATOMIC_CMPXCHG_CPT_REV( float4, sub_cpt_rev, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt_rev
2291 ATOMIC_CMPXCHG_CPT_REV( float8, div_cpt_rev, kmp_real64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt_rev
2292 ATOMIC_CMPXCHG_CPT_REV( float8, sub_cpt_rev, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt_rev
2293 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2294 
2295 
2296 // ------------------------------------------------------------------------
2297 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
2298 // TYPE_ID, OP_ID, TYPE - detailed above
2299 // OP - operator
2300 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2301 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2302 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2303  TYPE new_value; \
2304  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2305  /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/\
2306  OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \
2307  OP_CRITICAL_CPT_REV(OP,LCK_ID) \
2308 }
2309 
2310 
2311 /* ------------------------------------------------------------------------- */
2312 // routines for long double type
2313 ATOMIC_CRITICAL_CPT_REV( float10, sub_cpt_rev, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt_rev
2314 ATOMIC_CRITICAL_CPT_REV( float10, div_cpt_rev, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt_rev
2315 #if KMP_HAVE_QUAD
2316 // routines for _Quad type
2317 ATOMIC_CRITICAL_CPT_REV( float16, sub_cpt_rev, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt_rev
2318 ATOMIC_CRITICAL_CPT_REV( float16, div_cpt_rev, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_cpt_rev
2319 #if ( KMP_ARCH_X86 )
2320  ATOMIC_CRITICAL_CPT_REV( float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_cpt_rev
2321  ATOMIC_CRITICAL_CPT_REV( float16, div_a16_cpt_rev, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt_rev
2322 #endif
2323 #endif
2324 
2325 // routines for complex types
2326 
2327 // ------------------------------------------------------------------------
2328 
2329 // Workaround for cmplx4. Regular routines with return value don't work
2330 // on Win_32e. Let's return captured values through the additional parameter.
2331 #define OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID) \
2332  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2333  \
2334  if( flag ) { \
2335  (*lhs) = (rhs) OP (*lhs); \
2336  (*out) = (*lhs); \
2337  } else { \
2338  (*out) = (*lhs); \
2339  (*lhs) = (rhs) OP (*lhs); \
2340  } \
2341  \
2342  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2343  return;
2344 // ------------------------------------------------------------------------
2345 
2346 #ifdef KMP_GOMP_COMPAT
2347 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG) \
2348  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2349  KMP_CHECK_GTID; \
2350  OP_CRITICAL_CPT_REV_WRK( OP, 0 ); \
2351  }
2352 #else
2353 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG)
2354 #endif /* KMP_GOMP_COMPAT */
2355 // ------------------------------------------------------------------------
2356 
2357 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2358 ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \
2359  OP_GOMP_CRITICAL_CPT_REV_WRK(OP,GOMP_FLAG) \
2360  OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID) \
2361 }
2362 // The end of workaround for cmplx4
2363 
2364 
2365 // !!! TODO: check if we need to return void for cmplx4 routines
2366 // cmplx4 routines to return void
2367 ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_cpt_rev
2368 ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_cpt_rev
2369 
2370 ATOMIC_CRITICAL_CPT_REV( cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_cpt_rev
2371 ATOMIC_CRITICAL_CPT_REV( cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_cpt_rev
2372 ATOMIC_CRITICAL_CPT_REV( cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt_rev
2373 ATOMIC_CRITICAL_CPT_REV( cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt_rev
2374 #if KMP_HAVE_QUAD
2375 ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt_rev
2376 ATOMIC_CRITICAL_CPT_REV( cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_cpt_rev
2377 #if ( KMP_ARCH_X86 )
2378  ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
2379  ATOMIC_CRITICAL_CPT_REV( cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
2380 #endif
2381 #endif
2382 
2383 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
2384 
2385 #define ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2386 TYPE __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
2387 { \
2388  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
2389  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid ));
2390 
2391 #define CRITICAL_SWP(LCK_ID) \
2392  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2393  \
2394  old_value = (*lhs); \
2395  (*lhs) = rhs; \
2396  \
2397  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2398  return old_value;
2399 
2400 // ------------------------------------------------------------------------
2401 #ifdef KMP_GOMP_COMPAT
2402 #define GOMP_CRITICAL_SWP(FLAG) \
2403  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2404  KMP_CHECK_GTID; \
2405  CRITICAL_SWP( 0 ); \
2406  }
2407 #else
2408 #define GOMP_CRITICAL_SWP(FLAG)
2409 #endif /* KMP_GOMP_COMPAT */
2410 
2411 
2412 #define ATOMIC_XCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \
2413 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2414  TYPE old_value; \
2415  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2416  old_value = KMP_XCHG_FIXED##BITS( lhs, rhs ); \
2417  return old_value; \
2418 }
2419 // ------------------------------------------------------------------------
2420 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \
2421 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2422  TYPE old_value; \
2423  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2424  old_value = KMP_XCHG_REAL##BITS( lhs, rhs ); \
2425  return old_value; \
2426 }
2427 
2428 // ------------------------------------------------------------------------
2429 #define CMPXCHG_SWP(TYPE,BITS) \
2430  { \
2431  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2432  TYPE old_value, new_value; \
2433  temp_val = *lhs; \
2434  old_value = temp_val; \
2435  new_value = rhs; \
2436  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
2437  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
2438  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
2439  { \
2440  KMP_CPU_PAUSE(); \
2441  \
2442  temp_val = *lhs; \
2443  old_value = temp_val; \
2444  new_value = rhs; \
2445  } \
2446  return old_value; \
2447  }
2448 
2449 // -------------------------------------------------------------------------
2450 #define ATOMIC_CMPXCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \
2451 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2452  TYPE old_value; \
2453  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2454  CMPXCHG_SWP(TYPE,BITS) \
2455 }
2456 
2457 ATOMIC_XCHG_SWP( fixed1, kmp_int8, 8, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_swp
2458 ATOMIC_XCHG_SWP( fixed2, kmp_int16, 16, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_swp
2459 ATOMIC_XCHG_SWP( fixed4, kmp_int32, 32, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_swp
2460 
2461 ATOMIC_XCHG_FLOAT_SWP( float4, kmp_real32, 32, KMP_ARCH_X86 ) // __kmpc_atomic_float4_swp
2462 
2463 #if ( KMP_ARCH_X86 )
2464  ATOMIC_CMPXCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_swp
2465  ATOMIC_CMPXCHG_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_float8_swp
2466 #else
2467  ATOMIC_XCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_swp
2468  ATOMIC_XCHG_FLOAT_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_float8_swp
2469 #endif
2470 
2471 // ------------------------------------------------------------------------
2472 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
2473 #define ATOMIC_CRITICAL_SWP(TYPE_ID,TYPE,LCK_ID,GOMP_FLAG) \
2474 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2475  TYPE old_value; \
2476  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2477  CRITICAL_SWP(LCK_ID) \
2478 }
2479 
2480 // ------------------------------------------------------------------------
2481 
2482 // !!! TODO: check if we need to return void for cmplx4 routines
2483 // Workaround for cmplx4. Regular routines with return value don't work
2484 // on Win_32e. Let's return captured values through the additional parameter.
2485 
2486 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE) \
2487 void __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out ) \
2488 { \
2489  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
2490  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid ));
2491 
2492 
2493 #define CRITICAL_SWP_WRK(LCK_ID) \
2494  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2495  \
2496  tmp = (*lhs); \
2497  (*lhs) = (rhs); \
2498  (*out) = tmp; \
2499  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2500  return;
2501 
2502 // ------------------------------------------------------------------------
2503 
2504 #ifdef KMP_GOMP_COMPAT
2505 #define GOMP_CRITICAL_SWP_WRK(FLAG) \
2506  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2507  KMP_CHECK_GTID; \
2508  CRITICAL_SWP_WRK( 0 ); \
2509  }
2510 #else
2511 #define GOMP_CRITICAL_SWP_WRK(FLAG)
2512 #endif /* KMP_GOMP_COMPAT */
2513 // ------------------------------------------------------------------------
2514 
2515 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE,LCK_ID,GOMP_FLAG) \
2516 ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE) \
2517  TYPE tmp; \
2518  GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
2519  CRITICAL_SWP_WRK(LCK_ID) \
2520 }
2521 // The end of workaround for cmplx4
2522 
2523 
2524 ATOMIC_CRITICAL_SWP( float10, long double, 10r, 1 ) // __kmpc_atomic_float10_swp
2525 #if KMP_HAVE_QUAD
2526 ATOMIC_CRITICAL_SWP( float16, QUAD_LEGACY, 16r, 1 ) // __kmpc_atomic_float16_swp
2527 #endif
2528 // cmplx4 routine to return void
2529 ATOMIC_CRITICAL_SWP_WRK( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_atomic_cmplx4_swp
2530 
2531 //ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_atomic_cmplx4_swp
2532 
2533 
2534 ATOMIC_CRITICAL_SWP( cmplx8, kmp_cmplx64, 16c, 1 ) // __kmpc_atomic_cmplx8_swp
2535 ATOMIC_CRITICAL_SWP( cmplx10, kmp_cmplx80, 20c, 1 ) // __kmpc_atomic_cmplx10_swp
2536 #if KMP_HAVE_QUAD
2537 ATOMIC_CRITICAL_SWP( cmplx16, CPLX128_LEG, 32c, 1 ) // __kmpc_atomic_cmplx16_swp
2538 #if ( KMP_ARCH_X86 )
2539  ATOMIC_CRITICAL_SWP( float16_a16, Quad_a16_t, 16r, 1 ) // __kmpc_atomic_float16_a16_swp
2540  ATOMIC_CRITICAL_SWP( cmplx16_a16, kmp_cmplx128_a16_t, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_swp
2541 #endif
2542 #endif
2543 
2544 
2545 // End of OpenMP 4.0 Capture
2546 
2547 #endif //OMP_40_ENABLED
2548 
2549 #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
2550 
2551 
2552 #undef OP_CRITICAL
2553 
2554 /* ------------------------------------------------------------------------ */
2555 /* Generic atomic routines */
2556 /* ------------------------------------------------------------------------ */
2557 
2558 void
2559 __kmpc_atomic_1( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2560 {
2561  KMP_DEBUG_ASSERT( __kmp_init_serial );
2562 
2563  if (
2564 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
2565  FALSE /* must use lock */
2566 #else
2567  TRUE
2568 #endif
2569  )
2570  {
2571  kmp_int8 old_value, new_value;
2572 
2573  old_value = *(kmp_int8 *) lhs;
2574  (*f)( &new_value, &old_value, rhs );
2575 
2576  /* TODO: Should this be acquire or release? */
2577  while ( ! KMP_COMPARE_AND_STORE_ACQ8 ( (kmp_int8 *) lhs,
2578  *(kmp_int8 *) &old_value, *(kmp_int8 *) &new_value ) )
2579  {
2580  KMP_CPU_PAUSE();
2581 
2582  old_value = *(kmp_int8 *) lhs;
2583  (*f)( &new_value, &old_value, rhs );
2584  }
2585 
2586  return;
2587  }
2588  else {
2589  //
2590  // All 1-byte data is of integer data type.
2591  //
2592 
2593 #ifdef KMP_GOMP_COMPAT
2594  if ( __kmp_atomic_mode == 2 ) {
2595  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2596  }
2597  else
2598 #endif /* KMP_GOMP_COMPAT */
2599  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_1i, gtid );
2600 
2601  (*f)( lhs, lhs, rhs );
2602 
2603 #ifdef KMP_GOMP_COMPAT
2604  if ( __kmp_atomic_mode == 2 ) {
2605  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2606  }
2607  else
2608 #endif /* KMP_GOMP_COMPAT */
2609  __kmp_release_atomic_lock( & __kmp_atomic_lock_1i, gtid );
2610  }
2611 }
2612 
2613 void
2614 __kmpc_atomic_2( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2615 {
2616  if (
2617 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
2618  FALSE /* must use lock */
2619 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
2620  TRUE /* no alignment problems */
2621 #else
2622  ! ( (kmp_uintptr_t) lhs & 0x1) /* make sure address is 2-byte aligned */
2623 #endif
2624  )
2625  {
2626  kmp_int16 old_value, new_value;
2627 
2628  old_value = *(kmp_int16 *) lhs;
2629  (*f)( &new_value, &old_value, rhs );
2630 
2631  /* TODO: Should this be acquire or release? */
2632  while ( ! KMP_COMPARE_AND_STORE_ACQ16 ( (kmp_int16 *) lhs,
2633  *(kmp_int16 *) &old_value, *(kmp_int16 *) &new_value ) )
2634  {
2635  KMP_CPU_PAUSE();
2636 
2637  old_value = *(kmp_int16 *) lhs;
2638  (*f)( &new_value, &old_value, rhs );
2639  }
2640 
2641  return;
2642  }
2643  else {
2644  //
2645  // All 2-byte data is of integer data type.
2646  //
2647 
2648 #ifdef KMP_GOMP_COMPAT
2649  if ( __kmp_atomic_mode == 2 ) {
2650  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2651  }
2652  else
2653 #endif /* KMP_GOMP_COMPAT */
2654  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_2i, gtid );
2655 
2656  (*f)( lhs, lhs, rhs );
2657 
2658 #ifdef KMP_GOMP_COMPAT
2659  if ( __kmp_atomic_mode == 2 ) {
2660  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2661  }
2662  else
2663 #endif /* KMP_GOMP_COMPAT */
2664  __kmp_release_atomic_lock( & __kmp_atomic_lock_2i, gtid );
2665  }
2666 }
2667 
2668 void
2669 __kmpc_atomic_4( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2670 {
2671  KMP_DEBUG_ASSERT( __kmp_init_serial );
2672 
2673  if (
2674  //
2675  // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
2676  // Gomp compatibility is broken if this routine is called for floats.
2677  //
2678 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2679  TRUE /* no alignment problems */
2680 #else
2681  ! ( (kmp_uintptr_t) lhs & 0x3) /* make sure address is 4-byte aligned */
2682 #endif
2683  )
2684  {
2685  kmp_int32 old_value, new_value;
2686 
2687  old_value = *(kmp_int32 *) lhs;
2688  (*f)( &new_value, &old_value, rhs );
2689 
2690  /* TODO: Should this be acquire or release? */
2691  while ( ! KMP_COMPARE_AND_STORE_ACQ32 ( (kmp_int32 *) lhs,
2692  *(kmp_int32 *) &old_value, *(kmp_int32 *) &new_value ) )
2693  {
2694  KMP_CPU_PAUSE();
2695 
2696  old_value = *(kmp_int32 *) lhs;
2697  (*f)( &new_value, &old_value, rhs );
2698  }
2699 
2700  return;
2701  }
2702  else {
2703  //
2704  // Use __kmp_atomic_lock_4i for all 4-byte data,
2705  // even if it isn't of integer data type.
2706  //
2707 
2708 #ifdef KMP_GOMP_COMPAT
2709  if ( __kmp_atomic_mode == 2 ) {
2710  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2711  }
2712  else
2713 #endif /* KMP_GOMP_COMPAT */
2714  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_4i, gtid );
2715 
2716  (*f)( lhs, lhs, rhs );
2717 
2718 #ifdef KMP_GOMP_COMPAT
2719  if ( __kmp_atomic_mode == 2 ) {
2720  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2721  }
2722  else
2723 #endif /* KMP_GOMP_COMPAT */
2724  __kmp_release_atomic_lock( & __kmp_atomic_lock_4i, gtid );
2725  }
2726 }
2727 
2728 void
2729 __kmpc_atomic_8( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2730 {
2731  KMP_DEBUG_ASSERT( __kmp_init_serial );
2732  if (
2733 
2734 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
2735  FALSE /* must use lock */
2736 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
2737  TRUE /* no alignment problems */
2738 #else
2739  ! ( (kmp_uintptr_t) lhs & 0x7) /* make sure address is 8-byte aligned */
2740 #endif
2741  )
2742  {
2743  kmp_int64 old_value, new_value;
2744 
2745  old_value = *(kmp_int64 *) lhs;
2746  (*f)( &new_value, &old_value, rhs );
2747  /* TODO: Should this be acquire or release? */
2748  while ( ! KMP_COMPARE_AND_STORE_ACQ64 ( (kmp_int64 *) lhs,
2749  *(kmp_int64 *) &old_value,
2750  *(kmp_int64 *) &new_value ) )
2751  {
2752  KMP_CPU_PAUSE();
2753 
2754  old_value = *(kmp_int64 *) lhs;
2755  (*f)( &new_value, &old_value, rhs );
2756  }
2757 
2758  return;
2759  } else {
2760  //
2761  // Use __kmp_atomic_lock_8i for all 8-byte data,
2762  // even if it isn't of integer data type.
2763  //
2764 
2765 #ifdef KMP_GOMP_COMPAT
2766  if ( __kmp_atomic_mode == 2 ) {
2767  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2768  }
2769  else
2770 #endif /* KMP_GOMP_COMPAT */
2771  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_8i, gtid );
2772 
2773  (*f)( lhs, lhs, rhs );
2774 
2775 #ifdef KMP_GOMP_COMPAT
2776  if ( __kmp_atomic_mode == 2 ) {
2777  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2778  }
2779  else
2780 #endif /* KMP_GOMP_COMPAT */
2781  __kmp_release_atomic_lock( & __kmp_atomic_lock_8i, gtid );
2782  }
2783 }
2784 
2785 void
2786 __kmpc_atomic_10( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2787 {
2788  KMP_DEBUG_ASSERT( __kmp_init_serial );
2789 
2790 #ifdef KMP_GOMP_COMPAT
2791  if ( __kmp_atomic_mode == 2 ) {
2792  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2793  }
2794  else
2795 #endif /* KMP_GOMP_COMPAT */
2796  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_10r, gtid );
2797 
2798  (*f)( lhs, lhs, rhs );
2799 
2800 #ifdef KMP_GOMP_COMPAT
2801  if ( __kmp_atomic_mode == 2 ) {
2802  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2803  }
2804  else
2805 #endif /* KMP_GOMP_COMPAT */
2806  __kmp_release_atomic_lock( & __kmp_atomic_lock_10r, gtid );
2807 }
2808 
2809 void
2810 __kmpc_atomic_16( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2811 {
2812  KMP_DEBUG_ASSERT( __kmp_init_serial );
2813 
2814 #ifdef KMP_GOMP_COMPAT
2815  if ( __kmp_atomic_mode == 2 ) {
2816  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2817  }
2818  else
2819 #endif /* KMP_GOMP_COMPAT */
2820  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_16c, gtid );
2821 
2822  (*f)( lhs, lhs, rhs );
2823 
2824 #ifdef KMP_GOMP_COMPAT
2825  if ( __kmp_atomic_mode == 2 ) {
2826  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2827  }
2828  else
2829 #endif /* KMP_GOMP_COMPAT */
2830  __kmp_release_atomic_lock( & __kmp_atomic_lock_16c, gtid );
2831 }
2832 
2833 void
2834 __kmpc_atomic_20( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2835 {
2836  KMP_DEBUG_ASSERT( __kmp_init_serial );
2837 
2838 #ifdef KMP_GOMP_COMPAT
2839  if ( __kmp_atomic_mode == 2 ) {
2840  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2841  }
2842  else
2843 #endif /* KMP_GOMP_COMPAT */
2844  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_20c, gtid );
2845 
2846  (*f)( lhs, lhs, rhs );
2847 
2848 #ifdef KMP_GOMP_COMPAT
2849  if ( __kmp_atomic_mode == 2 ) {
2850  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2851  }
2852  else
2853 #endif /* KMP_GOMP_COMPAT */
2854  __kmp_release_atomic_lock( & __kmp_atomic_lock_20c, gtid );
2855 }
2856 
2857 void
2858 __kmpc_atomic_32( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2859 {
2860  KMP_DEBUG_ASSERT( __kmp_init_serial );
2861 
2862 #ifdef KMP_GOMP_COMPAT
2863  if ( __kmp_atomic_mode == 2 ) {
2864  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2865  }
2866  else
2867 #endif /* KMP_GOMP_COMPAT */
2868  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_32c, gtid );
2869 
2870  (*f)( lhs, lhs, rhs );
2871 
2872 #ifdef KMP_GOMP_COMPAT
2873  if ( __kmp_atomic_mode == 2 ) {
2874  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2875  }
2876  else
2877 #endif /* KMP_GOMP_COMPAT */
2878  __kmp_release_atomic_lock( & __kmp_atomic_lock_32c, gtid );
2879 }
2880 
2881 // AC: same two routines as GOMP_atomic_start/end, but will be called by our compiler
2882 // duplicated in order to not use 3-party names in pure Intel code
2883 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
2884 void
2885 __kmpc_atomic_start(void)
2886 {
2887  int gtid = __kmp_entry_gtid();
2888  KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
2889  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
2890 }
2891 
2892 
2893 void
2894 __kmpc_atomic_end(void)
2895 {
2896  int gtid = __kmp_get_gtid();
2897  KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
2898  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
2899 }
2900 
2901 /* ------------------------------------------------------------------------ */
2902 /* ------------------------------------------------------------------------ */
2907 // end of file
Definition: kmp.h:198