tbb_machine.h

00001 /*
00002     Copyright 2005-2010 Intel Corporation.  All Rights Reserved.
00003 
00004     The source code contained or described herein and all documents related
00005     to the source code ("Material") are owned by Intel Corporation or its
00006     suppliers or licensors.  Title to the Material remains with Intel
00007     Corporation or its suppliers and licensors.  The Material is protected
00008     by worldwide copyright laws and treaty provisions.  No part of the
00009     Material may be used, copied, reproduced, modified, published, uploaded,
00010     posted, transmitted, distributed, or disclosed in any way without
00011     Intel's prior express written permission.
00012 
00013     No license under any patent, copyright, trade secret or other
00014     intellectual property right is granted to or conferred upon you by
00015     disclosure or delivery of the Materials, either expressly, by
00016     implication, inducement, estoppel or otherwise.  Any license under such
00017     intellectual property rights must be express and approved by Intel in
00018     writing.
00019 */
00020 
00021 #ifndef __TBB_machine_H
00022 #define __TBB_machine_H
00023 
00024 #include "tbb_stddef.h"
00025 
00026 #if _WIN32||_WIN64
00027 
00028 #ifdef _MANAGED
00029 #pragma managed(push, off)
00030 #endif
00031 
00032 #if __MINGW64__
00033 #include "machine/linux_intel64.h"
00034 extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
00035 #define __TBB_Yield()  SwitchToThread()
00036 #elif __MINGW32__
00037 #include "machine/linux_ia32.h"
00038 extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
00039 #define __TBB_Yield()  SwitchToThread()
00040 #elif defined(_M_IX86)
00041 #include "machine/windows_ia32.h"
00042 #elif defined(_M_AMD64) 
00043 #include "machine/windows_intel64.h"
00044 #elif _XBOX 
00045 #include "machine/xbox360_ppc.h"
00046 #endif
00047 
00048 #ifdef _MANAGED
00049 #pragma managed(pop)
00050 #endif
00051 
00052 #elif __linux__ || __FreeBSD__
00053 
00054 #if __i386__
00055 #include "machine/linux_ia32.h"
00056 #elif __x86_64__
00057 #include "machine/linux_intel64.h"
00058 #elif __ia64__
00059 #include "machine/linux_ia64.h"
00060 #elif __powerpc__
00061 #include "machine/mac_ppc.h"
00062 #endif
00063 #include "machine/linux_common.h"
00064 
00065 #elif __APPLE__
00066 
00067 #if __i386__
00068 #include "machine/linux_ia32.h"
00069 #elif __x86_64__
00070 #include "machine/linux_intel64.h"
00071 #elif __POWERPC__
00072 #include "machine/mac_ppc.h"
00073 #endif
00074 #include "machine/macos_common.h"
00075 
00076 #elif _AIX
00077 
00078 #include "machine/ibm_aix51.h"
00079 
00080 #elif __sun || __SUNPRO_CC
00081 
00082 #define __asm__ asm 
00083 #define __volatile__ volatile
00084 #if __i386  || __i386__
00085 #include "machine/linux_ia32.h"
00086 #elif __x86_64__
00087 #include "machine/linux_intel64.h"
00088 #elif __sparc
00089 #include "machine/sunos_sparc.h"
00090 #endif
00091 #include <sched.h>
00092 #define __TBB_Yield() sched_yield()
00093 
00094 #endif
00095 
00097 
00109 #if    !defined(__TBB_CompareAndSwap4) \
00110     || !defined(__TBB_CompareAndSwap8) \
00111     || !defined(__TBB_Yield)           \
00112     || !defined(__TBB_full_memory_fence)    \
00113     || !defined(__TBB_release_consistency_helper)
00114 #error Minimal requirements for tbb_machine.h not satisfied; platform is not supported.
00115 #endif
00116 
00117 #ifndef __TBB_Pause
00118     inline void __TBB_Pause(int32_t) {
00119         __TBB_Yield();
00120     }
00121 #endif
00122 
00123 namespace tbb {
00124 namespace internal {
00125 
00127 
00128 class atomic_backoff : no_copy {
00130 
00132     static const int32_t LOOPS_BEFORE_YIELD = 16;
00133     int32_t count;
00134 public:
00135     atomic_backoff() : count(1) {}
00136 
00138     void pause() {
00139         if( count<=LOOPS_BEFORE_YIELD ) {
00140             __TBB_Pause(count);
00141             // Pause twice as long the next time.
00142             count*=2;
00143         } else {
00144             // Pause is so long that we might as well yield CPU to scheduler.
00145             __TBB_Yield();
00146         }
00147     }
00148 
00149     // pause for a few times and then return false immediately.
00150     bool bounded_pause() {
00151         if( count<=LOOPS_BEFORE_YIELD ) {
00152             __TBB_Pause(count);
00153             // Pause twice as long the next time.
00154             count*=2;
00155             return true;
00156         } else {
00157             return false;
00158         }
00159     }
00160 
00161     void reset() {
00162         count = 1;
00163     }
00164 };
00165 
00167 
00168 template<typename T, typename U>
00169 void spin_wait_while_eq( const volatile T& location, U value ) {
00170     atomic_backoff backoff;
00171     while( location==value ) backoff.pause();
00172 }
00173 
00175 
00176 template<typename T, typename U>
00177 void spin_wait_until_eq( const volatile T& location, const U value ) {
00178     atomic_backoff backoff;
00179     while( location!=value ) backoff.pause();
00180 }
00181 
00182 // T should be unsigned, otherwise sign propagation will break correctness of bit manipulations.
00183 // S should be either 1 or 2, for the mask calculation to work correctly.
00184 // Together, these rules limit applicability of Masked CAS to unsigned char and unsigned short.
00185 template<size_t S, typename T>
00186 inline T __TBB_MaskedCompareAndSwap (volatile T *ptr, T value, T comparand ) {
00187     volatile uint32_t * base = (uint32_t*)( (uintptr_t)ptr & ~(uintptr_t)0x3 );
00188 #if __TBB_BIG_ENDIAN
00189     const uint8_t bitoffset = uint8_t( 8*( 4-S - (uintptr_t(ptr) & 0x3) ) );
00190 #else
00191     const uint8_t bitoffset = uint8_t( 8*((uintptr_t)ptr & 0x3) );
00192 #endif
00193     const uint32_t mask = ( (1<<(S*8)) - 1 )<<bitoffset;
00194     atomic_backoff b;
00195     uint32_t result;
00196     for(;;) {
00197         result = *base; // reload the base value which might change during the pause
00198         uint32_t old_value = ( result & ~mask ) | ( comparand << bitoffset );
00199         uint32_t new_value = ( result & ~mask ) | ( value << bitoffset );
00200         // __TBB_CompareAndSwap4 presumed to have full fence. 
00201         result = __TBB_CompareAndSwap4( base, new_value, old_value );
00202         if(  result==old_value               // CAS succeeded
00203           || ((result^old_value)&mask)!=0 )  // CAS failed and the bits of interest have changed
00204             break;
00205         else                                 // CAS failed but the bits of interest left unchanged
00206             b.pause();
00207     }
00208     return T((result & mask) >> bitoffset);
00209 }
00210 
00211 template<size_t S, typename T>
00212 inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T comparand ) { 
00213     return __TBB_CompareAndSwapW((T *)ptr,value,comparand);
00214 }
00215 
00216 template<>
00217 inline uint8_t __TBB_CompareAndSwapGeneric <1,uint8_t> (volatile void *ptr, uint8_t value, uint8_t comparand ) {
00218 #ifdef __TBB_CompareAndSwap1
00219     return __TBB_CompareAndSwap1(ptr,value,comparand);
00220 #else
00221     return __TBB_MaskedCompareAndSwap<1,uint8_t>((volatile uint8_t *)ptr,value,comparand);
00222 #endif
00223 }
00224 
00225 template<>
00226 inline uint16_t __TBB_CompareAndSwapGeneric <2,uint16_t> (volatile void *ptr, uint16_t value, uint16_t comparand ) {
00227 #ifdef __TBB_CompareAndSwap2
00228     return __TBB_CompareAndSwap2(ptr,value,comparand);
00229 #else
00230     return __TBB_MaskedCompareAndSwap<2,uint16_t>((volatile uint16_t *)ptr,value,comparand);
00231 #endif
00232 }
00233 
00234 template<>
00235 inline uint32_t __TBB_CompareAndSwapGeneric <4,uint32_t> (volatile void *ptr, uint32_t value, uint32_t comparand ) { 
00236     return __TBB_CompareAndSwap4(ptr,value,comparand);
00237 }
00238 
00239 template<>
00240 inline uint64_t __TBB_CompareAndSwapGeneric <8,uint64_t> (volatile void *ptr, uint64_t value, uint64_t comparand ) { 
00241     return __TBB_CompareAndSwap8(ptr,value,comparand);
00242 }
00243 
00244 template<size_t S, typename T>
00245 inline T __TBB_FetchAndAddGeneric (volatile void *ptr, T addend) {
00246     atomic_backoff b;
00247     T result;
00248     for(;;) {
00249         result = *reinterpret_cast<volatile T *>(ptr);
00250         // __TBB_CompareAndSwapGeneric presumed to have full fence. 
00251         if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, result+addend, result )==result ) 
00252             break;
00253         b.pause();
00254     }
00255     return result;
00256 }
00257 
00258 template<size_t S, typename T>
00259 inline T __TBB_FetchAndStoreGeneric (volatile void *ptr, T value) {
00260     atomic_backoff b;
00261     T result;
00262     for(;;) {
00263         result = *reinterpret_cast<volatile T *>(ptr);
00264         // __TBB_CompareAndSwapGeneric presumed to have full fence.
00265         if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, value, result )==result ) 
00266             break;
00267         b.pause();
00268     }
00269     return result;
00270 }
00271 
00272 // Macro __TBB_TypeWithAlignmentAtLeastAsStrict(T) should be a type with alignment at least as 
00273 // strict as type T.  Type type should have a trivial default constructor and destructor, so that
00274 // arrays of that type can be declared without initializers.  
00275 // It is correct (but perhaps a waste of space) if __TBB_TypeWithAlignmentAtLeastAsStrict(T) expands
00276 // to a type bigger than T.
00277 // The default definition here works on machines where integers are naturally aligned and the
00278 // strictest alignment is 16.
00279 #ifndef __TBB_TypeWithAlignmentAtLeastAsStrict
00280 
00281 #if __GNUC__ || __SUNPRO_CC
00282 struct __TBB_machine_type_with_strictest_alignment {
00283     int member[4];
00284 } __attribute__((aligned(16)));
00285 #elif _MSC_VER
00286 __declspec(align(16)) struct __TBB_machine_type_with_strictest_alignment {
00287     int member[4];
00288 };
00289 #else
00290 #error Must define __TBB_TypeWithAlignmentAtLeastAsStrict(T) or __TBB_machine_type_with_strictest_alignment
00291 #endif
00292 
00293 template<size_t N> struct type_with_alignment {__TBB_machine_type_with_strictest_alignment member;};
00294 template<> struct type_with_alignment<1> { char member; };
00295 template<> struct type_with_alignment<2> { uint16_t member; };
00296 template<> struct type_with_alignment<4> { uint32_t member; };
00297 template<> struct type_with_alignment<8> { uint64_t member; };
00298 
00299 #if _MSC_VER||defined(__GNUC__)&&__GNUC__==3 && __GNUC_MINOR__<=2  
00301 
00303 template<size_t Size, typename T> 
00304 struct work_around_alignment_bug {
00305 #if _MSC_VER
00306     static const size_t alignment = __alignof(T);
00307 #else
00308     static const size_t alignment = __alignof__(T);
00309 #endif
00310 };
00311 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<tbb::internal::work_around_alignment_bug<sizeof(T),T>::alignment>
00312 #elif __GNUC__ || __SUNPRO_CC
00313 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<__alignof__(T)>
00314 #else
00315 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) __TBB_machine_type_with_strictest_alignment
00316 #endif
00317 #endif  /* ____TBB_TypeWithAlignmentAtLeastAsStrict */
00318 
00319 // Template class here is to avoid instantiation of the static data for modules that don't use it
00320 template<typename T>
00321 struct reverse {
00322     static const T byte_table[256];
00323 };
00324 // An efficient implementation of the reverse function utilizes a 2^8 lookup table holding the bit-reversed
00325 // values of [0..2^8 - 1]. Those values can also be computed on the fly at a slightly higher cost.
00326 template<typename T>
00327 const T reverse<T>::byte_table[256] = {
00328     0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
00329     0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
00330     0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
00331     0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
00332     0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
00333     0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
00334     0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
00335     0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
00336     0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
00337     0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
00338     0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
00339     0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
00340     0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
00341     0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
00342     0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
00343     0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
00344 };
00345 
00346 } // namespace internal
00347 } // namespace tbb
00348 
00349 #ifndef __TBB_CompareAndSwap1
00350 #define __TBB_CompareAndSwap1 tbb::internal::__TBB_CompareAndSwapGeneric<1,uint8_t>
00351 #endif
00352 
00353 #ifndef __TBB_CompareAndSwap2 
00354 #define __TBB_CompareAndSwap2 tbb::internal::__TBB_CompareAndSwapGeneric<2,uint16_t>
00355 #endif
00356 
00357 #ifndef __TBB_CompareAndSwapW
00358 #define __TBB_CompareAndSwapW tbb::internal::__TBB_CompareAndSwapGeneric<sizeof(ptrdiff_t),ptrdiff_t>
00359 #endif
00360 
00361 #ifndef __TBB_FetchAndAdd1
00362 #define __TBB_FetchAndAdd1 tbb::internal::__TBB_FetchAndAddGeneric<1,uint8_t>
00363 #endif
00364 
00365 #ifndef __TBB_FetchAndAdd2
00366 #define __TBB_FetchAndAdd2 tbb::internal::__TBB_FetchAndAddGeneric<2,uint16_t>
00367 #endif
00368 
00369 #ifndef __TBB_FetchAndAdd4
00370 #define __TBB_FetchAndAdd4 tbb::internal::__TBB_FetchAndAddGeneric<4,uint32_t>
00371 #endif
00372 
00373 #ifndef __TBB_FetchAndAdd8
00374 #define __TBB_FetchAndAdd8 tbb::internal::__TBB_FetchAndAddGeneric<8,uint64_t>
00375 #endif
00376 
00377 #ifndef __TBB_FetchAndAddW
00378 #define __TBB_FetchAndAddW tbb::internal::__TBB_FetchAndAddGeneric<sizeof(ptrdiff_t),ptrdiff_t>
00379 #endif
00380 
00381 #ifndef __TBB_FetchAndStore1
00382 #define __TBB_FetchAndStore1 tbb::internal::__TBB_FetchAndStoreGeneric<1,uint8_t>
00383 #endif
00384 
00385 #ifndef __TBB_FetchAndStore2
00386 #define __TBB_FetchAndStore2 tbb::internal::__TBB_FetchAndStoreGeneric<2,uint16_t>
00387 #endif
00388 
00389 #ifndef __TBB_FetchAndStore4
00390 #define __TBB_FetchAndStore4 tbb::internal::__TBB_FetchAndStoreGeneric<4,uint32_t>
00391 #endif
00392 
00393 #ifndef __TBB_FetchAndStore8
00394 #define __TBB_FetchAndStore8 tbb::internal::__TBB_FetchAndStoreGeneric<8,uint64_t>
00395 #endif
00396 
00397 #ifndef __TBB_FetchAndStoreW
00398 #define __TBB_FetchAndStoreW tbb::internal::__TBB_FetchAndStoreGeneric<sizeof(ptrdiff_t),ptrdiff_t>
00399 #endif
00400 
00401 #if __TBB_DECL_FENCED_ATOMICS
00402 
00403 #ifndef __TBB_CompareAndSwap1__TBB_full_fence
00404 #define __TBB_CompareAndSwap1__TBB_full_fence __TBB_CompareAndSwap1
00405 #endif 
00406 #ifndef __TBB_CompareAndSwap1acquire
00407 #define __TBB_CompareAndSwap1acquire __TBB_CompareAndSwap1__TBB_full_fence
00408 #endif 
00409 #ifndef __TBB_CompareAndSwap1release
00410 #define __TBB_CompareAndSwap1release __TBB_CompareAndSwap1__TBB_full_fence
00411 #endif 
00412 
00413 #ifndef __TBB_CompareAndSwap2__TBB_full_fence
00414 #define __TBB_CompareAndSwap2__TBB_full_fence __TBB_CompareAndSwap2
00415 #endif
00416 #ifndef __TBB_CompareAndSwap2acquire
00417 #define __TBB_CompareAndSwap2acquire __TBB_CompareAndSwap2__TBB_full_fence
00418 #endif
00419 #ifndef __TBB_CompareAndSwap2release
00420 #define __TBB_CompareAndSwap2release __TBB_CompareAndSwap2__TBB_full_fence
00421 #endif
00422 
00423 #ifndef __TBB_CompareAndSwap4__TBB_full_fence
00424 #define __TBB_CompareAndSwap4__TBB_full_fence __TBB_CompareAndSwap4
00425 #endif 
00426 #ifndef __TBB_CompareAndSwap4acquire
00427 #define __TBB_CompareAndSwap4acquire __TBB_CompareAndSwap4__TBB_full_fence
00428 #endif 
00429 #ifndef __TBB_CompareAndSwap4release
00430 #define __TBB_CompareAndSwap4release __TBB_CompareAndSwap4__TBB_full_fence
00431 #endif 
00432 
00433 #ifndef __TBB_CompareAndSwap8__TBB_full_fence
00434 #define __TBB_CompareAndSwap8__TBB_full_fence __TBB_CompareAndSwap8
00435 #endif
00436 #ifndef __TBB_CompareAndSwap8acquire
00437 #define __TBB_CompareAndSwap8acquire __TBB_CompareAndSwap8__TBB_full_fence
00438 #endif
00439 #ifndef __TBB_CompareAndSwap8release
00440 #define __TBB_CompareAndSwap8release __TBB_CompareAndSwap8__TBB_full_fence
00441 #endif
00442 
00443 #ifndef __TBB_FetchAndAdd1__TBB_full_fence
00444 #define __TBB_FetchAndAdd1__TBB_full_fence __TBB_FetchAndAdd1
00445 #endif
00446 #ifndef __TBB_FetchAndAdd1acquire
00447 #define __TBB_FetchAndAdd1acquire __TBB_FetchAndAdd1__TBB_full_fence
00448 #endif
00449 #ifndef __TBB_FetchAndAdd1release
00450 #define __TBB_FetchAndAdd1release __TBB_FetchAndAdd1__TBB_full_fence
00451 #endif
00452 
00453 #ifndef __TBB_FetchAndAdd2__TBB_full_fence
00454 #define __TBB_FetchAndAdd2__TBB_full_fence __TBB_FetchAndAdd2
00455 #endif
00456 #ifndef __TBB_FetchAndAdd2acquire
00457 #define __TBB_FetchAndAdd2acquire __TBB_FetchAndAdd2__TBB_full_fence
00458 #endif
00459 #ifndef __TBB_FetchAndAdd2release
00460 #define __TBB_FetchAndAdd2release __TBB_FetchAndAdd2__TBB_full_fence
00461 #endif
00462 
00463 #ifndef __TBB_FetchAndAdd4__TBB_full_fence
00464 #define __TBB_FetchAndAdd4__TBB_full_fence __TBB_FetchAndAdd4
00465 #endif
00466 #ifndef __TBB_FetchAndAdd4acquire
00467 #define __TBB_FetchAndAdd4acquire __TBB_FetchAndAdd4__TBB_full_fence
00468 #endif
00469 #ifndef __TBB_FetchAndAdd4release
00470 #define __TBB_FetchAndAdd4release __TBB_FetchAndAdd4__TBB_full_fence
00471 #endif
00472 
00473 #ifndef __TBB_FetchAndAdd8__TBB_full_fence
00474 #define __TBB_FetchAndAdd8__TBB_full_fence __TBB_FetchAndAdd8
00475 #endif
00476 #ifndef __TBB_FetchAndAdd8acquire
00477 #define __TBB_FetchAndAdd8acquire __TBB_FetchAndAdd8__TBB_full_fence
00478 #endif
00479 #ifndef __TBB_FetchAndAdd8release
00480 #define __TBB_FetchAndAdd8release __TBB_FetchAndAdd8__TBB_full_fence
00481 #endif
00482 
00483 #ifndef __TBB_FetchAndStore1__TBB_full_fence
00484 #define __TBB_FetchAndStore1__TBB_full_fence __TBB_FetchAndStore1
00485 #endif
00486 #ifndef __TBB_FetchAndStore1acquire
00487 #define __TBB_FetchAndStore1acquire __TBB_FetchAndStore1__TBB_full_fence
00488 #endif
00489 #ifndef __TBB_FetchAndStore1release
00490 #define __TBB_FetchAndStore1release __TBB_FetchAndStore1__TBB_full_fence
00491 #endif
00492 
00493 #ifndef __TBB_FetchAndStore2__TBB_full_fence
00494 #define __TBB_FetchAndStore2__TBB_full_fence __TBB_FetchAndStore2
00495 #endif
00496 #ifndef __TBB_FetchAndStore2acquire
00497 #define __TBB_FetchAndStore2acquire __TBB_FetchAndStore2__TBB_full_fence
00498 #endif
00499 #ifndef __TBB_FetchAndStore2release
00500 #define __TBB_FetchAndStore2release __TBB_FetchAndStore2__TBB_full_fence
00501 #endif
00502 
00503 #ifndef __TBB_FetchAndStore4__TBB_full_fence
00504 #define __TBB_FetchAndStore4__TBB_full_fence __TBB_FetchAndStore4
00505 #endif
00506 #ifndef __TBB_FetchAndStore4acquire
00507 #define __TBB_FetchAndStore4acquire __TBB_FetchAndStore4__TBB_full_fence
00508 #endif
00509 #ifndef __TBB_FetchAndStore4release
00510 #define __TBB_FetchAndStore4release __TBB_FetchAndStore4__TBB_full_fence
00511 #endif
00512 
00513 #ifndef __TBB_FetchAndStore8__TBB_full_fence
00514 #define __TBB_FetchAndStore8__TBB_full_fence __TBB_FetchAndStore8
00515 #endif
00516 #ifndef __TBB_FetchAndStore8acquire
00517 #define __TBB_FetchAndStore8acquire __TBB_FetchAndStore8__TBB_full_fence
00518 #endif
00519 #ifndef __TBB_FetchAndStore8release
00520 #define __TBB_FetchAndStore8release __TBB_FetchAndStore8__TBB_full_fence
00521 #endif
00522 
00523 #endif // __TBB_DECL_FENCED_ATOMICS
00524 
00525 // Special atomic functions
00526 #ifndef __TBB_FetchAndAddWrelease
00527 #define __TBB_FetchAndAddWrelease __TBB_FetchAndAddW
00528 #endif
00529 
00530 #ifndef __TBB_FetchAndIncrementWacquire
00531 #define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAddW(P,1)
00532 #endif
00533 
00534 #ifndef __TBB_FetchAndDecrementWrelease
00535 #define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAddW(P,(-1))
00536 #endif
00537 
00538 template <typename T, size_t S>
00539 struct __TBB_machine_load_store {
00540     static inline T load_with_acquire(const volatile T& location) {
00541         T to_return = location;
00542         __TBB_release_consistency_helper();
00543         return to_return;
00544     }
00545 
00546     static inline void store_with_release(volatile T &location, T value) {
00547         __TBB_release_consistency_helper();
00548         location = value;
00549     }
00550 };
00551 
00552 #if __TBB_WORDSIZE==4
00553 #if _MSC_VER
00554 using tbb::internal::int64_t;
00555 #endif
00556 // On 32-bit platforms, there should be definition of __TBB_Store8 and __TBB_Load8
00557 #ifndef __TBB_Store8
00558 inline void __TBB_Store8 (volatile void *ptr, int64_t value) {
00559     for(;;) {
00560         int64_t result = *(int64_t *)ptr;
00561         if( __TBB_CompareAndSwap8(ptr,value,result)==result ) break;
00562     }
00563 }
00564 #endif
00565 
00566 #ifndef __TBB_Load8
00567 inline int64_t __TBB_Load8 (const volatile void *ptr) {
00568     const int64_t anyvalue = 3264; // Could be anything, just the same for comparand and new value
00569     return __TBB_CompareAndSwap8(const_cast<volatile void *>(ptr),anyvalue,anyvalue);
00570 }
00571 #endif
00572 
00573 template <typename T>
00574 struct __TBB_machine_load_store<T,8> {
00575     static inline T load_with_acquire(const volatile T& location) {
00576         T to_return = (T)__TBB_Load8((const volatile void*)&location);
00577         __TBB_release_consistency_helper();
00578         return to_return;
00579     }
00580 
00581     static inline void store_with_release(volatile T& location, T value) {
00582         __TBB_release_consistency_helper();
00583         __TBB_Store8((volatile void *)&location,(int64_t)value);
00584     }
00585 };
00586 #endif /* __TBB_WORDSIZE==4 */
00587 
00588 #ifndef __TBB_load_with_acquire
00589 template<typename T>
00590 inline T __TBB_load_with_acquire(const volatile T &location) {
00591     return __TBB_machine_load_store<T,sizeof(T)>::load_with_acquire(location);
00592 }
00593 #endif
00594 
00595 #ifndef __TBB_store_with_release
00596 template<typename T, typename V>
00597 inline void __TBB_store_with_release(volatile T& location, V value) {
00598     __TBB_machine_load_store<T,sizeof(T)>::store_with_release(location,T(value));
00599 }
00601 inline void __TBB_store_with_release(volatile size_t& location, size_t value) {
00602     __TBB_machine_load_store<size_t,sizeof(size_t)>::store_with_release(location,value);
00603 }
00604 #endif
00605 
00606 #ifndef __TBB_Log2
00607 inline intptr_t __TBB_Log2( uintptr_t x ) {
00608     if( x==0 ) return -1;
00609     intptr_t result = 0;
00610     uintptr_t tmp;
00611 #if __TBB_WORDSIZE>=8
00612     if( (tmp = x>>32) ) { x=tmp; result += 32; }
00613 #endif
00614     if( (tmp = x>>16) ) { x=tmp; result += 16; }
00615     if( (tmp = x>>8) )  { x=tmp; result += 8; }
00616     if( (tmp = x>>4) )  { x=tmp; result += 4; }
00617     if( (tmp = x>>2) )  { x=tmp; result += 2; }
00618     return (x&2)? result+1: result;
00619 }
00620 #endif
00621 
00622 #ifndef __TBB_AtomicOR
00623 inline void __TBB_AtomicOR( volatile void *operand, uintptr_t addend ) {
00624     tbb::internal::atomic_backoff b;
00625     for(;;) {
00626         uintptr_t tmp = *(volatile uintptr_t *)operand;
00627         uintptr_t result = __TBB_CompareAndSwapW(operand, tmp|addend, tmp);
00628         if( result==tmp ) break;
00629         b.pause();
00630     }
00631 }
00632 #endif
00633 
00634 #ifndef __TBB_AtomicAND
00635 inline void __TBB_AtomicAND( volatile void *operand, uintptr_t addend ) {
00636     tbb::internal::atomic_backoff b;
00637     for(;;) {
00638         uintptr_t tmp = *(volatile uintptr_t *)operand;
00639         uintptr_t result = __TBB_CompareAndSwapW(operand, tmp&addend, tmp);
00640         if( result==tmp ) break;
00641         b.pause();
00642     }
00643 }
00644 #endif
00645 
00646 #ifndef __TBB_TryLockByte
00647 inline bool __TBB_TryLockByte( unsigned char &flag ) {
00648     return __TBB_CompareAndSwap1(&flag,1,0)==0;
00649 }
00650 #endif
00651 
00652 #ifndef __TBB_LockByte
00653 inline uintptr_t __TBB_LockByte( unsigned char& flag ) {
00654     if ( !__TBB_TryLockByte(flag) ) {
00655         tbb::internal::atomic_backoff b;
00656         do {
00657             b.pause();
00658         } while ( !__TBB_TryLockByte(flag) );
00659     }
00660     return 0;
00661 }
00662 #endif
00663 
00664 #ifndef __TBB_ReverseByte
00665 inline unsigned char __TBB_ReverseByte(unsigned char src) {
00666     return tbb::internal::reverse<unsigned char>::byte_table[src];
00667 }
00668 #endif
00669 
00670 template<typename T>
00671 T __TBB_ReverseBits(T src)
00672 {
00673     T dst;
00674     unsigned char *original = (unsigned char *) &src;
00675     unsigned char *reversed = (unsigned char *) &dst;
00676 
00677     for( int i = sizeof(T)-1; i >= 0; i-- )
00678         reversed[i] = __TBB_ReverseByte( original[sizeof(T)-i-1] );
00679 
00680     return dst;
00681 }
00682 
00683 #endif /* __TBB_machine_H */

Copyright © 2005-2010 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.