extern void lud_omp( _IEEE32 * a, _INT32 size) { register _INT32 _w2c_trip_count; register _INT32 _w2c___ompv_ok_to_fork; register _UINT64 _w2c_reg3; register _INT32 _w2c_trip_count0; register _INT32 _w2c___ompv_ok_to_fork0; _INT32 __localized_common_j; _IEEE32 __localized_common_sum; _INT32 __localized_common_k; _INT32 _w2c___localized_common_j0; _IEEE32 _w2c___localized_common_sum0; _INT32 _w2c___localized_common_k0; _INT32 __ompv_gtid_s1; /*Begin_of_nested_PU(s)*/ a_g = a; size_g = size; i = 0; while(size_g > i) { _514 :; _w2c_trip_count = size_g - i; _w2c___ompv_ok_to_fork = _w2c_trip_count > 1; if(_w2c___ompv_ok_to_fork) { _w2c___ompv_ok_to_fork = __ompc_can_fork(); } if(_w2c___ompv_ok_to_fork) { __ompc_fork(0, &__ompdo_lud_omp_1, _w2c_reg3); } else { __ompv_gtid_s1 = __ompc_get_local_thread_num(); __ompc_serialized_parallel(); for(__localized_common_j = i; __localized_common_j < size_g; __localized_common_j = __localized_common_j + 1) { __localized_common_sum = *(a_g + (_INT64)(__localized_common_j + (size_g * i))); __localized_common_k = 0; while(__localized_common_k < i) { _1026 :; __localized_common_sum = __localized_common_sum - (*(a_g + (_INT64)(__localized_common_j + (__localized_common_k * size_g))) ** (a_g + (_INT64)(__localized_common_k + (size_g * i)))); __localized_common_k = __localized_common_k + 1; _770 :; } goto _1282; _1282 :; * (a_g + (_INT64)(__localized_common_j + (size_g * i))) = __localized_common_sum; } __ompc_end_serialized_parallel(); } _w2c_trip_count0 = (size_g - i) + -1; _w2c___ompv_ok_to_fork0 = _w2c_trip_count0 > 1; if(_w2c___ompv_ok_to_fork0) { _w2c___ompv_ok_to_fork0 = __ompc_can_fork(); } if(_w2c___ompv_ok_to_fork0) { __ompc_fork(0, &__ompdo_lud_omp_2, _w2c_reg3); } else { __ompv_gtid_s1 = __ompc_get_local_thread_num(); __ompc_serialized_parallel(); for(_w2c___localized_common_j0 = i + 1; _w2c___localized_common_j0 < size_g; _w2c___localized_common_j0 = _w2c___localized_common_j0 + 1) { _w2c___localized_common_sum0 = *(a_g + (_INT64)(i + (_w2c___localized_common_j0 * size_g))); _w2c___localized_common_k0 = 0; while(_w2c___localized_common_k0 < i) { _1794 :; _w2c___localized_common_sum0 = _w2c___localized_common_sum0 - (*(a_g + (_INT64)(_w2c___localized_common_k0 + (_w2c___localized_common_j0 * size_g))) ** (a_g + (_INT64)(i + (_w2c___localized_common_k0 * size_g)))); _w2c___localized_common_k0 = _w2c___localized_common_k0 + 1; _1538 :; } goto _2050; _2050 :; * (a_g + (_INT64)(i + (_w2c___localized_common_j0 * size_g))) = _w2c___localized_common_sum0 / *(a_g + (_INT64)(i * (size_g + 1))); } __ompc_end_serialized_parallel(); } i = i + 1; _258 :; } goto _2306; _2306 :; return; } /* lud_omp */
OMPT_API ompt_thread_id_t ompt_get_thread_id(void) { return __ompc_get_local_thread_num() + 1; // return __omp_myid + 1; }
extern void fft_float( _UINT32 __NumSamples, _INT32 __InverseTransform, _IEEE32 * __RealIn, _IEEE32 * __ImagIn, _IEEE32 * __RealOut, _IEEE32 * __ImagOut) { register _INT32 _w2c___comma; register _UINT32 _w2c___comma0; register _INT32 _w2c___ompv_ok_to_fork; register _UINT64 _w2c_reg3; register _UINT32 _w2c___comma1; register _IEEE32 _w2c___cselect; register _IEEE64 _w2c___comma2; register _IEEE64 _w2c___comma3; register _IEEE64 _w2c___comma4; register _INT32 _w2c_trip_count; register _INT32 _w2c___ompv_ok_to_fork0; _IEEE64 delta_angle; _IEEE64 denom; _INT32 __localized_common_i; _INT32 __localized_common_j; _INT32 _w2c___localized_common_i0; _IEEE64 __localized_common_ar[3LL]; _IEEE64 __localized_common_ai[3LL]; _INT32 _w2c___localized_common_j0; _INT32 __localized_common_n; _INT32 __ompv_gtid_s1; /*Begin_of_nested_PU(s)*/ NumSamples = __NumSamples; InverseTransform = __InverseTransform; * RealIn = *__RealIn; * ImagIn = *__ImagIn; * RealOut = *__RealOut; * ImagOut = *__ImagOut; _w2c___comma = IsPowerOfTwo(NumSamples); if(_w2c___comma == 0) { printf("Error in fft(): NumSamples=%u is not power of two\n", NumSamples); exit(1); } if(InverseTransform != 0) { angle_numerator = -angle_numerator; } CheckPointer(RealIn, "RealIn"); CheckPointer(RealOut, "RealOut"); CheckPointer(ImagOut, "ImagOut"); _w2c___comma0 = NumberOfBitsNeeded(NumSamples); NumBits = _w2c___comma0; _w2c___ompv_ok_to_fork = 1; if(_w2c___ompv_ok_to_fork) { _w2c___ompv_ok_to_fork = __ompc_can_fork(); } if(_w2c___ompv_ok_to_fork) { __ompc_fork(0, &__omprg_fft_float_1, _w2c_reg3); } else { __ompv_gtid_s1 = __ompc_get_local_thread_num(); __ompc_serialized_parallel(); for(__localized_common_i = 0; __localized_common_i < (_INT32) NumSamples; __localized_common_i = __localized_common_i + 1) { _w2c___comma1 = ReverseBits((_UINT32) __localized_common_i, NumBits); __localized_common_j = (_INT32)(_w2c___comma1); * (RealOut + (_UINT64)((_UINT64) __localized_common_j)) = *(RealIn + (_UINT64)((_UINT64) __localized_common_i)); if((_UINT64)(ImagIn) != 0ULL) { _w2c___cselect = *(ImagIn + (_UINT64)((_UINT64) __localized_common_i)); } else { _w2c___cselect = 0.0F; } * (ImagOut + (_UINT64)((_UINT64) __localized_common_j)) = _w2c___cselect; } __ompc_end_serialized_parallel(); } _w2c___comma2 = log2((_IEEE64)(NumSamples)); NumIter = _U4F8TRUNC(_w2c___comma2); m = 1; while(NumIter >= (_UINT32) m) { _514 :; _w2c___comma3 = pow(2.0, (_IEEE64)(m)); BlockSize = _U4F8TRUNC(_w2c___comma3); _w2c___comma4 = pow(2.0, (_IEEE64)(m + -1)); BlockEnd = _U4F8TRUNC(_w2c___comma4); delta_angle = angle_numerator / (_IEEE64)(BlockSize); sm2 = sin(delta_angle * -2.0); sm1 = sin(-delta_angle); cm2 = cos(delta_angle * -2.0); cm1 = cos(delta_angle); w = cm1 * 2.0; _w2c_trip_count = (((_INT32) NumSamples + (_INT32) BlockSize) + -1) / (_INT32) BlockSize; _w2c___ompv_ok_to_fork0 = _w2c_trip_count > 1; if(_w2c___ompv_ok_to_fork0) { _w2c___ompv_ok_to_fork0 = __ompc_can_fork(); } if(_w2c___ompv_ok_to_fork0) { __ompc_fork(0, &__ompdo_fft_float_11, _w2c_reg3); } else { __ompv_gtid_s1 = __ompc_get_local_thread_num(); __ompc_serialized_parallel(); for(_w2c___localized_common_i0 = 0; _w2c___localized_common_i0 < (_INT32) NumSamples; _w2c___localized_common_i0 = _w2c___localized_common_i0 + (_INT32) BlockSize) { (__localized_common_ar)[2] = cm2; (__localized_common_ar)[1] = cm1; (__localized_common_ai)[2] = sm2; (__localized_common_ai)[1] = sm1; _w2c___localized_common_j0 = _w2c___localized_common_i0; __localized_common_n = 0; while((_UINT32) __localized_common_n < BlockEnd) { _1026 :; (__localized_common_ar)[0] = ((__localized_common_ar)[1] * w) - (__localized_common_ar)[2]; (__localized_common_ar)[2] = (__localized_common_ar)[1]; (__localized_common_ar)[1] = (__localized_common_ar)[0]; (__localized_common_ai)[0] = ((__localized_common_ai)[1] * w) - (__localized_common_ai)[2]; (__localized_common_ai)[2] = (__localized_common_ai)[1]; (__localized_common_ai)[1] = (__localized_common_ai)[0]; k = (_INT32)((_UINT32) _w2c___localized_common_j0 + BlockEnd); tr = ((_IEEE64)(*(RealOut + (_UINT64)((_UINT64) k))) * (__localized_common_ar)[0]) - ((_IEEE64)(*(ImagOut + (_UINT64)((_UINT64) k))) * (__localized_common_ai)[0]); ti = ((_IEEE64)(*(RealOut + (_UINT64)((_UINT64) k))) * (__localized_common_ai)[0]) + ((_IEEE64)(*(ImagOut + (_UINT64)((_UINT64) k))) * (__localized_common_ar)[0]); * (RealOut + (_UINT64)((_UINT64) k)) = (_IEEE32)((_IEEE64)(*(RealOut + (_UINT64)((_UINT64) _w2c___localized_common_j0))) - tr); * (ImagOut + (_UINT64)((_UINT64) k)) = (_IEEE32)((_IEEE64)(*(ImagOut + (_UINT64)((_UINT64) _w2c___localized_common_j0))) - ti); * (RealOut + (_UINT64)((_UINT64) _w2c___localized_common_j0)) = (_IEEE32)((_IEEE64)(*(RealOut + (_UINT64)((_UINT64) _w2c___localized_common_j0))) + tr); * (ImagOut + (_UINT64)((_UINT64) _w2c___localized_common_j0)) = (_IEEE32)((_IEEE64)(*(ImagOut + (_UINT64)((_UINT64) _w2c___localized_common_j0))) + ti); _w2c___localized_common_j0 = _w2c___localized_common_j0 + 1; __localized_common_n = __localized_common_n + 1; _770 :; } goto _1282; _1282 :; } __ompc_end_serialized_parallel(); } m = m + 1; _258 :; } goto _1538; _1538 :; if(InverseTransform != 0) { denom = (_IEEE64)(NumSamples); i = 0; while(NumSamples > (_UINT32) i) { _2050 :; * (RealOut + (_UINT64)((_UINT64) i)) = (_IEEE32)((_IEEE64)(*(RealOut + (_UINT64)((_UINT64) i))) / denom); * (ImagOut + (_UINT64)((_UINT64) i)) = (_IEEE32)((_IEEE64)(*(ImagOut + (_UINT64)((_UINT64) i))) / denom); i = i + 1; _1794 :; } _2306 :; } return; } /* fft_float */