extern void lud_omp(
  _IEEE32 * a,
  _INT32 size)
{
  
  register _INT32 _w2c_trip_count;
  register _INT32 _w2c___ompv_ok_to_fork;
  register _UINT64 _w2c_reg3;
  register _INT32 _w2c_trip_count0;
  register _INT32 _w2c___ompv_ok_to_fork0;
  _INT32 __localized_common_j;
  _IEEE32 __localized_common_sum;
  _INT32 __localized_common_k;
  _INT32 _w2c___localized_common_j0;
  _IEEE32 _w2c___localized_common_sum0;
  _INT32 _w2c___localized_common_k0;
  _INT32 __ompv_gtid_s1;
  
  /*Begin_of_nested_PU(s)*/
  
  a_g = a;
  size_g = size;
  i = 0;
  while(size_g > i)
  {
    _514 :;
    _w2c_trip_count = size_g - i;
    _w2c___ompv_ok_to_fork = _w2c_trip_count > 1;
    if(_w2c___ompv_ok_to_fork)
    {
      _w2c___ompv_ok_to_fork = __ompc_can_fork();
    }
    if(_w2c___ompv_ok_to_fork)
    {
      __ompc_fork(0, &__ompdo_lud_omp_1, _w2c_reg3);
    }
    else
    {
      __ompv_gtid_s1 = __ompc_get_local_thread_num();
      __ompc_serialized_parallel();
      for(__localized_common_j = i; __localized_common_j < size_g; __localized_common_j = __localized_common_j + 1)
      {
        __localized_common_sum = *(a_g + (_INT64)(__localized_common_j + (size_g * i)));
        __localized_common_k = 0;
        while(__localized_common_k < i)
        {
          _1026 :;
          __localized_common_sum = __localized_common_sum - (*(a_g + (_INT64)(__localized_common_j + (__localized_common_k * size_g))) ** (a_g + (_INT64)(__localized_common_k + (size_g * i))));
          __localized_common_k = __localized_common_k + 1;
          _770 :;
        }
        goto _1282;
        _1282 :;
        * (a_g + (_INT64)(__localized_common_j + (size_g * i))) = __localized_common_sum;
      }
      __ompc_end_serialized_parallel();
    }
    _w2c_trip_count0 = (size_g - i) + -1;
    _w2c___ompv_ok_to_fork0 = _w2c_trip_count0 > 1;
    if(_w2c___ompv_ok_to_fork0)
    {
      _w2c___ompv_ok_to_fork0 = __ompc_can_fork();
    }
    if(_w2c___ompv_ok_to_fork0)
    {
      __ompc_fork(0, &__ompdo_lud_omp_2, _w2c_reg3);
    }
    else
    {
      __ompv_gtid_s1 = __ompc_get_local_thread_num();
      __ompc_serialized_parallel();
      for(_w2c___localized_common_j0 = i + 1; _w2c___localized_common_j0 < size_g; _w2c___localized_common_j0 = _w2c___localized_common_j0 + 1)
      {
        _w2c___localized_common_sum0 = *(a_g + (_INT64)(i + (_w2c___localized_common_j0 * size_g)));
        _w2c___localized_common_k0 = 0;
        while(_w2c___localized_common_k0 < i)
        {
          _1794 :;
          _w2c___localized_common_sum0 = _w2c___localized_common_sum0 - (*(a_g + (_INT64)(_w2c___localized_common_k0 + (_w2c___localized_common_j0 * size_g))) ** (a_g + (_INT64)(i + (_w2c___localized_common_k0 * size_g))));
          _w2c___localized_common_k0 = _w2c___localized_common_k0 + 1;
          _1538 :;
        }
        goto _2050;
        _2050 :;
        * (a_g + (_INT64)(i + (_w2c___localized_common_j0 * size_g))) = _w2c___localized_common_sum0 / *(a_g + (_INT64)(i * (size_g + 1)));
      }
      __ompc_end_serialized_parallel();
    }
    i = i + 1;
    _258 :;
  }
  goto _2306;
  _2306 :;
  return;
} /* lud_omp */
Beispiel #2
0
OMPT_API ompt_thread_id_t ompt_get_thread_id(void) {
	return __ompc_get_local_thread_num() + 1;
//	return __omp_myid + 1;
}
extern void fft_float(
  _UINT32 __NumSamples,
  _INT32 __InverseTransform,
  _IEEE32 * __RealIn,
  _IEEE32 * __ImagIn,
  _IEEE32 * __RealOut,
  _IEEE32 * __ImagOut)
{
  
  register _INT32 _w2c___comma;
  register _UINT32 _w2c___comma0;
  register _INT32 _w2c___ompv_ok_to_fork;
  register _UINT64 _w2c_reg3;
  register _UINT32 _w2c___comma1;
  register _IEEE32 _w2c___cselect;
  register _IEEE64 _w2c___comma2;
  register _IEEE64 _w2c___comma3;
  register _IEEE64 _w2c___comma4;
  register _INT32 _w2c_trip_count;
  register _INT32 _w2c___ompv_ok_to_fork0;
  _IEEE64 delta_angle;
  _IEEE64 denom;
  _INT32 __localized_common_i;
  _INT32 __localized_common_j;
  _INT32 _w2c___localized_common_i0;
  _IEEE64 __localized_common_ar[3LL];
  _IEEE64 __localized_common_ai[3LL];
  _INT32 _w2c___localized_common_j0;
  _INT32 __localized_common_n;
  _INT32 __ompv_gtid_s1;
  
  /*Begin_of_nested_PU(s)*/
  
  NumSamples = __NumSamples;
  InverseTransform = __InverseTransform;
  * RealIn = *__RealIn;
  * ImagIn = *__ImagIn;
  * RealOut = *__RealOut;
  * ImagOut = *__ImagOut;
  _w2c___comma = IsPowerOfTwo(NumSamples);
  if(_w2c___comma == 0)
  {
    printf("Error in fft():  NumSamples=%u is not power of two\n", NumSamples);
    exit(1);
  }
  if(InverseTransform != 0)
  {
    angle_numerator = -angle_numerator;
  }
  CheckPointer(RealIn, "RealIn");
  CheckPointer(RealOut, "RealOut");
  CheckPointer(ImagOut, "ImagOut");
  _w2c___comma0 = NumberOfBitsNeeded(NumSamples);
  NumBits = _w2c___comma0;
  _w2c___ompv_ok_to_fork = 1;
  if(_w2c___ompv_ok_to_fork)
  {
    _w2c___ompv_ok_to_fork = __ompc_can_fork();
  }
  if(_w2c___ompv_ok_to_fork)
  {
    __ompc_fork(0, &__omprg_fft_float_1, _w2c_reg3);
  }
  else
  {
    __ompv_gtid_s1 = __ompc_get_local_thread_num();
    __ompc_serialized_parallel();
    for(__localized_common_i = 0; __localized_common_i < (_INT32) NumSamples; __localized_common_i = __localized_common_i + 1)
    {
      _w2c___comma1 = ReverseBits((_UINT32) __localized_common_i, NumBits);
      __localized_common_j = (_INT32)(_w2c___comma1);
      * (RealOut + (_UINT64)((_UINT64) __localized_common_j)) = *(RealIn + (_UINT64)((_UINT64) __localized_common_i));
      if((_UINT64)(ImagIn) != 0ULL)
      {
        _w2c___cselect = *(ImagIn + (_UINT64)((_UINT64) __localized_common_i));
      }
      else
      {
        _w2c___cselect = 0.0F;
      }
      * (ImagOut + (_UINT64)((_UINT64) __localized_common_j)) = _w2c___cselect;
    }
    __ompc_end_serialized_parallel();
  }
  _w2c___comma2 = log2((_IEEE64)(NumSamples));
  NumIter = _U4F8TRUNC(_w2c___comma2);
  m = 1;
  while(NumIter >= (_UINT32) m)
  {
    _514 :;
    _w2c___comma3 = pow(2.0, (_IEEE64)(m));
    BlockSize = _U4F8TRUNC(_w2c___comma3);
    _w2c___comma4 = pow(2.0, (_IEEE64)(m + -1));
    BlockEnd = _U4F8TRUNC(_w2c___comma4);
    delta_angle = angle_numerator / (_IEEE64)(BlockSize);
    sm2 = sin(delta_angle * -2.0);
    sm1 = sin(-delta_angle);
    cm2 = cos(delta_angle * -2.0);
    cm1 = cos(delta_angle);
    w = cm1 * 2.0;
    _w2c_trip_count = (((_INT32) NumSamples + (_INT32) BlockSize) + -1) / (_INT32) BlockSize;
    _w2c___ompv_ok_to_fork0 = _w2c_trip_count > 1;
    if(_w2c___ompv_ok_to_fork0)
    {
      _w2c___ompv_ok_to_fork0 = __ompc_can_fork();
    }
    if(_w2c___ompv_ok_to_fork0)
    {
      __ompc_fork(0, &__ompdo_fft_float_11, _w2c_reg3);
    }
    else
    {
      __ompv_gtid_s1 = __ompc_get_local_thread_num();
      __ompc_serialized_parallel();
      for(_w2c___localized_common_i0 = 0; _w2c___localized_common_i0 < (_INT32) NumSamples; _w2c___localized_common_i0 = _w2c___localized_common_i0 + (_INT32) BlockSize)
      {
        (__localized_common_ar)[2] = cm2;
        (__localized_common_ar)[1] = cm1;
        (__localized_common_ai)[2] = sm2;
        (__localized_common_ai)[1] = sm1;
        _w2c___localized_common_j0 = _w2c___localized_common_i0;
        __localized_common_n = 0;
        while((_UINT32) __localized_common_n < BlockEnd)
        {
          _1026 :;
          (__localized_common_ar)[0] = ((__localized_common_ar)[1] * w) - (__localized_common_ar)[2];
          (__localized_common_ar)[2] = (__localized_common_ar)[1];
          (__localized_common_ar)[1] = (__localized_common_ar)[0];
          (__localized_common_ai)[0] = ((__localized_common_ai)[1] * w) - (__localized_common_ai)[2];
          (__localized_common_ai)[2] = (__localized_common_ai)[1];
          (__localized_common_ai)[1] = (__localized_common_ai)[0];
          k = (_INT32)((_UINT32) _w2c___localized_common_j0 + BlockEnd);
          tr = ((_IEEE64)(*(RealOut + (_UINT64)((_UINT64) k))) * (__localized_common_ar)[0]) - ((_IEEE64)(*(ImagOut + (_UINT64)((_UINT64) k))) * (__localized_common_ai)[0]);
          ti = ((_IEEE64)(*(RealOut + (_UINT64)((_UINT64) k))) * (__localized_common_ai)[0]) + ((_IEEE64)(*(ImagOut + (_UINT64)((_UINT64) k))) * (__localized_common_ar)[0]);
          * (RealOut + (_UINT64)((_UINT64) k)) = (_IEEE32)((_IEEE64)(*(RealOut + (_UINT64)((_UINT64) _w2c___localized_common_j0))) - tr);
          * (ImagOut + (_UINT64)((_UINT64) k)) = (_IEEE32)((_IEEE64)(*(ImagOut + (_UINT64)((_UINT64) _w2c___localized_common_j0))) - ti);
          * (RealOut + (_UINT64)((_UINT64) _w2c___localized_common_j0)) = (_IEEE32)((_IEEE64)(*(RealOut + (_UINT64)((_UINT64) _w2c___localized_common_j0))) + tr);
          * (ImagOut + (_UINT64)((_UINT64) _w2c___localized_common_j0)) = (_IEEE32)((_IEEE64)(*(ImagOut + (_UINT64)((_UINT64) _w2c___localized_common_j0))) + ti);
          _w2c___localized_common_j0 = _w2c___localized_common_j0 + 1;
          __localized_common_n = __localized_common_n + 1;
          _770 :;
        }
        goto _1282;
        _1282 :;
      }
      __ompc_end_serialized_parallel();
    }
    m = m + 1;
    _258 :;
  }
  goto _1538;
  _1538 :;
  if(InverseTransform != 0)
  {
    denom = (_IEEE64)(NumSamples);
    i = 0;
    while(NumSamples > (_UINT32) i)
    {
      _2050 :;
      * (RealOut + (_UINT64)((_UINT64) i)) = (_IEEE32)((_IEEE64)(*(RealOut + (_UINT64)((_UINT64) i))) / denom);
      * (ImagOut + (_UINT64)((_UINT64) i)) = (_IEEE32)((_IEEE64)(*(ImagOut + (_UINT64)((_UINT64) i))) / denom);
      i = i + 1;
      _1794 :;
    }
    _2306 :;
  }
  return;
} /* fft_float */