int main(int argc, char *argv[]){

    double sum, aa[N], bb[N], cc[N], dd[N], ee[N];
    int i;

    printf("Hello World!\n");

    for(i=0; i<N; i++){
        aa[i] = (double) i;
    }

    for(i=0; i<N; i++){
        bb[i] = (double) (2*i);
    }

    for(i=0; i<N; i++){
        cc[i] = 1.0;
    }

    for(i=0; i<N; i++){
        cc[i] = aa[i] + bb[i];
    }

    printf("cc = %f\n",cc[2]);

    for (i=0; i<N; i++) {
        dd[i] = add_internal((aa[i]*2), bb[i]);
    }

    for (i=0; i<N; i++) {
        ee[i] = add_external((aa[i]*2), bb[i]/3);
    }

    sum = 0.0;

    for(i=0; i<N; i++){
        sum += cc[i];
    }

    printf("sum cc = %f\n",sum);

    sum = 0.0;

    for(i=0; i<N; i++){
        sum += dd[i];
    }

    printf("sum dd = %f\n",sum);

    sum = 0.0;

    for(i=0; i<N; i++){
        sum += ee[i];
    }

    printf("sum ee = %f\n",sum);

    return 0;
}
示例#2
0
/**
 * returns 0 if already there, 1 if new. Stores only the pointer
 */
bool pp_linkset_add(pp_linkset *ls, const char *str)
{
	assert(ls != NULL, "pp_linkset internal error: Trying to add to a null set");

	if (add_internal(ls, str) == NULL) return false;
	ls->population++;
	return true;
}
示例#3
0
int pp_linkset_add(pp_linkset *ls, wchar_t *str)
{
  /* returns 0 if already there, 1 if new. Stores only the pointer */
  if (ls==NULL) error(L"pp_linkset internal error: Trying to add to a null set");
  if (add_internal(ls, str) == NULL) return 0;
  ls->population++;
  return 1;
}
示例#4
0
BOOL LASoccupancyGrid::add(I32 pos_x, I32 pos_y)
{
  if (grid_spacing < 0)
  {
    grid_spacing = -grid_spacing;
    anker = pos_y;
    min_x = max_x = pos_x;
    min_y = max_y = pos_y;
  }
  else
  {
    if (pos_x < min_x) min_x = pos_x; else if (pos_x > max_x) max_x = pos_x;
    if (pos_y < min_y) min_y = pos_y; else if (pos_y > max_y) max_y = pos_y;
  }
  return add_internal(pos_x, pos_y);
}
示例#5
0
inline void HeapRegionLinkedList::add_as_tail(HeapRegion* hr) {
  hrl_assert_mt_safety_ok(this);
  assert((length() == 0 && _head == NULL && _tail == NULL) ||
         (length() >  0 && _head != NULL && _tail != NULL),
         hrl_ext_msg(this, "invariant"));
  // add_internal() will verify the region.
  add_internal(hr);

  // Now link the region.
  if (_tail != NULL) {
    _tail->set_next(hr);
  } else {
    _head = hr;
  }
  _tail = hr;
}
示例#6
0
BOOL LASoccupancyGrid::add(const LASpoint* point)
{
  I32 pos_x, pos_y;
  if (grid_spacing < 0)
  {
    grid_spacing = -grid_spacing;
    pos_x = I32_FLOOR(point->get_x() / grid_spacing);
    pos_y = I32_FLOOR(point->get_y() / grid_spacing);
    anker = pos_y;
    min_x = max_x = pos_x;
    min_y = max_y = pos_y;
  }
  else
  {
    pos_x = I32_FLOOR(point->get_x() / grid_spacing);
    pos_y = I32_FLOOR(point->get_y() / grid_spacing);
    if (pos_x < min_x) min_x = pos_x; else if (pos_x > max_x) max_x = pos_x;
    if (pos_y < min_y) min_y = pos_y; else if (pos_y > max_y) max_y = pos_y;
  }
  return add_internal(pos_x, pos_y);
}
int main(int argc, char *argv[]){

    double sum, aa[N][P], bb[N][P], cc[N][P], dd[N][P], ee[N][P];
    int i,x;

    printf("Hello World!\n");

    for(i=0; i<N; i++){
        for(x=0; x<P; x++){
            aa[i][x] = (double) i;
        }
    }

    for(i=0; i<N; i++){
        for(x=0; x<P; x++){
            bb[i][x] = (double) (2*i);
        }
    }

    for(i=0; i<N; i++){
        for(x=0; x<P; x++){
            cc[i][x] = 1.0;
        }
    }

    for(i=0; i<N; i++){
        for(x=0; x<P; x++){
            cc[i][x] = aa[i][x] + bb[i][x];
        }
    }

    printf("cc = %f\n",cc[2][2]);   // don't ask why!

    for (i=0; i<N; i++) {
        for(x=0; x<P; x++){
            dd[i][x] = add_internal((aa[i][x]*2), bb[i][x]);
        }
    }

    for (i=0; i<N; i++) {
        for(x=0; x<P; x++){
            // loop was not vectorized: vector dependence 
            // prevents vectorization
            //
            // a possible solution to this may be found
            // at https://software.intel.com/en-us/articles/fdiag15344
            // 
            // I got this message when compiling with
            //
            //  -unroll-agressive
            //
            //
            // ee[i][x] = add_external(aa[i][x]*2, bb[i][x]/3);
            //
            ee[i][x] = add_external(aa[i][x], bb[i][x]);
            //
            // I thought getting rid of the computation inside
            // the argument MAY cure the prevention of vectorisation
            // as per the Intel documentation mentioned.
            // I obvioulsy don't understand this well enough.
            //
            // Requirements for loop vectorization:
            //
            // • The loop must contain straight-line code (a single basic block). 
            // There should be no jumps or branches, but masked assignments are allowed, 
            // including if-then-else constructs that can be interpreted as masked assignments.
            // • The loop must be countable, i.e. the number of iterations must be known 
            // before the loop starts to execute, though it need not be known at compile 
            // time. Consequently, there must be no data-dependent exit conditions.
            // • There should be no backward loop-carried dependencies. For example, the loop 
            // must not require statement 2 of iteration 1 to be executed before statement 1 
            // of iteration 2 for correct results. This allows consecutive iterations of 
            // the original loop to be executed simultaneously in a single iteration of the 
            // unrolled, vectorized loop.
            //
            // OK (vectorizable):  a[i-1] is always computed before it is used:
            //
            // for (i=1; i<MAX; i++) {
            // 
            //    a[i] = b[i] + c[i]
            //    
            //    d[i] = e[i] – a[i-1]
            //  }
            //
            //  Not OK (unvectorizable): a[i-1] might be needed before it has been computed:
            //
            //  for (i=1; i<MAX; i++) {
            // 
            //     d[i] = e[i] – a[i-1]
            //
            //     a[i] = b[i] + c[i]
            //  }
            // • There should be no special operators and no function or subroutine calls, 
            // unless these are inlined, either manually or automatically by the compiler, 
            // or they are SIMD (vectorized) functions. Intrinsic math functions such as 
            // sin(), log(), fmax(), etc. are allowed, since the compiler runtime library 
            // contains SIMD (vectorized) versions of these functions. See the comments 
            // section for a more extensive list.
            //
            // • If a loop is part of a loop nest, it should normally be the inner loop. 
            // Outer loops can be parallelized using OpenMP or autoparallelization (–parallel), 
            // but they can only rarely be auto-vectorized, unless the compiler is able either to 
            // fully unroll the inner loop, or to interchange the inner and outer loops. 
            // (Additional high level loop transformations such as these may require –O3. 
            // This option is available for both Intel® and non-Intel microprocessors but it 
            // may result in more optimizations for Intel microprocessors than for non-Intel 
            // microprocessors). The SIMD pragma or directive can be used to ask the compiler 
            // to vectorize an outer loop. See 
            // http://software.intel.com/en-us/articles/requirements-for-vectorizing-loops-with-pragma-simd 
            // for more information about what sort of loops can be vectorized using #pragma simd, 
            // !DIR$ SIMD or the OpenMP 4.0 equivalents.
            //
        }
    }

    sum = 0.0;

    for(i=0; i<N; i++){
        for(x=0; x<P; x++){
            sum += cc[i][x];
        }
    }

    printf("sum cc = %f\n",sum);

    sum = 0.0;

    for(i=0; i<N; i++){
        for(x=0; x<P; x++){
            sum += dd[i][x];
        }
    }

    printf("sum dd = %f\n",sum);

    sum = 0.0;

    for(i=0; i<N; i++){
        for(x=0; x<P; x++){
            sum += ee[i][x];
        }
    }

    printf("sum ee = %f\n",sum);

    return 0;
}
示例#8
0
inline void HeapRegionSet::add(HeapRegion* hr) {
  hrl_assert_mt_safety_ok(this);
  // add_internal() will verify the region.
  add_internal(hr);
}