示例#1
0
void foo (char *p)
{
  __builtin_prefetch (p, 0, 0);
  __builtin_prefetch (p, 0, 1);
  __builtin_prefetch (p, 0, 2);
  __builtin_prefetch (p, 0, 3);
  __builtin_prefetch (p, 1, 0);
  __builtin_prefetch (p, 1, 1);
  __builtin_prefetch (p, 1, 2);
  __builtin_prefetch (p, 1, 3);
}
示例#2
0
//list get next is just a macro that either calls this for maps, or returns Node->next
ListNode *MapGetNext(ListNode *CurrItem)
{
    ListNode *SubNode, *Head;

    if (! CurrItem) return(NULL);

    if (CurrItem->Next)
    {
        if (CurrItem->Next->Next)
        {
            //it's unlikely that we will be looking up the same item again, because maps maintain seperate chains of items
            //and the likelyhood of hitting the same chain twice is low. THIS IS NOT TRUE FOR REPEATED LOOKUPS ON A LIST
            //because with a list we go through the same items over and over again whenever looking for items in the chain

            //Thus for maps we call this prefetch code, which prefetches into the L1 cache, but not into the larger, long-term
            //L2 cache. As we're unlikely to be revisiting this chain in the near future, we don't want to pollute the L2
            //cache with it

            //This is a disaster for straight forward lists though, because they have only one chain that gets revisited on
            //every search for an item

            __builtin_prefetch (CurrItem->Next->Next, 0, 0);
            if (CurrItem->Next->Next->Tag) __builtin_prefetch (CurrItem->Next->Next->Tag, 0, 0);
        }
        return(CurrItem->Next);
    }

    if (CurrItem->Flags & LIST_FLAG_MAP_HEAD)
    {
        CurrItem=(ListNode *) CurrItem->Item;
        if (CurrItem->Next) return(CurrItem->Next);
    }

//'Head' here points to a BUCKET HEADER. These are marked with this flag, except the last one
//so we know when we've reached the end
    Head=ListGetHead(CurrItem);
    while (Head->Flags & LIST_FLAG_MAP_CHAIN)
    {
        Head++;
        if (Head->Next) return(Head->Next);
    }

    return(NULL);
}
示例#3
0
int foo() {
  int a;
  __builtin_prefetch(&a);
  __builtin_prefetch(&a, 1);
  __builtin_prefetch(&a, 1, 2);
  __builtin_prefetch(&a, 1, 9, 3); // expected-error{{too many arguments to function}}
  __builtin_prefetch(&a, "hello", 2); // expected-error{{argument to __builtin_prefetch must be a constant integer}}
  __builtin_prefetch(&a, 2); // expected-error{{argument should be a value from 0 to 1}}
  __builtin_prefetch(&a, 0, 4); // expected-error{{argument should be a value from 0 to 3}}
  __builtin_prefetch(&a, -1, 4); // expected-error{{argument should be a value from 0 to 1}}
}
示例#4
0
文件: dmx.c 项目: vanvught/rpidmx512
void dmx_set_send_data(const uint8_t *data, uint16_t length) {
	do {
		dmb();
	} while (dmx_send_state != IDLE && dmx_send_state != DMXINTER);

	__builtin_prefetch(data);
	memcpy(dmx_data[0].data, data, (size_t)length);

	dmx_set_send_data_length(length);
}
示例#5
0
文件: prefetch.hpp 项目: KWMalik/nt2
  void prefetch(void const* pointer)
  {
#ifdef BOOST_SIMD_ARCH_X86
    #ifdef __GNUC__
      __builtin_prefetch(pointer, 0, 0);
    #elif defined( BOOST_SIMD_HAS_SSE_SUPPORT )
      _mm_prefetch( static_cast<char const *>(pointer), Strategy);
    #endif
#endif
  }
示例#6
0
void
good_enum (const int *p)
{
    __builtin_prefetch (p, read, none);
    __builtin_prefetch (p, read, low);
    __builtin_prefetch (p, read, moderate);
    __builtin_prefetch (p, read, high);
    __builtin_prefetch (p, write, none);
    __builtin_prefetch (p, write, low);
    __builtin_prefetch (p, write, moderate);
    __builtin_prefetch (p, write, high);
}
示例#7
0
void
good (int *p)
{
  __builtin_prefetch (p, 0, 0);
  __builtin_prefetch (p, 0, 1);
  __builtin_prefetch (p, 0, 2);
  __builtin_prefetch (p, 0, 3);
  __builtin_prefetch (p, 1, 0);
  __builtin_prefetch (p, 1, 1);
  __builtin_prefetch (p, 1, 2);
  __builtin_prefetch (p, 1, 3);
}
示例#8
0
void
good_const (const int *p)
{
  __builtin_prefetch (p, 0, 0);
  __builtin_prefetch (p, 0, 1);
  __builtin_prefetch (p, 0, 2);
  __builtin_prefetch (p, READ_ACCESS, 3);
  __builtin_prefetch (p, 1, NO_TEMPORAL_LOCALITY);
  __builtin_prefetch (p, 1, LOW_TEMPORAL_LOCALITY);
  __builtin_prefetch (p, 1, MODERATE_TEMPORAL_LOCALITY);
  __builtin_prefetch (p, WRITE_ACCESS, HIGH_TEMPORAL_LOCALITY);
}
示例#9
0
void GLMatrix<GLfloat>::glVertex3v(int num, const GLfloat* v_arr)
{
#ifdef GLMATRIX_USE_SSE
    __builtin_prefetch(v_arr);

    sse_vector r0,r1,r2;
    register sse_v4sf m_col0,m_col1,m_col2,m_col3;

    m_col0 = __builtin_ia32_loadaps(m);
    m_col1 = __builtin_ia32_loadaps(m+4);
    m_col2 = __builtin_ia32_loadaps(m+8);
    m_col3 = __builtin_ia32_loadaps(m+12);

    for(register int k = 0; k < num; ++k)
    {
        //load x,y,z
        r0.v4sf = __builtin_ia32_loadss(v_arr);
        r1.v4sf = __builtin_ia32_loadss(v_arr+1);
        r2.v4sf = __builtin_ia32_loadss(v_arr+2);
        //extend into all 4 single floats
        r0.v4sf = __builtin_ia32_shufps(r0.v4sf,r0.v4sf,0x00);
        r1.v4sf = __builtin_ia32_shufps(r1.v4sf,r1.v4sf,0x00);
        r2.v4sf = __builtin_ia32_shufps(r2.v4sf,r2.v4sf,0x00);

        //do the mults
        r0.v4sf = __builtin_ia32_mulps(r0.v4sf,m_col0);
        v_arr+=3;
        r1.v4sf = __builtin_ia32_mulps(r1.v4sf,m_col1);
        //add it all up and, voila
        r2.v4sf = __builtin_ia32_mulps(r2.v4sf,m_col2);
        r0.v4sf = __builtin_ia32_addps(r0.v4sf,r1.v4sf);
        r2.v4sf = __builtin_ia32_addps(r2.v4sf,m_col3);
        r0.v4sf = __builtin_ia32_addps(r0.v4sf,r2.v4sf);

        ::glVertex4fv(r0.f);
    }

#else
    register GLfloat ret[3];
    register GLfloat recip;
    for(register int k = 0; k < num; ++k)
    {
        ret[0] = v_arr[k*3]*m0 + v_arr[1+k*3]*m4 + v_arr[2+k*3]*m8 + m12;
        ret[1] = v_arr[k*3]*m1 + v_arr[1+k*3]*m5 + v_arr[2+k*3]*m9 + m13;
        ret[2] = v_arr[k*3]*m2 + v_arr[1+k*3]*m6 + v_arr[2+k*3]*m10 + m14;

        recip = 1/(v_arr[k*3]*m3 + v_arr[1+k*3]*m7 + v_arr[2+k*3]*m11 + m15);

        ret[0] *= recip;
        ret[1] *= recip;
        ret[2] *= recip;
        ::glVertex3fv(ret);
    }
#endif
}
示例#10
0
/* Returns 1 when element exists.
   Returns 0 when endpoint reached. */
int iter_next (iterator_t *itr, void **addr)
{
    assert (itr);

    if (itr->current_index >= itr->val->next_insert_pos)
    {
        /* Should hop to a different chunk. */
        if (itr->val->next_val)
        {
            /* Hop to next chunk on the same list. */
            itr->val = itr->val->next_val;
            itr->current_index = 0;

            /* Prefetch next chunk on the same list. */
            if (itr->val->next_val) {
                __builtin_prefetch (itr->val->next_val->array, 0, 0);
            }
        }
        else if (itr->current_list + 1 < itr->next_insert_pos)
        {
            /* Hop to the first block of next list. */
            itr->current_list += 1;
            itr->val = itr->list_array[itr->current_list]->vals;
            itr->current_index = 0;

            /* Prefetch next chunk on the same list. */
            if (itr->val->next_val) {
                __builtin_prefetch (itr->val->next_val->array, 0, 0);
            }
        }
        else
        {
            /* Endpoint reached. */
            *addr = NULL;
            return 0;
        }
    }

    *addr = itr->val->array[itr->current_index++];

    return 1;
}
示例#11
0
inline void prefetch(const void *ptr, size_t offset = 32*10)
{
#if defined __GNUC__
    __builtin_prefetch(reinterpret_cast<const char*>(ptr) + offset);
#elif defined _MSC_VER && defined CAROTENE_NEON
    __prefetch(reinterpret_cast<const char*>(ptr) + offset);
#else
    (void)ptr;
    (void)offset;
#endif
}
void mypp_dsymv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
	      const int N, const double alpha, const double *A,
	      const int lda, const double *X, const int incX,
	      const double beta, double *Y, const int incY)
{
	// limited implementation 
	assert(Order==CblasRowMajor);
	assert(Uplo==CblasUpper);
	assert(N==lda);

	__builtin_prefetch (Y, 1, 3);
	__builtin_prefetch (X, 1, 3);
	int i,j;
	double temp, reg1, reg2;
	const double *pA, *pX;
	double* pY = Y;
	pA = A;
	pX = X;

	// y = beta*y
	for(i=0;i<lda;i++,pY+=incY)
		(*pY) = beta * (*pY);

	// reset pointers
	pY = Y;

	for(i=0;i<lda;i++,pA+=i,pY+=incY)
	{
		pX = X + i*incX;
		reg1 = (*pX++);
		(*pY) += alpha * (*pA++) * reg1;
		temp = 0.0;
		for(j=i+1;j<N;j++,pA++,pX+=incX)
		{
			reg2 = alpha * (*pA);
			temp += reg2 * (*pX);
			Y[j*incY] += reg2 * reg1;
		}
		(*pY) += temp;
	}
}
示例#13
0
hkey_t hash_table_find_or_insert(HashTable *ht, const BinaryKmer key,
                                 bool *found)
{
  const BinaryKmer *ptr;
  size_t i;
  uint_fast32_t h;

  #ifdef HASH_PREFETCH
    uint_fast32_t h2 = binary_kmer_hash(key,ht->seed+0) & ht->hash_mask;
    __builtin_prefetch(ht_bckt_ptr(ht, h2), 0, 1);
  #endif

  for(i = 0; i < REHASH_LIMIT; i++)
  {
    #ifdef HASH_PREFETCH
      h = h2;
      if(ht->buckets[h][HT_BSIZE] == ht->bucket_size) {
        h2 = binary_kmer_hash(key,ht->seed+i+1) & ht->hash_mask;
        __builtin_prefetch(ht_bckt_ptr(ht, h2), 0, 1);
      }
    #else
      h = binary_kmer_hash(key,ht->seed+i) & ht->hash_mask;
    #endif

    ptr = hash_table_find_in_bucket_mt(ht, h, key);

    if(ptr != NULL)  {
      *found = true;
      return (hkey_t)(ptr - ht->table);
    }
    else if(ht->buckets[h][HT_BITEMS] < ht->bucket_size) {
      *found = false;
      ptr = hash_table_insert_in_bucket(ht, h, key);
      ht->collisions[i]++; // only increment collisions when inserting
      ht->num_kmers++;
      return (hkey_t)(ptr - ht->table);
    }
  }

  rehash_error_exit(ht);
}
示例#14
0
            void prefetch() const
            {
#if defined(__x86_64__)
                HPX_ASSERT(sizeof(void*) == 8);
#else
                HPX_ASSERT(sizeof(void*) == 4);
#endif

                __builtin_prefetch(m_sp, 1, 3);
                __builtin_prefetch(m_sp, 0, 3);
                __builtin_prefetch(static_cast<void**>(m_sp) + 64 / sizeof(void*), 1, 3);
                __builtin_prefetch(static_cast<void**>(m_sp) + 64 / sizeof(void*), 0, 3);
#if !defined(__x86_64__)
                __builtin_prefetch(static_cast<void**>(m_sp) + 32 / sizeof(void*), 1, 3);
                __builtin_prefetch(static_cast<void**>(m_sp) + 32 / sizeof(void*), 0, 3);
                __builtin_prefetch(static_cast<void**>(m_sp) - 32 / sizeof(void*), 1, 3);
                __builtin_prefetch(static_cast<void**>(m_sp) - 32 / sizeof(void*), 0, 3);
#endif
                __builtin_prefetch(static_cast<void**>(m_sp) - 64 / sizeof(void*), 1, 3);
                __builtin_prefetch(static_cast<void**>(m_sp) - 64 / sizeof(void*), 0, 3);
            }
// tries to put array of words in cache
void bitset_cache_prefetch(bitset_container_t* B) {
#ifdef IS_X64
    const int32_t CACHELINESIZE =
        computecacheline();  // 64 bytes per cache line
#else
    const int32_t CACHELINESIZE = 64;
#endif
    for (int32_t k = 0; k < BITSET_CONTAINER_SIZE_IN_WORDS;
         k += CACHELINESIZE / (int32_t)sizeof(uint64_t)) {
        __builtin_prefetch(B->array + k);
    }
}
示例#16
0
/**
 * trie_lookup:
 * @trie: A #Trie.
 * @key: The key to lookup.
 *
 * Looks up @key in @trie and returns the value associated.
 *
 * Returns: (transfer none): The value inserted or %NULL.
 */
gpointer
trie_lookup (Trie        *trie,
             const gchar *key)
{
   TrieNode *node;

   __builtin_prefetch(trie);
   __builtin_prefetch(key);

   g_return_val_if_fail(trie, NULL);
   g_return_val_if_fail(key, NULL);

   node = trie->root;

   while (*key && node) {
      node = trie_find_node(trie, node, *key);
      key++;
   }

   return node ? node->value : NULL;
}
示例#17
0
int main(int argc, char **argv) {
  int a;
  a = __builtin_bswap32(a);
  a = __builtin_bswap64(a);
  a = __builtin_constant_p(1);
  a = __builtin_constant_p("string");
  char *b = __builtin_strchr("string", 's');
  a = __builtin_expect(1, a);
  a = __builtin_strlen("string");
  a = __builtin_strcmp("string1", "string2");
  a = __builtin_offsetof(struct point, y);
  char c[100];
  b = __builtin_strcpy(c, "a");
  b = __builtin_strncpy(c, "a", 1);
  a = __builtin_ctzl(a);
  varargsfn(0);
  __builtin_prefetch(b);
  __builtin_prefetch(b, 1);
  __builtin_prefetch(b, 1, 1);
  return a;
}
示例#18
0
文件: dmx.c 项目: vanvught/rpidmx512
void dmx_set_send_data_without_sc(const uint8_t *data, uint16_t length) {
	do {
		dmb();
	} while (dmx_send_state != IDLE && dmx_send_state != DMXINTER);

	dmx_data[0].data[0] = DMX512_START_CODE;

	__builtin_prefetch(data);
	memcpy(&dmx_data[0].data[1], data, (size_t) length);

	dmx_set_send_data_length(length + 1);
}
示例#19
0
文件: chain.hpp 项目: bnels/ctl
	inline void 
	mismatch_prefetch( Iterator& f1, Iterator& e1, 
			   Iterator& f2, Iterator& e2,
			   Compare & c,
			   Persistence_data & d,
			   std::size_t& n, ctl::detail::term_z2_tag t) const {
		while( f1 != e1 && f2 != e2){ 
		if( c(*f1, *f2)){
		    const auto i = d.cascade_boundary_map[ f1->cell()].rbegin();
		    __builtin_prefetch( std::addressof( *i));
		    break;
		}
		if( c( *f2, *f1)){
		    const auto i = d.cascade_boundary_map[ f2->cell()].rbegin();
		     __builtin_prefetch( std::addressof( *i));
		     break;
		}
                   ++f1, ++f2;
		   n -= 2;
                }
	} 
示例#20
0
文件: fp.c 项目: 13Homer/TurboPFor
unsigned char *fpfcmdec64(unsigned char *in, unsigned n, uint64_t *out, uint64_t start) {
  uint64_t      *op, htab[1<<HBITS] = {0}, h = 0, _p[VSIZE+32],*p; 
  unsigned char *ip = in;

  #define FD64(i) { uint64_t u = DEC64(p[i], htab[h]); op[i] = u; htab[h] = u; h = HASH64(h,u); }
  for(op = (uint64_t*)out; op != out+(n&~(VSIZE-1)); ) { __builtin_prefetch(ip+512, 0);
	for(ip = p4dec64(ip, VSIZE, _p), p = _p; p != &_p[VSIZE]; p+=4,op+=4) { FD64(0); FD64(1); FD64(2); FD64(3); }
  }
  if(n = ((uint64_t *)out+n) - op)
	for(ip = p4dec64(ip, n, _p), p = _p; p != &_p[n]; p++,op++) FD64(0);
  return ip;
}
示例#21
0
文件: fp.c 项目: 13Homer/TurboPFor
unsigned char *fpdfcmdec64(unsigned char *in, unsigned n, uint64_t *out, uint64_t start) {
  unsigned char *ip = in;
  uint64_t      _p[VSIZE+32], *op, h = 0, *p, htab[1<<HBITS] = {0}; htab[0] = start;

  #define DD64(i) { uint64_t u = DEC64(p[i], (htab[h]+start)); op[i] = u; htab[h] = start = u-start; h = HASH64(h,start); start = u; }
  for(op = (uint64_t*)out; op != out+(n&~(VSIZE-1)); ) { __builtin_prefetch(ip+512, 0);
	for(ip = p4dec64(ip, VSIZE, _p), p = _p; p != &_p[VSIZE]; p+=4,op+=4) { DD64(0); DD64(1); DD64(2); DD64(3); }
  }
  if(n = ((uint64_t *)out+n) - op)
	for(ip = p4dec64(ip, n, _p), p = _p; p != &_p[n]; p++,op++) DD64(0);
  return ip;
}
示例#22
0
[[gnu::hot, gnu::pure]]
/*Rating rate(const TicTacBoard& board){
	if(board.isWon()){
		if((board.wonState & 0x6) == 0x2) return Ratings::RATING_P1_WON;
		if((board.wonState & 0x6) == 0x4) return Ratings::RATING_P2_WON;
		return 0;
	}
	unsigned index = ((FieldBits) board.setPlayerOne << 9) + (FieldBits) board.setPlayerTwo;
	return ratingTable[index];
}*/

 Rating rate(const TicTacBoard& board){
	if(__builtin_expect(board.safe, true))
		return board.rating;

	if(board.isWon()){
		if((board.wonState & 0x6) == 0x2) return Ratings::RATING_P1_WON;
		if((board.wonState & 0x6) == 0x4) return Ratings::RATING_P2_WON;
		return 0;
	}

	FieldBits setP1 = board.setPlayerOne.bitsUsed;
	FieldBits setP2 = board.setPlayerTwo.bitsUsed;
	__builtin_prefetch(singleRatingTable+setP1);
	__builtin_prefetch(singleRatingTable+setP2);

	FieldBits chancesP1 = chancesTable[setP1];
	FieldBits chancesP2 = chancesTable[setP2];
	signed chancesDiff =  __builtin_popcount(chancesP1 & ~setP2) -__builtin_popcount(chancesP2 & ~setP1);

	Rating rate = 0;
	rate += chancesDiff * chance_bonus;
	rate += singleRatingTable[setP1];
	rate -= singleRatingTable[setP2];

	rate = std::max(-minmaxscore, std::min(minmaxscore, rate));
	board.safe = true;
	board.rating = rate;
	return rate;
}
示例#23
0
文件: goto.c 项目: carriercomm/fastpp
void *reader( void *ptr)
{
	struct timespec start, end;
	int tid = *((int *) ptr);
	uint64_t seed = 0xdeadbeef + tid;
	int sum = 0, i;

	/** < The node and lock to use in an iteration */
	int node_id[BATCH_SIZE], lock_id[BATCH_SIZE], I;

	/** < Total number of iterations (for measurement) */
	int num_iters = 0;

	clock_gettime(CLOCK_REALTIME, &start);

	while(1) {
		if(num_iters >= ITERS_PER_MEASUREMENT) {
			clock_gettime(CLOCK_REALTIME, &end);
			double seconds = (end.tv_sec - start.tv_sec) + 
				(double) (end.tv_nsec - start.tv_nsec) / GHZ_CPS;
		
			printf("Reader thread %d: rate = %.2f M/s. Sum = %d\n", tid, 
				num_iters / (1000000 * seconds), sum);
				
			num_iters = 0;
			clock_gettime(CLOCK_REALTIME, &start);
		}

		for(I = 0; I < BATCH_SIZE; I ++) {
			for(i = 0; i < COMPUTE; i ++) {
				node_id[I] = fastrand(&seed) & NUM_NODES_;
			}
			lock_id[I] = node_id[I] & NUM_LOCKS_;
			__builtin_prefetch(&locks[lock_id[I]], 0, 0);
		}

		for(I = 0; I < BATCH_SIZE; I ++) {
			pthread_spin_lock(&locks[lock_id[I]].lock);
			
			/** < Critical section begin */
			nodes[node_id[I]].a ++;
			nodes[node_id[I]].b ++;
			/** < Critical section end */

			pthread_spin_unlock(&locks[lock_id[I]].lock);
	
			num_iters ++;
		}
	}
}
示例#24
0
/* Traverse the hardware receive descriptor ring.
 * Process each packet that is ready.
 * Return the updated ring index.
 */
int firehose_callback_v1(const char *pciaddr,
                         char **packets,
                         struct firehose_rdesc *rxring,
                         int ring_size,
                         int index) {
  while (rxring[index].status & 1) {
    int next_index = (index + 1) & (ring_size-1);
    __builtin_prefetch(packets[next_index]);
    firehose_packet(pciaddr, packets[index], rxring[index].length);
    rxring[index].status = 0; /* reset descriptor for reuse */
    index = next_index;
  }
  return index;
}
// tries to put the array in cache
void array_cache_prefetch(array_container_t* B) {
#ifdef IS_X64
    const int32_t CACHELINESIZE =
        computecacheline();  // 64 bytes per cache line
#else
    const int32_t CACHELINESIZE = 64;
#endif
#if !(defined(_MSC_VER) && !defined(__clang__))
    for (int32_t k = 0; k < B->cardinality;
         k += CACHELINESIZE / (int32_t)sizeof(uint16_t)) {
        __builtin_prefetch(B->array + k);
    }
#endif
}
示例#26
0
// Here we make loads with regular prefetch. The loop is unrolled
// with factor 2
static double prefetchSumm(const double * data)
{
    double res = 0;
    int interval = 32;

    for(int i = 0; i < ARR_SIZE; i+= unroll)
    {
	__builtin_prefetch(&data[i + interval], 0, 0);
        res += data[i] * A * B + C - D * E;
        res += data[i + 1] * A * B + C - D * E;
    }

    return res;
}
示例#27
0
文件: fp.c 项目: 13Homer/TurboPFor
//---- FCM: Finite Context Method Predictor 
unsigned char *fpfcmenc64(uint64_t *in, unsigned n, unsigned char *out, uint64_t start) {
  uint64_t      *ip, htab[1<<HBITS] = {0}, h = 0, _p[VSIZE], *p;
  unsigned char *op = out;
  
  #define FE64(i) { uint64_t u = ip[i]; p[i] = ENC64(u, htab[h]); htab[h] = u; h = HASH64(h,u); } 
  for(ip = (uint64_t *)in; ip != in + (n&~(VSIZE-1)); ) {   		
    for(p = _p; p != &_p[VSIZE]; p+=4,ip+=4) { FE64(0); FE64(1); FE64(2); FE64(3); }  
	op = p4enc64(_p, VSIZE, op); 													__builtin_prefetch(ip+512, 0);
  }   
  if(n = ((uint64_t *)in+n)-ip) { 					
    for(p = _p; p != &_p[n]; p++,ip++) FE64(0);
    op = p4enc64(_p, n, op);
  }																	
  return op;
}
示例#28
0
void tracingTask(Worker *me, void *arg) {
	// TODO: arg parse
	int listsPerCoro = TOTAL_LISTS/CORO_NUM;
	int remainder = TOTAL_LISTS%CORO_NUM;
	intptr_t idx = (intptr_t)arg;
	int mListIdx = idx*listsPerCoro + (idx>=remainder ? remainder : idx);
	int nextListIdx = mListIdx + listsPerCoro + (idx>=remainder ? 0 : 1);
	List* localList;
	//
	int64_t accum = 0;
	int64_t times = 0;
	// TODO: tracing
	for (int j = mListIdx; j < nextListIdx; j++) {
		localList = head[j];
		while (localList != NULL) {
#ifdef DATA_PREFETCH
			//__builtin_prefetch(localList, PREFETCH_MODE, PREFETCH_LOCALITY);
			//yield();
			__builtin_prefetch(localList->data, PREFETCH_MODE, PREFETCH_LOCALITY);
			yield();
#endif
			for (int i = 0; i < REPEAT_TIMES; i++) {
				for (int k = 0; k < LOCAL_NUM; k++) {
					accum += localList->data[k];
				}
				/*accum += localList->data[0];
				accum += localList->data[1];
				accum += localList->data[2];
				accum += localList->data[3];
				accum += localList->data[4];
				accum += localList->data[5];
				accum += localList->data[6];
				accum += localList->data[7];
				accum += localList->data[8];
				accum += localList->data[9];
				accum += localList->data[10];
				accum += localList->data[11];
				accum += localList->data[12];
				accum += localList->data[13];
				*/
			} 
			times++;
			localList = localList->next;
		}
	}
	total_accum += accum;
	tra_times += times;
}
示例#29
0
void
bad (int *p)
{
  __builtin_prefetch (p, -1, 0);  /* { dg-warning "invalid second arg to __builtin_prefetch; using zero" } */
  __builtin_prefetch (p, 2, 0);   /* { dg-warning "invalid second arg to __builtin_prefetch; using zero" } */
  __builtin_prefetch (p, bogus, 0);   /* { dg-warning "invalid second arg to __builtin_prefetch; using zero" } */
  __builtin_prefetch (p, 0, -1);  /* { dg-warning "invalid third arg to __builtin_prefetch; using zero" } */
  __builtin_prefetch (p, 0, 4);   /* { dg-warning "invalid third arg to __builtin_prefetch; using zero" } */
  __builtin_prefetch (p, 0, bogus);   /* { dg-warning "invalid third arg to __builtin_prefetch; using zero" } */
}
示例#30
0
static void
pkt_prefetch_etherhdr(struct pkt *pq, int n)
{
	int i, len;
	const char *buf;

	for (i = 0; i < n; i++) {
		buf = pq[i].buf;
		len = pq[i].len;
		if (len < 14)
			continue;

		/* Pre-fetch the ethertype */
		__builtin_prefetch(&buf[14]);
	}
}