コード例 #1
0
ファイル: checksum.c プロジェクト: RyanTaker/tb
static void calc_checksum(char *name)
{
  long64 orig_size;

  if (!work) work = alloc_work(total_work);

  data = map_file(name, 0, &size);
  orig_size = size;
  if ((size & 0x3f) == 0x10) {
    size &= ~0x3fULL;
    memcpy(checksum1, data + size, 16);
    checksum_found = 1;
  } else {
    if (size & 0x3f) {
      printf("Size of %s is not a multiple of 64.\n", name);
      exit(1);
    }
    checksum_found = 0;
  }

  int chunks = (size + CHUNK - 1) / CHUNK;
  results = (uint64 *)malloc(32 * chunks);
  fill_work(total_work, chunks, 0, work);
  run_threaded(checksum_worker, work, 0);
  CityHashCrc128((char *)results, 32 * chunks, checksum2);
  unmap_file(data, orig_size);
  free(results);

  if (checksum_found)
    checksum_match = (checksum1[0] == checksum2[0]
			&& checksum1[1] == checksum2[1]);
}
コード例 #2
0
ファイル: proof_of_work.cpp プロジェクト: bitasset/bitshares
/**
 *  This proof-of-work is computationally difficult even for a single hash,
 *  but must be so to prevent optimizations to the required memory foot print.
 *
 *  The maximum level of parallelism achievable per GB of RAM is 8, and the highest
 *  end GPUs now have 4 GB of ram which means they could in theory support 32 
 *  parallel execution of this proof-of-work.     
 *
 *  On GPU's you only tend to get 1 instruction per 4 clock cycles in a single
 *  thread context.   Modern super-scalar CPU's can get more than 1 instruction
 *  per block and CityHash is specifically optomized to take advantage of this. 
 *  In addition to getting more done per-cycle, CPU's have close to 4x the clock
 *  frequency.
 *
 *  Based upon these characteristics alone, I estimate that a CPU can execute the
 *  serial portions of this algorithm at least 16x faster than a GPU which means
 *  that an 8-core CPU should easily compete with a 128 core GPU. Fortunately,
 *  a 128 core GPU would require 16 GB of RAM.  Note also, that most GPUs have 
 *  less than 128 'real' cores that are able to handle conditionals. 
 *
 *  Further more, GPU's are not well suited for branch misprediction and code
 *  must be optimized to avoid branches as much as possible.  
 *
 *  Lastly this algorithm takes advantage of a hardware instruction that is
 *  unlikely to be included in GPUs (Intel CRC32).  The lack of this hardware
 *  instruction alone is likely to give the CPU an order of magnitude advantage
 *  over the GPUs.
 */
fc::sha1 proof_of_work( const fc::sha256& in, unsigned char* buffer_128m )
{
   // I use 8 threads to reduce latency in the calculation, it may be possible to
   // get higher-throughput by going to a single thread and performing multiple
   // proofs in parallel.   Latency is important in some cases (verification) while
   // throughput is important in mining.  Two variations of this method may eventually
   // need to be created.

   static fc::thread _threads[8]; 
   uint32_t* _seeds = (uint32_t*)∈

   fc::future<void> ready[8];
   for( uint32_t i = 0; i < 8; ++i )
   {
     ready[i] = _threads[i].async( [=]()
     {
       boost::random::mt11213b gen( _seeds[i] ); // this generator does not optimize well on GPU, but is fast on CPU
       uint32_t* start = (uint32_t*)(buffer_128m + i * MB128/8);
       uint32_t* end = start + MB128/8/4;
       for( uint32_t* pos = start;  pos < end; ++pos )
       {
          *pos = gen();
          if( *pos % 17 > 15 ) // hardware branch misprediction, foils GPU data-parallel instructions
          {
             if( pos > (start +32/4) )
                 *pos =  CityHashCrc128( (char*)(pos-32/4), 32 ).first; // CRC is fast on Intel
          }
          else if ( *pos > gen() )  // unpredictiable branch foils GPU and CPU
          {
             if( (*pos % 2) && pos > (start +256/4) )
                 *pos =  CityHashCrc128( (char*)(pos-256/4), 32 ).first; // CRC is fast on Intel
          }
       }
     });
   }
   for( uint32_t i = 0; i < 8; ++i )
   {
    ready[i].wait();
   }
   // require full 128 MB to complete sequential step
   auto     out  = CityHashCrc128( (char*)buffer_128m, MB128 ); 
   return fc::sha1::hash( (char*)&out, sizeof(out) );
}