コード例 #1
0
ファイル: ta_sparse.cpp プロジェクト: mapleyustat/tiledarray
int main(int argc, char** argv) {
  int rc = 0;

  try {
    // Initialize runtime
    madness::World& world = madness::initialize(argc, argv);

    // Get command line arguments
    if(argc < 2) {
      std::cout << "Usage: ta_sparse matrix_size block_size [repetitions]\n";
      return 0;
    }
    const long matrix_size = atol(argv[1]);
    const long block_size = atol(argv[2]);
    if(matrix_size <= 0) {
      std::cerr << "Error: matrix size must greater than zero.\n";
      return 1;
    }
    if(block_size <= 0) {
      std::cerr << "Error: block size must greater than zero.\n";
      return 1;
    }
    if((matrix_size % block_size) != 0ul) {
      std::cerr << "Error: matrix size must be evenly divisible by block size.\n";
      return 1;
    }
    const long repeat = (argc >= 4 ? atol(argv[3]) : 4);
    if(repeat <= 0) {
      std::cerr << "Error: number of repetitions must greater than zero.\n";
      return 1;
    }

    // Print information about the test
    const std::size_t num_blocks = matrix_size / block_size;
    std::vector<std::vector<double> > gflops;
    std::vector<std::vector<double> > times;

    std::cout << "TiledArray: block-sparse matrix multiply test...\n"
              << "Number of nodes    = " << world.size()
              << "\nMatrix size        = " << matrix_size << "x" << matrix_size
              << "\nBlock size         = " << block_size << "x" << block_size;

    for(unsigned int left_sparsity = 10; left_sparsity <= 100; left_sparsity += 10){
      std::vector<double> inner_gflops;
      std::vector<double> inner_times;
      for(unsigned int right_sparsity = 10; right_sparsity <= left_sparsity; right_sparsity += 10){
        const long l_block_count = (double(left_sparsity) / 100.0) * double(num_blocks * num_blocks);
        const long r_block_count = (double(right_sparsity) / 100.0) * double(num_blocks * num_blocks);
        if(world.rank() == 0)
                    std::cout << "\nMemory per left matrix  = " << double(l_block_count * block_size * block_size * sizeof(double)) / 1.0e9 << " GB"
                    << "\nMemory per right matrix  = " << double(r_block_count * block_size * block_size * sizeof(double)) / 1.0e9 << " GB"
                    << "\nNumber of left blocks   = " << l_block_count
                    << " " << left_sparsity << " percent"
                    << "\nNumber of right blocks   = " << r_block_count
                    << " " << right_sparsity << " percent"
                    << "\nAverage left blocks/node = " << double(l_block_count) / double(world.size())
                    << "\nAverage right blocks/node = " << double(r_block_count) / double(world.size()) << "\n";

        // Construct TiledRange
        std::vector<unsigned int> blocking;
        blocking.reserve(num_blocks + 1);
        for(long i = 0l; i <= matrix_size; i += block_size)
          blocking.push_back(i);

        std::vector<TiledArray::TiledRange1> blocking2(2,
            TiledArray::TiledRange1(blocking.begin(), blocking.end()));

        TiledArray::TiledRange
          trange(blocking2.begin(), blocking2.end());

        // Construct shape
        TiledArray::Tensor<float>
            a_shape_tensor(trange.tiles(), 0.0),
            b_shape_tensor(trange.tiles(), 0.0),
            c_shape_tensor(trange.tiles(), 0.0);
        if(world.rank() == 0) {
          world.srand(time(NULL));
          const long l_process_block_count = l_block_count / world.size() +
              (world.rank() < (l_block_count / world.size()) ? 1 : 0);
          const long r_process_block_count = r_block_count / world.size() +
              (world.rank() < (r_block_count / world.size()) ? 1 : 0);
          for(long i = 0; i < l_process_block_count; ++i)
            a_shape_tensor.data()[world.rand() % trange.tiles().volume()] = 1.0;

          for(long i = 0; i < r_process_block_count; ++i)
            b_shape_tensor.data()[world.rand() % trange.tiles().volume()] = 1.0;

        }
        TiledArray::SparseShape<float>
            a_shape(world, a_shape_tensor, trange),
            b_shape(world, b_shape_tensor, trange),
            c_shape(world, c_shape_tensor, trange);


        typedef TiledArray::Array<double, 2, TiledArray::Tensor<double>,
            TiledArray::SparsePolicy > SpTArray2;

        // Construct and initialize arrays
        SpTArray2 a(world, trange, a_shape);
        SpTArray2 b(world, trange, b_shape);
        SpTArray2 c(world, trange, c_shape);
        a.set_all_local(1.0);
        b.set_all_local(1.0);

        // Start clock
        world.gop.fence();
        const double wall_time_start = madness::wall_time();

        // Do matrix multiplication
        for(int i = 0; i < repeat; ++i) {
          c("m,n") = a("m,k") * b("k,n");
          world.gop.fence();
          if(world.rank() == 0)
            std::cout << "Iteration " << i + 1 << "\n";
        }

        // Stop clock
        const double wall_time_stop = madness::wall_time();
        const long flop = 2.0 * c("m,n").sum();
        inner_gflops.push_back(double(repeat) * double(flop) / (wall_time_stop - wall_time_start) / 1.0e9);
        inner_times.push_back((wall_time_stop - wall_time_start)/double(repeat));

        // Print results
        if(world.rank() == 0) {
          std::cout << "Average wall time = " << (wall_time_stop - wall_time_start) / double(repeat)
              << "\nAverage GFLOPS = " << double(repeat) * double(flop) / (wall_time_stop - wall_time_start) / 1.0e9 << "\n";
        }
      }
      gflops.push_back(inner_gflops);
      times.push_back(inner_times);
      world.gop.fence();
    }
    if(world.rank() == 0){
      for(unsigned int i = 0; i < gflops.size(); ++i){
        if(i == 0){
          std::cout << std::defaultfloat;
          std::cout << "   ";
          for(unsigned int j = 10; j <= 100; j+=10){
            std::cout << "        " << j;
          }
          std::cout << std::endl;
        }
        for(unsigned int j = 0; j < gflops[i].size(); ++j){
          if(j == 0){
            std::cout << std::defaultfloat;
            int num = (i+1) * 10;
            if(num < 100){
              std::cout << num << " |";
            } else { std::cout << num << "|"; }
          }

          std::cout << std::setprecision(3) << std::scientific;
          std::cout << double(gflops[i][j]) << " ";
        }
        std::cout << std::endl;
      }
    }

    if(world.rank() == 0){
      for(unsigned int i = 0; i < times.size(); ++i){
        if(i == 0){
          std::cout << std::defaultfloat;
          std::cout << "   ";
          for(unsigned int j = 10; j <= 100; j+=10){
            std::cout << "        " << j;
          }
          std::cout << std::endl;
        }
        for(unsigned int j = 0; j < times[i].size(); ++j){
          if(j == 0){
            std::cout << std::defaultfloat;
            int num = (i+1) * 10;
            if(num < 100){
              std::cout << num << " |";
            } else { std::cout << num << "|"; }
          }

          std::cout << std::setprecision(3) << std::scientific;
          std::cout << double(times[i][j]) << " ";
        }
        std::cout << std::endl;
      }
    }

    madness::finalize();

  } catch(TiledArray::Exception& e) {
    std::cerr << "!!ERROR TiledArray: " << e.what() << "\n";
    rc = 1;
  } catch(madness::MadnessException& e) {
    std::cerr << "!!ERROR MADNESS: " << e.what() << "\n";
    rc = 1;
  } catch(SafeMPI::Exception& e) {
    std::cerr << "!!ERROR SafeMPI: " << e.what() << "\n";
    rc = 1;
  } catch(std::exception& e) {
    std::cerr << "!!ERROR std: " << e.what() << "\n";
    rc = 1;
  } catch(...) {
    std::cerr << "!!ERROR: unknown exception\n";
    rc = 1;
  }


  return rc;
}
コード例 #2
0
ファイル: ta_sparse.cpp プロジェクト: evaleev/tiledarray
int main(int argc, char** argv) {
  int rc = 0;

  try {
    // Initialize runtime
    TiledArray::World& world = TiledArray::initialize(argc, argv);

    // Get command line arguments
    if(argc < 2) {
      std::cout << "Usage: " << argv[0] << " matrix_size block_size [repetitions]\n";
      return 0;
    }
    const long matrix_size = atol(argv[1]);
    const long block_size = atol(argv[2]);
    if(matrix_size <= 0) {
      std::cerr << "Error: matrix size must be greater than zero.\n";
      return 1;
    }
    if(block_size <= 0) {
      std::cerr << "Error: block size must be greater than zero.\n";
      return 1;
    }
    if((matrix_size % block_size) != 0ul) {
      std::cerr << "Error: matrix size must be evenly divisible by block size.\n";
      return 1;
    }
    const long repeat = (argc >= 4 ? atol(argv[3]) : 4);
    if(repeat <= 0) {
      std::cerr << "Error: number of repetitions must be greater than zero.\n";
      return 1;
    }

    // Print information about the test
    const std::size_t num_blocks = matrix_size / block_size;
    const double app_flop = 2.0 * matrix_size * matrix_size * matrix_size;
    std::vector<std::vector<double> > gflops;
    std::vector<std::vector<double> > times;
    std::vector<std::vector<double> > app_gflops;

    if(world.rank() == 0)
      std::cout << "TiledArray: block-sparse matrix multiply test..."
                << "\nGit HASH: " << TILEDARRAY_REVISION
                << "\nNumber of nodes    = " << world.size()
                << "\nMatrix size        = " << matrix_size << "x" << matrix_size
                << "\nBlock size         = " << block_size << "x" << block_size;


    // Construct TiledRange
    std::vector<unsigned int> blocking;
    blocking.reserve(num_blocks + 1);
    for(long i = 0l; i <= matrix_size; i += block_size)
      blocking.push_back(i);

    std::vector<TiledArray::TiledRange1> blocking2(2,
        TiledArray::TiledRange1(blocking.begin(), blocking.end()));

    TiledArray::TiledRange
      trange(blocking2.begin(), blocking2.end());

    TiledArray::SparseShape<float> forced_shape;
    for(unsigned int left_sparsity = 10; left_sparsity <= 100; left_sparsity += 10){
      std::vector<double> inner_gflops, inner_times, inner_app_gflops;
      for(unsigned int right_sparsity = 10; right_sparsity <= left_sparsity; right_sparsity += 10){
        const long l_block_count = (double(left_sparsity) / 100.0) * double(num_blocks * num_blocks);
        const long r_block_count = (double(right_sparsity) / 100.0) * double(num_blocks * num_blocks);
        if(world.rank() == 0)
                    std::cout << "\nMemory per left matrix  = " << double(l_block_count * block_size * block_size * sizeof(double)) / 1.0e9 << " GB"
                    << "\nMemory per right matrix  = " << double(r_block_count * block_size * block_size * sizeof(double)) / 1.0e9 << " GB"
                    << "\nNumber of left blocks   = " << l_block_count << "   " << 100.0 * double(l_block_count) / double(num_blocks * num_blocks) << "%"
                    << "\nNumber of right blocks   = " << r_block_count << "   " << 100.0 * double(r_block_count) / double(num_blocks * num_blocks) << "%"
                    << "\nAverage left blocks/node = " << double(l_block_count) / double(world.size())
                    << "\nAverage right blocks/node = " << double(r_block_count) / double(world.size()) << "\n";

        // Construct shape
        TiledArray::Tensor<float>
            a_tile_norms(trange.tiles_range(), 0.0),
            b_tile_norms(trange.tiles_range(), 0.0);
        if(world.rank() == 0) {
          world.srand(time(NULL));
          for(long count = 0l; count < l_block_count; ++count) {
            std::size_t index = world.rand() % trange.tiles_range().volume();

            // Avoid setting the same tile to non-zero.
            while(a_tile_norms[index] > TiledArray::SparseShape<float>::threshold())
              index = world.rand() % trange.tiles_range().volume();

            a_tile_norms[index] = std::sqrt(float(block_size * block_size));
          }
          for(long count = 0l; count < r_block_count; ++count) {
            std::size_t index = world.rand() % trange.tiles_range().volume();

            // Avoid setting the same tile to non-zero.
            while(b_tile_norms[index] > TiledArray::SparseShape<float>::threshold())
              index = world.rand() % trange.tiles_range().volume();

            b_tile_norms[index] = std::sqrt(float(block_size * block_size));
          }

        }
        TiledArray::SparseShape<float>
            a_shape(world, a_tile_norms, trange),
            b_shape(world, b_tile_norms, trange);

        if(left_sparsity == 10){
            forced_shape = a_shape;
        }


        // Construct and initialize arrays
        TiledArray::TSpArrayD a(world, trange, a_shape);
        TiledArray::TSpArrayD b(world, trange, b_shape);
        TiledArray::TSpArrayD c;
        a.fill(1.0);
        b.fill(1.0);

        // Start clock
        TiledArray::TSpArrayD::wait_for_lazy_cleanup(world);
        world.gop.fence();
        if(world.rank() == 0)
          std::cout << "Starting iterations:\n";

        double total_time = 0.0, flop = 0.0;

        // Do matrix multiplication
        try {
          for(int i = 0; i < repeat; ++i) {
            const double start = madness::wall_time();
            c("m,n") = (a("m,k") * b("k,n")).set_shape(forced_shape);
            const double time = madness::wall_time() - start;
            total_time += time;
            if(flop < 1.0)
              flop = 2.0 * c("m,n").sum();
            if(world.rank() == 0)
              std::cout << "Iteration " << i + 1 << "   time=" << time << "   GFLOPS="
                  << flop / time / 1.0e9 << "   apparent GFLOPS=" << app_flop / time / 1.0e9 << "\n";
            std::cout << "C sparsity = " << c.shape().sparsity() << "\n";

          }
        } catch(...) {
          if(world.rank() == 0) {
            std::stringstream ss;
            ss << "left shape  = " << a.shape().data() << "\n"
               << "right shape = " << b.shape().data() << "\n";
            std::cout << ss.str();
          }
          throw;
        }

        // Stop clock
        inner_gflops.push_back(double(repeat) * flop / total_time / 1.0e9);
        inner_times.push_back(total_time / repeat);
        inner_app_gflops.push_back(double(repeat) * app_flop / total_time / 1.0e9);

        // Print results
        if(world.rank() == 0) {
          std::cout << "Average wall time = " << total_time / double(repeat)
              << "\nAverage GFLOPS = " << double(repeat) * double(flop) / total_time / 1.0e9
              << "\nAverage apparent GFLOPS = " << double(repeat) * double(app_flop) / total_time / 1.0e9 << "\n";
        }
      }
      gflops.push_back(inner_gflops);
      times.push_back(inner_times);
      app_gflops.push_back(inner_app_gflops);
    }

    if(world.rank() == 0) {
      std::cout << "\n--------------------------------------------------------------------------------------------------------\nGFLOPS\n";
      print_results(world, gflops);
      std::cout << "\n--------------------------------------------------------------------------------------------------------\nAverage wall times\n";
      print_results(world, times);
      std::cout << "\n--------------------------------------------------------------------------------------------------------\nApparent GFLOPS\n";
      print_results(world, app_gflops);
    }


    TiledArray::finalize();

  } catch(TiledArray::Exception& e) {
    std::cerr << "!! TiledArray exception: " << e.what() << "\n";
    rc = 1;
  } catch(madness::MadnessException& e) {
    std::cerr << "!! MADNESS exception: " << e.what() << "\n";
    rc = 1;
  } catch(SafeMPI::Exception& e) {
    std::cerr << "!! SafeMPI exception: " << e.what() << "\n";
    rc = 1;
  } catch(std::exception& e) {
    std::cerr << "!! std exception: " << e.what() << "\n";
    rc = 1;
  } catch(...) {
    std::cerr << "!! exception: unknown exception\n";
    rc = 1;
  }

  return rc;
}