コード例 #1
0
ファイル: CUnit_pthpool_func.c プロジェクト: vitrone/FEM1D
int main(void)
{

    //mkl_set_num_threads(1);
    mkl_domain_set_num_threads(1, MKL_BLAS);
    CU_pSuite pSuite = NULL;

    /* initialize the CUnit test registry */
    if (CUE_SUCCESS != CU_initialize_registry())
    {
        return CU_get_error();
    }

    /* Create a test array */
    CU_TestInfo test_array[] = 
    {
        { "Parallel Gaussian"      , test_pfunc },
        CU_TEST_INFO_NULL,
    };

    /* Create the test suite */ 
    CU_SuiteInfo suites[] = 
    {
        { "Parallel Evaluation of Expressions", init_suite, clean_suite, NULL, NULL, test_array },
        CU_SUITE_INFO_NULL,
    }; 

    /* Register test suites */ 
    CU_ErrorCode CU_error = CU_register_suites(suites); 
    if (CU_error != CUE_SUCCESS) 
    {
        debug_body("%s", CU_get_error_msg());
        CU_cleanup_registry();
        return CU_get_error();
    }

   /* Run all tests using the CUnit Basic interface */
   CU_basic_set_mode(CU_BRM_VERBOSE);
   CU_basic_run_tests();
   CU_cleanup_registry();
   return CU_get_error();
}
コード例 #2
0
int main(int argc, char *argv[]) {
  long matrixSize= 16384;
  int blockSize = 128;
  bool runSequential = false;
  bool validate = false;

  int numBlasThreads = 40;

  int numGausElimThreads = 2;
  int numFactorLowerThreads = 4;
  int numFactorUpperThreads = 4;
  int numMatrixMulThreads = 30;

  std::string runtimeFileStr("runtimes");

  int numRetry = 1;

  if (argc > 1) {
    for (int arg = 1; arg < argc; arg++) {
      std::string argvs(argv[arg]);

      if (argvs == "--size") {
        arg++;
        matrixSize = atoi(argv[arg]);
      }

      if (argvs == "--num-threads-blas") {
        arg++;
        numBlasThreads = atoi(argv[arg]);
      }


      if (argvs == "num-threads-factor-l") {
        arg++;
        numFactorLowerThreads = atoi(argv[arg]);
      }

      if (argvs == "num-threads-factor-u") {
        arg++;
        numFactorUpperThreads = atoi(argv[arg]);
      }

      if (argvs == "num-threads-gaus") {
        arg++;
        numGausElimThreads = atoi(argv[arg]);
      }

      if (argvs == "num-threads-gemm") {
        arg++;
        numMatrixMulThreads = atoi(argv[arg]);
      }

      if (argvs == "--run-sequential") {
        runSequential = true;
      }

      if (argvs == "--num-retry" && arg + 1 < argc) {
        arg++;
        numRetry = atoi(argv[arg]);
      }

      if (argvs == "--block-size") {
        arg++;
        blockSize = atoi(argv[arg]);
      }


      if (argvs == "--runtime-file" && arg + 1 < argc) {
        runtimeFileStr = argv[arg + 1];
        arg++;
      }

      if (argvs == "--validate-results") {
        validate = true;
      }

      if (argvs == "--help") {
        std::cout << argv[0]
                  << " args: [--size <#>] [--block-size <#>] [--num-retry <#>] [--runtime-file <filename>] [--validate-results] [--run-sequential] [--num-threads-factor-l <#>] [--num-threads-factor-u <#>] [--num-threads-gaus <#>] [--num-threads-gemm <#>] [--num-threads-blas <#>] [--help]"
                  << std::endl;
        exit(0);

      }
    }
  }

  std::ofstream runtimeFile(runtimeFileStr, std::ios::app);
  double *matrix = new double[matrixSize * matrixSize];
  double *matrixTest = nullptr;

  // TODO: Ensure diagonally dominant
  initMatrixDiagDom(matrix, matrixSize, matrixSize, true);

  if (validate) {
    matrixTest = new double[matrixSize * matrixSize];
    for (int i = 0; i < matrixSize * matrixSize; i++)
      matrixTest[i] = matrix[i];
  }

  for (int numTry = 0; numTry < numRetry; numTry++) {
    SimpleClock clk;
    SimpleClock endToEnd;

    if (runSequential) {
      endToEnd.start();
      mkl_domain_set_num_threads(numBlasThreads, MKL_DOMAIN_ALL);
//      mkl_set_num_threads(40);

      clk.start();
      runSequentialLU(matrix, matrixSize);
//      computeSequentialMatMul(matrixA, matrixB, matrixC, matrixAHeight, sharedDim, matrixBWidth);
      clk.stopAndIncrement();
      endToEnd.stopAndIncrement();
    }
    else {
      endToEnd.start();
      mkl_domain_set_num_threads(numBlasThreads, MKL_DOMAIN_ALL);

      int gridHeight = (int) matrixSize / blockSize;
      int gridWidth = (int) matrixSize / blockSize;

      // TODO: Build graph and runtime
      htgs::StateContainer<std::shared_ptr<MatrixBlockData<double *>>> *matrixBlocks = new htgs::StateContainer<std::shared_ptr<MatrixBlockData<double *>>>(gridHeight, gridWidth, nullptr);

      for (int r = 0; r < gridHeight; r++)
      {
        for (int c = 0; c < gridWidth; c++)
        {
          // Store pointer locations for all blocks
          double *ptr = &matrix[IDX2C(r * blockSize, c *blockSize, matrixSize)];

          std::shared_ptr<MatrixRequestData> request(new MatrixRequestData(r, c, MatrixType::MatrixA));
          std::shared_ptr<MatrixBlockData<double *>> data(new MatrixBlockData<double *>(request, ptr, blockSize, blockSize));

          matrixBlocks->set(r, c, data);
        }
      }

      GausElimTask *gausElimTask = new GausElimTask(numGausElimThreads, matrixSize, matrixSize);

      auto gausElimBk = new htgs::Bookkeeper<MatrixBlockData<double *>>();

      GausElimRuleUpper *gausElimRuleUpper = new GausElimRuleUpper(matrixBlocks, gridHeight, gridWidth);
      GausElimRuleLower *gausElimRuleLower = new GausElimRuleLower(matrixBlocks, gridHeight, gridWidth);

      FactorUpperTask *factorUpperTask = new FactorUpperTask(numFactorUpperThreads, matrixSize, matrixSize);
      FactorLowerTask *factorLowerTask = new FactorLowerTask(numFactorLowerThreads, matrixSize, matrixSize);

      auto matrixMulBk = new htgs::Bookkeeper<MatrixBlockData<double *>>();
      MatrixMulRule *matrixMulRule = new MatrixMulRule(matrixBlocks, gridHeight, gridWidth);

      MatrixMulBlkTask *matrixMulTask = new MatrixMulBlkTask(numMatrixMulThreads, matrixSize, matrixSize, matrixSize, matrixSize, blockSize);


      auto matrixMulResultBk = new htgs::Bookkeeper<MatrixBlockData<double *>>();

      int numDiagonals = gridWidth - 1;
      GausElimRule *gausElimRule = new GausElimRule(numDiagonals, gridHeight, gridWidth);

      // Number of updates excluding the diagonal and the top/left row/column
      int numUpdates = (1.0/6.0) * (double)gridWidth * (2.0 * ((double)gridWidth * (double)gridWidth) - 3.0 * (double)gridWidth + 1.0);

      UpdateRule *updateRule = new UpdateRule(numUpdates);
      UpdateRule *updateRule2 = new UpdateRule(numUpdates);

      auto taskGraph = new htgs::TaskGraph<MatrixBlockData<double *>, htgs::VoidData>();
      taskGraph->addGraphInputConsumer(gausElimTask);

      taskGraph->addEdge(gausElimTask, gausElimBk);
      taskGraph->addRule(gausElimBk, factorUpperTask, gausElimRuleUpper);
      taskGraph->addRule(gausElimBk, factorLowerTask, gausElimRuleLower);

      taskGraph->addEdge(factorUpperTask, matrixMulBk);
      taskGraph->addEdge(factorLowerTask, matrixMulBk);

      taskGraph->addRule(matrixMulBk, matrixMulTask, matrixMulRule);
      taskGraph->addEdge(matrixMulTask, matrixMulResultBk);

      if (numDiagonals > 0)
        taskGraph->addRule(matrixMulResultBk, gausElimTask, gausElimRule);

      if (numUpdates > 0)
        taskGraph->addRule(matrixMulResultBk, matrixMulBk, updateRule);

      if (numUpdates > 0)
        taskGraph->addRule(matrixMulResultBk, gausElimBk, updateRule2);

      taskGraph->incrementGraphInputProducer();

      taskGraph->writeDotToFile("lud-graph.dot");

      htgs::Runtime *runtime = new htgs::Runtime(taskGraph);

      clk.start();

      runtime->executeRuntime();

      taskGraph->produceData(matrixBlocks->get(0, 0));
      taskGraph->finishedProducingData();

      runtime->waitForRuntime();

      clk.stopAndIncrement();


      delete runtime;
      endToEnd.stopAndIncrement();
    }

    double operations = (2.0 * (matrixSize * matrixSize * matrixSize)) / 3.0;
    double flops = operations / clk.getAverageTime(TimeVal::SEC);
    double gflops = flops / 1073741824.0;

    std::cout << (runSequential ? "sequential" : "htgs")
              << ", matrix-size: " << matrixSize
              << ", " << "blockSize: " << (runSequential ? 0 : blockSize)
              << ", blasThreads: " << numBlasThreads
              << ", gausThreads: " << numGausElimThreads
              << ", factorUpperThreads: " << numFactorUpperThreads
              << ", factorLowerThreads: " << numFactorLowerThreads
              << ", gemmThreads: " << numMatrixMulThreads
              << ", time:" << clk.getAverageTime(TimeVal::MILLI)
              << ", end-to-end:" << endToEnd.getAverageTime(TimeVal::MILLI)
              << ", gflops: " << gflops
        << std::endl;

    runtimeFile << (runSequential ? "sequential" : "htgs")
                << ", " << matrixSize
                << ", " << blockSize
                << ", " << numBlasThreads
                << ", " << numGausElimThreads
                << ", " << numFactorUpperThreads
                << ", " << numFactorLowerThreads
                << ", " << numMatrixMulThreads
                << ", " << clk.getAverageTime(TimeVal::MILLI)
                << ", " << endToEnd.getAverageTime(TimeVal::MILLI)
                << ", " << gflops
                << std::endl;



    if (validate)
    {
      int res = validateResults(matrix, matrixTest, matrixSize);
      std::cout << (res == 0 ? "PASSED" : "FAILED") << std::endl;
    }


  }

  delete[] matrix;
  delete[] matrixTest;

}