//************************************************************** // ilMatTestFun // performs tests of matrix functions: // ilEuclidDist, ilEuclidNorm, ilMinAtDim //*************************************************************** void ilMatFunTest() { #if 0 //*** testing ilEuclidNorm (comparison with matlab) std::string matfnamein = "C:/temp/matdata3.xml"; std::string matfnameout = "C:/temp/matdata4.xml"; pFileStorage fsr = OpenFileStorage(matfnamein, RWM_Read); pMat mat1 = ReadMatrixFromFile(fsr, "matrix1"); //*** normalize pMat mat2 = CreateMat(mat1->rows, mat1->cols, mat1->type); ilEuclidNorm(mat1, mat2); //*** Save result in xml pFileStorage fsw = OpenFileStorage(matfnameout, RWM_Write); WriteMatrixToFile(fsw, "matrix2", mat2); /* % Matlab test code m1=rand(10,5); xmlsavematrices('C:/temp/matdata3.xml',{m1},{'matrix1'}); %%% run ilMatFunTest(); [matval,matname]=xmlreadmatrices('c:/temp/matdata4.xml'); enorm=euclidnorm(m1); err=mean(abs(enorm(:)-matval{1}(:))) */ #endif #if 0 //*** testing ilEuclidDist (comparison with matlab) std::string matfnamein = "C:/temp/matdata1.xml"; std::string matfnameout = "C:/temp/matdata2.xml"; // we don't own the matrices below -- file storage does pFileStorage fsr = OpenFileStorage(matfnamein, RWM_Read); pMat mat1 = ReadMatrixFromFile(fsr, "matrix1"); pMat mat2 = ReadMatrixFromFile(fsr, "matrix2"); //*** compute Euclidean distance pMat mat3 = CreateMat(mat1->rows, mat2->rows, mat1->type); ilEuclidDist(mat1, mat2, mat3); //*** Save result in xml pFileStorage fsw = OpenFileStorage(matfnameout, RWM_Write); WriteMatrixToFile(fsw, "matrix3", mat3); // cvReleaseMat(&mat1); // TODO do we have to? // cvReleaseMat(&mat2); // TODO do we have to? /* % Matlab test code m1=rand(10,5); m2=rand(20,5); xmlsavematrices('C:/temp/matdata1.xml',{m1,m2},{'matrix1','matrix2'}); %%% run ilMatFunTest(); [matval,matname]=xmlreadmatrices('c:/temp/matdata2.xml'); edist=eucliddist(m1,m2); err=mean(abs(edist(:)-matval{1}(:))) */ #endif }
//------------------------------------------------------------------------------ // main //------------------------------------------------------------------------------ int main (int argc, char * const argv[]) { std::cout.setf(std::ios::right); std::cout << std::setprecision(4) << std::fixed << std::showpoint; std::cout << std::setfill(' '); if (argc == 4 && !strncmp(argv[1], "-c", 2)) { // Compress Matrixf M; ReadMatrixFromFile(M, std::string(argv[2])); Scale(M, 2.0f, M); ZerotreeEncode(std::string(argv[3]), M); } else if (argc == 4 && !strncmp(argv[1], "-d", 2)) { // Decompress Matrixf M; ZerotreeDecode(M, std::string(argv[2])); WriteMatrixToFile(M, std::string(argv[3])); } else { printf("[-c|-d] [input] [output]\n"); } return 0; }
/** * @brief Read a kernel from the command line option stirng * @param Kernel pointer to kernel image * @param String command line optionstring * @return 1 on success, 0 on failure * * The syntax of string can be "disk:<radius>" or "gaussian:<sigma>". * Otherwise, the string is assumed to be a file name. */ int ReadKernel(image *Kernel, const char *String) { if(Kernel->Data) { FreeImageObj(*Kernel); *Kernel = NullImage; } if(MakeNamedKernel(Kernel, String) || ReadMatrixFromFile(Kernel, String, KernelRescale)) return 1; else return 0; }
void DoTest( const char* timerDesc, ResultDatabase& resultDB, OptionParser& opts ) { StencilFactory<T>* stdStencilFactory = NULL; Stencil<T>* stdStencil = NULL; StencilFactory<T>* testStencilFactory = NULL; Stencil<T>* testStencil = NULL; try { #if defined(PARALLEL) stdStencilFactory = new MPIHostStencilFactory<T>; testStencilFactory = new MPICUDAStencilFactory<T>; #else stdStencilFactory = new HostStencilFactory<T>; testStencilFactory = new CUDAStencilFactory<T>; #endif // defined(PARALLEL) assert( (stdStencilFactory != NULL) && (testStencilFactory != NULL) ); // do a sanity check on option values CheckOptions( opts ); stdStencilFactory->CheckOptions( opts ); testStencilFactory->CheckOptions( opts ); // extract and validate options std::vector<long long> arrayDims = opts.getOptionVecInt( "customSize" ); if( arrayDims.size() != 2 ) { cerr << "Dim size: " << arrayDims.size() << "\n"; throw InvalidArgValue( "all overall dimensions must be positive" ); } if (arrayDims[0] == 0) // User has not specified a custom size { int sizeClass = opts.getOptionInt("size"); arrayDims = StencilFactory<T>::GetStandardProblemSize( sizeClass ); } long int seed = (long)opts.getOptionInt( "seed" ); bool beVerbose = opts.getOptionBool( "verbose" ); unsigned int nIters = (unsigned int)opts.getOptionInt( "num-iters" ); double valErrThreshold = (double)opts.getOptionFloat( "val-threshold" ); unsigned int nValErrsToPrint = (unsigned int)opts.getOptionInt( "val-print-limit" ); #if defined(PARALLEL) unsigned int haloWidth = (unsigned int)opts.getOptionInt( "iters-per-exchange" ); #else unsigned int haloWidth = 1; #endif // defined(PARALLEL) float haloVal = (float)opts.getOptionFloat( "haloVal" ); // build a description of this experiment std::vector<long long> lDims = opts.getOptionVecInt( "lsize" ); assert( lDims.size() == 2 ); std::ostringstream experimentDescriptionStr; experimentDescriptionStr << nIters << ':' << arrayDims[0] << 'x' << arrayDims[1] << ':' << lDims[0] << 'x' << lDims[1]; unsigned int nPasses = (unsigned int)opts.getOptionInt( "passes" ); unsigned int nWarmupPasses = (unsigned int)opts.getOptionInt( "warmupPasses" ); // compute the expected result on the host // or read it from a pre-existing file std::string matrixFilenameBase = (std::string)opts.getOptionString( "expMatrixFile" ); #if defined(PARALLEL) int cwrank; MPI_Comm_rank( MPI_COMM_WORLD, &cwrank ); if( cwrank == 0 ) { #endif // defined(PARALLEL) if( !matrixFilenameBase.empty() ) { std::cout << "\nReading expected stencil operation result from file for later comparison with CUDA output\n" << std::endl; } else { std::cout << "\nPerforming stencil operation on host for later comparison with CUDA output\n" << "Depending on host capabilities, this may take a while." << std::endl; } #if defined(PARALLEL) } #endif // defined(PARALLEL) Matrix2D<T> expected( arrayDims[0] + 2*haloWidth, arrayDims[1] + 2*haloWidth ); Initialize<T> init( seed, haloWidth, haloVal ); bool haveExpectedData = false; if( ! matrixFilenameBase.empty() ) { bool readOK = ReadMatrixFromFile( expected, GetMatrixFileName<T>( matrixFilenameBase ) ); if( readOK ) { if( (expected.GetNumRows() != arrayDims[0] + 2*haloWidth) || (expected.GetNumColumns() != arrayDims[1] + 2*haloWidth) ) { std::cerr << "The matrix read from file \'" << GetMatrixFileName<T>( matrixFilenameBase ) << "\' does not match the matrix size specified on the command line.\n"; expected.Reset( arrayDims[0] + 2*haloWidth, arrayDims[1] + 2*haloWidth ); } else { haveExpectedData = true; } } if( !haveExpectedData ) { std::cout << "\nSince we could not read the expected matrix values,\nperforming stencil operation on host for later comparison with CUDA output.\n" << "Depending on host capabilities, this may take a while." << std::endl; } } if( !haveExpectedData ) { init( expected ); haveExpectedData = true; if( beVerbose ) { std::cout << "initial state:\n" << expected << std::endl; } stdStencil = stdStencilFactory->BuildStencil( opts ); (*stdStencil)( expected, nIters ); } if( beVerbose ) { std::cout << "expected result:\n" << expected << std::endl; } // determine whether we are to save the expected matrix values to a file // to speed up future runs matrixFilenameBase = (std::string)opts.getOptionString( "saveExpMatrixFile" ); if( !matrixFilenameBase.empty() ) { SaveMatrixToFile( expected, GetMatrixFileName<T>( matrixFilenameBase ) ); } assert( haveExpectedData ); // compute the result on the CUDA device Matrix2D<T> data( arrayDims[0] + 2*haloWidth, arrayDims[1] + 2*haloWidth ); Stencil<T>* testStencil = testStencilFactory->BuildStencil( opts ); // Compute the number of floating point operations we will perform. // // Note: in the truly-parallel case, we count flops for redundant // work due to the need for a halo. // But we do not add to the count for the local 1-wide halo since // we aren't computing new values for those items. unsigned long npts = (arrayDims[0] + 2*haloWidth - 2) * (arrayDims[1] + 2*haloWidth - 2); #if defined(PARALLEL) MPICUDAStencil<T>* mpiTestStencil = static_cast<MPICUDAStencil<T>*>( testStencil ); assert( mpiTestStencil != NULL ); int participating = mpiTestStencil->ParticipatingInProgram() ? 1 : 0; int numParticipating = 0; MPI_Allreduce( &participating, // src &numParticipating, // dest 1, // count MPI_INT, // type MPI_SUM, // op MPI_COMM_WORLD ); // communicator npts *= numParticipating; #endif // defined(PARALLEL) // In our 9-point stencil, there are 11 floating point operations // per point (3 multiplies and 11 adds): // // newval = weight_center * centerval + // weight_cardinal * (northval + southval + eastval + westval) + // weight_diagnoal * (neval + nwval + seval + swval) // // we do this stencil operation 'nIters' times unsigned long nflops = npts * 11 * nIters; #if defined(PARALLEL) if( cwrank == 0 ) { #endif // defined(PARALLEL) std::cout << "Performing " << nWarmupPasses << " warmup passes..."; #if defined(PARALLEL) } #endif // defined(PARALLEL) for( unsigned int pass = 0; pass < nWarmupPasses; pass++ ) { init(data); (*testStencil)( data, nIters ); } #if defined(PARALLEL) if( cwrank == 0 ) { #endif // defined(PARALLEL) std::cout << "done." << std::endl; #if defined(PARALLEL) } #endif // defined(PARALLEL) #if defined(PARALLEL) MPI_Comm_rank( MPI_COMM_WORLD, &cwrank ); if( cwrank == 0 ) { #endif // defined(PARALLEL) std::cout << "\nPerforming stencil operation on chosen device, " << nPasses << " passes.\n" << "Depending on chosen device, this may take a while." << std::endl; #if defined(PARALLEL) } #endif // defined(PARALLEL) #if !defined(PARALLEL) std::cout << "At the end of each pass the number of validation\nerrors observed will be printed to the standard output." << std::endl; #endif // !defined(PARALLEL) for( unsigned int pass = 0; pass < nPasses; pass++ ) { #if !defined(PARALLEL) std::cout << "pass " << pass << ": "; #endif // !defined(PARALLEL) init( data ); int timerHandle = Timer::Start(); (*testStencil)( data, nIters ); double elapsedTime = Timer::Stop( timerHandle, "CUDA stencil" ); // find and report the computation rate double gflops = (nflops / elapsedTime) / 1e9; resultDB.AddResult( timerDesc, experimentDescriptionStr.str(), "GFLOPS", gflops ); if( beVerbose ) { std::cout << "observed result, pass " << pass << ":\n" << data << std::endl; } // validate the result #if defined(PARALLEL) StencilValidater<T>* validater = new MPIStencilValidater<T>; #else StencilValidater<T>* validater = new SerialStencilValidater<T>; #endif // defined(PARALLEL) validater->ValidateResult( expected, data, valErrThreshold, nValErrsToPrint ); } } catch( ... ) { // clean up - abnormal termination // wish we didn't have to do this, but C++ exceptions do not // support a try-catch-finally approach delete stdStencil; delete stdStencilFactory; delete testStencil; delete testStencilFactory; throw; } // clean up - normal termination delete stdStencil; delete stdStencilFactory; delete testStencil; delete testStencilFactory; }