int resize_array(const MKL_INT length, T*& x) { x = (T*)mkl_realloc(x, length*sizeof(T)); if (x == nullptr) { return OUTOFMEMORY; } return 0; }
ocrGuid_t mainEdt(u32 paramc, u64 *paramv, u32 depc, ocrEdtDep_t depv[]) { u32 matrixSize = -1; u32 tileSize = -1; u32 numTiles = -1; u32 i, j, k, c; u32 outSelLevel = 2; double **matrix, ** temp; u64 argc; void *programArg = depv[0].ptr; argc = getArgc(programArg); char *nparamv[argc]; char *fileNameIn, *fileNameOut = "ocr_mkl_cholesky.out"; for (i=0; i< argc; i++) { nparamv[i] = getArgv(programArg, i); } if ( argc == 1) { PRINTF("OCR-MKL Cholesky\n"); PRINTF("__________________________________________________________________________________________________\n"); PRINTF("Solves an OCR version of a Tiled Cholesky Decomposition with all math kernels using Intel MKL only\n\n"); PRINTF("Usage:\n"); PRINTF("\tocr_mkl_cholesky.exe {Arguments}\n\n"); PRINTF("Arguments:\n"); PRINTF("\t--ds -- Specify the Size of the Input Matrix\n"); PRINTF("\t--ts -- Specify the Tile Size\n"); PRINTF("\t--fi -- Specify the Input File Name of the Matrix\n"); // PRINTF("\t--fo -- Specify an Output File Name (default: ocr_mkl_cholesky.out)\n"); PRINTF("\t--ol -- Output Selection Level:\n"); PRINTF("\t\t0: Print solution to stdout\n"); PRINTF("\t\t1: Write solution to text file\n"); PRINTF("\t\t2: Write solution to binary file (default)\n"); PRINTF("\t\t3: Write solution to text file and print to stdout\n"); PRINTF("\t\t4: Write solution to binary file and print to stdout\n"); PRINTF("\t\t5: Write algorithm timing data to ocr_mkl_cholesky_stats.csv and write solution to binary file\n"); ocrShutdown(); return NULL_GUID; } else { // Reads in 4 arguments, input matrix file name, output matrix filename, datasize, and tilesize while (1) { static struct option long_options[] = { {"ds", required_argument, 0, 'a'}, {"ts", required_argument, 0, 'b'}, {"fi", required_argument, 0, 'c'}, {"fo", required_argument, 0, 'd'}, {"ol", required_argument, 0, 'e'}, {0, 0, 0, 0} }; u32 option_index = 0; c = getopt_long(argc, nparamv, "a:b:c:d:e", long_options, &option_index); if (c == -1) // Detect the end of the options break; switch (c) { case 'a': //PRINTF("Option a: matrixSize with value '%s'\n", optarg); matrixSize = (u32) atoi(optarg); break; case 'b': //PRINTF("Option b: tileSize with value '%s'\n", optarg); tileSize = (u32) atoi(optarg); break; case 'c': //PRINTF("Option c: fileNameIn with value '%s'\n", optarg); fileNameIn = optarg; break; case 'd': //PRINTF("Option d: fileNameOut with value '%s'\n", optarg); fileNameOut = (char*) mkl_realloc(fileNameOut, sizeof(optarg)); strcpy(fileNameOut, optarg); break; case 'e': //PRINTF("Option e: outSelLevel with value '%s'\n", optarg); outSelLevel = (u32) atoi(optarg); break; default: PRINTF("ERROR: Invalid argument switch\n\n"); PRINTF("OCR-MKL Cholesky\n"); PRINTF("__________________________________________________________________________________________________\n"); PRINTF("Solves an OCR version of a Tiled Cholesky Decomposition with all math kernels using Intel MKL only\n\n"); PRINTF("Usage:\n"); PRINTF("\tocr_mkl_cholesky.exe {Arguments}\n\n"); PRINTF("Arguments:\n"); PRINTF("\t--ds -- Specify the Size of the Input Matrix\n"); PRINTF("\t--ts -- Specify the Tile Size\n"); PRINTF("\t--fi -- Specify the Input File Name of the Matrix\n"); // PRINTF("\t--fo -- Specify an Output File Name (default: ocr_mkl_cholesky.out)\n"); PRINTF("\t--ol -- Output Selection Level:\n"); PRINTF("\t\t0: Print solution to stdout\n"); PRINTF("\t\t1: Write solution to text file\n"); PRINTF("\t\t2: Write solution to binary file (default)\n"); PRINTF("\t\t3: Write solution to text file and print to stdout\n"); PRINTF("\t\t4: Write solution to binary file and print to stdout\n"); PRINTF("\t\t5: Write algorithm timing data to ocr_mkl_cholesky_stats.csv and write solution to binary file\n"); ocrShutdown(); return NULL_GUID; } } } if(matrixSize == -1 || tileSize == -1) { PRINTF("Must specify matrix size and tile size\n"); ocrShutdown(); return NULL_GUID; } else if(matrixSize % tileSize != 0) { PRINTF("Incorrect tile size %d for the matrix of size %d \n", tileSize, matrixSize); ocrShutdown(); return NULL_GUID; } numTiles = matrixSize/tileSize; PRINTF("Matrixsize %d, tileSize %d\n", matrixSize, tileSize); #ifndef TG_ARCH struct timeval a; if(outSelLevel == 5) { FILE* outCSV = fopen("ocr_mkl_cholesky_stats.csv", "r"); if( !outCSV ) { outCSV = fopen("ocr_mkl_cholesky_stats.csv", "w"); if( !outCSV ) { PRINTF("Cannot find file: %s\n", "ocr_mkl_cholesky_stats.csv"); ocrShutdown(); return NULL_GUID; } fprintf(outCSV, "MatrixSize,TileSize,NumTile,PreAllocTime,PreAlgorithmTime,PostAlgorithmTime\n"); } else { outCSV = fopen("ocr_mkl_cholesky_stats.csv", "a+"); } fprintf(outCSV, "%d,%d,%d,", matrixSize, tileSize, numTiles); gettimeofday(&a, 0); fprintf(outCSV, "%f,", (a.tv_sec*1000000+a.tv_usec)*1.0/1000000); fclose(outCSV); } FILE *in; in = fopen(fileNameIn, "r"); if( !in ) { PRINTF("Cannot find file: %s\n", fileNameIn); ocrShutdown(); return NULL_GUID; } matrix = readMatrix(matrixSize, in); if(outSelLevel == 5) { FILE* outCSV = fopen("ocr_mkl_cholesky_stats.csv", "a"); if( !outCSV ) { PRINTF("Cannot find file: %s\n", "ocr_mkl_cholesky_stats.csv"); ocrShutdown(); return NULL_GUID; } gettimeofday(&a, 0); fprintf(outCSV, "%f,", (a.tv_sec*1000000+a.tv_usec)*1.0/1000000); fclose(outCSV); } #endif ocrGuid_t*** lkji_event_guids = allocateCreateEvents(numTiles); ocrGuid_t templateSeq, templateTrisolve, templateUpdateNonDiag, templateUpdate, templateWrap; ocrEdtTemplateCreate(&templateSeq, lapacke_dpotrf_task, 3, 1); ocrEdtTemplateCreate(&templateTrisolve, cblas_dtrsm_task, 4, 2); ocrEdtTemplateCreate(&templateUpdateNonDiag, cblas_dgemm_task, 5, 3); ocrEdtTemplateCreate(&templateUpdate, cblas_dsyrk_task, 5, 2); ocrEdtTemplateCreate(&templateWrap, wrap_up_task, 3, (numTiles+1)*numTiles/2); // PRINTF("Going to satisfy initial tiles\n"); #ifdef TG_ARCH satisfyInitialTiles(numTiles, tileSize, lkji_event_guids); #else satisfyInitialTiles(numTiles, tileSize, matrix, lkji_event_guids); #endif for ( k = 0; k < numTiles; ++k ) { // PRINTF("Prescribing sequential task %d\n", k); lapacke_dpotrf_task_prescriber(templateSeq, k, tileSize, lkji_event_guids); for( j = k + 1 ; j < numTiles ; ++j ) { cblas_dtrsm_task_prescriber (templateTrisolve, k, j, tileSize, lkji_event_guids); for( i = k + 1 ; i < j ; ++i ) { cblas_dgemm_task_prescriber (templateUpdateNonDiag, k, j, i, tileSize, lkji_event_guids); } cblas_dsyrk_task_prescriber (templateUpdate, k, j, i, tileSize, lkji_event_guids); } } wrap_up_task_prescriber (templateWrap, numTiles, tileSize, outSelLevel, lkji_event_guids); // PRINTF("Wrapping up mainEdt\n"); return NULL_GUID; }