ocrGuid_t mainEdt ( u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { u64 size = -1; u64 offset = -1; u32 i = 0; u64 argc; void *programArg = depv[0].ptr; argc = getArgc(programArg); if ( argc != 4 ) { PRINTF("Usage: ./basicIO offset size fileName \n"); ocrShutdown(); return 1; } offset = atoi(getArgv(programArg, 1)); size = atoi(getArgv(programArg, 2)); u64 nparamv[2]; nparamv[0] = offset; nparamv[1] = size; FILE *in; in = fopen(getArgv(programArg, 3), "r"); if( !in ) { PRINTF("Cannot find file: %s\n", getArgv(programArg, 3)); ocrShutdown(); return NULL_GUID; } ocrGuid_t dataGuid; // Data can be passed as parameter also , there was no // necessary need of creating data block in this example. // Its has been created for demo purpose //Create datablock to hold a block of 'size' elements u64 *inputarr; ocrDbCreate(&dataGuid, (void**)&inputarr, sizeof(u64)*size,0,NULL_GUID, NO_ALLOC); #ifndef TG_ARCH while(fscanf(in,"%llu\n",&inputarr[i++])!=EOF); #else fread(inputarr, sizeof(u64),size , in); #endif fclose(in); ocrGuid_t addEdtTemplateGuid; ocrEdtTemplateCreate(&addEdtTemplateGuid, add_edt, 2 /*paramc*/, 1 /*depc*/); ocrGuid_t add_edt_guid; // Create the EDT not specifying the dependence vector at creation ocrEdtCreate(&add_edt_guid, addEdtTemplateGuid, EDT_PARAM_DEF, nparamv,1,NULL ,EDT_PROP_FINISH , NULL_GUID, NULL); ocrGuid_t triggerEventGuid; ocrEventCreate(&triggerEventGuid, OCR_EVENT_STICKY_T, true); ocrAddDependence(triggerEventGuid, add_edt_guid, 0, DB_MODE_EW); ocrEventSatisfy(triggerEventGuid, dataGuid); return NULL_GUID; }
ocrGuid_t finalPrintEdt(u32 paramc, u64 *paramv, u32 depc, ocrEdtDep_t depv[]) { int i; float *data = (float*)depv[1].ptr; ocrGuid_t dataGuid = depv[1].guid; u64 N = paramv[0]; bool verbose = paramv[1]; bool printResults = paramv[2]; float *x_in = (float*)data; float *X_real = (float*)(data + N); float *X_imag = (float*)(data + 2*N); if(verbose) { PRINTF("Final print EDT\n"); } if(printResults) { PRINTF("Starting values:\n"); for(i=0;i<N;i++) { PRINTF("%d [ %f ]\n",i,x_in[i]); } PRINTF("\n"); PRINTF("Final result:\n"); for(i=0;i<N;i++) { PRINTF("%d [%f + %fi]\n",i,X_real[i],X_imag[i]); } } ocrShutdown(); }
ocrGuid_t mainEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { // No real right or wrong here, just check the call doesn't crash ocrGuid_t workerGuid = ocrCurrentWorkerGuid(); PRINTF("Current worker GUID is "GUIDF"\n", GUIDA(workerGuid)); ocrShutdown(); return NULL_GUID; }
// This task computes the Cholesky factorization of a symmetric positive definite matrix... // This is the first step in the tile Cholesky factorization. ocrGuid_t lapacke_dpotrf_task ( u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { u32 info; u64 *func_args = paramv; u32 k = (u32) func_args[0]; u32 tileSize = (u32) func_args[1]; ocrGuid_t out_lkji_kkkp1_event_guid = (ocrGuid_t) func_args[2]; double* aBlock = (double*) (depv[0].ptr); // PRINTF("RUNNING sequential_cholesky %d with 0x%llx to satisfy\n", k, (u64)(out_lkji_kkkp1_event_guid)); ocrGuid_t out_lkji_kkkp1_db_guid; ocrGuid_t out_lkji_kkkp1_db_affinity = NULL_GUID; info = LAPACKE_dpotrf(LAPACK_ROW_MAJOR, 'L', tileSize, aBlock, tileSize ); if (info != 0) { if (info > 0) PRINTF("Matrix A is not Symmetric Positive Definite (SPD)"); else PRINTF("i-th parameter had an illegal value."); ASSERT(0); ocrShutdown(); return NULL_GUID; } ocrEventSatisfy(out_lkji_kkkp1_event_guid, depv[0].guid); return NULL_GUID; }
// Prints the final result of the computation. Called as the last EDT. ocrGuid_t finalPrintEdt(u32 paramc, u64 *paramv, u32 depc, ocrEdtDep_t depv[]) { int i; u64 N = paramv[0]; bool verbose = paramv[1]; bool printResults = paramv[2]; float *data_in = (float*)depv[1].ptr; float *data_real = (float*)depv[2].ptr; float *data_imag = (float*)depv[3].ptr; float *x_in = (float*)data_in; float *X_real = (float*)(data_real); float *X_imag = (float*)(data_imag); double *startTime = (double*)(depv[4].ptr); if(verbose) { PRINTF("Final print EDT\n"); } double endTime = mysecond(); PRINTF("%f\n",endTime-*startTime); if(printResults) { PRINTF("Starting values:\n"); for(i=0;i<N;i++) { PRINTF("%d [ %f ]\n",i,x_in[i]); } PRINTF("\n"); PRINTF("Final result:\n"); for(i=0;i<N;i++) { PRINTF("%d [%f + %fi]\n",i,X_real[i],X_imag[i]); } } ocrShutdown(); }
ocrGuid_t mapCreateFunc(u32 paramc, u64 *paramv, u32 depc, ocrEdtDep_t depv[]) { ocrShutdown(); PRINTF("==Shutdown\n"); return NULL_GUID; }
ocrGuid_t otherEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { u64 * u1 = depv[0].ptr; u64 * u2 = depv[1].ptr; ASSERT(u1[0] == 1); ASSERT(u2[0] == 2); ocrShutdown(); return NULL_GUID; }
ocrGuid_t taskForEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { int* res = (int*)depv[0].ptr; printf("In the taskForEdt with value %d\n", (*res)); assert(*res == 42); // This is the last EDT to execute, terminate ocrShutdown(); return NULL_GUID; }
ocrGuid_t remoteEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { ASSERT(paramc == 2); ASSERT(paramv[0] == 333); ASSERT(paramv[1] == 555); PRINTF("[remote] RemoteEdt: paramv checked\n"); ocrShutdown(); return NULL_GUID; }
ocrGuid_t endEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { ocrGuid_t infoGuid = depv[0].guid; info_t * info = (info_t *) depv[0].ptr; print_throughput("TEST", (info->max * NB_SATISFY), usec_to_sec(info->timer)); ocrDbRelease(infoGuid); ocrDbDestroy(infoGuid); ocrShutdown(); return NULL_GUID; }
ocrGuid_t taskForEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { PRINTF("In the taskForEdt with value %"PRId32"\n", (int)paramv[0]); ASSERT(paramc == 1); ASSERT(paramv[0] == 32); void * ptr = depv[0].ptr; ASSERT(*((int*)ptr) == 42); // This is the last EDT to execute, terminate ocrShutdown(); return NULL_GUID; }
ocrGuid_t shutdownEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { PRINTF("Hello from shutdownEdt\n"); int* data1 = (int*) depv[0].ptr; int* data2 = (int*) depv[1].ptr; PRINTF("Received data1 = %"PRId32", data2 = %"PRId32"\n", *data1, *data2); ocrDbDestroy(depv[0].guid); ocrDbDestroy(depv[1].guid); ocrShutdown(); return NULL_GUID; }
ocrGuid_t mainEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { int *k; ocrGuid_t dbGuid; ocrDbCreate(&dbGuid,(void **) &k, sizeof(int), DB_PROP_NONE, NULL_HINT, NO_ALLOC); *k = 42; if (TEST_OCR_ELS) { ocrElsUserSet(ELS_OFFSET, dbGuid); someUserFunction(dbGuid); } ocrShutdown(); return NULL_GUID; }
ocrGuid_t mainEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { // Create a DB void * dbPtr; ocrGuid_t dbGuid; ocrDbCreate(&dbGuid, &dbPtr, sizeof(TYPE_ELEM_DB) * NB_ELEM_DB, 0, NULL_HINT, NO_ALLOC); ocrDbDestroy(dbGuid); ocrShutdown(); return NULL_GUID; }
ocrGuid_t stepB_edt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { u64 arraySize = paramv[0]; u64 * data = depv[0].ptr; // Do step B processing u64 i = 0; while(i < arraySize) { data[i]+=1; i++; } ocrShutdown(); // This is the last EDT to execute return NULL_GUID; }
// This edt is triggered when the output event of the other edt is satisfied by the runtime ocrGuid_t terminateEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { // TODO shouldn't be doing that... but need more support from output events to get a 'return' value from an edt assert(depv[0].guid != NULL_GUID); u64 * array = (u64*)depv[0].ptr; u64 i = 0; while (i < N) { assert(array[i] == i); i++; } printf("Everything went OK\n"); ocrShutdown(); // This is the last EDT to execute, terminate return NULL_GUID; }
ocrGuid_t checkerEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { ocrGuid_t dbGuid = (ocrGuid_t) depv[0].guid; PRINTF("[remote] checkerEdt: executing, depends on remote DB guid "GUIDF" \n", GUIDA(dbGuid)); TYPE_ELEM_DB v = 1; int i = 0; TYPE_ELEM_DB * data = (TYPE_ELEM_DB *) depv[0].ptr; while (i < NB_ELEM_DB) { ASSERT (data[i] == v++); i++; } PRINTF("[remote] checkerEdt: DB/SA copy checked\n"); ocrShutdown(); return NULL_GUID; }
ocrGuid_t add_edt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { u32 offset = paramv[0]; u32 size = paramv[1]; u64 * data = depv[0].ptr; u64 i; for(i=0;i<size;i++) data[i]+=offset; FILE *out = fopen("basicIO_output.txt","w"); #ifndef TG_ARCH for(i=0;i<size;i++) fprintf(out,"%llu\n",data[i]); #else fwrite(data,10,sizeof(u64),out); #endif fclose(out); ocrShutdown(); // This is the last EDT to execute return NULL_GUID; }
ocrGuid_t mainEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { // Set up arena datablock void *arenaPtr; ocrGuid_t arenaGuid; ocrDbCreate(&arenaGuid, &arenaPtr, ARENA_SIZE, DB_PROP_NONE, NULL_GUID, NO_ALLOC); Ocr::InitializeArena(arenaPtr, ARENA_SIZE); // Use arena as current allocator backing-datablock Ocr::SetCurrentArena(arenaPtr); // Allocate a grid in the datablock Grid2D &grid = *Ocr::New<Grid2D>(10,10); // Is the grid in the datablock? void *gridAddr = &grid; void *arenaEnd = (char*)arenaPtr + ARENA_SIZE; assert(arenaPtr <= gridAddr && gridAddr <= arenaEnd); // Read current grid double *data = &grid.at(5,6); PRINTF("Item at orig (5, 6) = %.1f (@ %p)\n", *data, data); // Copy current grid void *copyPtr; ocrGuid_t copyGuid; ocrDbCreate(©Guid, ©Ptr, ARENA_SIZE, DB_PROP_NONE, NULL_GUID, NO_ALLOC); memcpy(copyPtr, arenaPtr, ARENA_SIZE); Grid2D &grid2 = Ocr::GetArenaRoot<Grid2D>(copyPtr); // Wipe old grid memset(arenaPtr, 0, ARENA_SIZE); PRINTF("Wiped original: val = %.1f (@ %p)\n", *data, data); // Read new grid data = &grid2.at(5,6); PRINTF("Item at orig (5, 6) = %.1f (@ %p)\n", *data, data); // Update current arena to the copy (since we clobbered the original) Ocr::SetCurrentArena(copyPtr); // Try a non-scalar array Grid2D *grids = Ocr::NewArray<Grid2D>(5); PRINTF("Grid of grids (1,2,3) = %.1f\n", grids[1].at(2,3)); PRINTF("Grid of grids (2,3,4) = %.1f\n", grids[2].at(3,4)); // Done ocrShutdown(); return 0; }
ocrGuid_t readerEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { ocrGuid_t dbCloneGuid = (ocrGuid_t) depv[0].guid; PRINTF("[remote] readerEdt: executing, depends on remote DB guid "GUIDF" \n", GUIDA(dbCloneGuid)); ocrShutdown(); return NULL_GUID; }
ocrGuid_t mainEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { PRINTF("No RT API\n"); ocrShutdown(); return NULL_GUID; }
extern "C" ocrGuid_t mainEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { u64 argc = getArgc(depv[0].ptr); int i; char *argv[argc]; for(i=0;i<argc;i++) { argv[i] = getArgv(depv[0].ptr,i); } u64 N; u64 iterations; bool verify; bool verbose; bool printResults; u64 serialBlockSize = SERIAL_BLOCK_SIZE_DEFAULT; if(!parseOptions(argc,argv,&N,&verify,&iterations,&verbose,&printResults,&serialBlockSize)) { printHelp(argv,true); ocrShutdown(); return NULL_GUID; } if(verbose) { for(i=0;i<argc;i++) { PRINTF("argv[%d]: %s\n",i,argv[i]); } PRINTF("Running %d iterations\n",iterations); } ocrGuid_t iterationTempGuid,startTempGuid,endTempGuid,printTempGuid,endSlaveTempGuid; ocrEdtTemplateCreate(&iterationTempGuid, &fftIterationEdt, 6, 2); ocrEdtTemplateCreate(&startTempGuid, &fftStartEdt, 9, 1); ocrEdtTemplateCreate(&endTempGuid, &fftEndEdt, 9, 3); ocrEdtTemplateCreate(&endSlaveTempGuid, &fftEndSlaveEdt, 5, 1); ocrEdtTemplateCreate(&printTempGuid, &finalPrintEdt, 3, 2); // x_in, X_real, and X_imag in a contiguous block float *x; ocrGuid_t dataGuid; // TODO: OCR cannot handle large datablocks DBCREATE(&dataGuid, (void **) &x, sizeof(float) * N * 3, 0, NULL_GUID, NO_ALLOC); if(verbose) { PRINTF("Datablock of size %lu (N=%lu) created\n",sizeof(float)*N*3,N); } for(i=0;i<N;i++) { x[i] = 0; } x[1] = 1; //x[3] = -3; //x[4] = 8; //x[5] = 9; //x[6] = 1; std::stack<ocrGuid_t> edtStack; std::stack<ocrGuid_t> eventStack; u64 edtParamv[6] = { startTempGuid, endTempGuid, endSlaveTempGuid, N, verbose, serialBlockSize }; ocrGuid_t edtGuid, printEdtGuid, edtEventGuid; for(i=1;i<=iterations;i++) { ocrEdtCreate(&edtGuid, iterationTempGuid, EDT_PARAM_DEF, edtParamv, EDT_PARAM_DEF, NULL_GUID, EDT_PROP_FINISH, NULL_GUID, &edtEventGuid); edtStack.push(edtGuid); eventStack.push(edtEventGuid); } edtEventGuid = eventStack.top(); if(verify) { edtEventGuid = setUpVerify(dataGuid, NULL_GUID, NULL_GUID, N, edtEventGuid); } u64 printParamv[3] = { N, verbose, printResults }; ocrGuid_t finishDependencies[2] = { edtEventGuid, dataGuid }; ocrEdtCreate(&printEdtGuid, printTempGuid, EDT_PARAM_DEF, printParamv, EDT_PARAM_DEF, finishDependencies, EDT_PROP_NONE, NULL_GUID, NULL); eventStack.pop(); while(!edtStack.empty()) { edtGuid = edtStack.top(); if(!eventStack.empty()) { edtEventGuid = eventStack.top(); } else { edtEventGuid = NULL_GUID; } ocrAddDependence(dataGuid, edtGuid, 0, DB_MODE_ITW); ocrAddDependence(edtEventGuid, edtGuid, 1, DB_MODE_RO); edtStack.pop(); eventStack.pop(); } return NULL_GUID; }
ocrGuid_t remoteEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { PRINTF("[remote] RemoteEdt: executing\n"); ocrShutdown(); return NULL_GUID; }
ocrGuid_t mainEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { ocrShutdown(); return NULL_GUID; }
ocrGuid_t taskForEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { // This is the last EDT to execute, terminate ocrShutdown(); return NULL_GUID; }
ocrGuid_t terminateEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { PRINTF("Everything went OK\n"); ocrShutdown(); // This is the last EDT to execute, terminate return NULL_GUID; }
extern "C" ocrGuid_t mainEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { u64 argc = getArgc(depv[0].ptr); int i; char *argv[argc]; for(i=0;i<argc;i++) { argv[i] = getArgv(depv[0].ptr,i); } u64 N; u64 iterations; bool verify; bool verbose; bool printResults; u64 serialBlockSize = SERIAL_BLOCK_SIZE_DEFAULT; if(!parseOptions(argc,argv,&N,&verify,&iterations,&verbose,&printResults,&serialBlockSize)) { printHelp(argv,true); ocrShutdown(); return NULL_GUID; } if(verbose) { for(i=0;i<argc;i++) { PRINTF("argv[%d]: %s\n",i,argv[i]); } } if(iterations > 1 && verbose) { PRINTF("Running %d iterations\n",iterations); } ocrGuid_t startTempGuid,endTempGuid,printTempGuid,endSlaveTempGuid,iterationTempGuid; ocrEdtTemplateCreate(&iterationTempGuid, &fftIterationEdt, 7, 4); ocrEdtTemplateCreate(&startTempGuid, &fftStartEdt, 9, 3); ocrEdtTemplateCreate(&endTempGuid, &fftEndEdt, 9, 5); ocrEdtTemplateCreate(&endSlaveTempGuid, &fftEndSlaveEdt, 5, 3); ocrEdtTemplateCreate(&printTempGuid, &finalPrintEdt, 3, 5); float *x_in; // Output for the FFT float *X_real; float *X_imag; ocrGuid_t dataInGuid,dataRealGuid,dataImagGuid,timeDataGuid; // TODO: OCR cannot handle large datablocks DBCREATE(&dataInGuid, (void **) &x_in, sizeof(float) * N, 0, NULL_GUID, NO_ALLOC); DBCREATE(&dataRealGuid, (void **) &X_real, sizeof(float) * N, 0, NULL_GUID, NO_ALLOC); DBCREATE(&dataImagGuid, (void **) &X_imag, sizeof(float) * N, 0, NULL_GUID, NO_ALLOC); if(verbose) { PRINTF("3 Datablocks of size %lu (N=%lu) created\n",sizeof(float)*N,N); } for(i=0;i<N;i++) { x_in[i] = 0; X_real[i] = 0; X_imag[i] = 0; } x_in[1] = 1; //x_in[3] = -1; //x_in[5] = 1; //x_in[7] = -1; // Create an EDT out of the EDT template ocrGuid_t edtGuid, edtPrevGuid, printEdtGuid, edtEventGuid, edtPrevEventGuid; //ocrEdtCreate(&edtGuid, startTempGuid, EDT_PARAM_DEF, edtParamv, EDT_PARAM_DEF, NULL_GUID, EDT_PROP_FINISH, NULL_GUID, &edtEventGuid); std::stack<ocrGuid_t> edtStack; std::stack<ocrGuid_t> eventStack; edtEventGuid = NULL_GUID; edtPrevEventGuid = NULL_GUID; for(i=1;i<=iterations;i++) { u64 edtParamv[7] = { startTempGuid, endTempGuid, endSlaveTempGuid, N, verbose, serialBlockSize, i }; ocrEdtCreate(&edtGuid, iterationTempGuid, EDT_PARAM_DEF, edtParamv, EDT_PARAM_DEF, NULL_GUID, EDT_PROP_FINISH, NULL_GUID, &edtEventGuid); edtStack.push(edtGuid); eventStack.push(edtEventGuid); } edtEventGuid = eventStack.top(); if(verify) { edtEventGuid = setUpVerify(dataInGuid, dataRealGuid, dataImagGuid, N, edtEventGuid); } double *startTime; DBCREATE(&timeDataGuid, (void **) &startTime, sizeof(double), 0, NULL_GUID, NO_ALLOC); *startTime = mysecond(); u64 edtParamv[3] = { N, verbose, printResults }; // Create finish EDT, with dependence on last EDT ocrGuid_t finishDependencies[5] = { edtEventGuid, dataInGuid, dataRealGuid, dataImagGuid, timeDataGuid }; ocrEdtCreate(&printEdtGuid, printTempGuid, EDT_PARAM_DEF, edtParamv, EDT_PARAM_DEF, finishDependencies, EDT_PROP_NONE, NULL_GUID, NULL); eventStack.pop(); while(!edtStack.empty()) { edtGuid = edtStack.top(); if(!eventStack.empty()) { edtEventGuid = eventStack.top(); } else { edtEventGuid = NULL_GUID; } ocrAddDependence(dataInGuid, edtGuid, 0, DB_MODE_RO); ocrAddDependence(dataRealGuid, edtGuid, 1, DB_MODE_ITW); ocrAddDependence(dataImagGuid, edtGuid, 2, DB_MODE_ITW); ocrAddDependence(edtEventGuid, edtGuid, 3, DB_MODE_RO); edtStack.pop(); eventStack.pop(); } return NULL_GUID; }
ocrGuid_t wrap_up_task ( u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { u32 i, j, i_b, j_b; double* temp; u64 *func_args = paramv; u32 numTiles = (u32) func_args[0]; u32 tileSize = (u32) func_args[1]; u32 outSelLevel = (u32) func_args[2]; struct timeval a; if(outSelLevel == 5) { FILE* outCSV = fopen("ocr_mkl_cholesky_stats.csv", "a"); if( !outCSV ) { PRINTF("Cannot find file: %s\n", "ocr_mkl_cholesky_stats.csv"); ocrShutdown(); return NULL_GUID; } gettimeofday(&a, 0); fprintf(outCSV, "%f\n", (a.tv_sec*1000000+a.tv_usec)*1.0/1000000); fclose(outCSV); outSelLevel = 2; } FILE* out = fopen("ocr_mkl_cholesky.out", "w"); for ( i = 0; i < numTiles; ++i ) { for( i_b = 0; i_b < tileSize; ++i_b) { for ( j = 0; j <= i; ++j ) { temp = (double*) (depv[i*(i+1)/2+j].ptr); if(i != j) { for(j_b = 0; j_b < tileSize; ++j_b) { switch(outSelLevel) { case 0: printf("%lf ", temp[i_b*tileSize+j_b]); break; case 1: fprintf(out, "%lf ", temp[i_b*tileSize+j_b]); break; case 2: fwrite(&temp[i_b*tileSize+j_b], sizeof(double), 1, out); break; case 3: fprintf(out, "%lf ", temp[i_b*tileSize+j_b]); printf("%lf ", temp[i_b*tileSize+j_b]); break; case 4: fwrite(&temp[i_b*tileSize+j_b], sizeof(double), 1, out); printf("%lf ", temp[i_b*tileSize+j_b]); break; } } } else { for(j_b = 0; j_b <= i_b; ++j_b) { switch(outSelLevel) { case 0: printf("%lf ", temp[i_b*tileSize+j_b]); break; case 1: fprintf(out, "%lf ", temp[i_b*tileSize+j_b]); break; case 2: fwrite(&temp[i_b*tileSize+j_b], sizeof(double), 1, out); break; case 3: fprintf(out, "%lf ", temp[i_b*tileSize+j_b]); printf("%lf ", temp[i_b*tileSize+j_b]); break; case 4: fwrite(&temp[i_b*tileSize+j_b], sizeof(double), 1, out); printf("%lf ", temp[i_b*tileSize+j_b]); break; } } } } } } fclose(out); ocrShutdown(); return NULL_GUID; }
ocrGuid_t mainEdt(u32 paramc, u64 *paramv, u32 depc, ocrEdtDep_t depv[]) { u32 matrixSize = -1; u32 tileSize = -1; u32 numTiles = -1; u32 i, j, k, c; u32 outSelLevel = 2; double **matrix, ** temp; u64 argc; void *programArg = depv[0].ptr; argc = getArgc(programArg); char *nparamv[argc]; char *fileNameIn, *fileNameOut = "ocr_mkl_cholesky.out"; for (i=0; i< argc; i++) { nparamv[i] = getArgv(programArg, i); } if ( argc == 1) { PRINTF("OCR-MKL Cholesky\n"); PRINTF("__________________________________________________________________________________________________\n"); PRINTF("Solves an OCR version of a Tiled Cholesky Decomposition with all math kernels using Intel MKL only\n\n"); PRINTF("Usage:\n"); PRINTF("\tocr_mkl_cholesky.exe {Arguments}\n\n"); PRINTF("Arguments:\n"); PRINTF("\t--ds -- Specify the Size of the Input Matrix\n"); PRINTF("\t--ts -- Specify the Tile Size\n"); PRINTF("\t--fi -- Specify the Input File Name of the Matrix\n"); // PRINTF("\t--fo -- Specify an Output File Name (default: ocr_mkl_cholesky.out)\n"); PRINTF("\t--ol -- Output Selection Level:\n"); PRINTF("\t\t0: Print solution to stdout\n"); PRINTF("\t\t1: Write solution to text file\n"); PRINTF("\t\t2: Write solution to binary file (default)\n"); PRINTF("\t\t3: Write solution to text file and print to stdout\n"); PRINTF("\t\t4: Write solution to binary file and print to stdout\n"); PRINTF("\t\t5: Write algorithm timing data to ocr_mkl_cholesky_stats.csv and write solution to binary file\n"); ocrShutdown(); return NULL_GUID; } else { // Reads in 4 arguments, input matrix file name, output matrix filename, datasize, and tilesize while (1) { static struct option long_options[] = { {"ds", required_argument, 0, 'a'}, {"ts", required_argument, 0, 'b'}, {"fi", required_argument, 0, 'c'}, {"fo", required_argument, 0, 'd'}, {"ol", required_argument, 0, 'e'}, {0, 0, 0, 0} }; u32 option_index = 0; c = getopt_long(argc, nparamv, "a:b:c:d:e", long_options, &option_index); if (c == -1) // Detect the end of the options break; switch (c) { case 'a': //PRINTF("Option a: matrixSize with value '%s'\n", optarg); matrixSize = (u32) atoi(optarg); break; case 'b': //PRINTF("Option b: tileSize with value '%s'\n", optarg); tileSize = (u32) atoi(optarg); break; case 'c': //PRINTF("Option c: fileNameIn with value '%s'\n", optarg); fileNameIn = optarg; break; case 'd': //PRINTF("Option d: fileNameOut with value '%s'\n", optarg); fileNameOut = (char*) mkl_realloc(fileNameOut, sizeof(optarg)); strcpy(fileNameOut, optarg); break; case 'e': //PRINTF("Option e: outSelLevel with value '%s'\n", optarg); outSelLevel = (u32) atoi(optarg); break; default: PRINTF("ERROR: Invalid argument switch\n\n"); PRINTF("OCR-MKL Cholesky\n"); PRINTF("__________________________________________________________________________________________________\n"); PRINTF("Solves an OCR version of a Tiled Cholesky Decomposition with all math kernels using Intel MKL only\n\n"); PRINTF("Usage:\n"); PRINTF("\tocr_mkl_cholesky.exe {Arguments}\n\n"); PRINTF("Arguments:\n"); PRINTF("\t--ds -- Specify the Size of the Input Matrix\n"); PRINTF("\t--ts -- Specify the Tile Size\n"); PRINTF("\t--fi -- Specify the Input File Name of the Matrix\n"); // PRINTF("\t--fo -- Specify an Output File Name (default: ocr_mkl_cholesky.out)\n"); PRINTF("\t--ol -- Output Selection Level:\n"); PRINTF("\t\t0: Print solution to stdout\n"); PRINTF("\t\t1: Write solution to text file\n"); PRINTF("\t\t2: Write solution to binary file (default)\n"); PRINTF("\t\t3: Write solution to text file and print to stdout\n"); PRINTF("\t\t4: Write solution to binary file and print to stdout\n"); PRINTF("\t\t5: Write algorithm timing data to ocr_mkl_cholesky_stats.csv and write solution to binary file\n"); ocrShutdown(); return NULL_GUID; } } } if(matrixSize == -1 || tileSize == -1) { PRINTF("Must specify matrix size and tile size\n"); ocrShutdown(); return NULL_GUID; } else if(matrixSize % tileSize != 0) { PRINTF("Incorrect tile size %d for the matrix of size %d \n", tileSize, matrixSize); ocrShutdown(); return NULL_GUID; } numTiles = matrixSize/tileSize; PRINTF("Matrixsize %d, tileSize %d\n", matrixSize, tileSize); #ifndef TG_ARCH struct timeval a; if(outSelLevel == 5) { FILE* outCSV = fopen("ocr_mkl_cholesky_stats.csv", "r"); if( !outCSV ) { outCSV = fopen("ocr_mkl_cholesky_stats.csv", "w"); if( !outCSV ) { PRINTF("Cannot find file: %s\n", "ocr_mkl_cholesky_stats.csv"); ocrShutdown(); return NULL_GUID; } fprintf(outCSV, "MatrixSize,TileSize,NumTile,PreAllocTime,PreAlgorithmTime,PostAlgorithmTime\n"); } else { outCSV = fopen("ocr_mkl_cholesky_stats.csv", "a+"); } fprintf(outCSV, "%d,%d,%d,", matrixSize, tileSize, numTiles); gettimeofday(&a, 0); fprintf(outCSV, "%f,", (a.tv_sec*1000000+a.tv_usec)*1.0/1000000); fclose(outCSV); } FILE *in; in = fopen(fileNameIn, "r"); if( !in ) { PRINTF("Cannot find file: %s\n", fileNameIn); ocrShutdown(); return NULL_GUID; } matrix = readMatrix(matrixSize, in); if(outSelLevel == 5) { FILE* outCSV = fopen("ocr_mkl_cholesky_stats.csv", "a"); if( !outCSV ) { PRINTF("Cannot find file: %s\n", "ocr_mkl_cholesky_stats.csv"); ocrShutdown(); return NULL_GUID; } gettimeofday(&a, 0); fprintf(outCSV, "%f,", (a.tv_sec*1000000+a.tv_usec)*1.0/1000000); fclose(outCSV); } #endif ocrGuid_t*** lkji_event_guids = allocateCreateEvents(numTiles); ocrGuid_t templateSeq, templateTrisolve, templateUpdateNonDiag, templateUpdate, templateWrap; ocrEdtTemplateCreate(&templateSeq, lapacke_dpotrf_task, 3, 1); ocrEdtTemplateCreate(&templateTrisolve, cblas_dtrsm_task, 4, 2); ocrEdtTemplateCreate(&templateUpdateNonDiag, cblas_dgemm_task, 5, 3); ocrEdtTemplateCreate(&templateUpdate, cblas_dsyrk_task, 5, 2); ocrEdtTemplateCreate(&templateWrap, wrap_up_task, 3, (numTiles+1)*numTiles/2); // PRINTF("Going to satisfy initial tiles\n"); #ifdef TG_ARCH satisfyInitialTiles(numTiles, tileSize, lkji_event_guids); #else satisfyInitialTiles(numTiles, tileSize, matrix, lkji_event_guids); #endif for ( k = 0; k < numTiles; ++k ) { // PRINTF("Prescribing sequential task %d\n", k); lapacke_dpotrf_task_prescriber(templateSeq, k, tileSize, lkji_event_guids); for( j = k + 1 ; j < numTiles ; ++j ) { cblas_dtrsm_task_prescriber (templateTrisolve, k, j, tileSize, lkji_event_guids); for( i = k + 1 ; i < j ; ++i ) { cblas_dgemm_task_prescriber (templateUpdateNonDiag, k, j, i, tileSize, lkji_event_guids); } cblas_dsyrk_task_prescriber (templateUpdate, k, j, i, tileSize, lkji_event_guids); } } wrap_up_task_prescriber (templateWrap, numTiles, tileSize, outSelLevel, lkji_event_guids); // PRINTF("Wrapping up mainEdt\n"); return NULL_GUID; }
ocrGuid_t wrapupEdt(){ ocrShutdown(); }