static void initialize_border_values( Tile_t** tile_matrix, int n_tiles_width, int n_tiles_height, int tile_width, int tile_height ) { int i, j; /* Create a datablock for the bottom right element for tile[0][0] */ ocrGuid_t db_guid_0_0_br; void* db_guid_0_0_br_data; ocrDbCreate( &db_guid_0_0_br, &db_guid_0_0_br_data, sizeof(int), FLAGS, NULL, NO_ALLOC ); /* Satisfy the bottom right event for tile[0][0] with the respective datablock */ int* allocated = (int*)db_guid_0_0_br_data; allocated[0] = 0; ocrEventSatisfy(tile_matrix[0][0].bottom_right_event_guid, db_guid_0_0_br); /* Create datablocks for the bottom right elements and bottom rows for tiles[0][j] * and Satisfy the bottom row event for tile[0][j] with the respective datablock */ for ( j = 1; j < n_tiles_width + 1; ++j ) { ocrGuid_t db_guid_0_j_brow; void* db_guid_0_j_brow_data; ocrDbCreate( &db_guid_0_j_brow, &db_guid_0_j_brow_data, sizeof(int)*tile_width, FLAGS, NULL, NO_ALLOC ); allocated = (int*)db_guid_0_j_brow_data; for( i = 0; i < tile_width ; ++i ) { allocated[i] = GAP_PENALTY*((j-1)*tile_width+i+1); } ocrEventSatisfy(tile_matrix[0][j].bottom_row_event_guid, db_guid_0_j_brow); ocrGuid_t db_guid_0_j_br; void* db_guid_0_j_br_data; ocrDbCreate( &db_guid_0_j_br, &db_guid_0_j_br_data, sizeof(int), FLAGS, NULL, NO_ALLOC ); allocated = (int*)db_guid_0_j_br_data; allocated[0] = GAP_PENALTY*(j*tile_width); //sagnak: needed to handle tilesize 2 ocrEventSatisfy(tile_matrix[0][j].bottom_right_event_guid, db_guid_0_j_br); } /* Create datablocks for the right columns for tiles[i][0] * and Satisfy the right column event for tile[i][0] with the respective datablock */ for ( i = 1; i < n_tiles_height + 1; ++i ) { ocrGuid_t db_guid_i_0_rc; void* db_guid_i_0_rc_data; ocrDbCreate( &db_guid_i_0_rc, &db_guid_i_0_rc_data, sizeof(int)*tile_height, FLAGS, NULL, NO_ALLOC ); allocated = (int*)db_guid_i_0_rc_data; for ( j = 0; j < tile_height ; ++j ) { allocated[j] = GAP_PENALTY*((i-1)*tile_height+j+1); } ocrEventSatisfy(tile_matrix[i][0].right_column_event_guid, db_guid_i_0_rc); ocrGuid_t db_guid_i_0_br; void* db_guid_i_0_br_data; ocrDbCreate( &db_guid_i_0_br, &db_guid_i_0_br_data, sizeof(int), FLAGS, NULL, NO_ALLOC ); allocated = (int*)db_guid_i_0_br_data; allocated[0] = GAP_PENALTY*(i*tile_height); //sagnak: needed to handle tilesize 2 ocrEventSatisfy(tile_matrix[i][0].bottom_right_event_guid, db_guid_i_0_br); } }
ocrGuid_t mainEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { // Current thread is '0' and goes on with user code. ocrGuid_t ndepv[1]; ocrGuid_t eventGuid; ocrEventCreate(&eventGuid, OCR_EVENT_STICKY_T, true); ndepv[0] = eventGuid; // Creates the EDT u32 nparamc = 1; u64 nparamv = 32; // 'ndepv' stores dependencies, so no need to call // ocrAddDependence later on to register events. ocrGuid_t edtGuid; ocrGuid_t taskForEdtTemplateGuid; ocrEdtTemplateCreate(&taskForEdtTemplateGuid, taskForEdt, nparamc, 1 /*depc*/); ocrEdtCreate(&edtGuid, taskForEdtTemplateGuid, EDT_PARAM_DEF, &nparamv, EDT_PARAM_DEF, /*depv=*/ndepv, /*properties=*/0, NULL_HINT, /*outEvent=*/NULL); int *k; ocrGuid_t dbGuid; ocrDbCreate(&dbGuid,(void **) &k, sizeof(int), /*flags=*/DB_PROP_NONE, /*location=*/NULL_HINT, NO_ALLOC); *k = 42; ocrEventSatisfy(eventGuid, dbGuid); return NULL_GUID; }
// This task solves a triangular matrix equation, which is the // second step in the tiled Cholesky factorization ocrGuid_t cblas_dtrsm_task ( u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { u64 *func_args = paramv; u32 k = (u32) func_args[0]; u32 j = (u32) func_args[1]; u32 tileSize = (u32) func_args[2]; ocrGuid_t out_lkji_jkkp1_event_guid = (ocrGuid_t) func_args[3]; // PRINTF("RUNNING trisolve (%d, %d)\n", k, j); double* aBlock = (double*) (depv[0].ptr); double* liBlock = (double*) (depv[1].ptr); ocrGuid_t out_lkji_jkkp1_db_guid; ocrGuid_t out_lkji_jkkp1_db_affinity = NULL_GUID; // Solve Ax = alpha B where alpha = 1 and A is liBlock triangular matrix output from // dpotrf and B is the next block to solve in factorization cblas_dtrsm(CblasColMajor, CblasLeft, CblasUpper, CblasTrans, CblasNonUnit, tileSize, tileSize, 1.0, liBlock, tileSize, // A matrix to solve for x in A x = B aBlock, tileSize); // B matrix, solution put here ocrEventSatisfy(out_lkji_jkkp1_event_guid, depv[0].guid); return NULL_GUID; }
inline static void satisfyInitialTiles(u32 numTiles, u32 tileSize, double** matrix, ocrGuid_t*** lkji_event_guids) { u32 i,j,index; u32 A_i, A_j, T_i, T_j; for( i = 0 ; i < numTiles ; ++i ) { for( j = 0 ; j <= i ; ++j ) { ocrGuid_t db_guid; ocrGuid_t db_affinity = NULL_GUID; void* temp_db; ocrGuid_t tmpdb_guid; ocrDbCreate(&db_guid, &temp_db, sizeof(double)*tileSize*tileSize, FLAGS, db_affinity, NO_ALLOC); double* temp = (double*) temp_db; double** temp2D; ocrDbCreate(&tmpdb_guid, (void *)&temp2D, sizeof(double*)*tileSize, FLAGS, NULL_GUID, NO_ALLOC); for( index = 0; index < tileSize; ++index ) temp2D [index] = &(temp[index*tileSize]); // Split the matrix u32o tiles and write it u32o the item space at time 0. // The tiles are indexed by tile indices (which are tag values). for( A_i = i*tileSize, T_i = 0 ; T_i < tileSize; ++A_i, ++T_i ) { for( A_j = j*tileSize, T_j = 0 ; T_j < tileSize; ++A_j, ++T_j ) { temp2D[ T_i ][ T_j ] = matrix[ A_i ][ A_j ]; } } ocrEventSatisfy(lkji_event_guids[i][j][0], db_guid); ocrDbDestroy(tmpdb_guid); } } }
inline static void satisfyInitialTiles(u32 numTiles, u32 tileSize, ocrGuid_t*** lkji_event_guids) { u32 i,j; u32 T_i, T_j; ocrGuid_t db_guid; ocrGuid_t db_affinity; void* temp_db; FILE *fin; fin = fopen("inputfile", "r"); if(fin == NULL) PRINTF("Error opening input file\n"); for( i = 0 ; i < numTiles ; ++i ) { for( j = 0 ; j <= i ; ++j ) { ocrDbCreate(&db_guid, &temp_db, sizeof(double)*tileSize*tileSize, FLAGS, db_affinity, NO_ALLOC); fread(temp_db, sizeof(double)*tileSize*tileSize, 1, fin); ocrEventSatisfy(lkji_event_guids[i][j][0], db_guid); ocrDbRelease(db_guid); } } hal_fence(); fclose(fin); }
// This task computes the Cholesky factorization of a symmetric positive definite matrix... // This is the first step in the tile Cholesky factorization. ocrGuid_t lapacke_dpotrf_task ( u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { u32 info; u64 *func_args = paramv; u32 k = (u32) func_args[0]; u32 tileSize = (u32) func_args[1]; ocrGuid_t out_lkji_kkkp1_event_guid = (ocrGuid_t) func_args[2]; double* aBlock = (double*) (depv[0].ptr); // PRINTF("RUNNING sequential_cholesky %d with 0x%llx to satisfy\n", k, (u64)(out_lkji_kkkp1_event_guid)); ocrGuid_t out_lkji_kkkp1_db_guid; ocrGuid_t out_lkji_kkkp1_db_affinity = NULL_GUID; info = LAPACKE_dpotrf(LAPACK_ROW_MAJOR, 'L', tileSize, aBlock, tileSize ); if (info != 0) { if (info > 0) PRINTF("Matrix A is not Symmetric Positive Definite (SPD)"); else PRINTF("i-th parameter had an illegal value."); ASSERT(0); ocrShutdown(); return NULL_GUID; } ocrEventSatisfy(out_lkji_kkkp1_event_guid, depv[0].guid); return NULL_GUID; }
ocrGuid_t mainEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { // Current thread is '0' and goes on with user code. ocrGuid_t e0; ocrEventCreate(&e0, OCR_EVENT_ONCE_T, EVT_PROP_TAKES_ARG); // Creates the EDTa ocrGuid_t edtGuid; ocrGuid_t taskForEdtTemplateGuid; ocrEdtTemplateCreate(&taskForEdtTemplateGuid, taskForEdt, 0 /*paramc*/, 2/*depc*/); ocrEdtCreate(&edtGuid, taskForEdtTemplateGuid, EDT_PARAM_DEF, /*paramv=*/NULL, EDT_PARAM_DEF, /*depv=*/NULL, /*properties=*/0, NULL_HINT, /*outEvent=*/NULL); // Register a dependence between an event and an edt ocrAddDependence(e0, edtGuid, 0, DB_MODE_CONST); ocrAddDependence(e0, edtGuid, 1, DB_MODE_CONST); int *k; ocrGuid_t dbGuid; ocrDbCreate(&dbGuid,(void **) &k, sizeof(int), /*flags=*/DB_PROP_NONE, /*location=*/NULL_HINT, NO_ALLOC); *k = 42; // Satisfy event's chain head ocrEventSatisfy(e0, dbGuid); return NULL_GUID; }
// Takes two events and satisfy a result event //depv[0] db for domainSetup_t //depv[1] db for domainKernel_t ocrGuid_t combineKernelEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { // timestamp_t stopTimer; // get_time(&stopTimer); ocrGuid_t kernelEdtDoneEvt; // Result event kernelEdtDoneEvt.guid = paramv[0]; // Impl-specific here, we know to expect two DBs ocrGuid_t setupDb = depv[0].guid; domainSetup_t * setupDbPtr = depv[0].ptr; ocrGuid_t kernelDb = depv[1].guid; domainKernel_t * kernelDbPtr = depv[1].ptr; // kernelDbPtr->stopTimer=stopTimer; ocrGuid_t timeDbGuid; long * timeDbPtr; ocrDbCreate(&timeDbGuid, (void**) &timeDbPtr, sizeof(long), 0, NULL_HINT, NO_ALLOC); domainKernelCombine(setupDbPtr, kernelDbPtr, timeDbPtr); // PRINTF("combineEdt timeDbGuid=0x%lx\n", timeDbGuid); ocrDbRelease(timeDbGuid); ocrDbRelease(kernelDb); ocrDbRelease(setupDb); ocrDbDestroy(kernelDb); ocrDbDestroy(setupDb); ocrEventSatisfy(kernelEdtDoneEvt, timeDbGuid); return NULL_GUID; }
void test () { // Current thread is '0' and goes on with user code. ocrGuid_t eventGuid; ocrEventCreate(&eventGuid, OCR_EVENT_STICKY_T, true); // Creates the EDT ocrGuid_t edtGuid; ocrGuid_t taskForEdtTemplateGuid; ocrEdtTemplateCreate(&taskForEdtTemplateGuid, taskForEdt, 0 /*paramc*/, 1 /*depc*/); ocrEdtCreate(&edtGuid, taskForEdtTemplateGuid, EDT_PARAM_DEF, /*paramv=*/NULL, EDT_PARAM_DEF, /*depv=*/NULL, 0, NULL_GUID, NULL); // Register a dependence between an event and an edt ocrAddDependence(eventGuid, edtGuid, 0, DB_MODE_RO); int *k; ocrGuid_t db_guid; ocrDbCreate(&db_guid,(void **) &k, sizeof(int), /*flags=*/FLAGS, /*location=*/NULL_GUID, NO_ALLOC); *k = 42; ocrEventSatisfy(eventGuid, db_guid); }
// paramv[0]: event to satisfy when kernel is done // depv[0]: setupEdt completed (may carry a DB) ocrGuid_t kernelEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { // PRINTF("kernelEdt\n"); ocrGuid_t kernelEdtDoneEvt; kernelEdtDoneEvt.guid = paramv[0]; ocrGuid_t setupDb = depv[0].guid; domainSetup_t * setupDbPtr = depv[0].ptr; // The sub kernel done event ocrGuid_t subKernelDoneEvt; ocrEventCreate(&subKernelDoneEvt, OCR_EVENT_ONCE_T, true); // This EDT done event ocrGuid_t selfDoneEvt; ocrEventCreate(&selfDoneEvt, OCR_EVENT_ONCE_T, true); // Combine those in a combine EDT that satisfies kernelEdtDoneEvt //TODO same issue of allocating tpl every iteration ocrGuid_t curAffGuid; ocrAffinityGetCurrent(&curAffGuid); ocrGuid_t combEdtTplGuid; ocrEdtTemplateCreate(&combEdtTplGuid, combineKernelEdt, 1, 2); ocrGuid_t combineEdtGuid; combine(&combineEdtGuid, combEdtTplGuid, curAffGuid, selfDoneEvt, subKernelDoneEvt, kernelEdtDoneEvt); ocrEdtTemplateDestroy(combEdtTplGuid); timestamp_t timer; domainKernel(subKernelDoneEvt, setupDbPtr, &timer); // Satisfy self event with the timer information ocrEventSatisfy(selfDoneEvt, setupDb); return NULL_GUID; }
/* Do the addition */ u8 summer(u32 paramc, u64 * params, void* paramv[], u32 depc, ocrEdtDep_t depv[]) { int *result; ocrGuid_t resultGuid; /* Get both numbers */ int *n1 = (int*)depv[0].ptr, *n2 = (int*)depv[1].ptr; /* Get event to satisfy */ ocrGuid_t *evt = (ocrGuid_t*)depv[2].ptr; /* Create data-block to put result */ ocrDbCreate(&resultGuid, (void**)&result, sizeof(int), /*flags=*/0, /*location=*/NULL, NO_ALLOC); *result = *n1 + *n2; /* Say hello */ printf("I am summing %d (GUID: 0x%lx) and %d (GUID: 0x%lx) and passing along %d (GUID: 0x%lx)\n", *n1, (u64)depv[0].guid, *n2, (u64)depv[1].guid, *result, (u64)resultGuid); /* Satisfy whomever is waiting on me */ ocrEventSatisfy(*evt, resultGuid); /* Free inputs */ ocrDbDestroy(depv[0].guid); ocrDbDestroy(depv[1].guid); ocrDbDestroy(depv[2].guid); return 0; }
ocrGuid_t complete(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { u64 arg = (u64)paramv[0]; int my_ID = paramv[1]; ocrGuid_t inDep; u32 in1, in2; u32 out; inDep = (ocrGuid_t)arg; /* When we run, we got our inputs from fib(n-1) and fib(n-2) */ in1 = *(u32*)depv[0].ptr; in2 = *(u32*)depv[1].ptr; out = *(u32*)depv[2].ptr; PRINTF("r%d Done with %d (%d + %d)\n", my_ID, out, in1, in2); /* we return our answer in the 3rd db passed in as an argument */ *((u32*)(depv[2].ptr)) = in1 + in2; /* The app is done with the answers from fib(n-1) and fib(n-2) */ ocrDbDestroy(depv[0].guid); ocrDbDestroy(depv[1].guid); /* and let our parent's completion know we're done with fib(n) */ ocrEventSatisfy(inDep, depv[2].guid); return NULL_GUID; }
int main (int argc, char ** argv) { ocrEdt_t fctPtrArray [1]; fctPtrArray[0] = &task_for_edt; ocrInit(&argc, argv, 1, fctPtrArray); // Current thread is '0' and goes on with user code. ocrGuid_t event_guid; ocrEventCreate(&event_guid, OCR_EVENT_STICKY_T, true); // Creates the EDT ocrGuid_t edt_guid; ocrEdtCreate(&edt_guid, task_for_edt, /*paramc=*/0, /*params=*/ NULL, /*paramv=*/NULL, /*properties=*/0, /*depc=*/1, /*depv=*/NULL); // Register a dependence between an event and an edt ocrAddDependence(event_guid, edt_guid, 0); // Schedule the EDT (will run when dependences satisfied) ocrEdtSchedule(edt_guid); int *k; ocrGuid_t db_guid; ocrDbCreate(&db_guid, (void **) &k, sizeof(int), /*flags=*/FLAGS, /*location=*/NULL, NO_ALLOC); *k = 42; ocrEventSatisfy(event_guid, db_guid); ocrCleanup(); return 0; }
// paramv[0]: continuation after iterations // depv[0]: info // depv[1]: done event for the work spawned by the iteration, carries the time DB ocrGuid_t iterationEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { ocrGuid_t iterationsDoneEvt; iterationsDoneEvt.guid = paramv[0]; ocrGuid_t infoGuid = depv[0].guid; info_t * info = (info_t *) depv[0].ptr; ocrGuid_t timeDbGuid = depv[1].guid; long * timePrevIt = (long *) depv[1].ptr; // PRINTF("iteration %d\n", info->i); if (timePrevIt != NULL) { info->timer += (*timePrevIt); ocrDbRelease(timeDbGuid); ocrDbDestroy(timeDbGuid); } if (info->i < info->max) { //TODO do a pipeline EDT u64 affCount = 0; ocrAffinityCount(AFFINITY_PD, &affCount); ocrGuid_t affinities[affCount]; ocrAffinityGet(AFFINITY_PD, &affCount, affinities); ocrGuid_t stageInit; ocrEventCreate(&stageInit, OCR_EVENT_ONCE_T, false); ocrGuid_t stagePrev = stageInit; u32 i = PIPE_START; while(i < (PIPE_START+PIPE_SZ)) { ocrGuid_t stageEdtDoneEvt; ocrEventCreate(&stageEdtDoneEvt, OCR_EVENT_ONCE_T, false); ocrGuid_t stageEdtGuid; //TODO I wonder if we shouldn't give the whole info to a functor and invoke that chain(&stageEdtGuid, info->edtTemplGuids[i], affinities[info->edtAffinities[i]], stagePrev, stageEdtDoneEvt); stagePrev = stageEdtDoneEvt; i++; } info->i+=1; ocrDbRelease(infoGuid); ocrGuid_t nextItEdtGuid; // 'iterationsDoneEvt' is passed on and on til the last iteration iterate(&nextItEdtGuid, info->edtTemplGuids[ITER_IDX], /*prev*/stagePrev, /*data*/infoGuid, /*next*/iterationsDoneEvt); // Start the pipeline ocrEventSatisfy(stageInit, NULL_GUID); } else { ocrEventSatisfy(iterationsDoneEvt, infoGuid); } return NULL_GUID; }
// For control dependence to sync up framework setup and user setup ocrGuid_t combineSetupEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { ocrGuid_t doneEvt; doneEvt.guid = paramv[0]; ocrGuid_t setupDbGuid = depv[0].guid; ocrDbRelease(setupDbGuid); ocrEventSatisfy(doneEvt, setupDbGuid); return NULL_GUID; }
ocrGuid_t remoteEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { PRINTF("[remote] RemoteEdt: executing\n"); ocrGuid_t eventGuid = ((ocrGuid_t *) depv[0].ptr)[0]; // see comment at top of file // ocrGuid_t expectedAffinityGuid = ((ocrGuid_t *) depv[0].ptr)[1]; // ocrGuid_t currentAffinity; // ocrAffinityGetCurrent(¤tAffinity); // ASSERT(expectedAffinityGuid == currentAffinity); ocrEventSatisfy(eventGuid, NULL_GUID); return NULL_GUID; }
ocrGuid_t mainEdt ( u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { u64 size = -1; u64 offset = -1; u32 i = 0; u64 argc; void *programArg = depv[0].ptr; argc = getArgc(programArg); if ( argc != 4 ) { PRINTF("Usage: ./basicIO offset size fileName \n"); ocrShutdown(); return 1; } offset = atoi(getArgv(programArg, 1)); size = atoi(getArgv(programArg, 2)); u64 nparamv[2]; nparamv[0] = offset; nparamv[1] = size; FILE *in; in = fopen(getArgv(programArg, 3), "r"); if( !in ) { PRINTF("Cannot find file: %s\n", getArgv(programArg, 3)); ocrShutdown(); return NULL_GUID; } ocrGuid_t dataGuid; // Data can be passed as parameter also , there was no // necessary need of creating data block in this example. // Its has been created for demo purpose //Create datablock to hold a block of 'size' elements u64 *inputarr; ocrDbCreate(&dataGuid, (void**)&inputarr, sizeof(u64)*size,0,NULL_GUID, NO_ALLOC); #ifndef TG_ARCH while(fscanf(in,"%llu\n",&inputarr[i++])!=EOF); #else fread(inputarr, sizeof(u64),size , in); #endif fclose(in); ocrGuid_t addEdtTemplateGuid; ocrEdtTemplateCreate(&addEdtTemplateGuid, add_edt, 2 /*paramc*/, 1 /*depc*/); ocrGuid_t add_edt_guid; // Create the EDT not specifying the dependence vector at creation ocrEdtCreate(&add_edt_guid, addEdtTemplateGuid, EDT_PARAM_DEF, nparamv,1,NULL ,EDT_PROP_FINISH , NULL_GUID, NULL); ocrGuid_t triggerEventGuid; ocrEventCreate(&triggerEventGuid, OCR_EVENT_STICKY_T, true); ocrAddDependence(triggerEventGuid, add_edt_guid, 0, DB_MODE_EW); ocrEventSatisfy(triggerEventGuid, dataGuid); return NULL_GUID; }
ocrGuid_t mainEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { ocrGuid_t e0; ocrEventCreate(&e0, OCR_EVENT_STICKY_T, false); ocrGuid_t terminateEdtGuid; ocrGuid_t terminateEdtTemplateGuid; ocrEdtTemplateCreate(&terminateEdtTemplateGuid, terminateEdt, 0 /*paramc*/, EDT_PARAM_UNK /*depc*/); ocrEdtCreate(&terminateEdtGuid, terminateEdtTemplateGuid, EDT_PARAM_DEF, NULL, 1, NULL_GUID, /*properties=*/EDT_PROP_FINISH, NULL_GUID, /*outEvent=*/ NULL); ocrAddDependence(e0, terminateEdtGuid, 0, DB_MODE_RO); ocrEventSatisfy(e0, NULL_GUID); return NULL_GUID; }
ocrGuid_t postEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { // PRINTF("postEdt timeDbGuid=0x%lx\n", depv[0].guid); ocrGuid_t postEdtDoneEvt; postEdtDoneEvt.guid = paramv[0]; ocrGuid_t timeDbGuid = depv[0].guid; long * timeDbPtr = depv[0].ptr; //TODO I think there's an issue here with this //edt doing an implicit release at the end //concurrently with the iterationEdt destroying it. ocrDbRelease(depv[0].guid); // domainPost(kernelDbPtr); ocrEventSatisfy(postEdtDoneEvt, timeDbGuid); return NULL_GUID; }
ocrGuid_t remoteSetupUserEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { ocrGuid_t dsetupGuid = depv[0].guid; domainSetup_t * dsetup = (domainSetup_t *) depv[0].ptr; // Create 'remote' (local to here) latch event ocrGuid_t evtGuid; ocrEventParams_t params; params.EVENT_LATCH.counter = NB_SATISFY; ocrEventCreateParams(&evtGuid, OCR_EVENT_LATCH_T, false, ¶ms); dsetup->remoteLatchEvent = evtGuid; // Setup callback for when the latch event fires ocrAddDependence(evtGuid, dsetup->stopTimerEvt, 0, DB_MODE_NULL); ocrGuid_t userSetupDoneEvt = dsetup->userSetupDoneEvt; ocrDbRelease(dsetupGuid); // Global setup is done ocrEventSatisfy(userSetupDoneEvt, NULL_GUID); return NULL_GUID; }
// This task performs a symmetric rank-k update, which is the // third step in the tiled Cholesky factorization ocrGuid_t cblas_dsyrk_task ( u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { u64 *func_args = paramv; u32 k = (u32) func_args[0]; u32 j = (u32) func_args[1]; u32 i = (u32) func_args[2]; u32 tileSize = (u32) func_args[3]; ocrGuid_t out_lkji_jjkp1_event_guid = (ocrGuid_t) func_args[4]; // PRINTF("RUNNING update_diagonal (%d, %d, %d)\n", k, j, i); double* aBlock = (double*) (depv[0].ptr); double* l2Block = (double*) (depv[1].ptr); // Symmetric Rank-k Update C = alpha AA' + beta C, where alpha = -1, beta = 1 cblas_dsyrk(CblasRowMajor, CblasLower, CblasNoTrans, tileSize, tileSize, -1.0, l2Block, tileSize, 1.0, // A matrix aBlock, tileSize); // C matrix, solution put here in lower triangle ocrEventSatisfy(out_lkji_jjkp1_event_guid, depv[0].guid); return NULL_GUID; }
int main (int argc, char ** argv) { ocrEdt_t fctPtrArray [1]; fctPtrArray[0] = &task_for_edt; ocrInit(&argc, argv, 1, fctPtrArray); // Current thread is '0' and goes on with user code. ocrGuid_t event_guid; ocrEventCreate(&event_guid, OCR_EVENT_STICKY_T, true); // Creates the EDT u32 paramc = 1; u64 params[1]; params[0] = sizeof(int); int * paramv = (int *) malloc(sizeof(int)); paramv[0] = 32; ocrGuid_t edt_guid; ocrEdtCreate(&edt_guid, task_for_edt, paramc, params, (void**) ¶mv, 0, 1, NULL); // Register a dependence between an event and an edt ocrAddDependence(event_guid, edt_guid, 0); int *k; ocrGuid_t db_guid; ocrDbCreate(&db_guid,(void **) &k, sizeof(int), /*flags=*/FLAGS, /*location=*/NULL, NO_ALLOC); *k = 42; ocrEventSatisfy(event_guid, db_guid); ocrEdtSchedule(edt_guid); ocrCleanup(); return 0; }
// This computes a matrix-matrix product to complete the last step in // the Cholesky factorization prior to the next iteration. ocrGuid_t cblas_dgemm_task ( u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { u64 *func_args = paramv; u32 k = (u32) func_args[0]; u32 j = (u32) func_args[1]; u32 i = (u32) func_args[2]; u32 tileSize = (u32) func_args[3]; ocrGuid_t out_lkji_jikp1_event_guid = (ocrGuid_t) func_args[4]; // PRINTF("RUNNING update_nondiagonal (%d, %d, %d)\n", k, j, i); double* aBlock = (double*) (depv[0].ptr); double* l1Block = (double*) (depv[1].ptr); double* l2Block = (double*) (depv[2].ptr); cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, tileSize, tileSize, tileSize, -1.0, l1Block, tileSize, // A matrix l2Block, tileSize, 1.0, // B matrix aBlock, tileSize); // C matrix, solution put here ocrEventSatisfy(out_lkji_jikp1_event_guid, depv[0].guid); return NULL_GUID; }
ocrGuid_t mainEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { u64 arraySize = 100; //Create argument vector to hold array size u64 nparamv[1]; nparamv[0] = arraySize; u64 * dataArray; ocrGuid_t dataGuid; //Create datablock to hold a block of 'array size' u64 elements ocrDbCreate(&dataGuid, (void **) &dataArray, sizeof(u64)*arraySize, /*flags=*/0, /*loc=*/NULL_GUID, NO_ALLOC); ocrGuid_t stepAEdtTemplateGuid; ocrEdtTemplateCreate(&stepAEdtTemplateGuid, stepA_edt, 1 /*paramc*/, 1 /*depc*/); // Create the EDT not specifying the dependence vector at creation ocrGuid_t stepAEdtGuid; ocrEdtCreate(&stepAEdtGuid, stepAEdtTemplateGuid, EDT_PARAM_DEF, nparamv, 1, NULL, /*prop=*/EDT_PROP_NONE, NULL_GUID, NULL); ocrGuid_t triggerEventGuid; //TODO Setup event used to trigger stepA ocrEventCreate(&triggerEventGuid, OCR_EVENT_STICKY_T, EVT_PROP_TAKES_ARG); //END-TODO //TODO Setup dependence between event and stepA's EDTs slot 0 ocrAddDependence(triggerEventGuid, stepAEdtGuid, 0, DB_MODE_EW); //END-TODO //TODO Satisfy the event with the datablock ocrEventSatisfy(triggerEventGuid, dataGuid); //END-TODO return NULL_GUID; }
// Input // - Completion event to be satisfied when setup is done (paramv[0]) ocrGuid_t setupEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { // Setup all done event ocrGuid_t setupEdtDoneEvt; setupEdtDoneEvt.guid = paramv[0]; ocrGuid_t curAffGuid; ocrAffinityGetCurrent(&curAffGuid); ocrGuid_t setupDbGuid; domainSetup_t * setupDbPtr; ocrHint_t dbHint; ocrHintInit(&dbHint, OCR_HINT_DB_T); ocrSetHintValue(&dbHint, OCR_HINT_DB_AFFINITY, ocrAffinityToHintValue(curAffGuid)); ocrDbCreate(&setupDbGuid, (void**) &setupDbPtr, sizeof(domainSetup_t), 0, &dbHint, NO_ALLOC); setupDbPtr->self = setupDbGuid; // This EDT done event ocrGuid_t selfDoneEvt; ocrEventCreate(&selfDoneEvt, OCR_EVENT_ONCE_T, true); // Create a done event for the user code ocrGuid_t subSetupDoneEvt; ocrEventCreate(&subSetupDoneEvt, OCR_EVENT_ONCE_T, true); ocrGuid_t combEdtTplGuid; ocrEdtTemplateCreate(&combEdtTplGuid, combineSetupEdt, 1, 2); ocrGuid_t combineEdtGuid; combine(&combineEdtGuid, combEdtTplGuid, curAffGuid, selfDoneEvt, subSetupDoneEvt, setupEdtDoneEvt); ocrEdtTemplateDestroy(combEdtTplGuid); domainSetup(subSetupDoneEvt, setupDbPtr); ocrDbRelease(setupDbGuid); ocrEventSatisfy(selfDoneEvt, setupDbGuid); return NULL_GUID; }
u8 smith_waterman_task ( u32 paramc, u64 * params, void* paramv[], u32 depc, ocrEdtDep_t depv[]) { int index, ii, jj; /* Unbox parameters */ intptr_t* typed_paramv = *paramv; int i = (int) typed_paramv[0]; int j = (int) typed_paramv[1]; int tile_width = (int) typed_paramv[2]; int tile_height = (int) typed_paramv[3]; Tile_t** tile_matrix = (Tile_t**) typed_paramv[4]; signed char* string_1 = (signed char* ) typed_paramv[5]; signed char* string_2 = (signed char* ) typed_paramv[6]; int n_tiles_height = (int) typed_paramv[7]; int n_tiles_width = (int)typed_paramv[8]; /* Get the input datablock data pointers acquired from dependences */ int* left_tile_right_column = (int *) depv[0].ptr; int* above_tile_bottom_row = (int *) depv[1].ptr; int* diagonal_tile_bottom_right = (int *) depv[2].ptr; /* Allocate a haloed local matrix for calculating 'this' tile*/ int * curr_tile_tmp = (int*)malloc(sizeof(int)*(1+tile_width)*(1+tile_height)); /* 2D-ify it for readability */ int ** curr_tile = (int**)malloc(sizeof(int*)*(1+tile_height)); for (index = 0; index < tile_height+1; ++index) { curr_tile[index] = &curr_tile_tmp[index*(1+tile_width)]; } /* Initialize halo from neighbouring tiles */ /* Set local_tile[0][0] (top left) from the bottom right of the northwest tile */ curr_tile[0][0] = diagonal_tile_bottom_right[0]; /* Set local_tile[i+1][0] (left column) from the right column of the left tile */ for ( index = 1; index < tile_height+1; ++index ) { curr_tile[index][0] = left_tile_right_column[index-1]; } /* Set local_tile[0][j+1] (top row) from the bottom row of the above tile */ for ( index = 1; index < tile_width+1; ++index ) { curr_tile[0][index] = above_tile_bottom_row[index-1]; } /* Run a smith-waterman on the local tile */ for ( ii = 1; ii < tile_height+1; ++ii ) { for ( jj = 1; jj < tile_width+1; ++jj ) { signed char char_from_1 = string_1[(j-1)*tile_width+(jj-1)]; signed char char_from_2 = string_2[(i-1)*tile_height+(ii-1)]; /* Get score from northwest, north and west */ int diag_score = curr_tile[ii-1][jj-1] + alignment_score_matrix[char_from_2][char_from_1]; int left_score = curr_tile[ii ][jj-1] + alignment_score_matrix[char_from_1][GAP]; int top_score = curr_tile[ii-1][jj ] + alignment_score_matrix[GAP][char_from_2]; int bigger_of_left_top = (left_score > top_score) ? left_score : top_score; /* Set the local tile[i][j] to the maximum value of northwest, north and west */ curr_tile[ii][jj] = (bigger_of_left_top > diag_score) ? bigger_of_left_top : diag_score; } } /* Allocate datablock for bottom right of the local tile */ ocrGuid_t db_guid_i_j_br; void* db_guid_i_j_br_data; ocrDbCreate( &db_guid_i_j_br, &db_guid_i_j_br_data, sizeof(int), FLAGS, NULL, NO_ALLOC ); /* Satisfy the bottom right event of local tile with the data block allocated above */ int* curr_bottom_right = (int*)db_guid_i_j_br_data; curr_bottom_right[0] = curr_tile[tile_height][tile_width]; ocrEventSatisfy(tile_matrix[i][j].bottom_right_event_guid, db_guid_i_j_br); /* Allocate datablock for right column of the local tile */ ocrGuid_t db_guid_i_j_rc; void* db_guid_i_j_rc_data; ocrDbCreate( &db_guid_i_j_rc, &db_guid_i_j_rc_data, sizeof(int)*tile_height, FLAGS, NULL, NO_ALLOC ); /* Satisfy the right column event of local tile with the data block allocated above */ int* curr_right_column = (int*)db_guid_i_j_rc_data; for ( index = 0; index < tile_height; ++index ) { curr_right_column[index] = curr_tile[index+1][tile_width]; } ocrEventSatisfy(tile_matrix[i][j].right_column_event_guid, db_guid_i_j_rc); /* Allocate datablock for bottom row of the local tile */ ocrGuid_t db_guid_i_j_brow; void* db_guid_i_j_brow_data; ocrDbCreate( &db_guid_i_j_brow, &db_guid_i_j_brow_data, sizeof(int)*tile_width, FLAGS, NULL, NO_ALLOC ); /* Satisfy the bottom row event of local tile with the data block allocated above */ int* curr_bottom_row = (int*)db_guid_i_j_brow_data; for ( index = 0; index < tile_width; ++index ) { curr_bottom_row[index] = curr_tile[tile_height][index+1]; } ocrEventSatisfy(tile_matrix[i][j].bottom_row_event_guid, db_guid_i_j_brow); free(curr_tile); free(curr_tile_tmp); /* If this is the last tile (bottom right most tile), finish */ if ( i == n_tiles_height && j == n_tiles_width ) { fprintf(stdout, "score: %d\n", curr_bottom_row[tile_width-1]); ocrFinish(); } }
ocrGuid_t fibEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { void* ptr; ocrGuid_t inDep; ocrGuid_t fib0, fib1, comp; ocrGuid_t fibDone[2]; ocrGuid_t fibArg[2]; inDep = (ocrGuid_t)paramv[0]; int my_ID = paramv[1]; u32 n = *(u32*)(depv[0].ptr); PRINTF("r%d Starting fibEdt(%u)\n", my_ID, n); if (n < 2) { PRINTF("r%d In fibEdt(%d) -- done (sat %lx)\n", my_ID, n, inDep); ocrEventSatisfy(inDep, depv[0].guid); return NULL_GUID; } PRINTF("r%d In fibEdt(%d) -- spawning children\n", my_ID, n); /* create the completion EDT and pass it the in/out argument as a dependency */ /* create the EDT with the done_event as the argument */ { u64 paramv[] = {(u64)inDep, my_ID}; ocrGuid_t templateGuid; ocrEdtTemplateCreate(&templateGuid, complete, 2, 3); ocrEdtCreate(&comp, templateGuid, 2, paramv, 3, NULL, EDT_PROP_NONE, NULL_GUID, NULL); ocrEdtTemplateDestroy(templateGuid); } PRINTF("r%d In fibEdt(%u) -- spawned complete EDT GUID 0x%llx\n", my_ID, n, (u64)comp); ocrAddDependence(depv[0].guid, comp, 2, DB_DEFAULT_MODE); /* create the events that the completion EDT will "wait" on */ ocrEventCreate(&fibDone[0], OCR_EVENT_ONCE_T, EVT_PROP_TAKES_ARG); ocrEventCreate(&fibDone[1], OCR_EVENT_ONCE_T, EVT_PROP_TAKES_ARG); ocrAddDependence(fibDone[0], comp, 0, DB_DEFAULT_MODE); ocrAddDependence(fibDone[1], comp, 1, DB_DEFAULT_MODE); /* allocate the argument to pass to fib(n-1) */ ocrDbCreate(&fibArg[0], (void**)&ptr, sizeof(u32), DB_PROP_NONE, NULL_GUID, NO_ALLOC); PRINTF("r%d In fibEdt(%u) -- created arg DB GUID 0x%llx\n", my_ID, n, fibArg[0]); *((u32*)ptr) = n-1; /* sched the EDT, passing the fibDone event as it's argument */ { u64 paramv[] = {(u64)fibDone[0], my_ID}; ocrGuid_t depv = fibArg[0]; ocrGuid_t templateGuid; ocrEdtTemplateCreate(&templateGuid, fibEdt, 2, 1); ocrEdtCreate(&fib0, templateGuid, 2, paramv, 1, &depv, EDT_PROP_NONE, NULL_GUID, NULL); ocrEdtTemplateDestroy(templateGuid); } PRINTF("r%d In fibEdt(%u) -- spawned first sub-part EDT GUID 0x%llx\n", my_ID, n, fib0); /* then do the exact same thing for n-2 */ ocrDbCreate(&fibArg[1], (void**)&ptr, sizeof(u32), DB_PROP_NONE, NULL_GUID, NO_ALLOC); PRINTF("r%d In fibEdt(%u) -- created arg DB GUID 0x%llx\n", my_ID, n, fibArg[1]); *((u32*)ptr) = n-2; { u64 paramv[] = {(u64)fibDone[1], my_ID}; ocrGuid_t depv = fibArg[1]; ocrGuid_t templateGuid; ocrEdtTemplateCreate(&templateGuid, fibEdt, 2, 1); ocrEdtCreate(&fib1, templateGuid, 2, paramv, 1, &depv, EDT_PROP_NONE, NULL_GUID, NULL); ocrEdtTemplateDestroy(templateGuid); } PRINTF("r%d In fibEdt(%u) -- spawned first sub-part EDT GUID 0x%llx\n", my_ID, n, fib1); PRINTF("r%d Returning from fibEdt(%u)\n", my_ID, n); return NULL_GUID; }
ocrGuid_t remoteEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { PRINTF("RemoteEdt: executing\n"); ocrGuid_t eventGuid = ((ocrGuid_t *) depv[0].ptr)[0]; ocrEventSatisfy(eventGuid, NULL_GUID); return NULL_GUID; }
int main (int argc, char ** argv) { ocrEdt_t fctPtrArray[2] = {summer, autumn}; ocrInit(&argc, argv, 2, fctPtrArray); /* No machine description */ if(argc != 3) { printf("Usage %s <num1> <num2>\n", argv[0]); return -1; } /* Create 2 data-blocks */ ocrGuid_t dbs[2]; int *data[2]; int i; for(i = 0; i < 2; ++i) { ocrDbCreate(&dbs[i], (void**)&data[i], sizeof(int), /*flags=*/0, /*location=*/NULL, NO_ALLOC); *(data[i]) = atoi(argv[i+1]); printf("Created a data-block with value %d (GUID: 0x%lx)\n", i, (u64)dbs[i]); } ocrGuid_t summerEdt, autumnEdt; ocrGuid_t autumnEvt, summerEvt[3]; ocrGuid_t summerEvtDbGuid; ocrGuid_t *summerEvtDb; /* Create final EDT (autumn) */ ocrEdtCreate(&autumnEdt, autumn, /*paramc=*/0, /*params=*/NULL, /*paramv=*/NULL, /*properties=*/0, /*depc=*/1, /*depv=*/NULL); /* Create event */ ocrEventCreate(&autumnEvt, OCR_EVENT_STICKY_T, true); /* Create summer */ ocrEdtCreate(&summerEdt, summer, /*paramc=*/0, /*params=*/NULL, /*paramv=*/NULL, /*properties=*/0, /*depc=*/3, /*depv=*/NULL); /* Create events for summer */ for(i = 0; i < 3; ++i) { ocrEventCreate(&summerEvt[i], OCR_EVENT_STICKY_T, true); } /* Create data-block containing event */ ocrDbCreate(&summerEvtDbGuid, (void**)&summerEvtDb, sizeof(ocrGuid_t), /*flags=*/0, /*location=*/NULL, NO_ALLOC); *summerEvtDb = autumnEvt; /* Link up dependencees */ for(i = 0; i < 3; ++i) { ocrAddDependence(summerEvt[i], summerEdt, i); } ocrAddDependence(autumnEvt, autumnEdt, 0); /* "Schedule" EDTs (order does not matter) */ ocrEdtSchedule(autumnEdt); ocrEdtSchedule(summerEdt); printf("Done all scheduling, now going to satisfy\n"); /* Satisfy dependences passing data */ ocrEventSatisfy(summerEvt[0], dbs[0]); ocrEventSatisfy(summerEvt[1], dbs[1]); ocrEventSatisfy(summerEvt[2], summerEvtDbGuid); /* Finalize */ ocrCleanup(); return 0; }
void SPMD_Finalize(info_t info) { ocrEventSatisfy(info.evt_finalize, NULL_GUID); }