// Create an event at the current affinity and writes // the GUID into the domainSetup data-structure. void domainSetup(ocrGuid_t userSetupDoneEvt, domainSetup_t * dsetup) { // This is for the domain kernel to callback and stop the timer ocrGuid_t stopTimerEvt; ocrEventCreate(&stopTimerEvt, OCR_EVENT_ONCE_T, true); // Create an EDT at a remote affinity to: // - Create a remote latch event and initialize it // - Hook that event to the local event declared above // - write back the guid of the remote latch event into the setup DB // - userSetupDoneEvt: to be satisfied when setup is done // - stopTimerEvt: to be satisfied when domain kernel is done dsetup->userSetupDoneEvt = userSetupDoneEvt; dsetup->stopTimerEvt = stopTimerEvt; u64 affinityCount; ocrAffinityCount(AFFINITY_PD, &affinityCount); ocrGuid_t remoteAffGuid; ocrAffinityGetAt(AFFINITY_PD, affinityCount-1, &remoteAffGuid); ocrHint_t edtHint; ocrHintInit(&edtHint, OCR_HINT_EDT_T); ocrSetHintValue(&edtHint, OCR_HINT_EDT_AFFINITY, ocrAffinityToHintValue(remoteAffGuid)); ocrGuid_t edtTemplGuid; ocrEdtTemplateCreate(&edtTemplGuid, remoteSetupUserEdt, 0, 1); ocrGuid_t edtGuid; ocrEdtCreate(&edtGuid, edtTemplGuid, 0, NULL, 1, NULL, EDT_PROP_NONE, &edtHint, NULL); // EW addresses the race that the current caller owns the DB and we're // trying to start the remote setup EDT concurrently. Since we do not have // the caller event we can't setup a proper dependence and rely on EW instead. ocrAddDependence(dsetup->self, edtGuid, 0, DB_MODE_EW); ocrEdtTemplateDestroy(edtTemplGuid); }
// paramv[0]: event to satisfy when kernel is done // depv[0]: setupEdt completed (may carry a DB) ocrGuid_t kernelEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { // PRINTF("kernelEdt\n"); ocrGuid_t kernelEdtDoneEvt; kernelEdtDoneEvt.guid = paramv[0]; ocrGuid_t setupDb = depv[0].guid; domainSetup_t * setupDbPtr = depv[0].ptr; // The sub kernel done event ocrGuid_t subKernelDoneEvt; ocrEventCreate(&subKernelDoneEvt, OCR_EVENT_ONCE_T, true); // This EDT done event ocrGuid_t selfDoneEvt; ocrEventCreate(&selfDoneEvt, OCR_EVENT_ONCE_T, true); // Combine those in a combine EDT that satisfies kernelEdtDoneEvt //TODO same issue of allocating tpl every iteration ocrGuid_t curAffGuid; ocrAffinityGetCurrent(&curAffGuid); ocrGuid_t combEdtTplGuid; ocrEdtTemplateCreate(&combEdtTplGuid, combineKernelEdt, 1, 2); ocrGuid_t combineEdtGuid; combine(&combineEdtGuid, combEdtTplGuid, curAffGuid, selfDoneEvt, subKernelDoneEvt, kernelEdtDoneEvt); ocrEdtTemplateDestroy(combEdtTplGuid); timestamp_t timer; domainKernel(subKernelDoneEvt, setupDbPtr, &timer); // Satisfy self event with the timer information ocrEventSatisfy(selfDoneEvt, setupDb); return NULL_GUID; }
ocrGuid_t mainEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { ocrGuid_t tplGuid; ocrEdtTemplateCreate(&tplGuid, otherEdt, 0 /*paramc*/, 2 /*depc*/); ocrGuid_t edtGuid; ocrGuid_t ndepv[2]; ndepv[0] = UNINITIALIZED_GUID; ndepv[1] = UNINITIALIZED_GUID; ocrEdtCreate(&edtGuid, tplGuid, 0, NULL, 2, ndepv, EDT_PROP_NONE, NULL_HINT, NULL); ocrEdtTemplateDestroy(tplGuid); ocrGuid_t db1Guid; u64 * db1Ptr; ocrDbCreate(&db1Guid, (void**) &db1Ptr, sizeof(u64), DB_PROP_NONE, NULL_HINT, NO_ALLOC); db1Ptr[0] = 1; ocrDbRelease(db1Guid); ocrGuid_t db2Guid; u64 * db2Ptr; ocrDbCreate(&db2Guid, (void**) &db2Ptr, sizeof(u64), DB_PROP_NONE, NULL_HINT, NO_ALLOC); db2Ptr[0] = 2; ocrDbRelease(db2Guid); ocrAddDependence(db1Guid, edtGuid, 0, DB_MODE_RO); ocrAddDependence(db2Guid, edtGuid, 1, DB_MODE_RO); return NULL_GUID; }
// Input // - Completion event to be satisfied when setup is done (paramv[0]) ocrGuid_t setupEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { // Setup all done event ocrGuid_t setupEdtDoneEvt; setupEdtDoneEvt.guid = paramv[0]; ocrGuid_t curAffGuid; ocrAffinityGetCurrent(&curAffGuid); ocrGuid_t setupDbGuid; domainSetup_t * setupDbPtr; ocrHint_t dbHint; ocrHintInit(&dbHint, OCR_HINT_DB_T); ocrSetHintValue(&dbHint, OCR_HINT_DB_AFFINITY, ocrAffinityToHintValue(curAffGuid)); ocrDbCreate(&setupDbGuid, (void**) &setupDbPtr, sizeof(domainSetup_t), 0, &dbHint, NO_ALLOC); setupDbPtr->self = setupDbGuid; // This EDT done event ocrGuid_t selfDoneEvt; ocrEventCreate(&selfDoneEvt, OCR_EVENT_ONCE_T, true); // Create a done event for the user code ocrGuid_t subSetupDoneEvt; ocrEventCreate(&subSetupDoneEvt, OCR_EVENT_ONCE_T, true); ocrGuid_t combEdtTplGuid; ocrEdtTemplateCreate(&combEdtTplGuid, combineSetupEdt, 1, 2); ocrGuid_t combineEdtGuid; combine(&combineEdtGuid, combEdtTplGuid, curAffGuid, selfDoneEvt, subSetupDoneEvt, setupEdtDoneEvt); ocrEdtTemplateDestroy(combEdtTplGuid); domainSetup(subSetupDoneEvt, setupDbPtr); ocrDbRelease(setupDbGuid); ocrEventSatisfy(selfDoneEvt, setupDbGuid); return NULL_GUID; }
ocrGuid_t fibEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { void* ptr; ocrGuid_t inDep; ocrGuid_t fib0, fib1, comp; ocrGuid_t fibDone[2]; ocrGuid_t fibArg[2]; inDep = (ocrGuid_t)paramv[0]; int my_ID = paramv[1]; u32 n = *(u32*)(depv[0].ptr); PRINTF("r%d Starting fibEdt(%u)\n", my_ID, n); if (n < 2) { PRINTF("r%d In fibEdt(%d) -- done (sat %lx)\n", my_ID, n, inDep); ocrEventSatisfy(inDep, depv[0].guid); return NULL_GUID; } PRINTF("r%d In fibEdt(%d) -- spawning children\n", my_ID, n); /* create the completion EDT and pass it the in/out argument as a dependency */ /* create the EDT with the done_event as the argument */ { u64 paramv[] = {(u64)inDep, my_ID}; ocrGuid_t templateGuid; ocrEdtTemplateCreate(&templateGuid, complete, 2, 3); ocrEdtCreate(&comp, templateGuid, 2, paramv, 3, NULL, EDT_PROP_NONE, NULL_GUID, NULL); ocrEdtTemplateDestroy(templateGuid); } PRINTF("r%d In fibEdt(%u) -- spawned complete EDT GUID 0x%llx\n", my_ID, n, (u64)comp); ocrAddDependence(depv[0].guid, comp, 2, DB_DEFAULT_MODE); /* create the events that the completion EDT will "wait" on */ ocrEventCreate(&fibDone[0], OCR_EVENT_ONCE_T, EVT_PROP_TAKES_ARG); ocrEventCreate(&fibDone[1], OCR_EVENT_ONCE_T, EVT_PROP_TAKES_ARG); ocrAddDependence(fibDone[0], comp, 0, DB_DEFAULT_MODE); ocrAddDependence(fibDone[1], comp, 1, DB_DEFAULT_MODE); /* allocate the argument to pass to fib(n-1) */ ocrDbCreate(&fibArg[0], (void**)&ptr, sizeof(u32), DB_PROP_NONE, NULL_GUID, NO_ALLOC); PRINTF("r%d In fibEdt(%u) -- created arg DB GUID 0x%llx\n", my_ID, n, fibArg[0]); *((u32*)ptr) = n-1; /* sched the EDT, passing the fibDone event as it's argument */ { u64 paramv[] = {(u64)fibDone[0], my_ID}; ocrGuid_t depv = fibArg[0]; ocrGuid_t templateGuid; ocrEdtTemplateCreate(&templateGuid, fibEdt, 2, 1); ocrEdtCreate(&fib0, templateGuid, 2, paramv, 1, &depv, EDT_PROP_NONE, NULL_GUID, NULL); ocrEdtTemplateDestroy(templateGuid); } PRINTF("r%d In fibEdt(%u) -- spawned first sub-part EDT GUID 0x%llx\n", my_ID, n, fib0); /* then do the exact same thing for n-2 */ ocrDbCreate(&fibArg[1], (void**)&ptr, sizeof(u32), DB_PROP_NONE, NULL_GUID, NO_ALLOC); PRINTF("r%d In fibEdt(%u) -- created arg DB GUID 0x%llx\n", my_ID, n, fibArg[1]); *((u32*)ptr) = n-2; { u64 paramv[] = {(u64)fibDone[1], my_ID}; ocrGuid_t depv = fibArg[1]; ocrGuid_t templateGuid; ocrEdtTemplateCreate(&templateGuid, fibEdt, 2, 1); ocrEdtCreate(&fib1, templateGuid, 2, paramv, 1, &depv, EDT_PROP_NONE, NULL_GUID, NULL); ocrEdtTemplateDestroy(templateGuid); } PRINTF("r%d In fibEdt(%u) -- spawned first sub-part EDT GUID 0x%llx\n", my_ID, n, fib1); PRINTF("r%d Returning from fibEdt(%u)\n", my_ID, n); return NULL_GUID; }
//ocrGuid_t mainEdt(u32 paramc, u64* paramv, u32 depc, ocrEdtDep_t depv[]) { int main(int argc, char **argv) { int my_ID; int Num_procs; MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_ID); MPI_Comm_size(MPI_COMM_WORLD, &Num_procs); PRINTF("Starting main on rank %d\n", my_ID); u32 input; u32 myN; if (0 == my_ID) { if((argc != 2)) { PRINTF("Usage: fib <num>, defaulting to 10\n"); input = 10; } else { input = atoi(argv[1]); } myN = input-1; u32 yourN = input-2; MPI_Send(&yourN, 1, MPI_INT, 1, 0, MPI_COMM_WORLD); } else if (1 == my_ID) { MPI_Recv(&myN, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } else // only need 2 { MPI_Finalize(); return 0; } u64 correctAns = fib(myN); ocrGuid_t fibC, totallyDoneEvent, absFinalEdt, templateGuid; /* create a db for the results */ ocrGuid_t fibArg; u32* res; PRINTF("Before 1st DB create\n"); ocrDbCreate(&fibArg, (void**)&res, sizeof(u32), DB_PROP_NONE, NULL_GUID, NO_ALLOC); PRINTF("Got DB created\n"); /* DB is in/out */ *res = myN; /* and an event for when the results are finished */ ocrEventCreate(&totallyDoneEvent, OCR_EVENT_STICKY_T, EVT_PROP_TAKES_ARG); /* create the EDT with the done_event as the argument */ { u64 paramv[] = {(u64)totallyDoneEvent, my_ID}; ocrGuid_t depv = fibArg; ocrGuid_t templateGuid; ocrEdtTemplateCreate(&templateGuid, fibEdt, 2, 1); ocrEdtCreate(&fibC, templateGuid, 2, paramv, 1, &depv, EDT_PROP_NONE, NULL_GUID, NULL); ocrEdtTemplateDestroy(templateGuid); } ocrGuid_t DB; void *myPtr; u64 dbSize; ocrLegacyBlockProgress(totallyDoneEvent, &DB, &myPtr, &dbSize, LEGACY_PROP_NONE); u32 myAns = *(u32*)myPtr; u32 ourAns; // get the results if (2 == Num_procs) { MPI_Reduce(&myAns, &ourAns, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); } else { // extra ranks, need to ignore them else reduce would hang // because they have all returned if (0 == my_ID) { MPI_Recv(&ourAns, 1, MPI_INT, 1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); ourAns += myAns; } else { MPI_Send(&myAns, 1, MPI_INT, 0, 0, MPI_COMM_WORLD); } } if (0 == my_ID) { u64 correctAns = fib(input); if (correctAns == ourAns) { PRINTF("\nFinal Answer Correct fib(%d) = %d\n", input, ourAns); } else { PRINTF("\nFinal Answer **WRONG** fib(%d) = %d, should be %d\n", input, ourAns, correctAns); } } MPI_Finalize(); return 0; }
void cleanFramework(info_t * info) { u32 i = 0; while (i < NB_TMPL) { ocrEdtTemplateDestroy(info->edtTemplGuids[i++]); } }
int hta_map(int pid, Context* context) { register char * const basepointer __asm("rbp"); register char * const stackpointer __asm("rsp"); //====================================================================== // perform map //====================================================================== // create slave EDTs ocrGuid_t slaveEdt_template_guid; ocrGuid_t slaveEdts[NUM_SLAVES]; ocrGuid_t slaveOutEvent[NUM_SLAVES]; ocrGuid_t slaveInDBs[NUM_SLAVES]; ocrEdtTemplateCreate(&slaveEdt_template_guid, slaveEdt, 0, 2); printf("(%lu) slaveEDT template guid %lx\n", MYRANK, slaveEdt_template_guid); for(int i = 0; i < NUM_SLAVES; i++) { int *data; slaveOutEvent[i] = NULL_GUID; ocrDbCreate(&slaveInDBs[i], (void**) &data, sizeof(int), /*flags=*/DB_PROP_NO_ACQUIRE, /*affinity=*/NULL_GUID, NO_ALLOC); ocrEdtCreate(&slaveEdts[i], slaveEdt_template_guid, /*paramc=*/0, /*paramv=*/(u64 *)NULL, /*depc=*/2, /*depv=*/NULL, /*properties=*/0 , /*affinity*/NULL_GUID, &slaveOutEvent[i]); ocrAddDependence(slaveInDBs[i], slaveEdts[i], 1, DB_DEFAULT_MODE); // Immediately satisfy printf("(%lu) slave %d EDT guid %lx\n", MYRANK, i, slaveEdts[i]); printf("(%lu) slave %d out event guid %lx\n", MYRANK, i, slaveOutEvent[i]); } ocrEdtTemplateDestroy(slaveEdt_template_guid); //====================================================================== // Create continuation to wait for slave EDTs to finish //====================================================================== ocrGuid_t procEdt_template_guid; ocrGuid_t procEdt_guid; ocrEdtTemplateCreate(&procEdt_template_guid, procEdt, 1, 3+NUM_SLAVES); u64 rank = MYRANK; ocrGuid_t depv[3+NUM_SLAVES]; depv[0] = UNINITIALIZED_GUID; depv[1] = context->args; depv[2] = context->self_context_guid; for(int i = 0; i < NUM_SLAVES; i++) depv[3+i] = slaveOutEvent[i]; ocrEdtCreate(&procEdt_guid, procEdt_template_guid, /*paramc=*/1, /*paramv=*/(u64 *)&rank, /*depc=*/EDT_PARAM_DEF, /*depv=*/depv, /*properties=*/0 , /*affinity*/NULL_GUID, /*outputEvent*/NULL ); printf("(%lu) continuation procEDT template guid %lx\n", MYRANK, procEdt_template_guid); printf("(%lu) continuation procEDT guid %lx\n", MYRANK, procEdt_guid); // defer firing slave EDTs for(int i = 0; i < NUM_SLAVES; i++) { ocrAddDependence(NULL_GUID, slaveEdts[i], 0, DB_DEFAULT_MODE); //pure control dependence // Immediately satisfy } ocrEdtTemplateDestroy(procEdt_template_guid); // setjmp call creates a continuation point if(!setjmp(context->env)) { // switch back to thread stack // 1. compute the size that need to be copied (the growth of DB stack) size_t size_to_copy = (context->stack + DB_STACK_SIZE) - stackpointer; printf("(%lu) db stack (%p - %p) stack size growth = 0x%x\n", MYRANK, stackpointer, context->stack+DB_STACK_SIZE-1, size_to_copy); // 2. compute the start address of the thread stack char* originalbp = context->originalbp; char* threadsp = originalbp - size_to_copy; char* threadbp = threadsp + (basepointer-stackpointer); // 3. copy DB stack to overwrite thread stack printf("(%lu) Enabling continuation codelet\n", MYRANK); printf("(%lu) switching back to thread stack at (%p - %p) original bp (%p)\n", MYRANK, threadsp, threadbp, originalbp); memcpy(threadsp - RED_ZONE_SIZE, stackpointer - RED_ZONE_SIZE, size_to_copy + RED_ZONE_SIZE); // 4. fix frame link addresses _fix_pointers(basepointer, threadbp, originalbp); // 5. store the next phase EDT guid for deferred activation context->next_phase_edt_guid = procEdt_guid; // 6. set rsp and rbp to point to thread stack. Stop writing to context->stack __asm volatile( "movq %0, %%rbp;" "movq %1, %%rsp;" : :"r"(threadbp), "r"(threadsp) ); printf("(%lu) ==hta_map splited==\n", MYRANK); return HTA_OP_TO_BE_CONTINUED; }