/* ============================================================================= * router_solve * ============================================================================= */ void router_solve (void* argPtr) { TM_THREAD_ENTER(); router_solve_arg_t* routerArgPtr = (router_solve_arg_t*)argPtr; router_t* routerPtr = routerArgPtr->routerPtr; maze_t* mazePtr = routerArgPtr->mazePtr; vector_t* myPathVectorPtr = PVECTOR_ALLOC(1); assert(myPathVectorPtr); queue_t* workQueuePtr = mazePtr->workQueuePtr; grid_t* gridPtr = mazePtr->gridPtr; grid_t* myGridPtr = PGRID_ALLOC(gridPtr->width, gridPtr->height, gridPtr->depth); assert(myGridPtr); long bendCost = routerPtr->bendCost; queue_t* myExpansionQueuePtr = PQUEUE_ALLOC(-1); /* * Iterate over work list to route each path. This involves an * 'expansion' and 'traceback' phase for each source/destination pair. */ while (1) { pair_t* coordinatePairPtr; TM_BEGIN(); if (TMQUEUE_ISEMPTY(workQueuePtr)) { coordinatePairPtr = NULL; } else { coordinatePairPtr = (pair_t*)TMQUEUE_POP(workQueuePtr); } TM_END(); if (coordinatePairPtr == NULL) { break; } coordinate_t* srcPtr = (coordinate_t*)coordinatePairPtr->firstPtr; coordinate_t* dstPtr = (coordinate_t*)coordinatePairPtr->secondPtr; bool success = false; vector_t* pointVectorPtr = NULL; TM_BEGIN(); grid_copy(myGridPtr, gridPtr); /* ok if not most up-to-date */ if (PdoExpansion(routerPtr, myGridPtr, myExpansionQueuePtr, srcPtr, dstPtr)) { pointVectorPtr = PdoTraceback(gridPtr, myGridPtr, dstPtr, bendCost); /* * TODO: fix memory leak * * pointVectorPtr will be a memory leak if we abort this transaction */ if (pointVectorPtr) { TMGRID_ADDPATH(gridPtr, pointVectorPtr); TM_LOCAL_WRITE_L(success, true); } } TM_END(); if (success) { bool status = PVECTOR_PUSHBACK(myPathVectorPtr, (void*)pointVectorPtr); assert(status); } } /* * Add my paths to global list */ list_t* pathVectorListPtr = routerArgPtr->pathVectorListPtr; TM_BEGIN(); TMLIST_INSERT(pathVectorListPtr, (void*)myPathVectorPtr); TM_END(); PGRID_FREE(myGridPtr); PQUEUE_FREE(myExpansionQueuePtr); #if DEBUG puts("\nFinal Grid:"); grid_print(gridPtr); #endif /* DEBUG */ TM_THREAD_EXIT(); }
/* ============================================================================= * sequencer_run * ============================================================================= */ void sequencer_run (void* argPtr) { TM_THREAD_ENTER(); long threadId = thread_getId(); sequencer_t* sequencerPtr = (sequencer_t*)argPtr; hashtable_t* uniqueSegmentsPtr; endInfoEntry_t* endInfoEntries; table_t** startHashToConstructEntryTables; constructEntry_t* constructEntries; table_t* hashToConstructEntryTable; uniqueSegmentsPtr = sequencerPtr->uniqueSegmentsPtr; endInfoEntries = sequencerPtr->endInfoEntries; startHashToConstructEntryTables = sequencerPtr->startHashToConstructEntryTables; constructEntries = sequencerPtr->constructEntries; hashToConstructEntryTable = sequencerPtr->hashToConstructEntryTable; segments_t* segmentsPtr = sequencerPtr->segmentsPtr; assert(segmentsPtr); vector_t* segmentsContentsPtr = segmentsPtr->contentsPtr; long numSegment = vector_getSize(segmentsContentsPtr); long segmentLength = segmentsPtr->length; long i; long j; long i_start; long i_stop; long numUniqueSegment; long substringLength; long entryIndex; /* * Step 1: Remove duplicate segments */ #if defined(HTM) || defined(STM) long numThread = thread_getNumThread(); { /* Choose disjoint segments [i_start,i_stop) for each thread */ long partitionSize = (numSegment + numThread/2) / numThread; /* with rounding */ i_start = threadId * partitionSize; if (threadId == (numThread - 1)) { i_stop = numSegment; } else { i_stop = i_start + partitionSize; } } #else /* !(HTM || STM) */ i_start = 0; i_stop = numSegment; #endif /* !(HTM || STM) */ for (i = i_start; i < i_stop; i+=CHUNK_STEP1) { TM_BEGIN(); { long ii; long ii_stop = MIN(i_stop, (i+CHUNK_STEP1)); for (ii = i; ii < ii_stop; ii++) { void* segment = vector_at(segmentsContentsPtr, ii); TMHASHTABLE_INSERT(uniqueSegmentsPtr, segment, segment); } /* ii */ } TM_END(); } thread_barrier_wait(); /* * Step 2a: Iterate over unique segments and compute hashes. * * For the gene "atcg", the hashes for the end would be: * * "t", "tc", and "tcg" * * And for the gene "tcgg", the hashes for the start would be: * * "t", "tc", and "tcg" * * The names are "end" and "start" because if a matching pair is found, * they are the substring of the end part of the pair and the start * part of the pair respectively. In the above example, "tcg" is the * matching substring so: * * (end) (start) * a[tcg] + [tcg]g = a[tcg]g (overlap = "tcg") */ /* uniqueSegmentsPtr is constant now */ numUniqueSegment = hashtable_getSize(uniqueSegmentsPtr); entryIndex = 0; #if defined(HTM) || defined(STM) { /* Choose disjoint segments [i_start,i_stop) for each thread */ long num = uniqueSegmentsPtr->numBucket; long partitionSize = (num + numThread/2) / numThread; /* with rounding */ i_start = threadId * partitionSize; if (threadId == (numThread - 1)) { i_stop = num; } else { i_stop = i_start + partitionSize; } } { /* Approximate disjoint segments of element allocation in constructEntries */ long partitionSize = (numUniqueSegment + numThread/2) / numThread; /* with rounding */ entryIndex = threadId * partitionSize; } #else /* !(HTM || STM) */ i_start = 0; i_stop = uniqueSegmentsPtr->numBucket; entryIndex = 0; #endif /* !(HTM || STM) */ for (i = i_start; i < i_stop; i++) { list_t* chainPtr = uniqueSegmentsPtr->buckets[i]; list_iter_t it; list_iter_reset(&it, chainPtr); while (list_iter_hasNext(&it, chainPtr)) { char* segment = (char*)((pair_t*)list_iter_next(&it, chainPtr))->firstPtr; constructEntry_t* constructEntryPtr; long j; unsigned long startHash; bool status; /* Find an empty constructEntries entry */ TM_BEGIN(); while (((void*)TM_SHARED_READ_P(constructEntries[entryIndex].segment)) != NULL) { entryIndex = (entryIndex + 1) % numUniqueSegment; /* look for empty */ } constructEntryPtr = &constructEntries[entryIndex]; TM_SHARED_WRITE_P(constructEntryPtr->segment, segment); TM_END(); entryIndex = (entryIndex + 1) % numUniqueSegment; /* * Save hashes (sdbm algorithm) of segment substrings * * endHashes will be computed for shorter substrings after matches * have been made (in the next phase of the code). This will reduce * the number of substrings for which hashes need to be computed. * * Since we can compute startHashes incrementally, we go ahead * and compute all of them here. */ /* constructEntryPtr is local now */ constructEntryPtr->endHash = hashString(&segment[1]); startHash = 0; for (j = 1; j < segmentLength; j++) { startHash = (unsigned long)segment[j-1] + (startHash << 6) + (startHash << 16) - startHash; TM_BEGIN(); status = TMTABLE_INSERT(startHashToConstructEntryTables[j], (unsigned long)startHash, (void*)constructEntryPtr ); TM_END(); assert(status); } /* * For looking up construct entries quickly */ startHash = (unsigned long)segment[j-1] + (startHash << 6) + (startHash << 16) - startHash; TM_BEGIN(); status = TMTABLE_INSERT(hashToConstructEntryTable, (unsigned long)startHash, (void*)constructEntryPtr); TM_END(); assert(status); } } thread_barrier_wait(); /* * Step 2b: Match ends to starts by using hash-based string comparison. */ for (substringLength = segmentLength-1; substringLength > 0; substringLength--) { table_t* startHashToConstructEntryTablePtr = startHashToConstructEntryTables[substringLength]; list_t** buckets = startHashToConstructEntryTablePtr->buckets; long numBucket = startHashToConstructEntryTablePtr->numBucket; long index_start; long index_stop; #if defined(HTM) || defined(STM) { /* Choose disjoint segments [index_start,index_stop) for each thread */ long partitionSize = (numUniqueSegment + numThread/2) / numThread; /* with rounding */ index_start = threadId * partitionSize; if (threadId == (numThread - 1)) { index_stop = numUniqueSegment; } else { index_stop = index_start + partitionSize; } } #else /* !(HTM || STM) */ index_start = 0; index_stop = numUniqueSegment; #endif /* !(HTM || STM) */ /* Iterating over disjoint itervals in the range [0, numUniqueSegment) */ for (entryIndex = index_start; entryIndex < index_stop; entryIndex += endInfoEntries[entryIndex].jumpToNext) { if (!endInfoEntries[entryIndex].isEnd) { continue; } /* ConstructEntries[entryIndex] is local data */ constructEntry_t* endConstructEntryPtr = &constructEntries[entryIndex]; char* endSegment = endConstructEntryPtr->segment; unsigned long endHash = endConstructEntryPtr->endHash; list_t* chainPtr = buckets[endHash % numBucket]; /* buckets: constant data */ list_iter_t it; list_iter_reset(&it, chainPtr); /* Linked list at chainPtr is constant */ while (list_iter_hasNext(&it, chainPtr)) { constructEntry_t* startConstructEntryPtr = (constructEntry_t*)list_iter_next(&it, chainPtr); char* startSegment = startConstructEntryPtr->segment; long newLength = 0; /* endConstructEntryPtr is local except for properties startPtr/endPtr/length */ TM_BEGIN(); /* Check if matches */ if (TM_SHARED_READ_L(startConstructEntryPtr->isStart) && (TM_SHARED_READ_P(endConstructEntryPtr->startPtr) != startConstructEntryPtr) && (strncmp(startSegment, &endSegment[segmentLength - substringLength], substringLength) == 0)) { TM_SHARED_WRITE_L(startConstructEntryPtr->isStart, false); constructEntry_t* startConstructEntry_endPtr; constructEntry_t* endConstructEntry_startPtr; /* Update endInfo (appended something so no longer end) */ TM_LOCAL_WRITE_L(endInfoEntries[entryIndex].isEnd, false); /* Update segment chain construct info */ startConstructEntry_endPtr = (constructEntry_t*)TM_SHARED_READ_P(startConstructEntryPtr->endPtr); endConstructEntry_startPtr = (constructEntry_t*)TM_SHARED_READ_P(endConstructEntryPtr->startPtr); assert(startConstructEntry_endPtr); assert(endConstructEntry_startPtr); TM_SHARED_WRITE_P(startConstructEntry_endPtr->startPtr, endConstructEntry_startPtr); TM_LOCAL_WRITE_P(endConstructEntryPtr->nextPtr, startConstructEntryPtr); TM_SHARED_WRITE_P(endConstructEntry_startPtr->endPtr, startConstructEntry_endPtr); TM_SHARED_WRITE_L(endConstructEntryPtr->overlap, substringLength); newLength = (long)TM_SHARED_READ_L(endConstructEntry_startPtr->length) + (long)TM_SHARED_READ_L(startConstructEntryPtr->length) - substringLength; TM_SHARED_WRITE_L(endConstructEntry_startPtr->length, newLength); } /* if (matched) */ TM_END(); if (!endInfoEntries[entryIndex].isEnd) { /* if there was a match */ break; } } /* iterate over chain */ } /* for (endIndex < numUniqueSegment) */ thread_barrier_wait(); /* * Step 2c: Update jump values and hashes * * endHash entries of all remaining ends are updated to the next * substringLength. Additionally jumpToNext entries are updated such * that they allow to skip non-end entries. Currently this is sequential * because parallelization did not perform better. . */ if (threadId == 0) { if (substringLength > 1) { long index = segmentLength - substringLength + 1; /* initialization if j and i: with i being the next end after j=0 */ for (i = 1; !endInfoEntries[i].isEnd; i+=endInfoEntries[i].jumpToNext) { /* find first non-null */ } /* entry 0 is handled seperately from the loop below */ endInfoEntries[0].jumpToNext = i; if (endInfoEntries[0].isEnd) { constructEntry_t* constructEntryPtr = &constructEntries[0]; char* segment = constructEntryPtr->segment; constructEntryPtr->endHash = hashString(&segment[index]); } /* Continue scanning (do not reset i) */ for (j = 0; i < numUniqueSegment; i+=endInfoEntries[i].jumpToNext) { if (endInfoEntries[i].isEnd) { constructEntry_t* constructEntryPtr = &constructEntries[i]; char* segment = constructEntryPtr->segment; constructEntryPtr->endHash = hashString(&segment[index]); endInfoEntries[j].jumpToNext = MAX(1, (i - j)); j = i; } } endInfoEntries[j].jumpToNext = i - j; } } thread_barrier_wait(); } /* for (substringLength > 0) */ thread_barrier_wait(); /* * Step 3: Build sequence string */ if (threadId == 0) { long totalLength = 0; for (i = 0; i < numUniqueSegment; i++) { constructEntry_t* constructEntryPtr = &constructEntries[i]; if (constructEntryPtr->isStart) { totalLength += constructEntryPtr->length; } } sequencerPtr->sequence = (char*)P_MALLOC((totalLength+1) * sizeof(char)); char* sequence = sequencerPtr->sequence; assert(sequence); char* copyPtr = sequence; long sequenceLength = 0; for (i = 0; i < numUniqueSegment; i++) { constructEntry_t* constructEntryPtr = &constructEntries[i]; /* If there are several start segments, we append in arbitrary order */ if (constructEntryPtr->isStart) { long newSequenceLength = sequenceLength + constructEntryPtr->length; assert( newSequenceLength <= totalLength ); copyPtr = sequence + sequenceLength; sequenceLength = newSequenceLength; do { long numChar = segmentLength - constructEntryPtr->overlap; if ((copyPtr + numChar) > (sequence + newSequenceLength)) { TM_PRINT0("ERROR: sequence length != actual length\n"); break; } memcpy(copyPtr, constructEntryPtr->segment, (numChar * sizeof(char))); copyPtr += numChar; } while ((constructEntryPtr = constructEntryPtr->nextPtr) != NULL); assert(copyPtr <= (sequence + sequenceLength)); } } assert(sequence != NULL); sequence[sequenceLength] = '\0'; } TM_THREAD_EXIT(); }
/* ============================================================================= * router_solve * ============================================================================= */ void router_solve (void* argPtr) { TM_THREAD_ENTER(); long threadId = thread_getId(); router_solve_arg_t* routerArgPtr = (router_solve_arg_t*)argPtr; router_t* routerPtr = routerArgPtr->routerPtr; maze_t* mazePtr = routerArgPtr->mazePtr; long* numPathArray = routerArgPtr->numPathArray; vector_t* myPathVectorPtr = PVECTOR_ALLOC(1); assert(myPathVectorPtr); queue_t* workQueuePtr = mazePtr->workQueuePtr; grid_t* gridPtr = mazePtr->gridPtr; grid_t* myGridPtr = PGRID_ALLOC(gridPtr->width, gridPtr->height, gridPtr->depth); assert(myGridPtr); long bendCost = routerPtr->bendCost; queue_t* myExpansionQueuePtr = PQUEUE_ALLOC(-1); long numPath = 0; /* * Iterate over work list to route each path. This involves an * 'expansion' and 'traceback' phase for each source/destination pair. */ while ((global_timedExecution && !global_isTerminated) || (!global_timedExecution)) { //while (1) { wait_for_turn(threadId); if (global_timedExecution && global_isTerminated) break; ulong_t beginTime; pair_t* coordinatePairPtr; TM_BEGIN(); beginTime = get_thread_time(); if (TMQUEUE_ISEMPTY(workQueuePtr)) { if (TMQUEUE_ISEMPTY(workQueuePtr)) coordinatePairPtr = NULL; } else { coordinatePairPtr = (pair_t*)TMQUEUE_POP(workQueuePtr); } TM_END(); //add_throughput(threadId , get_thread_time() - beginTime); if (coordinatePairPtr == NULL) { break; } coordinate_t* srcPtr = (coordinate_t*)coordinatePairPtr->firstPtr; coordinate_t* dstPtr = (coordinate_t*)coordinatePairPtr->secondPtr; bool_t success = FALSE; vector_t* pointVectorPtr = NULL; TM_BEGIN(); beginTime = get_thread_time(); grid_copy(myGridPtr, gridPtr); /* ok if not most up-to-date */ if (PdoExpansion(routerPtr, myGridPtr, myExpansionQueuePtr, srcPtr, dstPtr)) { pointVectorPtr = PdoTraceback(gridPtr, myGridPtr, dstPtr, bendCost); /* * TODO: fix memory leak * * pointVectorPtr will be a memory leak if we abort this transaction */ if (pointVectorPtr) { TMGRID_ADDPATH(gridPtr, pointVectorPtr); TM_LOCAL_WRITE_L(success, TRUE); } } TM_END(); add_throughput(threadId , get_thread_time() - beginTime); numPath++; if (success) { bool_t status = PVECTOR_PUSHBACK(myPathVectorPtr, (void*)pointVectorPtr); assert(status); } } numPathArray[threadId] = numPath; /* * Add my paths to global list */ list_t* pathVectorListPtr = routerArgPtr->pathVectorListPtr; TM_BEGIN(); TMLIST_INSERT(pathVectorListPtr, (void*)myPathVectorPtr); TM_END(); PGRID_FREE(myGridPtr); PQUEUE_FREE(myExpansionQueuePtr); #if DEBUG puts("\nFinal Grid:"); grid_print(gridPtr); #endif /* DEBUG */ TM_THREAD_EXIT(); }