void func_count (void* argPtr) { TM_THREAD_ENTER(); while (1) { int stop_counting = 0; //pair_t* coordinatePairPtr; TM_BEGIN(); long local_counter = (long)TM_SHARED_READ(my_counter); local_counter++; if(local_counter > max_count) stop_counting = 1; TM_SHARED_WRITE(my_counter, local_counter); TM_END(); //pthread_yield(); if (stop_counting == 1) { break; } }//end of while TM_THREAD_EXIT(); }
void *test_maintenance(void *data) { #ifdef TINY10B int i; free_list_item **t_list_items; #endif maintenance_thread_data_t *d = (maintenance_thread_data_t *)data; #ifdef TINY10B t_list_items = (free_list_item **)malloc(d->nb_threads * sizeof(free_list_item *)); for(i = 0; i < d->nb_threads; i++) { t_list_items[i] = d->set->t_free_list[i]; } #endif /* Create transaction */ TM_THREAD_ENTER(); /* Wait on barrier */ barrier_cross(d->barrier); /* Is the first op an update? */ //unext = (rand_range_re(&d->seed, 100) - 1 < d->update); #ifdef ICC while (stop == 0) { #else while (AO_load_full(&stop) == 0) { #endif /* ICC */ #ifdef TINY10B do_maintenance_thread(d->set, d->id, d->nb_maint); #endif #ifdef ICC } #else } #endif /* ICC */ /* Free transaction */ TM_THREAD_EXIT(); return NULL; }
/* ============================================================================= * getStartLists * ============================================================================= */ void getStartLists (void* argPtr) { TM_THREAD_ENTER(); graph* GPtr = ((getStartLists_arg_t*)argPtr)->GPtr; edge** maxIntWtListPtr = ((getStartLists_arg_t*)argPtr)->maxIntWtListPtr; long* maxIntWtListSize = ((getStartLists_arg_t*)argPtr)->maxIntWtListSize; edge** soughtStrWtListPtr = ((getStartLists_arg_t*)argPtr)->soughtStrWtListPtr; long* soughtStrWtListSize = ((getStartLists_arg_t*)argPtr)->soughtStrWtListSize; long myId = thread_getId(); long numThread = thread_getNumThread(); /* * Find Max Wt on each thread */ LONGINT_T maxWeight = 0; long i; long i_start; long i_stop; createPartition(0, GPtr->numEdges, myId, numThread, &i_start, &i_stop); for (i = i_start; i < i_stop; i++) { if (GPtr->intWeight[i] > maxWeight) { maxWeight = GPtr->intWeight[i]; } } AL_LOCK(0); TM_BEGIN(9); long tmp_maxWeight = (long)TM_SHARED_READ(global_maxWeight); if (maxWeight > tmp_maxWeight) { TM_SHARED_WRITE(global_maxWeight, maxWeight); } TM_END(); thread_barrier_wait(); maxWeight = global_maxWeight; /* * Create partial lists */ /* * Allocate mem. for temp edge list for each thread */ long numTmpEdge = (5+ceil(1.5*(GPtr->numIntEdges)/MAX_INT_WEIGHT)); edge* tmpEdgeList = (edge*)P_MALLOC(numTmpEdge * sizeof(edge)); long i_edgeCounter = 0; for (i = i_start; i < i_stop; i++) { if (GPtr->intWeight[i] == maxWeight) { /* Find the corresponding endVertex */ long j; for (j = 0; j < GPtr->numDirectedEdges; j++) { if (GPtr->paralEdgeIndex[j] > i) { break; } } tmpEdgeList[i_edgeCounter].endVertex = GPtr->outVertexList[j-1]; tmpEdgeList[i_edgeCounter].edgeNum = j-1; long t; for (t = 0; t < GPtr->numVertices; t++) { if (GPtr->outVertexIndex[t] > j-1) { break; } } tmpEdgeList[i_edgeCounter].startVertex = t-1; i_edgeCounter++; } } /* * Merge partial edge lists */ long* i_edgeStartCounter; long* i_edgeEndCounter; if (myId == 0) { i_edgeStartCounter = (long*)P_MALLOC(numThread * sizeof(long)); assert(i_edgeStartCounter); global_i_edgeStartCounter = i_edgeStartCounter; i_edgeEndCounter = (long*)P_MALLOC(numThread * sizeof(long)); assert(i_edgeEndCounter); global_i_edgeEndCounter = i_edgeEndCounter; *maxIntWtListSize = 0; } thread_barrier_wait(); i_edgeStartCounter = global_i_edgeStartCounter; i_edgeEndCounter = global_i_edgeEndCounter; i_edgeEndCounter[myId] = i_edgeCounter; i_edgeStartCounter[myId] = 0; thread_barrier_wait(); if (myId == 0) { for (i = 1; i < numThread; i++) { i_edgeEndCounter[i] = i_edgeEndCounter[i-1] + i_edgeEndCounter[i]; i_edgeStartCounter[i] = i_edgeEndCounter[i-1]; } } *maxIntWtListSize += i_edgeCounter; thread_barrier_wait(); edge* maxIntWtList; if (myId == 0) { P_FREE(*maxIntWtListPtr); maxIntWtList = (edge*)P_MALLOC((*maxIntWtListSize) * sizeof(edge)); assert(maxIntWtList); global_maxIntWtList = maxIntWtList; } thread_barrier_wait(); maxIntWtList = global_maxIntWtList; for (i = i_edgeStartCounter[myId]; i<i_edgeEndCounter[myId]; i++) { (maxIntWtList[i]).startVertex = tmpEdgeList[i-i_edgeStartCounter[myId]].startVertex; (maxIntWtList[i]).endVertex = tmpEdgeList[i-i_edgeStartCounter[myId]].endVertex; (maxIntWtList[i]).edgeNum = tmpEdgeList[i-i_edgeStartCounter[myId]].edgeNum; } if (myId == 0) { *maxIntWtListPtr = maxIntWtList; } i_edgeCounter = 0; createPartition(0, GPtr->numStrEdges, myId, numThread, &i_start, &i_stop); for (i = i_start; i < i_stop; i++) { if (strncmp(GPtr->strWeight+i*MAX_STRLEN, SOUGHT_STRING, MAX_STRLEN) == 0) { /* * Find the corresponding endVertex */ long t; for (t = 0; t < GPtr->numEdges; t++) { if (GPtr->intWeight[t] == -i) { break; } } long j; for (j = 0; j < GPtr->numDirectedEdges; j++) { if (GPtr->paralEdgeIndex[j] > t) { break; } } tmpEdgeList[i_edgeCounter].endVertex = GPtr->outVertexList[j-1]; tmpEdgeList[i_edgeCounter].edgeNum = j-1; for (t = 0; t < GPtr->numVertices; t++) { if (GPtr->outVertexIndex[t] > j-1) { break; } } tmpEdgeList[i_edgeCounter].startVertex = t-1; i_edgeCounter++; } } thread_barrier_wait(); i_edgeEndCounter[myId] = i_edgeCounter; i_edgeStartCounter[myId] = 0; if (myId == 0) { *soughtStrWtListSize = 0; } thread_barrier_wait(); if (myId == 0) { for (i = 1; i < numThread; i++) { i_edgeEndCounter[i] = i_edgeEndCounter[i-1] + i_edgeEndCounter[i]; i_edgeStartCounter[i] = i_edgeEndCounter[i-1]; } } *soughtStrWtListSize += i_edgeCounter; thread_barrier_wait(); edge* soughtStrWtList; if (myId == 0) { P_FREE(*soughtStrWtListPtr); soughtStrWtList = (edge*)P_MALLOC((*soughtStrWtListSize) * sizeof(edge)); assert(soughtStrWtList); global_soughtStrWtList = soughtStrWtList; } thread_barrier_wait(); soughtStrWtList = global_soughtStrWtList; for (i = i_edgeStartCounter[myId]; i < i_edgeEndCounter[myId]; i++) { (soughtStrWtList[i]).startVertex = tmpEdgeList[i-i_edgeStartCounter[myId]].startVertex; (soughtStrWtList[i]).endVertex = tmpEdgeList[i-i_edgeStartCounter[myId]].endVertex; (soughtStrWtList[i]).edgeNum = tmpEdgeList[i-i_edgeStartCounter[myId]].edgeNum; } thread_barrier_wait(); if (myId == 0) { *soughtStrWtListPtr = soughtStrWtList; P_FREE(i_edgeStartCounter); P_FREE(i_edgeEndCounter); } P_FREE(tmpEdgeList); TM_THREAD_EXIT(); }
void *test(void *data) { int unext, last = -1; val_t val = 0; int result; int id; ulong *tloc; #ifdef BIAS_RANGE val_t increase; #endif thread_data_t *d = (thread_data_t *)data; id = d->id; tloc = d->set->nb_committed; #ifdef BIAS_RANGE increase = d->range; #endif /* Create transaction */ TM_THREAD_ENTER(); /* Wait on barrier */ barrier_cross(d->barrier); /* Is the first op an update? */ unext = (rand_range_re(&d->seed, 100) - 1 < d->update); #ifdef ICC while (stop == 0) { #else while (AO_load_full(&stop) == 0) { #endif /* ICC */ if (unext) { // update if (last < 0) { // add val = rand_range_re(&d->seed, d->range); #ifdef BIAS_RANGE if(rand_range_re(&d->seed, 1000) < 50) { increase += rand_range_re(&d->seed, 10); if(increase > d->range * 20) { increase = d->range; } val = increase; } #endif if ((result = avl_add(d->set, val, TRANSACTIONAL, id)) > 0) { d->nb_added++; if(result > 1) { d->nb_modifications++; } last = val; } d->nb_trans++; tloc[id]++; d->nb_add++; } else { // remove if (d->alternate) { // alternate mode (default) #ifdef TINY10B if ((result = avl_remove(d->set, last, TRANSACTIONAL, id)) > 0) { #else if ((result = avl_remove(d->set, last, TRANSACTIONAL, 0)) > 0) { #endif d->nb_removed++; #ifdef REMOVE_LATER finish_removal(d->set, id); #endif if(result > 1) { d->nb_modifications++; } } last = -1; } else { /* Random computation only in non-alternated cases */ val = rand_range_re(&d->seed, d->range); /* Remove one random value */ #ifdef BIAS_RANGE if(rand_range_re(&d->seed, 1000) < 300) { //val = d->range + rand_range_re(&d->seed, increase - d->range); val = increase - rand_range_re(&d->seed, 10); } #endif #ifdef TINY10B if ((result = avl_remove(d->set, val, TRANSACTIONAL, id)) > 0) { #else if ((result = avl_remove(d->set, val, TRANSACTIONAL, 0)) > 0) { #endif d->nb_removed++; #ifdef REMOVE_LATER finish_removal(d->set, id); #endif if(result > 1) { d->nb_modifications++; } /* Repeat until successful, to avoid size variations */ last = -1; } } d->nb_trans++; tloc[id]++; d->nb_remove++; } } else { // read if (d->alternate) { if (d->update == 0) { if (last < 0) { val = d->first; last = val; } else { // last >= 0 val = rand_range_re(&d->seed, d->range); last = -1; } } else { // update != 0 if (last < 0) { val = rand_range_re(&d->seed, d->range); //last = val; } else { val = last; } } } else val = rand_range_re(&d->seed, d->range); #ifdef BIAS_RANGE if(rand_range_re(&d->seed, 1000) < 100) { val = increase; } #endif if (avl_contains(d->set, val, TRANSACTIONAL, id)) d->nb_found++; d->nb_trans++; tloc[id]++; d->nb_contains++; } /* Is the next op an update? */ if (d->effective) { // a failed remove/add is a read-only tx unext = ((100 * (d->nb_added + d->nb_removed)) < (d->update * (d->nb_add + d->nb_remove + d->nb_contains))); } else { // remove/add (even failed) is considered as an update unext = (rand_range_re(&d->seed, 100) - 1 < d->update); } #ifdef ICC } #else } #endif /* ICC */ /* Free transaction */ TM_THREAD_EXIT(); return NULL; } void *test_maintenance(void *data) { #ifdef TINY10B int i; free_list_item **t_list_items; #endif maintenance_thread_data_t *d = (maintenance_thread_data_t *)data; #ifdef TINY10B t_list_items = (free_list_item **)malloc(d->nb_threads * sizeof(free_list_item *)); for(i = 0; i < d->nb_threads; i++) { t_list_items[i] = d->set->t_free_list[i]; } #endif /* Create transaction */ TM_THREAD_ENTER(); /* Wait on barrier */ barrier_cross(d->barrier); /* Is the first op an update? */ //unext = (rand_range_re(&d->seed, 100) - 1 < d->update); #ifdef ICC while (stop == 0) { #else while (AO_load_full(&stop) == 0) { #endif /* ICC */ #ifdef TINY10B do_maintenance_thread(d->set, d->id, d->nb_maint); #endif #ifdef ICC } #else } #endif /* ICC */ /* Free transaction */ TM_THREAD_EXIT(); return NULL; } void catcher(int sig) { printf("CAUGHT SIGNAL %d\n", sig); }
/* ============================================================================= * router_solve * ============================================================================= */ void router_solve (void* argPtr) { TM_THREAD_ENTER(); router_solve_arg_t* routerArgPtr = (router_solve_arg_t*)argPtr; router_t* routerPtr = routerArgPtr->routerPtr; maze_t* mazePtr = routerArgPtr->mazePtr; vector_t* myPathVectorPtr = PVECTOR_ALLOC(1); assert(myPathVectorPtr); queue_t* workQueuePtr = mazePtr->workQueuePtr; grid_t* gridPtr = mazePtr->gridPtr; grid_t* myGridPtr = PGRID_ALLOC(gridPtr->width, gridPtr->height, gridPtr->depth); assert(myGridPtr); long bendCost = routerPtr->bendCost; queue_t* myExpansionQueuePtr = PQUEUE_ALLOC(-1); /* * Iterate over work list to route each path. This involves an * 'expansion' and 'traceback' phase for each source/destination pair. */ while (1) { pair_t* coordinatePairPtr; int mode = 0; TM_BEGIN(0,mode); if (mode == 0) { if (queue_htm::TMqueue_isEmpty(TM_ARG workQueuePtr)) { coordinatePairPtr = NULL; } else { coordinatePairPtr = (pair_t*)queue_htm::TMqueue_pop(TM_ARG workQueuePtr); } } else { if (queue_stm::TMqueue_isEmpty(TM_ARG workQueuePtr)) { coordinatePairPtr = NULL; } else { coordinatePairPtr = (pair_t*)queue_stm::TMqueue_pop(TM_ARG workQueuePtr); } } TM_END(); if (coordinatePairPtr == NULL) { break; } coordinate_t* srcPtr = coordinatePairPtr->firstPtr; coordinate_t* dstPtr = coordinatePairPtr->secondPtr; bool_t success = FALSE; vector_t* pointVectorPtr = NULL; mode = 0; TM_BEGIN(1,mode); if (mode == 0) { grid_copy(myGridPtr, gridPtr); /* ok if not most up-to-date */ if (PdoExpansion(routerPtr, myGridPtr, myExpansionQueuePtr, srcPtr, dstPtr)) { pointVectorPtr = PdoTraceback(gridPtr, myGridPtr, dstPtr, bendCost); /* * TODO: fix memory leak * * pointVectorPtr will be a memory leak if we abort this transaction */ if (pointVectorPtr) { TMGRID_ADDPATH_HTM(gridPtr, pointVectorPtr); TM_LOCAL_WRITE(success, TRUE); } } } else { grid_copy(myGridPtr, gridPtr); /* ok if not most up-to-date */ if (PdoExpansion(routerPtr, myGridPtr, myExpansionQueuePtr, srcPtr, dstPtr)) { pointVectorPtr = PdoTraceback(gridPtr, myGridPtr, dstPtr, bendCost); /* * TODO: fix memory leak * * pointVectorPtr will be a memory leak if we abort this transaction */ if (pointVectorPtr) { TMGRID_ADDPATH_STM(gridPtr, pointVectorPtr); TM_LOCAL_WRITE(success, TRUE); } } } TM_END(); if (success) { bool_t status = PVECTOR_PUSHBACK(myPathVectorPtr, (void*)pointVectorPtr); assert(status); } } /* * Add my paths to global list */ list_t* pathVectorListPtr = routerArgPtr->pathVectorListPtr; int mode = 0; TM_BEGIN(2,mode); if (mode == 0) { list_htm::TMlist_insert(TM_ARG pathVectorListPtr, (void*)myPathVectorPtr); } else { list_stm::TMlist_insert(TM_ARG pathVectorListPtr, (void*)myPathVectorPtr); } TM_END(); PGRID_FREE(myGridPtr); PQUEUE_FREE(myExpansionQueuePtr); #if DEBUG puts("\nFinal Grid:"); grid_print(gridPtr); #endif /* DEBUG */ TM_THREAD_EXIT(); }
void client_run (void* argPtr) { TM_THREAD_ENTER(); random_t* randomPtr = random_alloc(); random_seed(randomPtr, time(0)); // unsigned long myId = thread_getId(); // long numThread = *((long*)argPtr); long operations = (long)global_params[PARAM_OPERATIONS] / (long)global_params[PARAM_THREADS]; long interval = (long)global_params[PARAM_INTERVAL]; printf("operations: %ld \tinterval: %ld\n", operations, interval); long total = 0; long total2 = 0; long i = 0; unsigned int cont_size = (unsigned int) global_params[PARAM_CONTENTION]; unsigned int* sorted_locks = (unsigned int*) malloc((2 + cont_size) * sizeof(int)); unsigned int* read_idxs = (unsigned int*) malloc(cont_size * sizeof(int)); for (; i < operations; i++) { long random_number = ((long) random_generate(randomPtr)) % ((long)global_params[PARAM_SIZE]); long random_number2 = ((long) random_generate(randomPtr)) % ((long)global_params[PARAM_SIZE]); if (random_number == random_number2) { random_number2 = (random_number2 + 1) % ((long)global_params[PARAM_SIZE]); } int repeat = 0; for (; repeat < cont_size; repeat++) { read_idxs[repeat] = ((unsigned int) random_generate(randomPtr)) % ((unsigned int)global_params[PARAM_SIZE]); LI_HASH(&global_array[read_idxs[repeat]], &sorted_locks[repeat + 2]); } // TM_BEGIN(); LI_HASH(&global_array[random_number], &sorted_locks[0]); LI_HASH(&global_array[random_number2], &sorted_locks[1]); TM_BEGIN_ARGS(sorted_locks, cont_size + 2); long r1 = (long)TM_SHARED_READ(global_array[random_number].value); long r2 = (long)TM_SHARED_READ(global_array[random_number2].value); for (repeat--; repeat >= 0; repeat--) { total2 += (long) TM_SHARED_READ(global_array[read_idxs[repeat]].value); } r1 = r1 + 1; r2 = r2 - 1; int f = 1; int ii; for(ii = 1; ii <= ((unsigned int) global_params[PARAM_WORK]); ii++) { f *= ii; } total += f / 1000000; TM_SHARED_WRITE(global_array[random_number].value, r1); TM_SHARED_WRITE(global_array[random_number2].value, r2); TM_END_ARGS(sorted_locks, cont_size + 2); long k = 0; for (;k < (long)global_params[PARAM_INTERVAL]; k++) { long ru = ((long) random_generate(randomPtr)) % 2; total += ru; } } TM_THREAD_EXIT(); printf("ru ignore %ld - %ld\n", total, total2); }
/* ============================================================================= * sequencer_run * ============================================================================= */ void sequencer_run (void* argPtr) { TM_THREAD_ENTER(); long threadId = thread_getId(); sequencer_t* sequencerPtr = (sequencer_t*)argPtr; hashtable_t* uniqueSegmentsPtr; endInfoEntry_t* endInfoEntries; table_t** startHashToConstructEntryTables; constructEntry_t* constructEntries; table_t* hashToConstructEntryTable; uniqueSegmentsPtr = sequencerPtr->uniqueSegmentsPtr; endInfoEntries = sequencerPtr->endInfoEntries; startHashToConstructEntryTables = sequencerPtr->startHashToConstructEntryTables; constructEntries = sequencerPtr->constructEntries; hashToConstructEntryTable = sequencerPtr->hashToConstructEntryTable; segments_t* segmentsPtr = sequencerPtr->segmentsPtr; assert(segmentsPtr); vector_t* segmentsContentsPtr = segmentsPtr->contentsPtr; long numSegment = vector_getSize(segmentsContentsPtr); long segmentLength = segmentsPtr->length; long i; long j; long i_start; long i_stop; long numUniqueSegment; long substringLength; long entryIndex; /* * Step 1: Remove duplicate segments */ // #if defined(HTM) || defined(STM) long numThread = thread_getNumThread(); { /* Choose disjoint segments [i_start,i_stop) for each thread */ long partitionSize = (numSegment + numThread/2) / numThread; /* with rounding */ i_start = threadId * partitionSize; if (threadId == (numThread - 1)) { i_stop = numSegment; } else { i_stop = i_start + partitionSize; } } // #else /* !(HTM || STM) */ // i_start = 0; // i_stop = numSegment; // #endif /* !(HTM || STM) */ for (i = i_start; i < i_stop; i+=CHUNK_STEP1) { TM_BEGIN(); { long ii; long ii_stop = MIN(i_stop, (i+CHUNK_STEP1)); for (ii = i; ii < ii_stop; ii++) { void* segment = vector_at(segmentsContentsPtr, ii); TMHASHTABLE_INSERT(uniqueSegmentsPtr, segment, segment); } /* ii */ } TM_END(); } thread_barrier_wait(); /* * Step 2a: Iterate over unique segments and compute hashes. * * For the gene "atcg", the hashes for the end would be: * * "t", "tc", and "tcg" * * And for the gene "tcgg", the hashes for the start would be: * * "t", "tc", and "tcg" * * The names are "end" and "start" because if a matching pair is found, * they are the substring of the end part of the pair and the start * part of the pair respectively. In the above example, "tcg" is the * matching substring so: * * (end) (start) * a[tcg] + [tcg]g = a[tcg]g (overlap = "tcg") */ /* uniqueSegmentsPtr is constant now */ numUniqueSegment = hashtable_getSize(uniqueSegmentsPtr); entryIndex = 0; // #if defined(HTM) || defined(STM) { /* Choose disjoint segments [i_start,i_stop) for each thread */ long num = uniqueSegmentsPtr->numBucket; long partitionSize = (num + numThread/2) / numThread; /* with rounding */ i_start = threadId * partitionSize; if (threadId == (numThread - 1)) { i_stop = num; } else { i_stop = i_start + partitionSize; } } { /* Approximate disjoint segments of element allocation in constructEntries */ long partitionSize = (numUniqueSegment + numThread/2) / numThread; /* with rounding */ entryIndex = threadId * partitionSize; } // #else /* !(HTM || STM) */ // i_start = 0; // i_stop = uniqueSegmentsPtr->numBucket; // entryIndex = 0; //#endif /* !(HTM || STM) */ for (i = i_start; i < i_stop; i++) { list_t* chainPtr = uniqueSegmentsPtr->buckets[i]; list_iter_t it; list_iter_reset(&it, chainPtr); while (list_iter_hasNext(&it, chainPtr)) { char* segment = (char*)((pair_t*)list_iter_next(&it, chainPtr))->firstPtr; constructEntry_t* constructEntryPtr; long j; ulong_t startHash; bool_t status; /* Find an empty constructEntries entry */ TM_BEGIN(); while (((void*)TM_SHARED_READ_P(constructEntries[entryIndex].segment)) != NULL) { entryIndex = (entryIndex + 1) % numUniqueSegment; /* look for empty */ } constructEntryPtr = &constructEntries[entryIndex]; TM_SHARED_WRITE_P(constructEntryPtr->segment, segment); TM_END(); entryIndex = (entryIndex + 1) % numUniqueSegment; /* * Save hashes (sdbm algorithm) of segment substrings * * endHashes will be computed for shorter substrings after matches * have been made (in the next phase of the code). This will reduce * the number of substrings for which hashes need to be computed. * * Since we can compute startHashes incrementally, we go ahead * and compute all of them here. */ /* constructEntryPtr is local now */ constructEntryPtr->endHash = (ulong_t)hashString(&segment[1]); startHash = 0; for (j = 1; j < segmentLength; j++) { startHash = (ulong_t)segment[j-1] + (startHash << 6) + (startHash << 16) - startHash; TM_BEGIN(); status = TMTABLE_INSERT(startHashToConstructEntryTables[j], (ulong_t)startHash, (void*)constructEntryPtr ); TM_END(); assert(status); } /* * For looking up construct entries quickly */ startHash = (ulong_t)segment[j-1] + (startHash << 6) + (startHash << 16) - startHash; TM_BEGIN(); status = TMTABLE_INSERT(hashToConstructEntryTable, (ulong_t)startHash, (void*)constructEntryPtr); TM_END(); assert(status); } } thread_barrier_wait(); /* * Step 2b: Match ends to starts by using hash-based string comparison. */ for (substringLength = segmentLength-1; substringLength > 0; substringLength--) { table_t* startHashToConstructEntryTablePtr = startHashToConstructEntryTables[substringLength]; list_t** buckets = startHashToConstructEntryTablePtr->buckets; long numBucket = startHashToConstructEntryTablePtr->numBucket; long index_start; long index_stop; // #if defined(HTM) || defined(STM) { /* Choose disjoint segments [index_start,index_stop) for each thread */ long partitionSize = (numUniqueSegment + numThread/2) / numThread; /* with rounding */ index_start = threadId * partitionSize; if (threadId == (numThread - 1)) { index_stop = numUniqueSegment; } else { index_stop = index_start + partitionSize; } } // #else /* !(HTM || STM) */ // index_start = 0; // index_stop = numUniqueSegment; //#endif /* !(HTM || STM) */ /* Iterating over disjoint itervals in the range [0, numUniqueSegment) */ for (entryIndex = index_start; entryIndex < index_stop; entryIndex += endInfoEntries[entryIndex].jumpToNext) { if (!endInfoEntries[entryIndex].isEnd) { continue; } /* ConstructEntries[entryIndex] is local data */ constructEntry_t* endConstructEntryPtr = &constructEntries[entryIndex]; char* endSegment = endConstructEntryPtr->segment; ulong_t endHash = endConstructEntryPtr->endHash; list_t* chainPtr = buckets[endHash % numBucket]; /* buckets: constant data */ list_iter_t it; list_iter_reset(&it, chainPtr); /* Linked list at chainPtr is constant */ while (list_iter_hasNext(&it, chainPtr)) { constructEntry_t* startConstructEntryPtr = (constructEntry_t*)list_iter_next(&it, chainPtr); char* startSegment = startConstructEntryPtr->segment; long newLength = 0; /* endConstructEntryPtr is local except for properties startPtr/endPtr/length */ TM_BEGIN(); /* Check if matches */ if (TM_SHARED_READ(startConstructEntryPtr->isStart) && (TM_SHARED_READ_P(endConstructEntryPtr->startPtr) != startConstructEntryPtr) && (strncmp(startSegment, &endSegment[segmentLength - substringLength], substringLength) == 0)) { TM_SHARED_WRITE(startConstructEntryPtr->isStart, FALSE); constructEntry_t* startConstructEntry_endPtr; constructEntry_t* endConstructEntry_startPtr; /* Update endInfo (appended something so no longer end) */ TM_LOCAL_WRITE(endInfoEntries[entryIndex].isEnd, FALSE); /* Update segment chain construct info */ startConstructEntry_endPtr = (constructEntry_t*)TM_SHARED_READ_P(startConstructEntryPtr->endPtr); endConstructEntry_startPtr = (constructEntry_t*)TM_SHARED_READ_P(endConstructEntryPtr->startPtr); assert(startConstructEntry_endPtr); assert(endConstructEntry_startPtr); TM_SHARED_WRITE_P(startConstructEntry_endPtr->startPtr, endConstructEntry_startPtr); TM_LOCAL_WRITE_P(endConstructEntryPtr->nextPtr, startConstructEntryPtr); TM_SHARED_WRITE_P(endConstructEntry_startPtr->endPtr, startConstructEntry_endPtr); TM_SHARED_WRITE(endConstructEntryPtr->overlap, substringLength); newLength = (long)TM_SHARED_READ(endConstructEntry_startPtr->length) + (long)TM_SHARED_READ(startConstructEntryPtr->length) - substringLength; TM_SHARED_WRITE(endConstructEntry_startPtr->length, newLength); } /* if (matched) */ TM_END(); if (!endInfoEntries[entryIndex].isEnd) { /* if there was a match */ break; } } /* iterate over chain */ } /* for (endIndex < numUniqueSegment) */ thread_barrier_wait(); /* * Step 2c: Update jump values and hashes * * endHash entries of all remaining ends are updated to the next * substringLength. Additionally jumpToNext entries are updated such * that they allow to skip non-end entries. Currently this is sequential * because parallelization did not perform better. . */ if (threadId == 0) { if (substringLength > 1) { long index = segmentLength - substringLength + 1; /* initialization if j and i: with i being the next end after j=0 */ for (i = 1; !endInfoEntries[i].isEnd; i+=endInfoEntries[i].jumpToNext) { /* find first non-null */ } /* entry 0 is handled seperately from the loop below */ endInfoEntries[0].jumpToNext = i; if (endInfoEntries[0].isEnd) { constructEntry_t* constructEntryPtr = &constructEntries[0]; char* segment = constructEntryPtr->segment; constructEntryPtr->endHash = (ulong_t)hashString(&segment[index]); } /* Continue scanning (do not reset i) */ for (j = 0; i < numUniqueSegment; i+=endInfoEntries[i].jumpToNext) { if (endInfoEntries[i].isEnd) { constructEntry_t* constructEntryPtr = &constructEntries[i]; char* segment = constructEntryPtr->segment; constructEntryPtr->endHash = (ulong_t)hashString(&segment[index]); endInfoEntries[j].jumpToNext = MAX(1, (i - j)); j = i; } } endInfoEntries[j].jumpToNext = i - j; } } thread_barrier_wait(); } /* for (substringLength > 0) */ thread_barrier_wait(); /* * Step 3: Build sequence string */ if (threadId == 0) { long totalLength = 0; for (i = 0; i < numUniqueSegment; i++) { constructEntry_t* constructEntryPtr = &constructEntries[i]; if (constructEntryPtr->isStart) { totalLength += constructEntryPtr->length; } } sequencerPtr->sequence = (char*)P_MALLOC((totalLength+1) * sizeof(char)); char* sequence = sequencerPtr->sequence; assert(sequence); char* copyPtr = sequence; long sequenceLength = 0; for (i = 0; i < numUniqueSegment; i++) { constructEntry_t* constructEntryPtr = &constructEntries[i]; /* If there are several start segments, we append in arbitrary order */ if (constructEntryPtr->isStart) { long newSequenceLength = sequenceLength + constructEntryPtr->length; assert( newSequenceLength <= totalLength ); copyPtr = sequence + sequenceLength; sequenceLength = newSequenceLength; do { long numChar = segmentLength - constructEntryPtr->overlap; if ((copyPtr + numChar) > (sequence + newSequenceLength)) { TM_PRINT0("ERROR: sequence length != actual length\n"); break; } memcpy(copyPtr, constructEntryPtr->segment, (numChar * sizeof(char))); copyPtr += numChar; } while ((constructEntryPtr = constructEntryPtr->nextPtr) != NULL); assert(copyPtr <= (sequence + sequenceLength)); } } assert(sequence != NULL); sequence[sequenceLength] = '\0'; } TM_THREAD_EXIT(); }
/* ============================================================================= * work * ============================================================================= */ static void work (void* argPtr) { TM_THREAD_ENTER(); args_t* args = (args_t*)argPtr; float** feature = args->feature; int nfeatures = args->nfeatures; int npoints = args->npoints; int nclusters = args->nclusters; int* membership = args->membership; float** clusters = args->clusters; int** new_centers_len = args->new_centers_len; float** new_centers = args->new_centers; float delta = 0.0; int index; long i; int j; int start; int stop; int myId; myId = thread_getId(); start = myId * CHUNK; while (start < npoints) { stop = (((start + CHUNK) < npoints) ? (start + CHUNK) : npoints); for (i = start; i < stop; TMHT_LOCAL_WRITE(i, i+1)) { index = common_findNearestPoint(feature[i], nfeatures, clusters, nclusters); /* * If membership changes, increase delta by 1. * membership[i] cannot be changed by other threads */ if (membership[i] != index) { delta += 1.0; } /* Assign the membership to object i */ /* membership[i] can't be changed by other thread */ membership[i] = index; /* Update new cluster centers : sum of objects located within */ TM_BEGIN(); TM_SHARED_WRITE_I(*new_centers_len[index], TM_SHARED_READ_I(*new_centers_len[index]) + 1); for (j = 0; j < nfeatures; j++) { TM_SHARED_WRITE_F( new_centers[index][j], (TM_SHARED_READ_F(new_centers[index][j]) + feature[i][j]) ); } TM_END(); } /* Update task queue */ if (start + CHUNK < npoints) { TM_BEGIN(); start = (int)TM_SHARED_READ_L(global_i); TM_SHARED_WRITE_L(global_i, (long)(start + CHUNK)); TM_END(); } else { break; } } TM_BEGIN(); TM_SHARED_WRITE_F(global_delta, TM_SHARED_READ_F(global_delta) + delta); TM_END(); TM_THREAD_EXIT(); }
/* ============================================================================= * computeGraph * ============================================================================= */ void computeGraph (void* argPtr) { TM_THREAD_ENTER(); graph* GPtr = ((computeGraph_arg_t*)argPtr)->GPtr; graphSDG* SDGdataPtr = ((computeGraph_arg_t*)argPtr)->SDGdataPtr; long myId = thread_getId(); long numThread = thread_getNumThread(); ULONGINT_T j; ULONGINT_T maxNumVertices = 0; ULONGINT_T numEdgesPlaced = SDGdataPtr->numEdgesPlaced; /* * First determine the number of vertices by scanning the tuple * startVertex list */ long i; long i_start; long i_stop; createPartition(0, numEdgesPlaced, myId, numThread, &i_start, &i_stop); for (i = i_start; i < i_stop; i++) { if (SDGdataPtr->startVertex[i] > maxNumVertices) { maxNumVertices = SDGdataPtr->startVertex[i]; } } TM_BEGIN(); long tmp_maxNumVertices = (long)TM_SHARED_READ_L(global_maxNumVertices); long new_maxNumVertices = MAX(tmp_maxNumVertices, maxNumVertices) + 1; TM_SHARED_WRITE_L(global_maxNumVertices, new_maxNumVertices); TM_END(); thread_barrier_wait(); maxNumVertices = global_maxNumVertices; if (myId == 0) { GPtr->numVertices = maxNumVertices; GPtr->numEdges = numEdgesPlaced; GPtr->intWeight = SDGdataPtr->intWeight; GPtr->strWeight = SDGdataPtr->strWeight; for (i = 0; i < numEdgesPlaced; i++) { if (GPtr->intWeight[numEdgesPlaced-i-1] < 0) { GPtr->numStrEdges = -(GPtr->intWeight[numEdgesPlaced-i-1]) + 1; GPtr->numIntEdges = numEdgesPlaced - GPtr->numStrEdges; break; } } GPtr->outDegree = (LONGINT_T*)P_MALLOC((GPtr->numVertices) * sizeof(LONGINT_T)); assert(GPtr->outDegree); GPtr->outVertexIndex = (ULONGINT_T*)P_MALLOC((GPtr->numVertices) * sizeof(ULONGINT_T)); assert(GPtr->outVertexIndex); } thread_barrier_wait(); createPartition(0, GPtr->numVertices, myId, numThread, &i_start, &i_stop); for (i = i_start; i < i_stop; i++) { GPtr->outDegree[i] = 0; GPtr->outVertexIndex[i] = 0; } ULONGINT_T outVertexListSize = 0; thread_barrier_wait(); ULONGINT_T i0 = -1UL; for (i = i_start; i < i_stop; i++) { ULONGINT_T k = i; if ((outVertexListSize == 0) && (k != 0)) { while (i0 == -1UL) { for (j = 0; j < numEdgesPlaced; j++) { if (k == SDGdataPtr->startVertex[j]) { i0 = j; break; } } k--; } } if ((outVertexListSize == 0) && (k == 0)) { i0 = 0; } for (j = i0; j < numEdgesPlaced; j++) { if (i == GPtr->numVertices-1) { break; } if ((i != SDGdataPtr->startVertex[j])) { if ((j > 0) && (i == SDGdataPtr->startVertex[j-1])) { if (j-i0 >= 1) { outVertexListSize++; GPtr->outDegree[i]++; ULONGINT_T t; for (t = i0+1; t < j; t++) { if (SDGdataPtr->endVertex[t] != SDGdataPtr->endVertex[t-1]) { outVertexListSize++; GPtr->outDegree[i] = GPtr->outDegree[i]+1; } } } } i0 = j; break; } } if (i == GPtr->numVertices-1) { if (numEdgesPlaced-i0 >= 0) { outVertexListSize++; GPtr->outDegree[i]++; ULONGINT_T t; for (t = i0+1; t < numEdgesPlaced; t++) { if (SDGdataPtr->endVertex[t] != SDGdataPtr->endVertex[t-1]) { outVertexListSize++; GPtr->outDegree[i]++; } } } } } /* for i */ thread_barrier_wait(); prefix_sums(GPtr->outVertexIndex, GPtr->outDegree, GPtr->numVertices); thread_barrier_wait(); TM_BEGIN(); TM_SHARED_WRITE_L( global_outVertexListSize, ((long)TM_SHARED_READ_L(global_outVertexListSize) + outVertexListSize) ); TM_END(); thread_barrier_wait(); outVertexListSize = global_outVertexListSize; if (myId == 0) { GPtr->numDirectedEdges = outVertexListSize; GPtr->outVertexList = (ULONGINT_T*)P_MALLOC(outVertexListSize * sizeof(ULONGINT_T)); assert(GPtr->outVertexList); GPtr->paralEdgeIndex = (ULONGINT_T*)P_MALLOC(outVertexListSize * sizeof(ULONGINT_T)); assert(GPtr->paralEdgeIndex); GPtr->outVertexList[0] = SDGdataPtr->endVertex[0]; } thread_barrier_wait(); /* * Evaluate outVertexList */ i0 = -1UL; for (i = i_start; i < i_stop; i++) { ULONGINT_T k = i; while ((i0 == -1UL) && (k != 0)) { for (j = 0; j < numEdgesPlaced; j++) { if (k == SDGdataPtr->startVertex[j]) { i0 = j; break; } } k--; } if ((i0 == -1) && (k == 0)) { i0 = 0; } for (j = i0; j < numEdgesPlaced; j++) { if (i == GPtr->numVertices-1) { break; } if (i != SDGdataPtr->startVertex[j]) { if ((j > 0) && (i == SDGdataPtr->startVertex[j-1])) { if (j-i0 >= 1) { long ii = GPtr->outVertexIndex[i]; ULONGINT_T r = 0; GPtr->paralEdgeIndex[ii] = i0; GPtr->outVertexList[ii] = SDGdataPtr->endVertex[i0]; r++; ULONGINT_T t; for (t = i0+1; t < j; t++) { if (SDGdataPtr->endVertex[t] != SDGdataPtr->endVertex[t-1]) { GPtr->paralEdgeIndex[ii+r] = t; GPtr->outVertexList[ii+r] = SDGdataPtr->endVertex[t]; r++; } } } } i0 = j; break; } } /* for j */ if (i == GPtr->numVertices-1) { ULONGINT_T r = 0; if (numEdgesPlaced-i0 >= 0) { long ii = GPtr->outVertexIndex[i]; GPtr->paralEdgeIndex[ii+r] = i0; GPtr->outVertexList[ii+r] = SDGdataPtr->endVertex[i0]; r++; ULONGINT_T t; for (t = i0+1; t < numEdgesPlaced; t++) { if (SDGdataPtr->endVertex[t] != SDGdataPtr->endVertex[t-1]) { GPtr->paralEdgeIndex[ii+r] = t; GPtr->outVertexList[ii+r] = SDGdataPtr->endVertex[t]; r++; } } } } } /* for i */ thread_barrier_wait(); if (myId == 0) { P_FREE(SDGdataPtr->startVertex); P_FREE(SDGdataPtr->endVertex); GPtr->inDegree = (LONGINT_T*)P_MALLOC(GPtr->numVertices * sizeof(LONGINT_T)); assert(GPtr->inDegree); GPtr->inVertexIndex = (ULONGINT_T*)P_MALLOC(GPtr->numVertices * sizeof(ULONGINT_T)); assert(GPtr->inVertexIndex); } thread_barrier_wait(); for (i = i_start; i < i_stop; i++) { GPtr->inDegree[i] = 0; GPtr->inVertexIndex[i] = 0; } /* A temp. array to store the inplied edges */ ULONGINT_T* impliedEdgeList; if (myId == 0) { impliedEdgeList = (ULONGINT_T*)P_MALLOC(GPtr->numVertices * MAX_CLUSTER_SIZE * sizeof(ULONGINT_T)); global_impliedEdgeList = impliedEdgeList; } thread_barrier_wait(); impliedEdgeList = global_impliedEdgeList; createPartition(0, (GPtr->numVertices * MAX_CLUSTER_SIZE), myId, numThread, &i_start, &i_stop); for (i = i_start; i < i_stop; i++) { impliedEdgeList[i] = 0; } /* * An auxiliary array to store implied edges, in case we overshoot * MAX_CLUSTER_SIZE */ ULONGINT_T** auxArr; if (myId == 0) { auxArr = (ULONGINT_T**)P_MALLOC(GPtr->numVertices * sizeof(ULONGINT_T*)); assert(auxArr); global_auxArr = auxArr; } thread_barrier_wait(); auxArr = global_auxArr; createPartition(0, GPtr->numVertices, myId, numThread, &i_start, &i_stop); for (i = i_start; i < i_stop; i++) { /* Inspect adjacency list of vertex i */ for (j = GPtr->outVertexIndex[i]; j < (GPtr->outVertexIndex[i] + GPtr->outDegree[i]); j++) { ULONGINT_T v = GPtr->outVertexList[j]; ULONGINT_T k; for (k = GPtr->outVertexIndex[v]; k < (GPtr->outVertexIndex[v] + GPtr->outDegree[v]); k++) { if (GPtr->outVertexList[k] == i) { break; } } if (k == GPtr->outVertexIndex[v]+GPtr->outDegree[v]) { TM_BEGIN(); /* Add i to the impliedEdgeList of v */ long inDegree = (long)TM_SHARED_READ_L(GPtr->inDegree[v]); TM_SHARED_WRITE_L(GPtr->inDegree[v], (inDegree + 1)); if (inDegree < MAX_CLUSTER_SIZE) { TM_SHARED_WRITE_L(impliedEdgeList[v*MAX_CLUSTER_SIZE+inDegree], i); } else { /* Use auxiliary array to store the implied edge */ /* Create an array if it's not present already */ ULONGINT_T* a = NULL; if ((inDegree % MAX_CLUSTER_SIZE) == 0) { a = (ULONGINT_T*)TM_MALLOC(MAX_CLUSTER_SIZE * sizeof(ULONGINT_T)); assert(a); TM_SHARED_WRITE_P(auxArr[v], a); } else { a = auxArr[v]; } TM_SHARED_WRITE_L(a[inDegree % MAX_CLUSTER_SIZE], i); } TM_END(); } } } /* for i */ thread_barrier_wait(); prefix_sums(GPtr->inVertexIndex, GPtr->inDegree, GPtr->numVertices); if (myId == 0) { GPtr->numUndirectedEdges = GPtr->inVertexIndex[GPtr->numVertices-1] + GPtr->inDegree[GPtr->numVertices-1]; GPtr->inVertexList = (ULONGINT_T *)P_MALLOC(GPtr->numUndirectedEdges * sizeof(ULONGINT_T)); } thread_barrier_wait(); /* * Create the inVertex List */ for (i = i_start; i < i_stop; i++) { for (j = GPtr->inVertexIndex[i]; j < (GPtr->inVertexIndex[i] + GPtr->inDegree[i]); j++) { if ((j - GPtr->inVertexIndex[i]) < MAX_CLUSTER_SIZE) { GPtr->inVertexList[j] = impliedEdgeList[i*MAX_CLUSTER_SIZE+j-GPtr->inVertexIndex[i]]; } else { GPtr->inVertexList[j] = auxArr[i][(j-GPtr->inVertexIndex[i]) % MAX_CLUSTER_SIZE]; } } } thread_barrier_wait(); if (myId == 0) { P_FREE(impliedEdgeList); } for (i = i_start; i < i_stop; i++) { if (GPtr->inDegree[i] > MAX_CLUSTER_SIZE) { P_FREE(auxArr[i]); } } thread_barrier_wait(); if (myId == 0) { P_FREE(auxArr); } TM_THREAD_EXIT(); }
/* ============================================================================= * processPackets * ============================================================================= */ void processPackets (void* argPtr) { TM_THREAD_ENTER(); long threadId = thread_getId(); stream_t* streamPtr = ((arg_t*)argPtr)->streamPtr; decoder_t* decoderPtr = ((arg_t*)argPtr)->decoderPtr; vector_t** errorVectors = ((arg_t*)argPtr)->errorVectors; detector_t* detectorPtr = PDETECTOR_ALLOC(); assert(detectorPtr); PDETECTOR_ADDPREPROCESSOR(detectorPtr, &preprocessor_toLower); vector_t* errorVectorPtr = errorVectors[threadId]; while (1) { char* bytes; unsigned int locks[1]; TM_BEGIN(); SINGLE_LOCK(streamPtr); bytes = TMSTREAM_GETPACKET(streamPtr); SINGLE_UNLOCK(streamPtr); TM_END(); if (!bytes) { break; } packet_t* packetPtr = (packet_t*)bytes; long flowId = packetPtr->flowId; error_t error; TM_BEGIN(); error = TMDECODER_PROCESS(decoderPtr, bytes, (PACKET_HEADER_LENGTH + packetPtr->length)); TM_END(); if (error) { /* * Currently, stream_generate() does not create these errors. */ assert(0); bool_t status = PVECTOR_PUSHBACK(errorVectorPtr, (void*)flowId); assert(status); } char* data; long decodedFlowId; TM_BEGIN(); SINGLE_LOCK(decoderPtr); data = TMDECODER_GETCOMPLETE(decoderPtr, &decodedFlowId); SINGLE_UNLOCK(decoderPtr); TM_END(); if (data) { error_t error = PDETECTOR_PROCESS(detectorPtr, data); P_FREE(data); if (error) { bool_t status = PVECTOR_PUSHBACK(errorVectorPtr, (void*)decodedFlowId); assert(status); } } } PDETECTOR_FREE(detectorPtr); TM_THREAD_EXIT(); }
void *test2(void *data) { int val, newval, last, flag = 1; int id; ulong *tloc; thread_data_t *d = (thread_data_t *)data; id = d->id; tloc = d->set->nb_committed; /* Create transaction */ TM_THREAD_ENTER(); /* Wait on barrier */ barrier_cross(d->barrier); last = 0; // to avoid warning while (stop == 0) { val = rand_range_re(&d->seed, 100) - 1; /* added for HashTables */ if (val < d->update) { if (val >= d->move) { /* update without move */ if (flag) { /* Add random value */ val = (rand_r(&d->seed) % d->range) + 1; if (avl_add(d->set, val, TRANSACTIONAL, id)) { d->nb_added++; last = val; flag = 0; } d->nb_trans++; tloc[id]++; d->nb_add++; } else { if (d->alternate) { /* Remove last value */ if (avl_remove(d->set, last, TRANSACTIONAL, id)) d->nb_removed++; d->nb_trans++; tloc[id]++; d->nb_remove++; flag = 1; } else { /* Random computation only in non-alternated cases */ newval = rand_range_re(&d->seed, d->range); if (avl_remove(d->set, newval, TRANSACTIONAL, id)) { d->nb_removed++; /* Repeat until successful, to avoid size variations */ flag = 1; } d->nb_trans++; tloc[id]++; d->nb_remove++; } } } else { /* move */ val = rand_range_re(&d->seed, d->range); if (avl_move(d->set, last, val, TRANSACTIONAL, id)) { d->nb_moved++; last = val; } d->nb_trans++; tloc[id]++; d->nb_move++; } } else { if (val >= d->update + d->snapshot) { /* read-only without snapshot */ /* Look for random value */ val = rand_range_re(&d->seed, d->range); if (avl_contains(d->set, val, TRANSACTIONAL, id)) d->nb_found++; d->nb_trans++; tloc[id]++; d->nb_contains++; } else { /* snapshot */ if (avl_snapshot(d->set, TRANSACTIONAL, id)) d->nb_snapshoted++; d->nb_trans++; tloc[id]++; d->nb_snapshot++; } } } /* Free transaction */ TM_THREAD_EXIT(); return NULL; }
/* ============================================================================= * router_solve * ============================================================================= */ void router_solve (void* argPtr) { TM_THREAD_ENTER(); router_solve_arg_t* routerArgPtr = (router_solve_arg_t*)argPtr; router_t* routerPtr = routerArgPtr->routerPtr; maze_t* mazePtr = routerArgPtr->mazePtr; vector_t* myPathVectorPtr = PVECTOR_ALLOC(1); assert(myPathVectorPtr); queue_t* workQueuePtr = mazePtr->workQueuePtr; grid_t* gridPtr = mazePtr->gridPtr; grid_t* myGridPtr = PGRID_ALLOC(gridPtr->width, gridPtr->height, gridPtr->depth); assert(myGridPtr); long bendCost = routerPtr->bendCost; queue_t* myExpansionQueuePtr = PQUEUE_ALLOC(-1); /* * Iterate over work list to route each path. This involves an * 'expansion' and 'traceback' phase for each source/destination pair. */ while (1) { pair_t* coordinatePairPtr; TM_BEGIN(); if (TMQUEUE_ISEMPTY(workQueuePtr)) { coordinatePairPtr = NULL; } else { coordinatePairPtr = (pair_t*)TMQUEUE_POP(workQueuePtr); } TM_END(); if (coordinatePairPtr == NULL) { break; } coordinate_t* srcPtr = (coordinate_t*)coordinatePairPtr->firstPtr; coordinate_t* dstPtr = (coordinate_t*)coordinatePairPtr->secondPtr; bool success = false; vector_t* pointVectorPtr = NULL; TM_BEGIN(); grid_copy(myGridPtr, gridPtr); /* ok if not most up-to-date */ if (PdoExpansion(routerPtr, myGridPtr, myExpansionQueuePtr, srcPtr, dstPtr)) { pointVectorPtr = PdoTraceback(gridPtr, myGridPtr, dstPtr, bendCost); /* * TODO: fix memory leak * * pointVectorPtr will be a memory leak if we abort this transaction */ if (pointVectorPtr) { TMGRID_ADDPATH(gridPtr, pointVectorPtr); TM_LOCAL_WRITE_L(success, true); } } TM_END(); if (success) { bool status = PVECTOR_PUSHBACK(myPathVectorPtr, (void*)pointVectorPtr); assert(status); } } /* * Add my paths to global list */ list_t* pathVectorListPtr = routerArgPtr->pathVectorListPtr; TM_BEGIN(); TMLIST_INSERT(pathVectorListPtr, (void*)myPathVectorPtr); TM_END(); PGRID_FREE(myGridPtr); PQUEUE_FREE(myExpansionQueuePtr); #if DEBUG puts("\nFinal Grid:"); grid_print(gridPtr); #endif /* DEBUG */ TM_THREAD_EXIT(); }
/* ============================================================================= * process * ============================================================================= */ void process () { TM_THREAD_ENTER(); heap_t* workHeapPtr = global_workHeapPtr; mesh_t* meshPtr = global_meshPtr; region_t* regionPtr; long totalNumAdded = 0; long numProcess = 0; regionPtr = PREGION_ALLOC(); assert(regionPtr); while (1) { element_t* elementPtr; __transaction_atomic { elementPtr = TMHEAP_REMOVE(workHeapPtr); } if (elementPtr == NULL) { break; } bool_t isGarbage; __transaction_atomic { isGarbage = TMELEMENT_ISGARBAGE(elementPtr); } if (isGarbage) { /* * Handle delayed deallocation */ PELEMENT_FREE(elementPtr); continue; } long numAdded; __transaction_atomic { PREGION_CLEARBAD(regionPtr); numAdded = TMREGION_REFINE(regionPtr, elementPtr, meshPtr); } __transaction_atomic { TMELEMENT_SETISREFERENCED(elementPtr, FALSE); isGarbage = TMELEMENT_ISGARBAGE(elementPtr); } if (isGarbage) { /* * Handle delayed deallocation */ PELEMENT_FREE(elementPtr); } totalNumAdded += numAdded; __transaction_atomic { TMREGION_TRANSFERBAD(regionPtr, workHeapPtr); } numProcess++; } __transaction_atomic { global_totalNumAdded = global_totalNumAdded + totalNumAdded; global_numProcess = global_numProcess + numProcess; } PREGION_FREE(regionPtr); TM_THREAD_EXIT(); }
void *test(void *data) { int val2, numtx, r, last = -1; val_t val = 0; int unext, mnext, cnext; thread_data_t *d = (thread_data_t *)data; /* Create transaction */ TM_THREAD_ENTER(); /* Wait on barrier */ barrier_cross(d->barrier); /* Is the first op an update, a move? */ r = rand_range_re(&d->seed, 100) - 1; unext = (r < d->update); mnext = (r < d->move); cnext = (r >= d->update + d->snapshot); #ifdef ICC while (stop == 0) { #else while (AO_load_full(&stop) == 0) { #endif /* ICC */ if (unext) { // update if (mnext) { // move if (last == -1) val = rand_range_re(&d->seed, d->range); val2 = rand_range_re(&d->seed, d->range); if (ht_move(d->set, val, val2, TRANSACTIONAL)) { d->nb_moved++; last = val2; } d->nb_move++; } else if (last < 0) { // add val = rand_range_re(&d->seed, d->range); if (ht_add(d->set, val, TRANSACTIONAL)) { d->nb_added++; last = val; } d->nb_add++; } else { // remove if (d->alternate) { // alternate mode if (ht_remove(d->set, last, TRANSACTIONAL)) { d->nb_removed++; last = -1; } } else { /* Random computation only in non-alternated cases */ val = rand_range_re(&d->seed, d->range); /* Remove one random value */ if (ht_remove(d->set, val, TRANSACTIONAL)) { d->nb_removed++; /* Repeat until successful, to avoid size variations */ last = -1; } } d->nb_remove++; } } else { // reads if (cnext) { // contains (no snapshot) if (d->alternate) { if (d->update == 0) { if (last < 0) { val = d->first; last = val; } else { // last >= 0 val = rand_range_re(&d->seed, d->range); last = -1; } } else { // update != 0 if (last < 0) { val = rand_range_re(&d->seed, d->range); //last = val; } else { val = last; } } } else val = rand_range_re(&d->seed, d->range); if (ht_contains(d->set, val, TRANSACTIONAL)) d->nb_found++; d->nb_contains++; } else { // snapshot if (ht_snapshot(d->set, TRANSACTIONAL)) d->nb_snapshoted++; d->nb_snapshot++; } } /* Is the next op an update, a move, a contains? */ if (d->effective) { // a failed remove/add is a read-only tx numtx = d->nb_contains + d->nb_add + d->nb_remove + d->nb_move + d->nb_snapshot; unext = ((100.0 * (d->nb_added + d->nb_removed + d->nb_moved)) < (d->update * numtx)); mnext = ((100.0 * d->nb_moved) < (d->move * numtx)); cnext = !((100.0 * d->nb_snapshoted) < (d->snapshot * numtx)); } else { // remove/add (even failed) is considered as an update r = rand_range_re(&d->seed, 100) - 1; unext = (r < d->update); mnext = (r < d->move); cnext = (r >= d->update + d->snapshot); } #ifdef ICC } #else } #endif /* ICC */ /* Free transaction */ TM_THREAD_EXIT(); return NULL; }
/* ============================================================================= * work * ============================================================================= */ static void work (void* argPtr) { TM_THREAD_ENTER(); args_t* args = (args_t*)argPtr; double** feature = args->feature; int nfeatures = args->nfeatures; int npoints = args->npoints; int nclusters = args->nclusters; int* membership = args->membership; double** clusters = args->clusters; int** new_centers_len = args->new_centers_len; double** new_centers = args->new_centers; double delta = 0.0; int index; int i; int j; int start; int stop; int myId; myId = thread_getId(); start = myId * CHUNK; while (start < npoints) { stop = (((start + CHUNK) < npoints) ? (start + CHUNK) : npoints); for (i = start; i < stop; i++) { index = common_findNearestPoint(feature[i], nfeatures, clusters, nclusters); /* * If membership changes, increase delta by 1. * membership[i] cannot be changed by other threads */ if (membership[i] != index) { delta += 1.0; } /* Assign the membership to object i */ /* membership[i] can't be changed by other thread */ membership[i] = index; /* Update new cluster centers : sum of objects located within */ int mode = 0; TM_BEGIN(0,mode); if (mode == 0) { FAST_PATH_SHARED_WRITE(*new_centers_len[index], FAST_PATH_SHARED_READ(*new_centers_len[index]) + 1); for (j = 0; j < nfeatures; j++) { FAST_PATH_SHARED_WRITE_D( new_centers[index][j], (FAST_PATH_SHARED_READ_D(new_centers[index][j]) + feature[i][j]) ); } } else { SLOW_PATH_SHARED_WRITE(*new_centers_len[index], SLOW_PATH_SHARED_READ(*new_centers_len[index]) + 1); for (j = 0; j < nfeatures; j++) { SLOW_PATH_SHARED_WRITE_D( new_centers[index][j], (SLOW_PATH_SHARED_READ_D(new_centers[index][j]) + feature[i][j]) ); } } TM_END(); } /* Update task queue */ if (start + CHUNK < npoints) { int mode = 0; TM_BEGIN(1, mode); if (mode == 0) { start = (int)FAST_PATH_SHARED_READ(global_i); FAST_PATH_SHARED_WRITE(global_i, (start + CHUNK)); } else { start = (int)SLOW_PATH_SHARED_READ(global_i); SLOW_PATH_SHARED_WRITE(global_i, (start + CHUNK)); } TM_END(); } else { break; } } int mode = 0; TM_BEGIN(2,mode); if (mode == 0) { FAST_PATH_SHARED_WRITE_D(global_delta, FAST_PATH_SHARED_READ_D(global_delta) + delta); } else { SLOW_PATH_SHARED_WRITE_D(global_delta, SLOW_PATH_SHARED_READ_D(global_delta) + delta); } TM_END(); TM_THREAD_EXIT(); }
/* ============================================================================= * process * ============================================================================= */ void process () { TM_THREAD_ENTER(); heap_t* workHeapPtr = global_workHeapPtr; mesh_t* meshPtr = global_meshPtr; region_t* regionPtr; long totalNumAdded = 0; long numProcess = 0; regionPtr = PREGION_ALLOC(); assert(regionPtr); while (1) { element_t* elementPtr; AL_LOCK(0); TM_BEGIN(0); elementPtr = TMHEAP_REMOVE(workHeapPtr); TM_END(); if (elementPtr == NULL) { break; } bool_t isGarbage; AL_LOCK(0); TM_BEGIN(1); isGarbage = TMELEMENT_ISGARBAGE(elementPtr); TM_END(); if (isGarbage) { /* * Handle delayed deallocation */ PELEMENT_FREE(elementPtr); continue; } long numAdded; AL_LOCK(0); TM_BEGIN(2); PREGION_CLEARBAD(regionPtr); numAdded = TMREGION_REFINE(regionPtr, elementPtr, meshPtr); TM_END(); AL_LOCK(0); TM_BEGIN(3); TMELEMENT_SETISREFERENCED(elementPtr, FALSE); isGarbage = TMELEMENT_ISGARBAGE(elementPtr); TM_END(); if (isGarbage) { /* * Handle delayed deallocation */ PELEMENT_FREE(elementPtr); } totalNumAdded += numAdded; AL_LOCK(0); TM_BEGIN(4); TMREGION_TRANSFERBAD(regionPtr, workHeapPtr); TM_END(); numProcess++; } AL_LOCK(0); TM_BEGIN(5); TM_SHARED_WRITE(global_totalNumAdded, TM_SHARED_READ(global_totalNumAdded) + totalNumAdded); TM_SHARED_WRITE(global_numProcess, TM_SHARED_READ(global_numProcess) + numProcess); TM_END(); PREGION_FREE(regionPtr); TM_THREAD_EXIT(); }
/* ============================================================================= * work * ============================================================================= */ static void work (void* argPtr) { TM_THREAD_ENTER(); args_t* args = (args_t*)argPtr; float** feature = args->feature; int nfeatures = args->nfeatures; int npoints = args->npoints; int nclusters = args->nclusters; int* membership = args->membership; float** clusters = args->clusters; long long int** new_centers_len = args->new_centers_len; float** new_centers = args->new_centers; float delta = 0.0; int index; int i; int j; int start; int stop; int myId; bool indexx[1000]; myId = thread_getId(); start = myId * CHUNK; int cnt=0; while (start < npoints) { stop = (((start + CHUNK) < npoints) ? (start + CHUNK) : npoints); for (i = start; i < stop; i++) { index = common_findNearestPoint(feature[i], nfeatures, clusters, nclusters); /* * If membership changes, increase delta by 1. * membership[i] cannot be changed by other threads */ if (membership[i] != index) { delta += 1.0; } /* Assign the membership to object i */ /* membership[i] can't be changed by other thread */ membership[i] = index; /* Update new cluster centers : sum of objects located within */ TM_BEGIN(); //printf("shared write to begin: \n"); // int write = *new_centers_len[index]; //int* pt = new_centers_len[i]; //int dat = TM_SHARED_READ_I(*new_centers_len[i]); //printf("in loop write centers lendata: %i %i\n", dat, *new_centers_len[i]); TM_SHARED_WRITE_I(*new_centers_len[index], TM_SHARED_READ_I(*new_centers_len[index]) + 1); //printf("befor loop len P: %p data: %i\n", new_centers_len[index], *new_centers_len[index]); //new *new_centers_len[index] = *new_centers_len[index] + 1; //printf("INDEX %i \n" , index); indexx[index] = true; /*if(*new_centers_len[index]==0)*/ //printf("in lloop len P: %p data: %i\n", new_centers_len[index], *new_centers_len[index]); //*new_centers_len[i] = *new_centers_len[i]+1; // pt = new_centers_len[i]; //dat = TM_SHARED_READ_I(*new_centers_len[i]); //printf("in loop write centers len data: %i\n", dat); for (j = 0; j < nfeatures; j++) { //printf("featurs\n"); //int feat = feature[i][j]; //printf("write\n"); //float read = TM_SHARED_READ_F(new_centers[index][j]); //printf("write %p " ,write); //printf("shared write to:\n"); //printf("feature %f", feature[i][j]); //float feat = feature[i][j]; //float fl = (TM_SHARED_READ_F(new_centers[index][j])+ feat);//feature[i][j]); //int len = *new_centers_len[index]; TM_SHARED_WRITE_F( //write, new_centers[index][j], //(read + feat) //fl (TM_SHARED_READ_F(new_centers[index][j])+ feature[i][j]) //printf("index %p %p\n", (void*)*new_centers[index][j], (void*)(*new_centers[index][j] +1)); //printf("indexnon p %p %p\n", (void*)new_centers[index][j], (void*)(new_centers[index][j] +1)); ); //new new_centers[index][j] = new_centers[index][j] + feature[i][j]; //if(0==*new_centers_len[index]) printf("ISNAN %i\n", len); //if(isnanf(new_centers[index][j])) printf("ISNAN2\n\n"); // if(isinf(*new_centers_len[index])) printf("ISINF\n\n"); //if(isinf(*new_centers_len[index])) printf("ISINF2\n\n"); } TM_END(); } //printf("update \n"); /* Update task queue */ if (start + CHUNK < npoints) { TM_BEGIN(); start = (int)TM_SHARED_READ_L(*global_i); TM_SHARED_WRITE_L(*global_i, (long)(start + CHUNK)); TM_END(); } else { break; } } TM_BEGIN(); //printf("shared write to: %p", *global_delta); TM_SHARED_WRITE_F(*global_delta, TM_SHARED_READ_F(*global_delta) + delta); //new *global_delta = *global_delta + delta; TM_END(); int u1 =0; /* for(int i1=0; i1<1000; i1++){ if(indexx[i1]) printf("INDEX %i %i\n", i1, u1++); }*/ TM_THREAD_EXIT(); }
void* test(void *data) { int unext, last = -1; val_t val = 0; pval_t pval = 0; thread_data_t *d = (thread_data_t *)data; /* Create transaction */ TM_THREAD_ENTER(d->id); set_cpu(the_cores[d->id]); /* Wait on barrier */ ssalloc_init(); PF_CORRECTION; seeds = seed_rand(); #ifdef PIN int id = d->id; int cpu = 40*(id/40) + 4*(id%10) + (id%40)/10; // printf("Pinning %d to %d\n",id,cpu); pin(pthread_self(), cpu); // pin(pthread_self(), id); #endif #ifdef PAPI if (PAPI_OK != PAPI_start_counters(g_events, G_EVENT_COUNT)) { printf("Problem starting counters 1."); } #endif barrier_cross(d->barrier); /* Is the first op an update? */ unext = (rand_range_re(&d->seed, 100) - 1 < d->update); #ifdef DISTRIBUTION_EXPERIMENT while (1) #else while (*running) #endif { if (d->es) { // event simulator experiment if (d->lin) { if (!empty(d->linden_set)) { d->nb_remove++; pval_t pval = deletemin(d->linden_set, d); d->nb_removed++; // printf("%d %d\n", pval, deps[pval][0]); int i = 0; val_t dep; while ((dep = deps[pval][i]) != -1 && i < MAX_DEPS) { d->nb_add++; if (insert(d->linden_set, dep, dep)) { d->nb_added++; } i++; } } } else { if (d->set->head->next[0]->next[0] != NULL) {// set not empty d->nb_remove++; if (d->sl) { // spray list if (spray_delete_min(d->set, &val, d)) { d->nb_removed++; } else { continue; } } else if (d->pq) { // lotan_shavit pq if (lotan_shavit_delete_min(d->set, &val, d)) { d->nb_removed++; // continue; // TODO: maybe try remove this to simulate task handling (dependency checks still occur) } else { continue; } } // struct timespec ten_usec; // ten_usec.tv_sec = 0; // ten_usec.tv_nsec = 10000; // nanosleep(&ten_usec, NULL); // dependency handling int i = 0; val_t dep; while ((dep = deps[val][i]) != -1 && i < MAX_DEPS) { if (!sl_contains(d->set, dep, TRANSACTIONAL)) { // dependent has been removed, need to add it again if (sl_add(d->set, dep, TRANSACTIONAL)) { // check if insert actually succeeded (otherwise someone else did it first) d->nb_added++; } d->nb_add++; } i++; } } } } else { // not event simulator if (unext) { // update if (last < 0) { // add val = rand_range_re(&d->seed, d->range); if (d->lin) { pval = val; insert(d->linden_set, pval, pval); d->nb_added++; last = pval; } else { // not linden if (sl_add(d->set, val, TRANSACTIONAL)) { d->nb_added++; last = val; } } d->nb_add++; } else { // remove if (d->pq) { if (lotan_shavit_delete_min(d->set, &val, d)) { d->nb_removed++; if (d->first_remove == -1) { d->first_remove = val; } } last = -1; } else if (d->sl) { if (spray_delete_min(d->set, &val, d)) { d->nb_removed++; if (d->first_remove == -1) { d->first_remove = val; } last = -1; } } else if (d->lin) { if ((pval = deletemin(d->linden_set, d))) { d->nb_removed++; if (d->first_remove == -1) { d->first_remove = pval; } last = -1; } } else if (d->alternate) { // alternate mode (default) if (sl_remove(d->set, last, TRANSACTIONAL)) { d->nb_removed++; if (d->first_remove == -1) { d->first_remove = val; } } last = -1; } else { /* Random computation only in non-alternated cases */ val = rand_range_re(&d->seed, d->range); /* Remove one random value */ if (sl_remove_succ(d->set, val, TRANSACTIONAL)) { d->nb_removed++; if (d->first_remove == -1) { d->first_remove = val; } /* Repeat until successful, to avoid size variations */ last = -1; } } d->nb_remove++; } } else { // read if (d->alternate) { if (d->update == 0) { if (last < 0) { val = d->first; last = val; } else { // last >= 0 val = rand_range_re(&d->seed, d->range); last = -1; } } else { // update != 0 if (last < 0) { val = rand_range_re(&d->seed, d->range); //last = val; } else { val = last; } } } else val = rand_range_re(&d->seed, d->range); PF_START(2); if (sl_contains(d->set, val, TRANSACTIONAL)) d->nb_found++; PF_STOP(2); d->nb_contains++; } /* Is the next op an update? */ if (d->effective) { // a failed remove/add is a read-only tx unext = ((100 * (d->nb_added + d->nb_removed)) < (d->update * (d->nb_add + d->nb_remove + d->nb_contains))); } else { // remove/add (even failed) is considered as an update unext = (rand_range_re(&d->seed, 100) - 1 < d->update); } } #ifdef DISTRIBUTION_EXPERIMENT if (d->first_remove != -1) { break; //only one run } #endif } #ifdef PAPI if (PAPI_OK != PAPI_read_counters(g_values[d->id], G_EVENT_COUNT)) { printf("Problem reading counters 2."); } #endif /* Free transaction */ TM_THREAD_EXIT(); PF_PRINT; return NULL; }
void client_run (void* argPtr) { TM_THREAD_ENTER(); /*long id = thread_getId(); volatile long* ptr1 = &(global_array[0].value); volatile long* ptr2 = &(global_array[100].value); long tt = 0; if (id == 0) { while (1) { long v1 = 0; long v2 = 0; acquire_write(&(local_th_data[phys_id]), &the_lock); *ptr1 = (*ptr1) + 1; int f = 1; int ii; for(ii = 1; ii <= 100000000; ii++) { f *= ii; } tt += f; *ptr2 = (*ptr2) + 1; v1 = global_array[0].value; v2 = global_array[100].value; release_write(cluster_id, &(local_th_data[phys_id]), &the_lock); \ if (v1 != v2) { printf("different2! %ld %ld\n", v1, v2); exit(1); } } } else { while (1) { int i = 0; long sum = 0; for (; i < 100000; i++) { int status = _xbegin(); if (status == _XBEGIN_STARTED) { sum += *ptr1; sum += *ptr2; _xend(); } } while(1) { long v1 = 0; long v2 = 0; int status = _xbegin(); if (status == _XBEGIN_STARTED) { v1 = *ptr1; v2 = *ptr2; _xend(); if (v1 != v2) { printf("different! %ld %ld\n", v1, v2); exit(1); } } } } } printf("%ld", tt);*/ random_t* randomPtr = random_alloc(); random_seed(randomPtr, time(0)); // unsigned long myId = thread_getId(); // long numThread = *((long*)argPtr); long operations = (long)global_params[PARAM_OPERATIONS] / (long)global_params[PARAM_THREADS]; long interval = (long)global_params[PARAM_INTERVAL]; printf("operations: %ld \tinterval: %ld\n", operations, interval); long total = 0; long total2 = 0; long i = 0; for (; i < operations; i++) { long random_number = ((long) random_generate(randomPtr)) % ((long)global_params[PARAM_SIZE]); long random_number2 = ((long) random_generate(randomPtr)) % ((long)global_params[PARAM_SIZE]); if (random_number == random_number2) { random_number2 = (random_number2 + 1) % ((long)global_params[PARAM_SIZE]); } TM_BEGIN(); long r1 = (long)TM_SHARED_READ_L(global_array[random_number].value); long r2 = (long)TM_SHARED_READ_L(global_array[random_number2].value); int repeat = 0; for (; repeat < (long) global_params[PARAM_CONTENTION]; repeat++) { total2 += (long) TM_SHARED_READ_L(global_array[((long) random_generate(randomPtr)) % ((long)global_params[PARAM_SIZE])].value); } r1 = r1 + 1; r2 = r2 - 1; int f = 1; int ii; for(ii = 1; ii <= ((unsigned int) global_params[PARAM_WORK]); ii++) { f *= ii; } total += f / 1000000; TM_SHARED_WRITE_L(global_array[random_number].value, r1); TM_SHARED_WRITE_L(global_array[random_number2].value, r2); TM_END(); long k = 0; for (;k < (long)global_params[PARAM_INTERVAL]; k++) { long ru = ((long) random_generate(randomPtr)) % 2; total += ru; } } TM_THREAD_EXIT(); printf("ru ignore %ld - %ld\n", total, total2); }
/* ============================================================================= * genScalData * ============================================================================= */ void genScalData (void* argPtr) { TM_THREAD_ENTER(); graphSDG* SDGdataPtr = (graphSDG*)argPtr; long myId = thread_getId(); long numThread = thread_getNumThread(); /* * STEP 0: Create the permutations required to randomize the vertices */ random_t* stream = PRANDOM_ALLOC(); assert(stream); PRANDOM_SEED(stream, myId); ULONGINT_T* permV; /* the vars associated with the graph tuple */ if (myId == 0) { permV = (ULONGINT_T*)P_MALLOC(TOT_VERTICES * sizeof(ULONGINT_T)); assert(permV); global_permV = permV; } thread_barrier_wait(); permV = global_permV; long i; long i_start; long i_stop; createPartition(0, TOT_VERTICES, myId, numThread, &i_start, &i_stop); /* Initialize the array */ for (i = i_start; i < i_stop; i++) { permV[i] = i; } thread_barrier_wait(); for (i = i_start; i < i_stop; i++) { long t1 = PRANDOM_GENERATE(stream); long t = i + t1 % (TOT_VERTICES - i); if (t != i) { AL_LOCK(0); TM_BEGIN(); long t2 = (long)TM_SHARED_READ(permV[t]); TM_SHARED_WRITE(permV[t], TM_SHARED_READ(permV[i])); TM_SHARED_WRITE(permV[i], t2); TM_END(); } } /* * STEP 1: Create Cliques */ long* cliqueSizes; long estTotCliques = ceil(1.5 * TOT_VERTICES / ((1+MAX_CLIQUE_SIZE)/2)); /* * Allocate mem for Clique array * Estimate number of clique required and pad by 50% */ if (myId == 0) { cliqueSizes = (long*)P_MALLOC(estTotCliques * sizeof(long)); assert(cliqueSizes); global_cliqueSizes = cliqueSizes; } thread_barrier_wait(); cliqueSizes = global_cliqueSizes; createPartition(0, estTotCliques, myId, numThread, &i_start, &i_stop); /* Generate random clique sizes. */ for (i = i_start; i < i_stop; i++) { cliqueSizes[i] = 1 + (PRANDOM_GENERATE(stream) % MAX_CLIQUE_SIZE); } thread_barrier_wait(); long totCliques = 0; /* * Allocate memory for cliqueList */ ULONGINT_T* lastVsInCliques; ULONGINT_T* firstVsInCliques; if (myId == 0) { lastVsInCliques = (ULONGINT_T*)P_MALLOC(estTotCliques * sizeof(ULONGINT_T)); assert(lastVsInCliques); global_lastVsInCliques = lastVsInCliques; firstVsInCliques = (ULONGINT_T*)P_MALLOC(estTotCliques * sizeof(ULONGINT_T)); assert(firstVsInCliques); global_firstVsInCliques = firstVsInCliques; /* * Sum up vertices in each clique to determine the lastVsInCliques array */ lastVsInCliques[0] = cliqueSizes[0] - 1; for (i = 1; i < estTotCliques; i++) { lastVsInCliques[i] = cliqueSizes[i] + lastVsInCliques[i-1]; if (lastVsInCliques[i] >= TOT_VERTICES-1) { break; } } totCliques = i + 1; global_totCliques = totCliques; /* * Fix the size of the last clique */ cliqueSizes[totCliques-1] = TOT_VERTICES - lastVsInCliques[totCliques-2] - 1; lastVsInCliques[totCliques-1] = TOT_VERTICES - 1; firstVsInCliques[0] = 0; } thread_barrier_wait(); lastVsInCliques = global_lastVsInCliques; firstVsInCliques = global_firstVsInCliques; totCliques = global_totCliques; /* Compute start Vertices in cliques. */ createPartition(1, totCliques, myId, numThread, &i_start, &i_stop); for (i = i_start; i < i_stop; i++) { firstVsInCliques[i] = lastVsInCliques[i-1] + 1; } #ifdef WRITE_RESULT_FILES thread_barrier_wait(); /* Write the generated cliques to file for comparison with Kernel 4 */ if (myId == 0) { FILE* outfp = fopen("cliques.txt", "w"); fprintf(outfp, "No. of cliques - %lu\n", totCliques); for (i = 0; i < totCliques; i++) { fprintf(outfp, "Clq %lu - ", i); long j; for (j = firstVsInCliques[i]; j <= lastVsInCliques[i]; j++) { fprintf(outfp, "%lu ", permV[j]); } fprintf(outfp, "\n"); } fclose(outfp); } thread_barrier_wait(); #endif /* * STEP 2: Create the edges within the cliques */ /* * Estimate number of edges - using an empirical measure */ long estTotEdges; if (SCALE >= 12) { estTotEdges = ceil(((MAX_CLIQUE_SIZE-1) * TOT_VERTICES)); } else { estTotEdges = ceil(1.2 * (((MAX_CLIQUE_SIZE-1)*TOT_VERTICES) * ((1 + MAX_PARAL_EDGES)/2) + TOT_VERTICES*2)); } /* * Initialize edge counter */ long i_edgePtr = 0; float p = PROB_UNIDIRECTIONAL; /* * Partial edgeLists */ ULONGINT_T* startV; ULONGINT_T* endV; if (numThread > 3) { long numByte = 1.5 * (estTotEdges/numThread) * sizeof(ULONGINT_T); startV = (ULONGINT_T*)P_MALLOC(numByte); endV = (ULONGINT_T*)P_MALLOC(numByte); } else { long numByte = (estTotEdges/numThread) * sizeof(ULONGINT_T); startV = (ULONGINT_T*)P_MALLOC(numByte); endV = (ULONGINT_T*)P_MALLOC(numByte); } assert(startV); assert(endV); /* * Tmp array to keep track of the no. of parallel edges in each direction */ ULONGINT_T** tmpEdgeCounter = (ULONGINT_T**)P_MALLOC(MAX_CLIQUE_SIZE * sizeof(ULONGINT_T *)); assert(tmpEdgeCounter); for (i = 0; i < MAX_CLIQUE_SIZE; i++) { tmpEdgeCounter[i] = (ULONGINT_T*)P_MALLOC(MAX_CLIQUE_SIZE * sizeof(ULONGINT_T)); assert(tmpEdgeCounter[i]); } /* * Create edges in parallel */ long i_clique; createPartition(0, totCliques, myId, numThread, &i_start, &i_stop); for (i_clique = i_start; i_clique < i_stop; i_clique++) { /* * Get current clique parameters */ long i_cliqueSize = cliqueSizes[i_clique]; long i_firstVsInClique = firstVsInCliques[i_clique]; /* * First create at least one edge between two vetices in a clique */ for (i = 0; i < i_cliqueSize; i++) { long j; for (j = 0; j < i; j++) { float r = (float)(PRANDOM_GENERATE(stream) % 1000) / (float)1000; if (r >= p) { startV[i_edgePtr] = i + i_firstVsInClique; endV[i_edgePtr] = j + i_firstVsInClique; i_edgePtr++; tmpEdgeCounter[i][j] = 1; startV[i_edgePtr] = j + i_firstVsInClique; endV[i_edgePtr] = i + i_firstVsInClique; i_edgePtr++; tmpEdgeCounter[j][i] = 1; } else if (r >= 0.5) { startV[i_edgePtr] = i + i_firstVsInClique; endV[i_edgePtr] = j + i_firstVsInClique; i_edgePtr++; tmpEdgeCounter[i][j] = 1; tmpEdgeCounter[j][i] = 0; } else { startV[i_edgePtr] = j + i_firstVsInClique; endV[i_edgePtr] = i + i_firstVsInClique; i_edgePtr++; tmpEdgeCounter[j][i] = 1; tmpEdgeCounter[i][j] = 0; } } /* for j */ } /* for i */ if (i_cliqueSize != 1) { long randNumEdges = (long)(PRANDOM_GENERATE(stream) % (2*i_cliqueSize*MAX_PARAL_EDGES)); long i_paralEdge; for (i_paralEdge = 0; i_paralEdge < randNumEdges; i_paralEdge++) { i = (PRANDOM_GENERATE(stream) % i_cliqueSize); long j = (PRANDOM_GENERATE(stream) % i_cliqueSize); if ((i != j) && (tmpEdgeCounter[i][j] < MAX_PARAL_EDGES)) { float r = (float)(PRANDOM_GENERATE(stream) % 1000) / (float)1000; if (r >= p) { /* Copy to edge structure. */ startV[i_edgePtr] = i + i_firstVsInClique; endV[i_edgePtr] = j + i_firstVsInClique; i_edgePtr++; tmpEdgeCounter[i][j]++; } } } } } /* for i_clique */ for (i = 0; i < MAX_CLIQUE_SIZE; i++) { P_FREE(tmpEdgeCounter[i]); } P_FREE(tmpEdgeCounter); /* * Merge partial edge lists */ ULONGINT_T* i_edgeStartCounter; ULONGINT_T* i_edgeEndCounter; if (myId == 0) { i_edgeStartCounter = (ULONGINT_T*)P_MALLOC(numThread * sizeof(ULONGINT_T)); assert(i_edgeStartCounter); global_i_edgeStartCounter = i_edgeStartCounter; i_edgeEndCounter = (ULONGINT_T*)P_MALLOC(numThread * sizeof(ULONGINT_T)); assert(i_edgeEndCounter); global_i_edgeEndCounter = i_edgeEndCounter; } thread_barrier_wait(); i_edgeStartCounter = global_i_edgeStartCounter; i_edgeEndCounter = global_i_edgeEndCounter; i_edgeEndCounter[myId] = i_edgePtr; i_edgeStartCounter[myId] = 0; thread_barrier_wait(); if (myId == 0) { for (i = 1; i < numThread; i++) { i_edgeEndCounter[i] = i_edgeEndCounter[i-1] + i_edgeEndCounter[i]; i_edgeStartCounter[i] = i_edgeEndCounter[i-1]; } } AL_LOCK(0); TM_BEGIN(); TM_SHARED_WRITE(global_edgeNum, ((long)TM_SHARED_READ(global_edgeNum) + i_edgePtr)); TM_END(); thread_barrier_wait(); long edgeNum = global_edgeNum; /* * Initialize edge list arrays */ ULONGINT_T* startVertex; ULONGINT_T* endVertex; if (myId == 0) { if (SCALE < 10) { long numByte = 2 * edgeNum * sizeof(ULONGINT_T); startVertex = (ULONGINT_T*)P_MALLOC(numByte); endVertex = (ULONGINT_T*)P_MALLOC(numByte); } else { long numByte = (edgeNum + MAX_PARAL_EDGES * TOT_VERTICES) * sizeof(ULONGINT_T); startVertex = (ULONGINT_T*)P_MALLOC(numByte); endVertex = (ULONGINT_T*)P_MALLOC(numByte); } assert(startVertex); assert(endVertex); global_startVertex = startVertex; global_endVertex = endVertex; } thread_barrier_wait(); startVertex = global_startVertex; endVertex = global_endVertex; for (i = i_edgeStartCounter[myId]; i < i_edgeEndCounter[myId]; i++) { startVertex[i] = startV[i-i_edgeStartCounter[myId]]; endVertex[i] = endV[i-i_edgeStartCounter[myId]]; } ULONGINT_T numEdgesPlacedInCliques = edgeNum; thread_barrier_wait(); /* * STEP 3: Connect the cliques */ i_edgePtr = 0; p = PROB_INTERCL_EDGES; /* * Generating inter-clique edges as given in the specs */ createPartition(0, TOT_VERTICES, myId, numThread, &i_start, &i_stop); for (i = i_start; i < i_stop; i++) { ULONGINT_T tempVertex1 = i; long h = totCliques; long l = 0; long t = -1; while (h - l > 1) { long m = (h + l) / 2; if (tempVertex1 >= firstVsInCliques[m]) { l = m; } else { if ((tempVertex1 < firstVsInCliques[m]) && (m > 0)) { if (tempVertex1 >= firstVsInCliques[m-1]) { t = m - 1; break; } else { h = m; } } } } if (t == -1) { long m; for (m = (l + 1); m < h; m++) { if (tempVertex1<firstVsInCliques[m]) { break; } } t = m-1; } long t1 = firstVsInCliques[t]; ULONGINT_T d; for (d = 1, p = PROB_INTERCL_EDGES; d < TOT_VERTICES; d *= 2, p /= 2) { float r = (float)(PRANDOM_GENERATE(stream) % 1000) / (float)1000; if (r <= p) { ULONGINT_T tempVertex2 = (i+d) % TOT_VERTICES; h = totCliques; l = 0; t = -1; while (h - l > 1) { long m = (h + l) / 2; if (tempVertex2 >= firstVsInCliques[m]) { l = m; } else { if ((tempVertex2 < firstVsInCliques[m]) && (m > 0)) { if (firstVsInCliques[m-1] <= tempVertex2) { t = m - 1; break; } else { h = m; } } } } if (t == -1) { long m; for (m = (l + 1); m < h; m++) { if (tempVertex2 < firstVsInCliques[m]) { break; } } t = m - 1; } long t2 = firstVsInCliques[t]; if (t1 != t2) { long randNumEdges = PRANDOM_GENERATE(stream) % MAX_PARAL_EDGES + 1; long j; for (j = 0; j < randNumEdges; j++) { startV[i_edgePtr] = tempVertex1; endV[i_edgePtr] = tempVertex2; i_edgePtr++; } } } /* r <= p */ float r0 = (float)(PRANDOM_GENERATE(stream) % 1000) / (float)1000; if ((r0 <= p) && (i-d>=0)) { ULONGINT_T tempVertex2 = (i-d) % TOT_VERTICES; h = totCliques; l = 0; t = -1; while (h - l > 1) { long m = (h + l) / 2; if (tempVertex2 >= firstVsInCliques[m]) { l = m; } else { if ((tempVertex2 < firstVsInCliques[m]) && (m > 0)) { if (firstVsInCliques[m-1] <= tempVertex2) { t = m - 1; break; } else { h = m; } } } } if (t == -1) { long m; for (m = (l + 1); m < h; m++) { if (tempVertex2 < firstVsInCliques[m]) { break; } } t = m - 1; } long t2 = firstVsInCliques[t]; if (t1 != t2) { long randNumEdges = PRANDOM_GENERATE(stream) % MAX_PARAL_EDGES + 1; long j; for (j = 0; j < randNumEdges; j++) { startV[i_edgePtr] = tempVertex1; endV[i_edgePtr] = tempVertex2; i_edgePtr++; } } } /* r0 <= p && (i-d) > 0 */ } /* for d, p */ } /* for i */ i_edgeEndCounter[myId] = i_edgePtr; i_edgeStartCounter[myId] = 0; if (myId == 0) { global_edgeNum = 0; } thread_barrier_wait(); if (myId == 0) { for (i = 1; i < numThread; i++) { i_edgeEndCounter[i] = i_edgeEndCounter[i-1] + i_edgeEndCounter[i]; i_edgeStartCounter[i] = i_edgeEndCounter[i-1]; } } AL_LOCK(0); TM_BEGIN(); TM_SHARED_WRITE(global_edgeNum, ((long)TM_SHARED_READ(global_edgeNum) + i_edgePtr)); TM_END(); thread_barrier_wait(); edgeNum = global_edgeNum; ULONGINT_T numEdgesPlacedOutside = global_edgeNum; for (i = i_edgeStartCounter[myId]; i < i_edgeEndCounter[myId]; i++) { startVertex[i+numEdgesPlacedInCliques] = startV[i-i_edgeStartCounter[myId]]; endVertex[i+numEdgesPlacedInCliques] = endV[i-i_edgeStartCounter[myId]]; } thread_barrier_wait(); ULONGINT_T numEdgesPlaced = numEdgesPlacedInCliques + numEdgesPlacedOutside; if (myId == 0) { SDGdataPtr->numEdgesPlaced = numEdgesPlaced; printf("Finished generating edges\n"); printf("No. of intra-clique edges - %lu\n", numEdgesPlacedInCliques); printf("No. of inter-clique edges - %lu\n", numEdgesPlacedOutside); printf("Total no. of edges - %lu\n", numEdgesPlaced); P_FREE(i_edgeStartCounter); P_FREE(i_edgeEndCounter); P_FREE(cliqueSizes); P_FREE(firstVsInCliques); P_FREE(lastVsInCliques); } thread_barrier_wait(); P_FREE(startV); P_FREE(endV); /* * STEP 4: Generate edge weights */ if (myId == 0) { SDGdataPtr->intWeight = (LONGINT_T*)P_MALLOC(numEdgesPlaced * sizeof(LONGINT_T)); assert(SDGdataPtr->intWeight); } thread_barrier_wait(); p = PERC_INT_WEIGHTS; ULONGINT_T numStrWtEdges = 0; createPartition(0, numEdgesPlaced, myId, numThread, &i_start, &i_stop); for (i = i_start; i < i_stop; i++) { float r = (float)(PRANDOM_GENERATE(stream) % 1000) / (float)1000; if (r <= p) { SDGdataPtr->intWeight[i] = 1 + (PRANDOM_GENERATE(stream) % (MAX_INT_WEIGHT-1)); } else { SDGdataPtr->intWeight[i] = -1; numStrWtEdges++; } } thread_barrier_wait(); if (myId == 0) { long t = 0; for (i = 0; i < numEdgesPlaced; i++) { if (SDGdataPtr->intWeight[i] < 0) { SDGdataPtr->intWeight[i] = -t; t++; } } } AL_LOCK(0); TM_BEGIN(); TM_SHARED_WRITE(global_numStrWtEdges, ((long)TM_SHARED_READ(global_numStrWtEdges) + numStrWtEdges)); TM_END(); thread_barrier_wait(); numStrWtEdges = global_numStrWtEdges; if (myId == 0) { SDGdataPtr->strWeight = (char*)P_MALLOC(numStrWtEdges * MAX_STRLEN * sizeof(char)); assert(SDGdataPtr->strWeight); } thread_barrier_wait(); createPartition(0, numEdgesPlaced, myId, numThread, &i_start, &i_stop); for (i = i_start; i < i_stop; i++) { if (SDGdataPtr->intWeight[i] <= 0) { long j; for (j = 0; j < MAX_STRLEN; j++) { SDGdataPtr->strWeight[(-SDGdataPtr->intWeight[i])*MAX_STRLEN+j] = (char) (1 + PRANDOM_GENERATE(stream) % 127); } } } /* * Choose SOUGHT STRING randomly if not assigned */ if (myId == 0) { if (strlen(SOUGHT_STRING) != MAX_STRLEN) { SOUGHT_STRING = (char*)P_MALLOC(MAX_STRLEN * sizeof(char)); assert(SOUGHT_STRING); } long t = PRANDOM_GENERATE(stream) % numStrWtEdges; long j; for (j = 0; j < MAX_STRLEN; j++) { SOUGHT_STRING[j] = (char) ((long) SDGdataPtr->strWeight[t*MAX_STRLEN+j]); } } thread_barrier_wait(); /* * STEP 5: Permute Vertices */ for (i = i_start; i < i_stop; i++) { startVertex[i] = permV[(startVertex[i])]; endVertex[i] = permV[(endVertex[i])]; } thread_barrier_wait(); /* * STEP 6: Sort Vertices */ /* * Radix sort with StartVertex as primary key */ if (myId == 0) { long numByte = numEdgesPlaced * sizeof(ULONGINT_T); SDGdataPtr->startVertex = (ULONGINT_T*)P_MALLOC(numByte); assert(SDGdataPtr->startVertex); SDGdataPtr->endVertex = (ULONGINT_T*)P_MALLOC(numByte); assert(SDGdataPtr->endVertex); } thread_barrier_wait(); all_radixsort_node_aux_s3(numEdgesPlaced, startVertex, SDGdataPtr->startVertex, endVertex, SDGdataPtr->endVertex); thread_barrier_wait(); if (myId == 0) { P_FREE(startVertex); P_FREE(endVertex); } thread_barrier_wait(); if (SCALE < 12) { /* * Sort with endVertex as secondary key */ if (myId == 0) { long i0 = 0; long i1 = 0; i = 0; while (i < numEdgesPlaced) { for (i = i0; i < numEdgesPlaced; i++) { if (SDGdataPtr->startVertex[i] != SDGdataPtr->startVertex[i1]) { i1 = i; break; } } long j; for (j = i0; j < i1; j++) { long k; for (k = j+1; k < i1; k++) { if (SDGdataPtr->endVertex[k] < SDGdataPtr->endVertex[j]) { long t = SDGdataPtr->endVertex[j]; SDGdataPtr->endVertex[j] = SDGdataPtr->endVertex[k]; SDGdataPtr->endVertex[k] = t; } } } if (SDGdataPtr->startVertex[i0] != TOT_VERTICES-1) { i0 = i1; } else { long j; for (j=i0; j<numEdgesPlaced; j++) { long k; for (k=j+1; k<numEdgesPlaced; k++) { if (SDGdataPtr->endVertex[k] < SDGdataPtr->endVertex[j]) { long t = SDGdataPtr->endVertex[j]; SDGdataPtr->endVertex[j] = SDGdataPtr->endVertex[k]; SDGdataPtr->endVertex[k] = t; } } } } } /* while i < numEdgesPlaced */ } } else { ULONGINT_T* tempIndex; if (myId == 0) { tempIndex = (ULONGINT_T*)P_MALLOC((TOT_VERTICES + 1) * sizeof(ULONGINT_T)); assert(tempIndex); global_tempIndex = tempIndex; /* * Update degree of each vertex */ tempIndex[0] = 0; tempIndex[TOT_VERTICES] = numEdgesPlaced; long i0 = 0; for (i=0; i < TOT_VERTICES; i++) { tempIndex[i+1] = tempIndex[i]; long j; for (j = i0; j < numEdgesPlaced; j++) { if (SDGdataPtr->startVertex[j] != SDGdataPtr->startVertex[i0]) { if (SDGdataPtr->startVertex[i0] == i) { tempIndex[i+1] = j; i0 = j; break; } } } } } thread_barrier_wait(); tempIndex = global_tempIndex; /* * Insertion sort for now, replace with something better later on */ #if 0 createPartition(0, TOT_VERTICES, myId, numThread, &i_start, &i_stop); for (i = i_start; i < i_stop; i++) { long j; for (j = tempIndex[i]; j < tempIndex[i+1]; j++) { long k; for (k = (j + 1); k < tempIndex[i+1]; k++) { if (SDGdataPtr->endVertex[k] < SDGdataPtr->endVertex[j]) { long t = SDGdataPtr->endVertex[j]; SDGdataPtr->endVertex[j] = SDGdataPtr->endVertex[k]; SDGdataPtr->endVertex[k] = t; } } } } #else if (myId == 0) { for (i = 0; i < TOT_VERTICES; i++) { long j; for (j = tempIndex[i]; j < tempIndex[i+1]; j++) { long k; for (k = (j + 1); k < tempIndex[i+1]; k++) { if (SDGdataPtr->endVertex[k] < SDGdataPtr->endVertex[j]) { long t = SDGdataPtr->endVertex[j]; SDGdataPtr->endVertex[j] = SDGdataPtr->endVertex[k]; SDGdataPtr->endVertex[k] = t; } } } } } #endif if (myId == 0) { P_FREE(tempIndex); } } /* SCALE >= 12 */ PRANDOM_FREE(stream); if (myId == 0) { P_FREE(permV); } TM_THREAD_EXIT(); }
/* ============================================================================= * cutClusters * ============================================================================= */ void cutClusters (void* argPtr) { TM_THREAD_ENTER(); graph* GPtr = (graph*)argPtr; long myId = thread_getId(); long numThread = thread_getNumThread(); /* * Sort the vertex list by their degree */ ULONGINT_T* Index; ULONGINT_T* neighbourArray; ULONGINT_T* IndexSorted; ULONGINT_T* neighbourArraySorted; if (myId == 0) { long numByte = GPtr->numVertices * sizeof(ULONGINT_T); Index = (ULONGINT_T*)P_MALLOC(numByte); assert(Index); global_Index = Index; neighbourArray = (ULONGINT_T*)P_MALLOC(numByte); assert(neighbourArray); global_neighbourArray = neighbourArray; IndexSorted = (ULONGINT_T*)P_MALLOC(numByte); assert(IndexSorted); global_IndexSorted = IndexSorted; neighbourArraySorted = (ULONGINT_T*)P_MALLOC(numByte); assert(neighbourArraySorted); global_neighbourArraySorted = neighbourArraySorted; } thread_barrier_wait(); Index = global_Index; neighbourArray = global_neighbourArray; IndexSorted = global_IndexSorted; neighbourArraySorted = global_neighbourArraySorted; long i; long i_start; long i_stop; createPartition(0, GPtr->numVertices, myId, numThread, &i_start, &i_stop); for (i = i_start; i < i_stop; i++) { neighbourArray[i] = GPtr->inDegree[i] + GPtr->outDegree[i]; Index[i] = i; } thread_barrier_wait(); all_radixsort_node_aux_s3(GPtr->numVertices, neighbourArray, neighbourArraySorted, Index, IndexSorted); thread_barrier_wait(); /* * Global array to keep track of vertex status: * -1 if a vertex hasn't been assigned to a cluster yet * t if it belongs to a cluster; t = iteration*numThread + myId */ long* vStatus; edge* pCutSet; ULONGINT_T* startV; ULONGINT_T* clusterSize; if (myId == 0) { P_FREE(Index); P_FREE(neighbourArray); vStatus = (long*)P_MALLOC(GPtr->numVertices * sizeof(long)); assert(vStatus); global_vStatus = vStatus; /* * Allocate mem. for the cut set list * Maintain local arrays initially and merge them in the end */ if (SCALE < 12) { pCutSet =(edge*)P_MALLOC((1*(GPtr->numDirectedEdges)/numThread) * sizeof(edge)); } else { pCutSet = (edge*)P_MALLOC((0.2*(GPtr->numDirectedEdges)/numThread) * sizeof(edge)); } assert(pCutSet); global_pCutSet = pCutSet; /* * Vertex to start from, on each thread */ startV = (ULONGINT_T*)P_MALLOC(numThread * sizeof(ULONGINT_T)); assert(startV); global_startV = startV; clusterSize = (ULONGINT_T*)P_MALLOC(numThread * sizeof(ULONGINT_T)); assert(clusterSize); global_clusterSize = clusterSize; } thread_barrier_wait(); vStatus = global_vStatus; pCutSet = global_pCutSet; startV = global_startV; clusterSize = global_clusterSize; for (i = i_start; i < i_stop; i++) { vStatus[i] = -1; } thread_barrier_wait(); ULONGINT_T verticesVisited = 0; #ifdef WRITE_RESULT_FILES FILE* outfp1 = NULL; if (myId == 0) { outfp1 = fopen("clusters.txt", "w"); fprintf(outfp1, "\nKernel 4 - Extracted Clusters\n"); } #endif long iter = 0; ULONGINT_T currIndex = 0; ULONGINT_T cutSetIndex = 0; while (verticesVisited < GPtr->numVertices) { /* Clear start vertex array */ startV[myId] = -1; clusterSize[myId] = 0; if (currIndex == GPtr->numVertices) { currIndex = 0; } thread_barrier_wait(); /* * Choose vertices to start from * Done sequentially right now, can be parallelized */ if (myId == 0) { long t; for (t = 0; t < numThread; t++) { long r; for (r = currIndex; r < GPtr->numVertices; r++) { if (vStatus[IndexSorted[GPtr->numVertices - r - 1]] == -1) { startV[t] = IndexSorted[GPtr->numVertices - r - 1]; vStatus[startV[t]] = iter * numThread + t; long j; for (j = 0; j < GPtr->outDegree[startV[t]]; j++) { long outVertexListIndex = j+GPtr->outVertexIndex[startV[t]]; long vStatusIndex = GPtr->outVertexList[outVertexListIndex]; if (vStatus[vStatusIndex] == -1) { vStatus[vStatusIndex] = iter * numThread + t; clusterSize[t]++; } } for (j = 0; j < GPtr->inDegree[startV[t]]; j++) { long inVertexIndex = j+GPtr->inVertexIndex[startV[t]]; long vStatusIndex = GPtr->inVertexList[inVertexIndex]; if (vStatus[vStatusIndex] == -1) { vStatus[vStatusIndex] = iter * numThread + t; clusterSize[t]++; } } currIndex = r+1; break; } } } } thread_barrier_wait(); /* * Determine clusters and cut sets in parallel */ i = startV[myId]; ULONGINT_T cliqueSize = 0; /* If the thread has some vertex to start from */ if (i != -1) { cliqueSize = 1; /* clusterSize[myId] gives the no. of 'unassigned' vertices adjacent to the current vertex */ if ((clusterSize[myId] >= 0.6*(GPtr->inDegree[i]+GPtr->outDegree[i])) || ((iter > (GPtr->numVertices)/(numThread*MAX_CLUSTER_SIZE)) && (clusterSize[myId] > 0))) { /* * Most of the adjacent vertices are unassigned, * should be able to extract a cluster easily */ /* Inspect adjacency list */ long j; for (j = 0; j < GPtr->outDegree[i]; j++) { ULONGINT_T clusterCounter = 0; ULONGINT_T cutSetIndexPrev = cutSetIndex; ULONGINT_T cutSetCounter = 0; if (vStatus[GPtr->outVertexList[j+GPtr->outVertexIndex[i]]] == iter * numThread + myId) { long v = GPtr->outVertexList[j+GPtr->outVertexIndex[i]]; /* * Inspect vertices adjacent to v and determine if it belongs * to a cluster or not */ long k; for (k = 0; k < GPtr->outDegree[v]; k++) { long outVertexListIndex = k+GPtr->outVertexIndex[v]; long vStatusIndex = GPtr->outVertexList[outVertexListIndex]; if (vStatus[vStatusIndex] == (iter * numThread + myId)) { clusterCounter++; } else { cutSetCounter++; if (vStatus[vStatusIndex] == -1) { /* Ensure that an edge is not added twice to the list */ pCutSet[cutSetIndex].startVertex = v; pCutSet[cutSetIndex].endVertex = vStatusIndex; cutSetIndex++; } } } if ((cutSetCounter >= clusterCounter) || ((SCALE < 9) && (clusterCounter <= 2) && (GPtr->inDegree[v]+GPtr->outDegree[v] > clusterCounter + cutSetCounter) && (clusterSize[myId] > clusterCounter + 2)) || ((SCALE > 9) && (clusterCounter < 0.5*clusterSize[myId]))) { /* v doesn't belong to this clique, free it */ vStatus[v] = -1; /* Also add this edge to cutset list, removing previously added edges */ cutSetIndex = cutSetIndexPrev; pCutSet[cutSetIndex].startVertex = i; pCutSet[cutSetIndex].endVertex = v; cutSetIndex++; } else { cliqueSize++; /* Add edges in inVertexList also to cut Set */ for (k = 0; k < GPtr->inDegree[v]; k++) { long inVertexListIndex = k+GPtr->inVertexIndex[v]; long vStatusIndex = GPtr->inVertexList[inVertexListIndex]; if (vStatus[vStatusIndex] == -1) { pCutSet[cutSetIndex].startVertex = v; pCutSet[cutSetIndex].endVertex = vStatusIndex; cutSetIndex++; } } } } } /* Do the same for the implied edges too */ for (j = 0; j < GPtr->inDegree[i]; j++) { ULONGINT_T clusterCounter = 0; ULONGINT_T cutSetIndexPrev = cutSetIndex; ULONGINT_T cutSetCounter = 0; if (vStatus[GPtr->inVertexList[j+GPtr->inVertexIndex[i]]] == iter*numThread+myId) { long v = GPtr->inVertexList[j+GPtr->inVertexIndex[i]]; /* Inspect vertices adjacent to v and determine if it belongs to a cluster or not */ long k; for (k = 0; k < GPtr->outDegree[v]; k++) { long outVertexListIndex = k+GPtr->outVertexIndex[v]; long vStatusIndex = GPtr->outVertexList[outVertexListIndex]; if (vStatus[vStatusIndex] == iter*numThread+myId) { clusterCounter++; } else { cutSetCounter++; if (vStatus[vStatusIndex] == -1) { /* To ensure that an edge is not added twice to the list */ pCutSet[cutSetIndex].startVertex = v; pCutSet[cutSetIndex].endVertex = vStatusIndex; cutSetIndex++; } } } if ((cutSetCounter >= clusterCounter) || ((SCALE < 9) && (clusterCounter <= 2) && (GPtr->inDegree[v]+GPtr->outDegree[v] > clusterCounter + cutSetCounter) && (clusterSize[myId] > clusterCounter + 2)) || ((SCALE > 9) && (clusterCounter < 0.5*clusterSize[myId]))) { /* v doesn't belong to this clique, free it */ vStatus[v] = -1; cutSetIndex = cutSetIndexPrev; pCutSet[cutSetIndex].startVertex = i; pCutSet[cutSetIndex].endVertex = v; cutSetIndex++; } else { cliqueSize++; /* Add edges in inVertexList also to cut Set */ for (k = 0; k < GPtr->inDegree[v]; k++) { long inVertexListIndex = k+GPtr->inVertexIndex[v]; long vStatusIndex = GPtr->inVertexList[inVertexListIndex]; if (vStatus[vStatusIndex] == -1) { pCutSet[cutSetIndex].startVertex = v; pCutSet[cutSetIndex].endVertex = vStatusIndex; cutSetIndex++; } } } } } } /* i != -1 */ if (clusterSize[myId] == 0) { /* Only one vertex in cluster */ cliqueSize = 1; } else { if ((clusterSize[myId] < 0.6*(GPtr->inDegree[i]+GPtr->outDegree[i])) && (iter <= GPtr->numVertices/(numThread*MAX_CLUSTER_SIZE))) { /* High perc. of intra-clique edges, do not commit clique */ cliqueSize = 0; vStatus[i] = -1; long j; for (j=0; j<GPtr->outDegree[i]; j++) { long outVertexListIndex = j+GPtr->outVertexIndex[i]; long vStatusIndex = GPtr->outVertexList[outVertexListIndex]; if (vStatus[vStatusIndex] == iter*numThread+myId) { vStatus[vStatusIndex] = -1; } } for (j=0; j<GPtr->inDegree[i]; j++) { long inVertexListIndex = j+GPtr->inVertexIndex[i]; long vStatusIndex = GPtr->inVertexList[inVertexListIndex]; if (vStatus[vStatusIndex] == iter*numThread+myId) { vStatus[vStatusIndex] = -1; } } } } } /* if i != -1 */ if (myId == 0) { global_cliqueSize = 0; } thread_barrier_wait(); #ifdef WRITE_RESULT_FILES /* Print to results.clq file */ if (myId == 0) { long t; for (t = 0; t < numThread; t++) { if (startV[t] != -1) { if (vStatus[startV[t]] == iter*numThread+t) { fprintf(outfp1, "%lu ", startV[t]); long j; for (j = 0; j < GPtr->outDegree[startV[t]]; j++) { long outVertexListIndex = j+GPtr->outVertexIndex[startV[t]]; long vStatusIndex = GPtr->outVertexList[outVertexListIndex]; if (vStatus[vStatusIndex] == iter*numThread+t) { fprintf(outfp1, "%lu ", vStatusIndex); } } for (j = 0; j < GPtr->inDegree[startV[t]]; j++) { long inVertexListIndex = j+GPtr->inVertexIndex[startV[t]]; long vStatusIndex = GPtr->inVertexList[inVertexListIndex]; if (vStatus[vStatusIndex] == iter*numThread+t) { fprintf(outfp1, "%lu ", vStatusIndex); } } fprintf(outfp1, "\n"); } } } } thread_barrier_wait(); #endif /* WRITE_RESULTS_FILE */ if (myId == 0) { iter++; global_iter = iter; } TM_BEGIN(); long tmp_cliqueSize = (long)TM_SHARED_READ(global_cliqueSize); TM_SHARED_WRITE(global_cliqueSize, (tmp_cliqueSize + cliqueSize)); TM_END(); thread_barrier_wait(); iter = global_iter; verticesVisited += global_cliqueSize; if ((verticesVisited >= 0.95*GPtr->numVertices) || (iter > GPtr->numVertices/2)) { break; } } /* while (verticesVisited < GPtr->numVertices) */ thread_barrier_wait(); #ifdef WRITE_RESULT_FILES /* Take care of unmarked vertices */ if (myId == 0) { if (verticesVisited < GPtr->numVertices) { for(i = 0; i < GPtr->numVertices; i++) { if (vStatus[i] == -1) { vStatus[i] = iter*numThread+myId; fprintf(outfp1, "%lu\n", i); iter++; } } } } thread_barrier_wait(); #endif /* * Merge partial Cutset Lists */ /* Temp vars for merging edge lists */ ULONGINT_T* edgeStartCounter; ULONGINT_T* edgeEndCounter; if (myId == 0) { edgeStartCounter = (ULONGINT_T*)P_MALLOC(numThread * sizeof(ULONGINT_T)); assert(edgeStartCounter); global_edgeStartCounter = edgeStartCounter; edgeEndCounter = (ULONGINT_T*)P_MALLOC(numThread * sizeof(ULONGINT_T)); assert(edgeEndCounter); global_edgeEndCounter = edgeEndCounter; } thread_barrier_wait(); edgeStartCounter = global_edgeStartCounter; edgeEndCounter = global_edgeEndCounter; edgeEndCounter[myId] = cutSetIndex; edgeStartCounter[myId] = 0; thread_barrier_wait(); if (myId == 0) { long t; for (t = 1; t < numThread; t++) { edgeEndCounter[t] = edgeEndCounter[t-1] + edgeEndCounter[t]; edgeStartCounter[t] = edgeEndCounter[t-1]; } } TM_BEGIN(); long tmp_cutSetIndex = (long)TM_SHARED_READ(global_cutSetIndex); TM_SHARED_WRITE(global_cutSetIndex, (tmp_cutSetIndex + cutSetIndex)); TM_END(); thread_barrier_wait(); cutSetIndex = global_cutSetIndex; ULONGINT_T cutSetCounter = cutSetIndex; /* Data struct. for storing edgeCut */ edge* cutSet; if (myId == 0) { cutSet = (edge*)P_MALLOC(cutSetCounter * sizeof(edge)); assert(cutSet); global_cutSet = cutSet; } thread_barrier_wait(); cutSet = global_cutSet; long j; for (j = edgeStartCounter[myId]; j < edgeEndCounter[myId]; j++) { cutSet[j].startVertex = pCutSet[j-edgeStartCounter[myId]].startVertex; cutSet[j].endVertex = pCutSet[j-edgeStartCounter[myId]].endVertex; } thread_barrier_wait(); #ifdef WRITE_RESULT_FILES FILE* outfp2 = NULL; if (myId == 0) { outfp2 = fopen("edgeCut.txt", "w"); fprintf(outfp2, "\nEdges in Cut Set - \n"); for (i = 0; i < cutSetCounter; i++) { fprintf(outfp2, "[%lu %lu] ", cutSet[i].startVertex, cutSet[i].endVertex); } fclose(outfp2); fclose(outfp1); } #endif if (myId == 0) { P_FREE(edgeStartCounter); P_FREE(edgeEndCounter); P_FREE(pCutSet); P_FREE(IndexSorted); P_FREE(neighbourArraySorted); P_FREE(startV); P_FREE(clusterSize); P_FREE(cutSet); P_FREE(vStatus); } TM_THREAD_EXIT(); }
/* ============================================================================= * client_run * -- Execute list operations on the database * ============================================================================= */ void client_run (void* argPtr) { TM_THREAD_ENTER(); long myId = thread_getId(); client_t* clientPtr = ((client_t**)argPtr)[myId]; manager_t* managerPtr = clientPtr->managerPtr; random_t* randomPtr = clientPtr->randomPtr; long numOperation = clientPtr->numOperation; long numQueryPerTransaction = clientPtr->numQueryPerTransaction; long queryRange = clientPtr->queryRange; long percentUser = clientPtr->percentUser; long* types = (long*)P_MALLOC(numQueryPerTransaction * sizeof(long)); long* ids = (long*)P_MALLOC(numQueryPerTransaction * sizeof(long)); long* ops = (long*)P_MALLOC(numQueryPerTransaction * sizeof(long)); long* prices = (long*)P_MALLOC(numQueryPerTransaction * sizeof(long)); long i; for (i = 0; i < numOperation; i++) { long r = random_generate(randomPtr) % 100; action_t action = selectAction(r, percentUser); switch (action) { case ACTION_MAKE_RESERVATION: { long maxPrices[NUM_RESERVATION_TYPE] = { -1, -1, -1 }; long maxIds[NUM_RESERVATION_TYPE] = { -1, -1, -1 }; long n; long numQuery = random_generate(randomPtr) % numQueryPerTransaction + 1; long customerId = random_generate(randomPtr) % queryRange + 1; for (n = 0; n < numQuery; n++) { types[n] = random_generate(randomPtr) % NUM_RESERVATION_TYPE; ids[n] = (random_generate(randomPtr) % queryRange) + 1; } bool_t isFound = FALSE; TM_BEGIN(); for (n = 0; n < numQuery; n++) { long t = types[n]; long id = ids[n]; long price = -1; switch (t) { case RESERVATION_CAR: if (MANAGER_QUERY_CAR(managerPtr, id) >= 0) { price = MANAGER_QUERY_CAR_PRICE(managerPtr, id); } break; case RESERVATION_FLIGHT: if (MANAGER_QUERY_FLIGHT(managerPtr, id) >= 0) { price = MANAGER_QUERY_FLIGHT_PRICE(managerPtr, id); } break; case RESERVATION_ROOM: if (MANAGER_QUERY_ROOM(managerPtr, id) >= 0) { price = MANAGER_QUERY_ROOM_PRICE(managerPtr, id); } break; default: assert(0); } if (price > maxPrices[t]) { maxPrices[t] = price; maxIds[t] = id; isFound = TRUE; } } /* for n */ if (isFound) { MANAGER_ADD_CUSTOMER(managerPtr, customerId); } if (maxIds[RESERVATION_CAR] > 0) { MANAGER_RESERVE_CAR(managerPtr, customerId, maxIds[RESERVATION_CAR]); } if (maxIds[RESERVATION_FLIGHT] > 0) { MANAGER_RESERVE_FLIGHT(managerPtr, customerId, maxIds[RESERVATION_FLIGHT]); } if (maxIds[RESERVATION_ROOM] > 0) { MANAGER_RESERVE_ROOM(managerPtr, customerId, maxIds[RESERVATION_ROOM]); } TM_END(); break; } case ACTION_DELETE_CUSTOMER: { long customerId = random_generate(randomPtr) % queryRange + 1; TM_BEGIN(); long bill = MANAGER_QUERY_CUSTOMER_BILL(managerPtr, customerId); if (bill >= 0) { MANAGER_DELETE_CUSTOMER(managerPtr, customerId); } TM_END(); break; } case ACTION_UPDATE_TABLES: { long numUpdate = random_generate(randomPtr) % numQueryPerTransaction + 1; long n; for (n = 0; n < numUpdate; n++) { types[n] = random_generate(randomPtr) % NUM_RESERVATION_TYPE; ids[n] = (random_generate(randomPtr) % queryRange) + 1; ops[n] = random_generate(randomPtr) % 2; if (ops[n]) { prices[n] = ((random_generate(randomPtr) % 5) * 10) + 50; } } TM_BEGIN(); for (n = 0; n < numUpdate; n++) { long t = types[n]; long id = ids[n]; long doAdd = ops[n]; if (doAdd) { long newPrice = prices[n]; switch (t) { case RESERVATION_CAR: MANAGER_ADD_CAR(managerPtr, id, 100, newPrice); break; case RESERVATION_FLIGHT: MANAGER_ADD_FLIGHT(managerPtr, id, 100, newPrice); break; case RESERVATION_ROOM: MANAGER_ADD_ROOM(managerPtr, id, 100, newPrice); break; default: assert(0); } } else { /* do delete */ switch (t) { case RESERVATION_CAR: MANAGER_DELETE_CAR(managerPtr, id, 100); break; case RESERVATION_FLIGHT: MANAGER_DELETE_FLIGHT(managerPtr, id); break; case RESERVATION_ROOM: MANAGER_DELETE_ROOM(managerPtr, id, 100); break; default: assert(0); } } } TM_END(); break; } default: assert(0); } /* switch (action) */ } /* for i */ TM_THREAD_EXIT(); }
/* ============================================================================= * router_solve * ============================================================================= */ void router_solve (void* argPtr) { TM_THREAD_ENTER(); long threadId = thread_getId(); router_solve_arg_t* routerArgPtr = (router_solve_arg_t*)argPtr; router_t* routerPtr = routerArgPtr->routerPtr; maze_t* mazePtr = routerArgPtr->mazePtr; long* numPathArray = routerArgPtr->numPathArray; vector_t* myPathVectorPtr = PVECTOR_ALLOC(1); assert(myPathVectorPtr); queue_t* workQueuePtr = mazePtr->workQueuePtr; grid_t* gridPtr = mazePtr->gridPtr; grid_t* myGridPtr = PGRID_ALLOC(gridPtr->width, gridPtr->height, gridPtr->depth); assert(myGridPtr); long bendCost = routerPtr->bendCost; queue_t* myExpansionQueuePtr = PQUEUE_ALLOC(-1); long numPath = 0; /* * Iterate over work list to route each path. This involves an * 'expansion' and 'traceback' phase for each source/destination pair. */ while ((global_timedExecution && !global_isTerminated) || (!global_timedExecution)) { //while (1) { wait_for_turn(threadId); if (global_timedExecution && global_isTerminated) break; ulong_t beginTime; pair_t* coordinatePairPtr; TM_BEGIN(); beginTime = get_thread_time(); if (TMQUEUE_ISEMPTY(workQueuePtr)) { if (TMQUEUE_ISEMPTY(workQueuePtr)) coordinatePairPtr = NULL; } else { coordinatePairPtr = (pair_t*)TMQUEUE_POP(workQueuePtr); } TM_END(); //add_throughput(threadId , get_thread_time() - beginTime); if (coordinatePairPtr == NULL) { break; } coordinate_t* srcPtr = (coordinate_t*)coordinatePairPtr->firstPtr; coordinate_t* dstPtr = (coordinate_t*)coordinatePairPtr->secondPtr; bool_t success = FALSE; vector_t* pointVectorPtr = NULL; TM_BEGIN(); beginTime = get_thread_time(); grid_copy(myGridPtr, gridPtr); /* ok if not most up-to-date */ if (PdoExpansion(routerPtr, myGridPtr, myExpansionQueuePtr, srcPtr, dstPtr)) { pointVectorPtr = PdoTraceback(gridPtr, myGridPtr, dstPtr, bendCost); /* * TODO: fix memory leak * * pointVectorPtr will be a memory leak if we abort this transaction */ if (pointVectorPtr) { TMGRID_ADDPATH(gridPtr, pointVectorPtr); TM_LOCAL_WRITE_L(success, TRUE); } } TM_END(); add_throughput(threadId , get_thread_time() - beginTime); numPath++; if (success) { bool_t status = PVECTOR_PUSHBACK(myPathVectorPtr, (void*)pointVectorPtr); assert(status); } } numPathArray[threadId] = numPath; /* * Add my paths to global list */ list_t* pathVectorListPtr = routerArgPtr->pathVectorListPtr; TM_BEGIN(); TMLIST_INSERT(pathVectorListPtr, (void*)myPathVectorPtr); TM_END(); PGRID_FREE(myGridPtr); PQUEUE_FREE(myExpansionQueuePtr); #if DEBUG puts("\nFinal Grid:"); grid_print(gridPtr); #endif /* DEBUG */ TM_THREAD_EXIT(); }