/* ============================================================================= * genScalData * ============================================================================= */ void genScalData (void* argPtr) { TM_THREAD_ENTER(); graphSDG* SDGdataPtr = (graphSDG*)argPtr; long myId = thread_getId(); long numThread = thread_getNumThread(); /* * STEP 0: Create the permutations required to randomize the vertices */ random_t* stream = PRANDOM_ALLOC(); assert(stream); PRANDOM_SEED(stream, myId); ULONGINT_T* permV; /* the vars associated with the graph tuple */ if (myId == 0) { permV = (ULONGINT_T*)P_MALLOC(TOT_VERTICES * sizeof(ULONGINT_T)); assert(permV); global_permV = permV; } thread_barrier_wait(); permV = global_permV; long i; long i_start; long i_stop; createPartition(0, TOT_VERTICES, myId, numThread, &i_start, &i_stop); /* Initialize the array */ for (i = i_start; i < i_stop; i++) { permV[i] = i; } thread_barrier_wait(); for (i = i_start; i < i_stop; i++) { long t1 = PRANDOM_GENERATE(stream); long t = i + t1 % (TOT_VERTICES - i); if (t != i) { __transaction_atomic { long t2 = permV[t]; permV[t] = permV[i]; permV[i] = t2; } } }
/* ============================================================================= * genScalData * ============================================================================= */ void genScalData (void* argPtr) { TM_THREAD_ENTER(); graphSDG* SDGdataPtr = (graphSDG*)argPtr; long myId = thread_getId(); long numThread = thread_getNumThread(); /* * STEP 0: Create the permutations required to randomize the vertices */ random_t* stream = PRANDOM_ALLOC(); assert(stream); PRANDOM_SEED(stream, myId); ULONGINT_T* permV; /* the vars associated with the graph tuple */ if (myId == 0) { permV = (ULONGINT_T*)P_MALLOC(TOT_VERTICES * sizeof(ULONGINT_T)); assert(permV); global_permV = permV; } thread_barrier_wait(); permV = global_permV; long i; long i_start; long i_stop; createPartition(0, TOT_VERTICES, myId, numThread, &i_start, &i_stop); /* Initialize the array */ for (i = i_start; i < i_stop; i++) { permV[i] = i; } thread_barrier_wait(); for (i = i_start; i < i_stop; i++) { long t1 = PRANDOM_GENERATE(stream); long t = i + t1 % (TOT_VERTICES - i); if (t != i) { AL_LOCK(0); TM_BEGIN(); long t2 = (long)TM_SHARED_READ(permV[t]); TM_SHARED_WRITE(permV[t], TM_SHARED_READ(permV[i])); TM_SHARED_WRITE(permV[i], t2); TM_END(); } } /* * STEP 1: Create Cliques */ long* cliqueSizes; long estTotCliques = ceil(1.5 * TOT_VERTICES / ((1+MAX_CLIQUE_SIZE)/2)); /* * Allocate mem for Clique array * Estimate number of clique required and pad by 50% */ if (myId == 0) { cliqueSizes = (long*)P_MALLOC(estTotCliques * sizeof(long)); assert(cliqueSizes); global_cliqueSizes = cliqueSizes; } thread_barrier_wait(); cliqueSizes = global_cliqueSizes; createPartition(0, estTotCliques, myId, numThread, &i_start, &i_stop); /* Generate random clique sizes. */ for (i = i_start; i < i_stop; i++) { cliqueSizes[i] = 1 + (PRANDOM_GENERATE(stream) % MAX_CLIQUE_SIZE); } thread_barrier_wait(); long totCliques = 0; /* * Allocate memory for cliqueList */ ULONGINT_T* lastVsInCliques; ULONGINT_T* firstVsInCliques; if (myId == 0) { lastVsInCliques = (ULONGINT_T*)P_MALLOC(estTotCliques * sizeof(ULONGINT_T)); assert(lastVsInCliques); global_lastVsInCliques = lastVsInCliques; firstVsInCliques = (ULONGINT_T*)P_MALLOC(estTotCliques * sizeof(ULONGINT_T)); assert(firstVsInCliques); global_firstVsInCliques = firstVsInCliques; /* * Sum up vertices in each clique to determine the lastVsInCliques array */ lastVsInCliques[0] = cliqueSizes[0] - 1; for (i = 1; i < estTotCliques; i++) { lastVsInCliques[i] = cliqueSizes[i] + lastVsInCliques[i-1]; if (lastVsInCliques[i] >= TOT_VERTICES-1) { break; } } totCliques = i + 1; global_totCliques = totCliques; /* * Fix the size of the last clique */ cliqueSizes[totCliques-1] = TOT_VERTICES - lastVsInCliques[totCliques-2] - 1; lastVsInCliques[totCliques-1] = TOT_VERTICES - 1; firstVsInCliques[0] = 0; } thread_barrier_wait(); lastVsInCliques = global_lastVsInCliques; firstVsInCliques = global_firstVsInCliques; totCliques = global_totCliques; /* Compute start Vertices in cliques. */ createPartition(1, totCliques, myId, numThread, &i_start, &i_stop); for (i = i_start; i < i_stop; i++) { firstVsInCliques[i] = lastVsInCliques[i-1] + 1; } #ifdef WRITE_RESULT_FILES thread_barrier_wait(); /* Write the generated cliques to file for comparison with Kernel 4 */ if (myId == 0) { FILE* outfp = fopen("cliques.txt", "w"); fprintf(outfp, "No. of cliques - %lu\n", totCliques); for (i = 0; i < totCliques; i++) { fprintf(outfp, "Clq %lu - ", i); long j; for (j = firstVsInCliques[i]; j <= lastVsInCliques[i]; j++) { fprintf(outfp, "%lu ", permV[j]); } fprintf(outfp, "\n"); } fclose(outfp); } thread_barrier_wait(); #endif /* * STEP 2: Create the edges within the cliques */ /* * Estimate number of edges - using an empirical measure */ long estTotEdges; if (SCALE >= 12) { estTotEdges = ceil(((MAX_CLIQUE_SIZE-1) * TOT_VERTICES)); } else { estTotEdges = ceil(1.2 * (((MAX_CLIQUE_SIZE-1)*TOT_VERTICES) * ((1 + MAX_PARAL_EDGES)/2) + TOT_VERTICES*2)); } /* * Initialize edge counter */ long i_edgePtr = 0; float p = PROB_UNIDIRECTIONAL; /* * Partial edgeLists */ ULONGINT_T* startV; ULONGINT_T* endV; if (numThread > 3) { long numByte = 1.5 * (estTotEdges/numThread) * sizeof(ULONGINT_T); startV = (ULONGINT_T*)P_MALLOC(numByte); endV = (ULONGINT_T*)P_MALLOC(numByte); } else { long numByte = (estTotEdges/numThread) * sizeof(ULONGINT_T); startV = (ULONGINT_T*)P_MALLOC(numByte); endV = (ULONGINT_T*)P_MALLOC(numByte); } assert(startV); assert(endV); /* * Tmp array to keep track of the no. of parallel edges in each direction */ ULONGINT_T** tmpEdgeCounter = (ULONGINT_T**)P_MALLOC(MAX_CLIQUE_SIZE * sizeof(ULONGINT_T *)); assert(tmpEdgeCounter); for (i = 0; i < MAX_CLIQUE_SIZE; i++) { tmpEdgeCounter[i] = (ULONGINT_T*)P_MALLOC(MAX_CLIQUE_SIZE * sizeof(ULONGINT_T)); assert(tmpEdgeCounter[i]); } /* * Create edges in parallel */ long i_clique; createPartition(0, totCliques, myId, numThread, &i_start, &i_stop); for (i_clique = i_start; i_clique < i_stop; i_clique++) { /* * Get current clique parameters */ long i_cliqueSize = cliqueSizes[i_clique]; long i_firstVsInClique = firstVsInCliques[i_clique]; /* * First create at least one edge between two vetices in a clique */ for (i = 0; i < i_cliqueSize; i++) { long j; for (j = 0; j < i; j++) { float r = (float)(PRANDOM_GENERATE(stream) % 1000) / (float)1000; if (r >= p) { startV[i_edgePtr] = i + i_firstVsInClique; endV[i_edgePtr] = j + i_firstVsInClique; i_edgePtr++; tmpEdgeCounter[i][j] = 1; startV[i_edgePtr] = j + i_firstVsInClique; endV[i_edgePtr] = i + i_firstVsInClique; i_edgePtr++; tmpEdgeCounter[j][i] = 1; } else if (r >= 0.5) { startV[i_edgePtr] = i + i_firstVsInClique; endV[i_edgePtr] = j + i_firstVsInClique; i_edgePtr++; tmpEdgeCounter[i][j] = 1; tmpEdgeCounter[j][i] = 0; } else { startV[i_edgePtr] = j + i_firstVsInClique; endV[i_edgePtr] = i + i_firstVsInClique; i_edgePtr++; tmpEdgeCounter[j][i] = 1; tmpEdgeCounter[i][j] = 0; } } /* for j */ } /* for i */ if (i_cliqueSize != 1) { long randNumEdges = (long)(PRANDOM_GENERATE(stream) % (2*i_cliqueSize*MAX_PARAL_EDGES)); long i_paralEdge; for (i_paralEdge = 0; i_paralEdge < randNumEdges; i_paralEdge++) { i = (PRANDOM_GENERATE(stream) % i_cliqueSize); long j = (PRANDOM_GENERATE(stream) % i_cliqueSize); if ((i != j) && (tmpEdgeCounter[i][j] < MAX_PARAL_EDGES)) { float r = (float)(PRANDOM_GENERATE(stream) % 1000) / (float)1000; if (r >= p) { /* Copy to edge structure. */ startV[i_edgePtr] = i + i_firstVsInClique; endV[i_edgePtr] = j + i_firstVsInClique; i_edgePtr++; tmpEdgeCounter[i][j]++; } } } } } /* for i_clique */ for (i = 0; i < MAX_CLIQUE_SIZE; i++) { P_FREE(tmpEdgeCounter[i]); } P_FREE(tmpEdgeCounter); /* * Merge partial edge lists */ ULONGINT_T* i_edgeStartCounter; ULONGINT_T* i_edgeEndCounter; if (myId == 0) { i_edgeStartCounter = (ULONGINT_T*)P_MALLOC(numThread * sizeof(ULONGINT_T)); assert(i_edgeStartCounter); global_i_edgeStartCounter = i_edgeStartCounter; i_edgeEndCounter = (ULONGINT_T*)P_MALLOC(numThread * sizeof(ULONGINT_T)); assert(i_edgeEndCounter); global_i_edgeEndCounter = i_edgeEndCounter; } thread_barrier_wait(); i_edgeStartCounter = global_i_edgeStartCounter; i_edgeEndCounter = global_i_edgeEndCounter; i_edgeEndCounter[myId] = i_edgePtr; i_edgeStartCounter[myId] = 0; thread_barrier_wait(); if (myId == 0) { for (i = 1; i < numThread; i++) { i_edgeEndCounter[i] = i_edgeEndCounter[i-1] + i_edgeEndCounter[i]; i_edgeStartCounter[i] = i_edgeEndCounter[i-1]; } } AL_LOCK(0); TM_BEGIN(); TM_SHARED_WRITE(global_edgeNum, ((long)TM_SHARED_READ(global_edgeNum) + i_edgePtr)); TM_END(); thread_barrier_wait(); long edgeNum = global_edgeNum; /* * Initialize edge list arrays */ ULONGINT_T* startVertex; ULONGINT_T* endVertex; if (myId == 0) { if (SCALE < 10) { long numByte = 2 * edgeNum * sizeof(ULONGINT_T); startVertex = (ULONGINT_T*)P_MALLOC(numByte); endVertex = (ULONGINT_T*)P_MALLOC(numByte); } else { long numByte = (edgeNum + MAX_PARAL_EDGES * TOT_VERTICES) * sizeof(ULONGINT_T); startVertex = (ULONGINT_T*)P_MALLOC(numByte); endVertex = (ULONGINT_T*)P_MALLOC(numByte); } assert(startVertex); assert(endVertex); global_startVertex = startVertex; global_endVertex = endVertex; } thread_barrier_wait(); startVertex = global_startVertex; endVertex = global_endVertex; for (i = i_edgeStartCounter[myId]; i < i_edgeEndCounter[myId]; i++) { startVertex[i] = startV[i-i_edgeStartCounter[myId]]; endVertex[i] = endV[i-i_edgeStartCounter[myId]]; } ULONGINT_T numEdgesPlacedInCliques = edgeNum; thread_barrier_wait(); /* * STEP 3: Connect the cliques */ i_edgePtr = 0; p = PROB_INTERCL_EDGES; /* * Generating inter-clique edges as given in the specs */ createPartition(0, TOT_VERTICES, myId, numThread, &i_start, &i_stop); for (i = i_start; i < i_stop; i++) { ULONGINT_T tempVertex1 = i; long h = totCliques; long l = 0; long t = -1; while (h - l > 1) { long m = (h + l) / 2; if (tempVertex1 >= firstVsInCliques[m]) { l = m; } else { if ((tempVertex1 < firstVsInCliques[m]) && (m > 0)) { if (tempVertex1 >= firstVsInCliques[m-1]) { t = m - 1; break; } else { h = m; } } } } if (t == -1) { long m; for (m = (l + 1); m < h; m++) { if (tempVertex1<firstVsInCliques[m]) { break; } } t = m-1; } long t1 = firstVsInCliques[t]; ULONGINT_T d; for (d = 1, p = PROB_INTERCL_EDGES; d < TOT_VERTICES; d *= 2, p /= 2) { float r = (float)(PRANDOM_GENERATE(stream) % 1000) / (float)1000; if (r <= p) { ULONGINT_T tempVertex2 = (i+d) % TOT_VERTICES; h = totCliques; l = 0; t = -1; while (h - l > 1) { long m = (h + l) / 2; if (tempVertex2 >= firstVsInCliques[m]) { l = m; } else { if ((tempVertex2 < firstVsInCliques[m]) && (m > 0)) { if (firstVsInCliques[m-1] <= tempVertex2) { t = m - 1; break; } else { h = m; } } } } if (t == -1) { long m; for (m = (l + 1); m < h; m++) { if (tempVertex2 < firstVsInCliques[m]) { break; } } t = m - 1; } long t2 = firstVsInCliques[t]; if (t1 != t2) { long randNumEdges = PRANDOM_GENERATE(stream) % MAX_PARAL_EDGES + 1; long j; for (j = 0; j < randNumEdges; j++) { startV[i_edgePtr] = tempVertex1; endV[i_edgePtr] = tempVertex2; i_edgePtr++; } } } /* r <= p */ float r0 = (float)(PRANDOM_GENERATE(stream) % 1000) / (float)1000; if ((r0 <= p) && (i-d>=0)) { ULONGINT_T tempVertex2 = (i-d) % TOT_VERTICES; h = totCliques; l = 0; t = -1; while (h - l > 1) { long m = (h + l) / 2; if (tempVertex2 >= firstVsInCliques[m]) { l = m; } else { if ((tempVertex2 < firstVsInCliques[m]) && (m > 0)) { if (firstVsInCliques[m-1] <= tempVertex2) { t = m - 1; break; } else { h = m; } } } } if (t == -1) { long m; for (m = (l + 1); m < h; m++) { if (tempVertex2 < firstVsInCliques[m]) { break; } } t = m - 1; } long t2 = firstVsInCliques[t]; if (t1 != t2) { long randNumEdges = PRANDOM_GENERATE(stream) % MAX_PARAL_EDGES + 1; long j; for (j = 0; j < randNumEdges; j++) { startV[i_edgePtr] = tempVertex1; endV[i_edgePtr] = tempVertex2; i_edgePtr++; } } } /* r0 <= p && (i-d) > 0 */ } /* for d, p */ } /* for i */ i_edgeEndCounter[myId] = i_edgePtr; i_edgeStartCounter[myId] = 0; if (myId == 0) { global_edgeNum = 0; } thread_barrier_wait(); if (myId == 0) { for (i = 1; i < numThread; i++) { i_edgeEndCounter[i] = i_edgeEndCounter[i-1] + i_edgeEndCounter[i]; i_edgeStartCounter[i] = i_edgeEndCounter[i-1]; } } AL_LOCK(0); TM_BEGIN(); TM_SHARED_WRITE(global_edgeNum, ((long)TM_SHARED_READ(global_edgeNum) + i_edgePtr)); TM_END(); thread_barrier_wait(); edgeNum = global_edgeNum; ULONGINT_T numEdgesPlacedOutside = global_edgeNum; for (i = i_edgeStartCounter[myId]; i < i_edgeEndCounter[myId]; i++) { startVertex[i+numEdgesPlacedInCliques] = startV[i-i_edgeStartCounter[myId]]; endVertex[i+numEdgesPlacedInCliques] = endV[i-i_edgeStartCounter[myId]]; } thread_barrier_wait(); ULONGINT_T numEdgesPlaced = numEdgesPlacedInCliques + numEdgesPlacedOutside; if (myId == 0) { SDGdataPtr->numEdgesPlaced = numEdgesPlaced; printf("Finished generating edges\n"); printf("No. of intra-clique edges - %lu\n", numEdgesPlacedInCliques); printf("No. of inter-clique edges - %lu\n", numEdgesPlacedOutside); printf("Total no. of edges - %lu\n", numEdgesPlaced); P_FREE(i_edgeStartCounter); P_FREE(i_edgeEndCounter); P_FREE(cliqueSizes); P_FREE(firstVsInCliques); P_FREE(lastVsInCliques); } thread_barrier_wait(); P_FREE(startV); P_FREE(endV); /* * STEP 4: Generate edge weights */ if (myId == 0) { SDGdataPtr->intWeight = (LONGINT_T*)P_MALLOC(numEdgesPlaced * sizeof(LONGINT_T)); assert(SDGdataPtr->intWeight); } thread_barrier_wait(); p = PERC_INT_WEIGHTS; ULONGINT_T numStrWtEdges = 0; createPartition(0, numEdgesPlaced, myId, numThread, &i_start, &i_stop); for (i = i_start; i < i_stop; i++) { float r = (float)(PRANDOM_GENERATE(stream) % 1000) / (float)1000; if (r <= p) { SDGdataPtr->intWeight[i] = 1 + (PRANDOM_GENERATE(stream) % (MAX_INT_WEIGHT-1)); } else { SDGdataPtr->intWeight[i] = -1; numStrWtEdges++; } } thread_barrier_wait(); if (myId == 0) { long t = 0; for (i = 0; i < numEdgesPlaced; i++) { if (SDGdataPtr->intWeight[i] < 0) { SDGdataPtr->intWeight[i] = -t; t++; } } } AL_LOCK(0); TM_BEGIN(); TM_SHARED_WRITE(global_numStrWtEdges, ((long)TM_SHARED_READ(global_numStrWtEdges) + numStrWtEdges)); TM_END(); thread_barrier_wait(); numStrWtEdges = global_numStrWtEdges; if (myId == 0) { SDGdataPtr->strWeight = (char*)P_MALLOC(numStrWtEdges * MAX_STRLEN * sizeof(char)); assert(SDGdataPtr->strWeight); } thread_barrier_wait(); createPartition(0, numEdgesPlaced, myId, numThread, &i_start, &i_stop); for (i = i_start; i < i_stop; i++) { if (SDGdataPtr->intWeight[i] <= 0) { long j; for (j = 0; j < MAX_STRLEN; j++) { SDGdataPtr->strWeight[(-SDGdataPtr->intWeight[i])*MAX_STRLEN+j] = (char) (1 + PRANDOM_GENERATE(stream) % 127); } } } /* * Choose SOUGHT STRING randomly if not assigned */ if (myId == 0) { if (strlen(SOUGHT_STRING) != MAX_STRLEN) { SOUGHT_STRING = (char*)P_MALLOC(MAX_STRLEN * sizeof(char)); assert(SOUGHT_STRING); } long t = PRANDOM_GENERATE(stream) % numStrWtEdges; long j; for (j = 0; j < MAX_STRLEN; j++) { SOUGHT_STRING[j] = (char) ((long) SDGdataPtr->strWeight[t*MAX_STRLEN+j]); } } thread_barrier_wait(); /* * STEP 5: Permute Vertices */ for (i = i_start; i < i_stop; i++) { startVertex[i] = permV[(startVertex[i])]; endVertex[i] = permV[(endVertex[i])]; } thread_barrier_wait(); /* * STEP 6: Sort Vertices */ /* * Radix sort with StartVertex as primary key */ if (myId == 0) { long numByte = numEdgesPlaced * sizeof(ULONGINT_T); SDGdataPtr->startVertex = (ULONGINT_T*)P_MALLOC(numByte); assert(SDGdataPtr->startVertex); SDGdataPtr->endVertex = (ULONGINT_T*)P_MALLOC(numByte); assert(SDGdataPtr->endVertex); } thread_barrier_wait(); all_radixsort_node_aux_s3(numEdgesPlaced, startVertex, SDGdataPtr->startVertex, endVertex, SDGdataPtr->endVertex); thread_barrier_wait(); if (myId == 0) { P_FREE(startVertex); P_FREE(endVertex); } thread_barrier_wait(); if (SCALE < 12) { /* * Sort with endVertex as secondary key */ if (myId == 0) { long i0 = 0; long i1 = 0; i = 0; while (i < numEdgesPlaced) { for (i = i0; i < numEdgesPlaced; i++) { if (SDGdataPtr->startVertex[i] != SDGdataPtr->startVertex[i1]) { i1 = i; break; } } long j; for (j = i0; j < i1; j++) { long k; for (k = j+1; k < i1; k++) { if (SDGdataPtr->endVertex[k] < SDGdataPtr->endVertex[j]) { long t = SDGdataPtr->endVertex[j]; SDGdataPtr->endVertex[j] = SDGdataPtr->endVertex[k]; SDGdataPtr->endVertex[k] = t; } } } if (SDGdataPtr->startVertex[i0] != TOT_VERTICES-1) { i0 = i1; } else { long j; for (j=i0; j<numEdgesPlaced; j++) { long k; for (k=j+1; k<numEdgesPlaced; k++) { if (SDGdataPtr->endVertex[k] < SDGdataPtr->endVertex[j]) { long t = SDGdataPtr->endVertex[j]; SDGdataPtr->endVertex[j] = SDGdataPtr->endVertex[k]; SDGdataPtr->endVertex[k] = t; } } } } } /* while i < numEdgesPlaced */ } } else { ULONGINT_T* tempIndex; if (myId == 0) { tempIndex = (ULONGINT_T*)P_MALLOC((TOT_VERTICES + 1) * sizeof(ULONGINT_T)); assert(tempIndex); global_tempIndex = tempIndex; /* * Update degree of each vertex */ tempIndex[0] = 0; tempIndex[TOT_VERTICES] = numEdgesPlaced; long i0 = 0; for (i=0; i < TOT_VERTICES; i++) { tempIndex[i+1] = tempIndex[i]; long j; for (j = i0; j < numEdgesPlaced; j++) { if (SDGdataPtr->startVertex[j] != SDGdataPtr->startVertex[i0]) { if (SDGdataPtr->startVertex[i0] == i) { tempIndex[i+1] = j; i0 = j; break; } } } } } thread_barrier_wait(); tempIndex = global_tempIndex; /* * Insertion sort for now, replace with something better later on */ #if 0 createPartition(0, TOT_VERTICES, myId, numThread, &i_start, &i_stop); for (i = i_start; i < i_stop; i++) { long j; for (j = tempIndex[i]; j < tempIndex[i+1]; j++) { long k; for (k = (j + 1); k < tempIndex[i+1]; k++) { if (SDGdataPtr->endVertex[k] < SDGdataPtr->endVertex[j]) { long t = SDGdataPtr->endVertex[j]; SDGdataPtr->endVertex[j] = SDGdataPtr->endVertex[k]; SDGdataPtr->endVertex[k] = t; } } } } #else if (myId == 0) { for (i = 0; i < TOT_VERTICES; i++) { long j; for (j = tempIndex[i]; j < tempIndex[i+1]; j++) { long k; for (k = (j + 1); k < tempIndex[i+1]; k++) { if (SDGdataPtr->endVertex[k] < SDGdataPtr->endVertex[j]) { long t = SDGdataPtr->endVertex[j]; SDGdataPtr->endVertex[j] = SDGdataPtr->endVertex[k]; SDGdataPtr->endVertex[k] = t; } } } } } #endif if (myId == 0) { P_FREE(tempIndex); } } /* SCALE >= 12 */ PRANDOM_FREE(stream); if (myId == 0) { P_FREE(permV); } TM_THREAD_EXIT(); }