int main(int argc, const char * argv[]) { // prefix_sums test: int A[] = {1, 2, 3, 4, 5, 6, 7}; struct Results s = prefix_sums(A, 7); printf("Array = {1, 2,... 7} \n"); printf("Prefix sums: "); for (int i = 0; i <= s.L; i++) { printf("%i, ", (s.C[i])); } // Passing cars test int B[] = {0, 1, 0, 1, 1}; printf("\nCars: %i ", PassingCars(B, 5)); // Genomic Range Query test int P[] = {2, 5, 0}; int Q[] = {4, 5, 6}; char *S = "CAGCCTA"; // ACGT printf("\nGenomic Range Query:\n "); s = GenomicRangeQuery(S, P, Q, 3); for (int i = 0; i < s.L; i++) { printf("%i, ", (s.C[i])); } }
int main(int argc, char **argv) { const int maxThreads = omp_get_max_threads(); if (argc < 4) { fprintf(stderr, "Usage: bench <csv file> <input size> <num threads> [<num threads> ...]\n"); return -1; } FILE *const csvFile = csv_open(argv[1]); if (csvFile == NULL) { return -1; } const int len = safe_strtol(argv[2]); if (len < 1) { fprintf(stderr, "Input size must be positive\n"); return -1; } TYPE *nrs = random_array(len, time(NULL)); if (nrs == NULL) { return -1; } for (int i = 3; i < argc; i++) { int threads = safe_strtol(argv[i]); if (threads < 1) { threads = maxThreads; } omp_set_num_threads(threads); printf("%s. omp_get_max_threads() == %d\n", algorithm_name, threads); /* Bench the parallel implementation. */ double start = omp_get_wtime(); if (prefix_sums(nrs, len, NULL) != 0) { return -1; } double par_time = omp_get_wtime() - start; printf("elements: %d; par time: %f\n\n", len, par_time); fprintf(csvFile, "%s,%d,%d,%f\n", algorithm_name, threads, len, par_time); } free(nrs); csv_close(csvFile); return 0; }
double computeGraph(graph* G, graphSDG* SDGdata) { VERT_T* endV; LONG_T *degree, *numEdges, *pos, *pSums; WEIGHT_T* w; double elapsed_time; #ifdef _OPENMP omp_lock_t *vLock; LONG_T chunkSize; #endif elapsed_time = get_seconds(); #ifdef _OPENMP omp_set_num_threads(NUM_THREADS); #endif #ifdef _OPENMP #pragma omp parallel #endif { LONG_T i, j, u, n, m, tid, nthreads; #ifdef DIAGNOSTIC double elapsed_time_part; #endif #ifdef _OPENMP nthreads = omp_get_num_threads(); tid = omp_get_thread_num(); #else tid = 0; nthreads = 1; #endif n = N; m = M; if (tid == 0) { #ifdef _OPENMP vLock = (omp_lock_t *) malloc(n*sizeof(omp_lock_t)); assert(vLock != NULL); chunkSize = n/nthreads; #endif pos = (LONG_T *) malloc(m*sizeof(LONG_T)); assert(pos != NULL); degree = (LONG_T *) calloc(n, sizeof(LONG_T)); assert(degree != NULL); } #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds(); } #endif #ifdef _OPENMP #pragma omp barrier #pragma omp for schedule(static, chunkSize) for (i=0; i<n; i++) { omp_init_lock(&vLock[i]); } #pragma omp barrier #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds() - elapsed_time_part; fprintf(stderr, "Lock initialization time: %lf seconds\n", elapsed_time_part); elapsed_time_part = get_seconds(); } #endif #pragma omp for #endif for (i=0; i<m; i++) { u = SDGdata->startVertex[i]; #ifdef _OPENMP omp_set_lock(&vLock[u]); #endif pos[i] = degree[u]++; #ifdef _OPENMP omp_unset_lock(&vLock[u]); #endif } #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds() - elapsed_time_part; fprintf(stderr, "Degree computation time: %lf seconds\n", elapsed_time_part); elapsed_time_part = get_seconds(); } #endif #ifdef _OPENMP #pragma omp barrier #pragma omp for schedule(static, chunkSize) for (i=0; i<n; i++) { omp_destroy_lock(&vLock[i]); } if (tid == 0) free(vLock); #endif #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds() - elapsed_time_part; fprintf(stderr, "Lock destruction time: %lf seconds\n", elapsed_time_part); elapsed_time_part = get_seconds(); } #endif if (tid == 0) { numEdges = (LONG_T *) malloc((n+1)*sizeof(LONG_T)); pSums = (LONG_T *) malloc(nthreads*sizeof(LONG_T)); } #ifdef _OPENMP #pragma omp barrier #endif prefix_sums(degree, numEdges, pSums, n); #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds() - elapsed_time_part; fprintf(stderr, "Prefix sums time: %lf seconds\n", elapsed_time_part); elapsed_time_part = get_seconds(); } #endif #ifdef _OPENMP #pragma omp barrier #endif if (tid == 0) { free(degree); free(pSums); w = (WEIGHT_T *) malloc(m*sizeof(WEIGHT_T)); endV = (VERT_T *) malloc(m* sizeof(VERT_T)); } #ifdef _OPENMP #pragma omp barrier #pragma omp for #endif for (i=0; i<m; i++) { u = SDGdata->startVertex[i]; j = numEdges[u] + pos[i]; endV[j] = SDGdata->endVertex[i]; //TODO: //w[j] = SDGdata->weight[i]; fprintf(stderr, "%d\n", SDGdata->weight[i]); w[j] = 1; } #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds() - elapsed_time_part; fprintf(stderr, "Edge data structure construction time: %lf seconds\n", elapsed_time_part); elapsed_time_part = get_seconds(); } #endif if (tid == 0) { free(pos); G->n = n; G->m = m; G->numEdges = numEdges; G->endV = endV; G->weight = w; } #ifdef _OPENMP #endif } /* Verification */ #if 0 fprintf(stderr, "SDG data:\n"); for (int i=0; i<SDGdata->m; i++) { fprintf(stderr, "[%ld %ld %ld] ", SDGdata->startVertex[i], SDGdata->endVertex[i], SDGdata->weight[i]); } fprintf(stderr, "\n"); for (int i=0; i<G->n + 1; i++) { fprintf(stderr, "[%ld] ", G->numEdges[i]); } fprintf(stderr, "\nGraph:\n"); for (int i=0; i<G->n; i++) { for (int j=G->numEdges[i]; j<G->numEdges[i+1]; j++) { fprintf(stderr, "[%ld %ld %ld] ", i, G->endV[j], G->weight[j]); } } #endif free(SDGdata->startVertex); free(SDGdata->endVertex); free(SDGdata->weight); elapsed_time = get_seconds() - elapsed_time; return elapsed_time; }
void vertex_betweenness_centrality_parBFS(graph_t* G, double* BC, long numSrcs) { attr_id_t *S; /* stack of vertices in the order of non-decreasing distance from s. Also used to implicitly represent the BFS queue */ plist_t* P; /* predecessors of a vertex v on shortest paths from s */ double* sig; /* No. of shortest paths */ attr_id_t* d; /* Length of the shortest path between every pair */ double* del; /* dependency of vertices */ attr_id_t *in_degree, *numEdges, *pSums; attr_id_t* pListMem; #if RANDSRCS attr_id_t* Srcs; #endif attr_id_t *start, *end; long MAX_NUM_PHASES; attr_id_t *psCount; #ifdef _OPENMP omp_lock_t* vLock; long chunkSize; #endif #ifdef DIAGNOSTIC double elapsed_time; #endif int seed = 2387; #ifdef _OPENMP #pragma omp parallel firstprivate(G) { #endif attr_id_t *myS, *myS_t; attr_id_t myS_size; long i, j, k, p, count, myCount; long v, w, vert; long k0, k1; long numV, num_traversals, n, m, phase_num; long start_iter, end_iter; long tid, nthreads; int* stream; #ifdef DIAGNOSTIC double elapsed_time_part; #endif #ifdef _OPENMP int myLock; tid = omp_get_thread_num(); nthreads = omp_get_num_threads(); #else tid = 0; nthreads = 1; #endif #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time = get_seconds(); elapsed_time_part = get_seconds(); } #endif /* numV: no. of vertices to run BFS from = numSrcs */ numV = numSrcs; n = G->n; m = G->m; /* Permute vertices */ if (tid == 0) { #if RANDSRCS Srcs = (attr_id_t *) malloc(n*sizeof(attr_id_t)); #endif #ifdef _OPENMP vLock = (omp_lock_t *) malloc(n*sizeof(omp_lock_t)); #endif } #ifdef _OPENMP #pragma omp barrier #pragma omp for for (i=0; i<n; i++) { omp_init_lock(&vLock[i]); } #endif /* Initialize RNG stream */ stream = init_sprng(0, tid, nthreads, seed, SPRNG_DEFAULT); #if RANDSRCS #ifdef _OPENMP #pragma omp for #endif for (i=0; i<n; i++) { Srcs[i] = i; } #ifdef _OPENMP #pragma omp for #endif for (i=0; i<n; i++) { j = n * sprng(stream); if (i != j) { #ifdef _OPENMP int l1 = omp_test_lock(&vLock[i]); if (l1) { int l2 = omp_test_lock(&vLock[j]); if (l2) { #endif k = Srcs[i]; Srcs[i] = Srcs[j]; Srcs[j] = k; #ifdef _OPENMP omp_unset_lock(&vLock[j]); } omp_unset_lock(&vLock[i]); } #endif } } #endif #ifdef _OPENMP #pragma omp barrier #endif if (tid == 0) { MAX_NUM_PHASES = 500; } #ifdef _OPENMP #pragma omp barrier #endif /* Initialize predecessor lists */ /* The size of the predecessor list of each vertex is bounded by its in-degree. So we first compute the in-degree of every vertex */ if (tid == 0) { P = (plist_t *) calloc(n, sizeof(plist_t)); in_degree = (attr_id_t *) calloc(n+1, sizeof(attr_id_t)); numEdges = (attr_id_t *) malloc((n+1)*sizeof(attr_id_t)); pSums = (attr_id_t *) malloc(nthreads*sizeof(attr_id_t)); } #ifdef _OPENMP #pragma omp barrier #pragma omp for #endif for (i=0; i<m; i++) { v = G->endV[i]; #ifdef _OPENMP omp_set_lock(&vLock[v]); #endif in_degree[v]++; #ifdef _OPENMP omp_unset_lock(&vLock[v]); #endif } prefix_sums(in_degree, numEdges, pSums, n); if (tid == 0) { pListMem = (attr_id_t *) malloc(m*sizeof(attr_id_t)); } #ifdef _OPENMP #pragma omp barrier #pragma omp for #endif for (i=0; i<n; i++) { P[i].list = pListMem + numEdges[i]; P[i].degree = in_degree[i]; P[i].count = 0; } #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds() -elapsed_time_part; fprintf(stderr, "In-degree computation time: %lf seconds\n", elapsed_time_part); elapsed_time_part = get_seconds(); } #endif /* Allocate shared memory */ if (tid == 0) { free(in_degree); free(numEdges); free(pSums); S = (attr_id_t *) malloc(n*sizeof(attr_id_t)); sig = (double *) malloc(n*sizeof(double)); d = (attr_id_t *) malloc(n*sizeof(attr_id_t)); del = (double *) calloc(n, sizeof(double)); start = (attr_id_t *) malloc(MAX_NUM_PHASES*sizeof(attr_id_t)); end = (attr_id_t *) malloc(MAX_NUM_PHASES*sizeof(attr_id_t)); psCount = (attr_id_t *) malloc((nthreads+1)*sizeof(attr_id_t)); } /* local memory for each thread */ myS_size = (2*n)/nthreads; myS = (attr_id_t *) malloc(myS_size*sizeof(attr_id_t)); num_traversals = 0; myCount = 0; #ifdef _OPENMP #pragma omp barrier #endif #ifdef _OPENMP #pragma omp for #endif for (i=0; i<n; i++) { d[i] = -1; } #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds() - elapsed_time_part; fprintf(stderr, "BC initialization time: %lf seconds\n", elapsed_time_part); elapsed_time_part = get_seconds(); } #endif for (p=0; p<n; p++) { #if RANDSRCS i = Srcs[p]; #else i = p; #endif if (G->numEdges[i+1] - G->numEdges[i] == 0) { continue; } else { num_traversals++; } if (num_traversals == numV + 1) { break; } if (tid == 0) { sig[i] = 1; d[i] = 0; S[0] = i; start[0] = 0; end[0] = 1; } count = 1; phase_num = 0; #ifdef _OPENMP #pragma omp barrier #endif while (end[phase_num] - start[phase_num] > 0) { myCount = 0; start_iter = start[phase_num]; end_iter = end[phase_num]; #ifdef _OPENMP #pragma omp barrier #pragma omp for schedule(dynamic) nowait #endif for (vert = start_iter; vert < end_iter; vert++) { v = S[vert]; for (j=G->numEdges[v]; j<G->numEdges[v+1]; j++) { w = G->endV[j]; if (v != w) { #ifdef _OPENMP myLock = omp_test_lock(&vLock[w]); if (myLock) { #endif /* w found for the first time? */ if (d[w] == -1) { if (myS_size == myCount) { /* Resize myS */ myS_t = (attr_id_t *) malloc(2*myS_size*sizeof(attr_id_t)); memcpy(myS_t, myS, myS_size*sizeof(attr_id_t)); free(myS); myS = myS_t; myS_size = 2*myS_size; } myS[myCount++] = w; d[w] = d[v] + 1; sig[w] = sig[v]; P[w].list[P[w].count++] = v; } else if (d[w] == d[v] + 1) { sig[w] += sig[v]; P[w].list[P[w].count++] = v; } #ifdef _OPENMP omp_unset_lock(&vLock[w]); } else { if ((d[w] == -1) || (d[w] == d[v]+ 1)) { omp_set_lock(&vLock[w]); sig[w] += sig[v]; P[w].list[P[w].count++] = v; omp_unset_lock(&vLock[w]); } } #endif } } } /* Merge all local stacks for next iteration */ phase_num++; if (tid == 0) { if (phase_num >= MAX_NUM_PHASES) { fprintf(stderr, "Error: Max num phases set to %ld\n", MAX_NUM_PHASES); fprintf(stderr, "Diameter of input network greater than" " this value. Increase MAX_NUM_PHASES" " in vertex_betweenness_centrality_parBFS()\n"); exit(-1); } } psCount[tid+1] = myCount; #ifdef _OPENMP #pragma omp barrier #endif if (tid == 0) { start[phase_num] = end[phase_num-1]; psCount[0] = start[phase_num]; for(k=1; k<=nthreads; k++) { psCount[k] = psCount[k-1] + psCount[k]; } end[phase_num] = psCount[nthreads]; } #ifdef _OPENMP #pragma omp barrier #endif k0 = psCount[tid]; k1 = psCount[tid+1]; for (k = k0; k < k1; k++) { S[k] = myS[k-k0]; } count = end[phase_num]; } phase_num--; while (phase_num > 0) { start_iter = start[phase_num]; end_iter = end[phase_num]; #ifdef _OPENMP #pragma omp for schedule(static) nowait #endif for (j=start_iter; j<end_iter; j++) { w = S[j]; for (k = 0; k<P[w].count; k++) { v = P[w].list[k]; #ifdef _OPENMP omp_set_lock(&vLock[v]); #endif del[v] = del[v] + sig[v]*(1+del[w])/sig[w]; #ifdef _OPENMP omp_unset_lock(&vLock[v]); #endif } BC[w] += del[w]; } phase_num--; #ifdef _OPENMP #pragma omp barrier #endif } #ifdef _OPENMP chunkSize = n/nthreads; #pragma omp for schedule(static, chunkSize) nowait #endif for (j=0; j<count; j++) { w = S[j]; d[w] = -1; del[w] = 0; P[w].count = 0; } #ifdef _OPENMP #pragma omp barrier #endif } #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds() - elapsed_time_part; fprintf(stderr, "BC computation time: %lf seconds\n", elapsed_time_part); } #endif #ifdef _OPENMP #pragma omp barrier #endif #ifdef _OPENMP #pragma omp for for (i=0; i<n; i++) { omp_destroy_lock(&vLock[i]); } #endif free(myS); if (tid == 0) { free(S); free(pListMem); free(P); free(sig); free(d); free(del); #ifdef _OPENMP free(vLock); #endif free(start); free(end); free(psCount); #ifdef DIAGNOSTIC elapsed_time = get_seconds() - elapsed_time; fprintf(stderr, "Time taken: %lf\n seconds", elapsed_time); #endif #if RANDSRCS free(Srcs); #endif } free_sprng(stream); #ifdef _OPENMP } #endif }
void vertex_betweenness_centrality_simple(graph_t* G, double* BC, long numSrcs) { attr_id_t *in_degree, *numEdges, *pSums; #if RANDSRCS attr_id_t* Srcs; #endif long num_traversals = 0; #ifdef _OPENMP omp_lock_t* vLock; long chunkSize; #endif #ifdef DIAGNOSTIC double elapsed_time; #endif int seed = 2387; /* The outer loop is parallelized in this case. Each thread does a BFS and the vertex BC values are incremented atomically */ #ifdef _OPENMP #pragma omp parallel firstprivate(G) { #endif attr_id_t *S; /* stack of vertices in the order of non-decreasing distance from s. Also used to implicitly represent the BFS queue */ plist_t* P; /* predecessors of a vertex v on shortest paths from s */ attr_id_t* pListMem; double* sig; /* No. of shortest paths */ attr_id_t* d; /* Length of the shortest path between every pair */ double* del; /* dependency of vertices */ attr_id_t *start, *end; long MAX_NUM_PHASES; long i, j, k, p, count; long v, w, vert; long numV, n, m, phase_num; long tid, nthreads; int* stream; #ifdef DIAGNOSTIC double elapsed_time_part; #endif #ifdef _OPENMP int myLock; tid = omp_get_thread_num(); nthreads = omp_get_num_threads(); #else tid = 0; nthreads = 1; #endif #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time = get_seconds(); elapsed_time_part = get_seconds(); } #endif /* numV: no. of vertices to run BFS from = numSrcs */ numV = numSrcs; n = G->n; m = G->m; /* Permute vertices */ if (tid == 0) { #if RANDSRCS Srcs = (attr_id_t *) malloc(n*sizeof(attr_id_t)); #endif #ifdef _OPENMP vLock = (omp_lock_t *) malloc(n*sizeof(omp_lock_t)); #endif } #ifdef _OPENMP #pragma omp barrier #pragma omp for for (i=0; i<n; i++) { omp_init_lock(&vLock[i]); } #endif /* Initialize RNG stream */ stream = init_sprng(0, tid, nthreads, seed, SPRNG_DEFAULT); #if RANDSRCS #ifdef _OPENMP #pragma omp for #endif for (i=0; i<n; i++) { Srcs[i] = i; } #ifdef _OPENMP #pragma omp for #endif for (i=0; i<n; i++) { j = n * sprng(stream); if (i != j) { #ifdef _OPENMP int l1 = omp_test_lock(&vLock[i]); if (l1) { int l2 = omp_test_lock(&vLock[j]); if (l2) { #endif k = Srcs[i]; Srcs[i] = Srcs[j]; Srcs[j] = k; #ifdef _OPENMP omp_unset_lock(&vLock[j]); } omp_unset_lock(&vLock[i]); } #endif } } #endif #ifdef _OPENMP #pragma omp barrier #endif MAX_NUM_PHASES = 50; /* Initialize predecessor lists */ /* The size of the predecessor list of each vertex is bounded by its in-degree. So we first compute the in-degree of every vertex */ if (tid == 0) { in_degree = (attr_id_t *) calloc(n+1, sizeof(attr_id_t)); numEdges = (attr_id_t *) malloc((n+1)*sizeof(attr_id_t)); pSums = (attr_id_t *) malloc(nthreads*sizeof(attr_id_t)); } #ifdef _OPENMP #pragma omp barrier #pragma omp for #endif for (i=0; i<m; i++) { v = G->endV[i]; #ifdef _OPENMP omp_set_lock(&vLock[v]); #endif in_degree[v]++; #ifdef _OPENMP omp_unset_lock(&vLock[v]); #endif } prefix_sums(in_degree, numEdges, pSums, n); P = (plist_t *) calloc(n, sizeof(plist_t)); pListMem = (attr_id_t *) malloc(m*sizeof(attr_id_t)); for (i=0; i<n; i++) { P[i].list = pListMem + numEdges[i]; P[i].degree = in_degree[i]; P[i].count = 0; } #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds() -elapsed_time_part; fprintf(stderr, "In-degree computation time: %lf seconds\n", elapsed_time_part); elapsed_time_part = get_seconds(); } #endif #ifdef _OPENMP #pragma omp barrier #endif /* Allocate shared memory */ if (tid == 0) { free(in_degree); free(numEdges); free(pSums); } S = (attr_id_t *) malloc(n*sizeof(attr_id_t)); sig = (double *) malloc(n*sizeof(double)); d = (attr_id_t *) malloc(n*sizeof(attr_id_t)); del = (double *) calloc(n, sizeof(double)); start = (attr_id_t *) malloc(MAX_NUM_PHASES*sizeof(attr_id_t)); end = (attr_id_t *) malloc(MAX_NUM_PHASES*sizeof(attr_id_t)); #ifdef _OPENMP #pragma omp barrier #endif for (i=0; i<n; i++) { d[i] = -1; } #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds() - elapsed_time_part; fprintf(stderr, "BC initialization time: %lf seconds\n", elapsed_time_part); elapsed_time_part = get_seconds(); } #endif #ifdef _OPENMP #pragma omp for reduction(+:num_traversals) #endif for (p=0; p<numV; p++) { #if RANDSRCS i = Srcs[p]; #else i = p; #endif if (G->numEdges[i+1] - G->numEdges[i] == 0) { continue; } else { num_traversals++; } sig[i] = 1; d[i] = 0; S[0] = i; start[0] = 0; end[0] = 1; count = 1; phase_num = 0; while (end[phase_num] - start[phase_num] > 0) { for (vert = start[phase_num]; vert < end[phase_num]; vert++) { v = S[vert]; for (j=G->numEdges[v]; j<G->numEdges[v+1]; j++) { w = G->endV[j]; if (v != w) { /* w found for the first time? */ if (d[w] == -1) { S[count++] = w; d[w] = d[v] + 1; sig[w] = sig[v]; P[w].list[P[w].count++] = v; } else if (d[w] == d[v] + 1) { sig[w] += sig[v]; P[w].list[P[w].count++] = v; } } } } phase_num++; start[phase_num] = end[phase_num-1]; end[phase_num] = count; } phase_num--; while (phase_num > 0) { for (j=start[phase_num]; j<end[phase_num]; j++) { w = S[j]; for (k = 0; k<P[w].count; k++) { v = P[w].list[k]; del[v] = del[v] + sig[v]*(1+del[w])/sig[w]; } #ifdef _OPENMP omp_set_lock(&vLock[w]); BC[w] += del[w]; omp_unset_lock(&vLock[w]); #else BC[w] += del[w]; #endif } phase_num--; } for (j=0; j<count; j++) { w = S[j]; d[w] = -1; del[w] = 0; P[w].count = 0; } } #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds() - elapsed_time_part; fprintf(stderr, "BC computation time: %lf seconds\n", elapsed_time_part); } #endif #ifdef _OPENMP #pragma omp barrier #endif #ifdef _OPENMP #pragma omp for for (i=0; i<n; i++) { omp_destroy_lock(&vLock[i]); } #endif free(S); free(pListMem); free(P); free(sig); free(d); free(del); free(start); free(end); if (tid == 0) { #ifdef _OPENMP free(vLock); #endif #if RANDSRCS free(Srcs); #endif #ifdef DIAGNOSTIC elapsed_time = get_seconds() - elapsed_time; fprintf(stderr, "Total time taken: %lf seconds\n", elapsed_time); #endif } free_sprng(stream); #ifdef _OPENMP #pragma omp barrier } #endif }
/* ============================================================================= * computeGraph * ============================================================================= */ void computeGraph (void* argPtr) { TM_THREAD_ENTER(); graph* GPtr = ((computeGraph_arg_t*)argPtr)->GPtr; graphSDG* SDGdataPtr = ((computeGraph_arg_t*)argPtr)->SDGdataPtr; long myId = thread_getId(); long numThread = thread_getNumThread(); ULONGINT_T j; ULONGINT_T maxNumVertices = 0; ULONGINT_T numEdgesPlaced = SDGdataPtr->numEdgesPlaced; /* * First determine the number of vertices by scanning the tuple * startVertex list */ long i; long i_start; long i_stop; createPartition(0, numEdgesPlaced, myId, numThread, &i_start, &i_stop); for (i = i_start; i < i_stop; i++) { if (SDGdataPtr->startVertex[i] > maxNumVertices) { maxNumVertices = SDGdataPtr->startVertex[i]; } } TM_BEGIN(); long tmp_maxNumVertices = (long)TM_SHARED_READ_L(global_maxNumVertices); long new_maxNumVertices = MAX(tmp_maxNumVertices, maxNumVertices) + 1; TM_SHARED_WRITE_L(global_maxNumVertices, new_maxNumVertices); TM_END(); thread_barrier_wait(); maxNumVertices = global_maxNumVertices; if (myId == 0) { GPtr->numVertices = maxNumVertices; GPtr->numEdges = numEdgesPlaced; GPtr->intWeight = SDGdataPtr->intWeight; GPtr->strWeight = SDGdataPtr->strWeight; for (i = 0; i < numEdgesPlaced; i++) { if (GPtr->intWeight[numEdgesPlaced-i-1] < 0) { GPtr->numStrEdges = -(GPtr->intWeight[numEdgesPlaced-i-1]) + 1; GPtr->numIntEdges = numEdgesPlaced - GPtr->numStrEdges; break; } } GPtr->outDegree = (LONGINT_T*)P_MALLOC((GPtr->numVertices) * sizeof(LONGINT_T)); assert(GPtr->outDegree); GPtr->outVertexIndex = (ULONGINT_T*)P_MALLOC((GPtr->numVertices) * sizeof(ULONGINT_T)); assert(GPtr->outVertexIndex); } thread_barrier_wait(); createPartition(0, GPtr->numVertices, myId, numThread, &i_start, &i_stop); for (i = i_start; i < i_stop; i++) { GPtr->outDegree[i] = 0; GPtr->outVertexIndex[i] = 0; } ULONGINT_T outVertexListSize = 0; thread_barrier_wait(); ULONGINT_T i0 = -1UL; for (i = i_start; i < i_stop; i++) { ULONGINT_T k = i; if ((outVertexListSize == 0) && (k != 0)) { while (i0 == -1UL) { for (j = 0; j < numEdgesPlaced; j++) { if (k == SDGdataPtr->startVertex[j]) { i0 = j; break; } } k--; } } if ((outVertexListSize == 0) && (k == 0)) { i0 = 0; } for (j = i0; j < numEdgesPlaced; j++) { if (i == GPtr->numVertices-1) { break; } if ((i != SDGdataPtr->startVertex[j])) { if ((j > 0) && (i == SDGdataPtr->startVertex[j-1])) { if (j-i0 >= 1) { outVertexListSize++; GPtr->outDegree[i]++; ULONGINT_T t; for (t = i0+1; t < j; t++) { if (SDGdataPtr->endVertex[t] != SDGdataPtr->endVertex[t-1]) { outVertexListSize++; GPtr->outDegree[i] = GPtr->outDegree[i]+1; } } } } i0 = j; break; } } if (i == GPtr->numVertices-1) { if (numEdgesPlaced-i0 >= 0) { outVertexListSize++; GPtr->outDegree[i]++; ULONGINT_T t; for (t = i0+1; t < numEdgesPlaced; t++) { if (SDGdataPtr->endVertex[t] != SDGdataPtr->endVertex[t-1]) { outVertexListSize++; GPtr->outDegree[i]++; } } } } } /* for i */ thread_barrier_wait(); prefix_sums(GPtr->outVertexIndex, GPtr->outDegree, GPtr->numVertices); thread_barrier_wait(); TM_BEGIN(); TM_SHARED_WRITE_L( global_outVertexListSize, ((long)TM_SHARED_READ_L(global_outVertexListSize) + outVertexListSize) ); TM_END(); thread_barrier_wait(); outVertexListSize = global_outVertexListSize; if (myId == 0) { GPtr->numDirectedEdges = outVertexListSize; GPtr->outVertexList = (ULONGINT_T*)P_MALLOC(outVertexListSize * sizeof(ULONGINT_T)); assert(GPtr->outVertexList); GPtr->paralEdgeIndex = (ULONGINT_T*)P_MALLOC(outVertexListSize * sizeof(ULONGINT_T)); assert(GPtr->paralEdgeIndex); GPtr->outVertexList[0] = SDGdataPtr->endVertex[0]; } thread_barrier_wait(); /* * Evaluate outVertexList */ i0 = -1UL; for (i = i_start; i < i_stop; i++) { ULONGINT_T k = i; while ((i0 == -1UL) && (k != 0)) { for (j = 0; j < numEdgesPlaced; j++) { if (k == SDGdataPtr->startVertex[j]) { i0 = j; break; } } k--; } if ((i0 == -1) && (k == 0)) { i0 = 0; } for (j = i0; j < numEdgesPlaced; j++) { if (i == GPtr->numVertices-1) { break; } if (i != SDGdataPtr->startVertex[j]) { if ((j > 0) && (i == SDGdataPtr->startVertex[j-1])) { if (j-i0 >= 1) { long ii = GPtr->outVertexIndex[i]; ULONGINT_T r = 0; GPtr->paralEdgeIndex[ii] = i0; GPtr->outVertexList[ii] = SDGdataPtr->endVertex[i0]; r++; ULONGINT_T t; for (t = i0+1; t < j; t++) { if (SDGdataPtr->endVertex[t] != SDGdataPtr->endVertex[t-1]) { GPtr->paralEdgeIndex[ii+r] = t; GPtr->outVertexList[ii+r] = SDGdataPtr->endVertex[t]; r++; } } } } i0 = j; break; } } /* for j */ if (i == GPtr->numVertices-1) { ULONGINT_T r = 0; if (numEdgesPlaced-i0 >= 0) { long ii = GPtr->outVertexIndex[i]; GPtr->paralEdgeIndex[ii+r] = i0; GPtr->outVertexList[ii+r] = SDGdataPtr->endVertex[i0]; r++; ULONGINT_T t; for (t = i0+1; t < numEdgesPlaced; t++) { if (SDGdataPtr->endVertex[t] != SDGdataPtr->endVertex[t-1]) { GPtr->paralEdgeIndex[ii+r] = t; GPtr->outVertexList[ii+r] = SDGdataPtr->endVertex[t]; r++; } } } } } /* for i */ thread_barrier_wait(); if (myId == 0) { P_FREE(SDGdataPtr->startVertex); P_FREE(SDGdataPtr->endVertex); GPtr->inDegree = (LONGINT_T*)P_MALLOC(GPtr->numVertices * sizeof(LONGINT_T)); assert(GPtr->inDegree); GPtr->inVertexIndex = (ULONGINT_T*)P_MALLOC(GPtr->numVertices * sizeof(ULONGINT_T)); assert(GPtr->inVertexIndex); } thread_barrier_wait(); for (i = i_start; i < i_stop; i++) { GPtr->inDegree[i] = 0; GPtr->inVertexIndex[i] = 0; } /* A temp. array to store the inplied edges */ ULONGINT_T* impliedEdgeList; if (myId == 0) { impliedEdgeList = (ULONGINT_T*)P_MALLOC(GPtr->numVertices * MAX_CLUSTER_SIZE * sizeof(ULONGINT_T)); global_impliedEdgeList = impliedEdgeList; } thread_barrier_wait(); impliedEdgeList = global_impliedEdgeList; createPartition(0, (GPtr->numVertices * MAX_CLUSTER_SIZE), myId, numThread, &i_start, &i_stop); for (i = i_start; i < i_stop; i++) { impliedEdgeList[i] = 0; } /* * An auxiliary array to store implied edges, in case we overshoot * MAX_CLUSTER_SIZE */ ULONGINT_T** auxArr; if (myId == 0) { auxArr = (ULONGINT_T**)P_MALLOC(GPtr->numVertices * sizeof(ULONGINT_T*)); assert(auxArr); global_auxArr = auxArr; } thread_barrier_wait(); auxArr = global_auxArr; createPartition(0, GPtr->numVertices, myId, numThread, &i_start, &i_stop); for (i = i_start; i < i_stop; i++) { /* Inspect adjacency list of vertex i */ for (j = GPtr->outVertexIndex[i]; j < (GPtr->outVertexIndex[i] + GPtr->outDegree[i]); j++) { ULONGINT_T v = GPtr->outVertexList[j]; ULONGINT_T k; for (k = GPtr->outVertexIndex[v]; k < (GPtr->outVertexIndex[v] + GPtr->outDegree[v]); k++) { if (GPtr->outVertexList[k] == i) { break; } } if (k == GPtr->outVertexIndex[v]+GPtr->outDegree[v]) { TM_BEGIN(); /* Add i to the impliedEdgeList of v */ long inDegree = (long)TM_SHARED_READ_L(GPtr->inDegree[v]); TM_SHARED_WRITE_L(GPtr->inDegree[v], (inDegree + 1)); if (inDegree < MAX_CLUSTER_SIZE) { TM_SHARED_WRITE_L(impliedEdgeList[v*MAX_CLUSTER_SIZE+inDegree], i); } else { /* Use auxiliary array to store the implied edge */ /* Create an array if it's not present already */ ULONGINT_T* a = NULL; if ((inDegree % MAX_CLUSTER_SIZE) == 0) { a = (ULONGINT_T*)TM_MALLOC(MAX_CLUSTER_SIZE * sizeof(ULONGINT_T)); assert(a); TM_SHARED_WRITE_P(auxArr[v], a); } else { a = auxArr[v]; } TM_SHARED_WRITE_L(a[inDegree % MAX_CLUSTER_SIZE], i); } TM_END(); } } } /* for i */ thread_barrier_wait(); prefix_sums(GPtr->inVertexIndex, GPtr->inDegree, GPtr->numVertices); if (myId == 0) { GPtr->numUndirectedEdges = GPtr->inVertexIndex[GPtr->numVertices-1] + GPtr->inDegree[GPtr->numVertices-1]; GPtr->inVertexList = (ULONGINT_T *)P_MALLOC(GPtr->numUndirectedEdges * sizeof(ULONGINT_T)); } thread_barrier_wait(); /* * Create the inVertex List */ for (i = i_start; i < i_stop; i++) { for (j = GPtr->inVertexIndex[i]; j < (GPtr->inVertexIndex[i] + GPtr->inDegree[i]); j++) { if ((j - GPtr->inVertexIndex[i]) < MAX_CLUSTER_SIZE) { GPtr->inVertexList[j] = impliedEdgeList[i*MAX_CLUSTER_SIZE+j-GPtr->inVertexIndex[i]]; } else { GPtr->inVertexList[j] = auxArr[i][(j-GPtr->inVertexIndex[i]) % MAX_CLUSTER_SIZE]; } } } thread_barrier_wait(); if (myId == 0) { P_FREE(impliedEdgeList); } for (i = i_start; i < i_stop; i++) { if (GPtr->inDegree[i] > MAX_CLUSTER_SIZE) { P_FREE(auxArr[i]); } } thread_barrier_wait(); if (myId == 0) { P_FREE(auxArr); } TM_THREAD_EXIT(); }
double betweennessCentrality(graph* G, DOUBLE_T* BC, int filter) { VERT_T *S; /* stack of vertices in the order of non-decreasing distance from s. Also used to implicitly represent the BFS queue */ plist* P; /* predecessors of a vertex v on shortest paths from s */ DOUBLE_T* sig; /* No. of shortest paths */ LONG_T* d; /* Length of the shortest path between every pair */ DOUBLE_T* del; /* dependency of vertices */ LONG_T *in_degree, *numEdges, *pSums; LONG_T *pListMem; LONG_T* Srcs; LONG_T *start, *end; LONG_T MAX_NUM_PHASES; LONG_T *psCount; #ifdef _OPENMP omp_lock_t* vLock; LONG_T chunkSize; #endif int seed = 2387; double elapsed_time; #ifdef _OPENMP #pragma omp parallel { #endif VERT_T *myS, *myS_t; LONG_T myS_size; LONG_T i, j, k, p, count, myCount; LONG_T v, w, vert; LONG_T numV, num_traversals, n, m, phase_num; LONG_T tid, nthreads; int* stream; #ifdef DIAGNOSTIC double elapsed_time_part; #endif #ifdef _OPENMP int myLock; tid = omp_get_thread_num(); nthreads = omp_get_num_threads(); #else tid = 0; nthreads = 1; #endif #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds(); } #endif /* numV: no. of vertices to run BFS from = 2^K4approx */ numV = 1<<K4approx; n = G->n; m = G->m; /* Permute vertices */ if (tid == 0) { Srcs = (LONG_T *) malloc(n*sizeof(LONG_T)); #ifdef _OPENMP vLock = (omp_lock_t *) malloc(n*sizeof(omp_lock_t)); #endif } #ifdef _OPENMP #pragma omp barrier #pragma omp for for (i=0; i<n; i++) { omp_init_lock(&vLock[i]); } #endif /* Initialize RNG stream */ stream = init_sprng(0, tid, nthreads, seed, SPRNG_DEFAULT); #ifdef _OPENMP #pragma omp for #endif for (i=0; i<n; i++) { Srcs[i] = i; } #ifdef _OPENMP #pragma omp for #endif for (i=0; i<n; i++) { j = n*sprng(stream); if (i != j) { #ifdef _OPENMP int l1 = omp_test_lock(&vLock[i]); if (l1) { int l2 = omp_test_lock(&vLock[j]); if (l2) { #endif k = Srcs[i]; Srcs[i] = Srcs[j]; Srcs[j] = k; #ifdef _OPENMP omp_unset_lock(&vLock[j]); } omp_unset_lock(&vLock[i]); } #endif } } #ifdef _OPENMP #pragma omp barrier #endif #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds() -elapsed_time_part; fprintf(stderr, "Vertex ID permutation time: %lf seconds\n", elapsed_time_part); elapsed_time_part = get_seconds(); } #endif /* Start timing code from here */ if (tid == 0) { elapsed_time = get_seconds(); #ifdef VERIFYK4 MAX_NUM_PHASES = 2*sqrt(n); #else MAX_NUM_PHASES = 50; #endif } #ifdef _OPENMP #pragma omp barrier #endif /* Initialize predecessor lists */ /* The size of the predecessor list of each vertex is bounded by its in-degree. So we first compute the in-degree of every vertex */ if (tid == 0) { P = (plist *) calloc(n, sizeof(plist)); in_degree = (LONG_T *) calloc(n+1, sizeof(LONG_T)); numEdges = (LONG_T *) malloc((n+1)*sizeof(LONG_T)); pSums = (LONG_T *) malloc(nthreads*sizeof(LONG_T)); } #ifdef _OPENMP #pragma omp barrier #pragma omp for #endif for (i=0; i<m; i++) { v = G->endV[i]; #ifdef _OPENMP omp_set_lock(&vLock[v]); #endif in_degree[v]++; #ifdef _OPENMP omp_unset_lock(&vLock[v]); #endif } prefix_sums(in_degree, numEdges, pSums, n); if (tid == 0) { pListMem = (LONG_T *) malloc(m*sizeof(LONG_T)); } #ifdef _OPENMP #pragma omp barrier #pragma omp for #endif for (i=0; i<n; i++) { P[i].list = pListMem + numEdges[i]; P[i].degree = in_degree[i]; P[i].count = 0; } #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds() - elapsed_time_part; fprintf(stderr, "In-degree computation time: %lf seconds\n", elapsed_time_part); elapsed_time_part = get_seconds(); } #endif /* Allocate shared memory */ if (tid == 0) { free(in_degree); free(numEdges); free(pSums); S = (VERT_T *) malloc(n*sizeof(VERT_T)); sig = (DOUBLE_T *) malloc(n*sizeof(DOUBLE_T)); d = (LONG_T *) malloc(n*sizeof(LONG_T)); del = (DOUBLE_T *) calloc(n, sizeof(DOUBLE_T)); start = (LONG_T *) malloc(MAX_NUM_PHASES*sizeof(LONG_T)); end = (LONG_T *) malloc(MAX_NUM_PHASES*sizeof(LONG_T)); psCount = (LONG_T *) malloc((nthreads+1)*sizeof(LONG_T)); } /* local memory for each thread */ myS_size = (2*n)/nthreads; myS = (LONG_T *) malloc(myS_size*sizeof(LONG_T)); num_traversals = 0; myCount = 0; #ifdef _OPENMP #pragma omp barrier #endif #ifdef _OPENMP #pragma omp for #endif for (i=0; i<n; i++) { d[i] = -1; } #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds() -elapsed_time_part; fprintf(stderr, "BC initialization time: %lf seconds\n", elapsed_time_part); elapsed_time_part = get_seconds(); } #endif for (p=0; p<n; p++) { i = Srcs[p]; //printf ("%d \n", i); // i = p; if (G->numEdges[i+1] - G->numEdges[i] == 0) { continue; } else { num_traversals++; } if (num_traversals == numV + 1) { break; } if (tid == 0) { sig[i] = 1; d[i] = 0; S[0] = i; start[0] = 0; end[0] = 1; } count = 1; phase_num = 0; #ifdef _OPENMP #pragma omp barrier #endif while (end[phase_num] - start[phase_num] > 0) { myCount = 0; #ifdef _OPENMP #pragma omp barrier #pragma omp for schedule(dynamic) #endif for (vert = start[phase_num]; vert < end[phase_num]; vert++) { v = S[vert]; for (j=G->numEdges[v]; j<G->numEdges[v+1]; j++) { if ((G->weight[j] & 7) == 0 && filter==1) continue; w = G->endV[j]; if (v != w) { #ifdef _OPENMP myLock = omp_test_lock(&vLock[w]); if (myLock) { #endif /* w found for the first time? */ if (d[w] == -1) { if (myS_size == myCount) { /* Resize myS */ myS_t = (LONG_T *) malloc(2*myS_size*sizeof(VERT_T)); memcpy(myS_t, myS, myS_size*sizeof(VERT_T)); free(myS); myS = myS_t; myS_size = 2*myS_size; } myS[myCount++] = w; d[w] = d[v] + 1; sig[w] = sig[v]; P[w].list[P[w].count++] = v; } else if (d[w] == d[v] + 1) { sig[w] += sig[v]; P[w].list[P[w].count++] = v; } #ifdef _OPENMP omp_unset_lock(&vLock[w]); } else { if ((d[w] == -1) || (d[w] == d[v]+ 1)) { omp_set_lock(&vLock[w]); sig[w] += sig[v]; P[w].list[P[w].count++] = v; omp_unset_lock(&vLock[w]); } } #endif } } } /* Merge all local stacks for next iteration */ phase_num++; psCount[tid+1] = myCount; #ifdef _OPENMP #pragma omp barrier #endif if (tid == 0) { start[phase_num] = end[phase_num-1]; psCount[0] = start[phase_num]; for(k=1; k<=nthreads; k++) { psCount[k] = psCount[k-1] + psCount[k]; } end[phase_num] = psCount[nthreads]; } #ifdef _OPENMP #pragma omp barrier #endif for (k = psCount[tid]; k < psCount[tid+1]; k++) { S[k] = myS[k-psCount[tid]]; } #ifdef _OPENMP #pragma omp barrier #endif count = end[phase_num]; } phase_num--; #ifdef _OPENMP #pragma omp barrier #endif //printf ("%d\n", phase_num); while (phase_num > 0) { #ifdef _OPENMP #pragma omp for #endif for (j=start[phase_num]; j<end[phase_num]; j++) { w = S[j]; for (k = 0; k<P[w].count; k++) { v = P[w].list[k]; #ifdef _OPENMP omp_set_lock(&vLock[v]); #endif del[v] = del[v] + sig[v]*(1+del[w])/sig[w]; #ifdef _OPENMP omp_unset_lock(&vLock[v]); #endif } BC[w] += del[w]; } phase_num--; #ifdef _OPENMP #pragma omp barrier #endif } #ifdef _OPENMP chunkSize = n/nthreads; #pragma omp for schedule(static, chunkSize) #endif for (j=0; j<count; j++) { w = S[j]; //fprintf (stderr, "w: %d\n", w); d[w] = -1; del[w] = 0; P[w].count = 0; } #ifdef _OPENMP #pragma omp barrier #endif } #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds() -elapsed_time_part; fprintf(stderr, "BC computation time: %lf seconds\n", elapsed_time_part); } #endif #ifdef _OPENMP #pragma omp for for (i=0; i<n; i++) { omp_destroy_lock(&vLock[i]); } #endif free(myS); if (tid == 0) { free(S); free(pListMem); free(P); free(sig); free(d); free(del); #ifdef _OPENMP free(vLock); #endif free(start); free(end); free(psCount); elapsed_time = get_seconds() - elapsed_time; free(Srcs); } free_sprng(stream); #ifdef _OPENMP } #endif /* Verification */ #ifdef VERIFYK4 double BCval; if (SCALE % 2 == 0) { BCval = 0.5*pow(2, 3*SCALE/2)-pow(2, SCALE)+1.0; } else { BCval = 0.75*pow(2, (3*SCALE-1)/2)-pow(2, SCALE)+1.0; } int failed = 0; for (int i=0; i<G->n; i++) { if (round(BC[i] - BCval) != 0) { failed = 1; break; } } if (failed) { fprintf(stderr, "Kernel 4 failed validation!\n"); } else { fprintf(stderr, "Kernel 4 validation successful!\n"); } #endif for (int i = 0; i < G->n; i++) printf ("BC: %d %f\n",i, BC[i]); return elapsed_time; }