예제 #1
0
파일: tar2a.c 프로젝트: kozk/kozk.github.io
void productor(){
  while(1){
    omp_set_lock(&CS);

    if(pet==0){
      rsp=rsp%10;
      rsp++;
    }	else if(pet==1){
      rsp=(rsp%10)+10;
      rsp++;
    }	else if(pet==2){
      rsp=(rsp%10)+20;
      rsp++;
    }
    int id=omp_get_thread_num();

    printf("----Productor %d con petición: %d y respuesta: %d\n",id,pet,rsp);
    
//    omp_unset_lock(&CC); // Para el caso sin distribuidor
    omp_unset_lock(&CD);
    sleep(1);
  }
}
예제 #2
0
파일: sync.c 프로젝트: jsjessen/school
/* Implements using omp lock functions
 * */
void foo_locks(long long int n) {
	long long int a=0;
	long long int i;

	omp_lock_t my_lock;

	// init lock
	omp_init_lock(&my_lock);

	double time = omp_get_wtime();
	#pragma omp parallel for schedule(static) shared(a)	
	for(i = 0; i < n; i++) 
	{	
		omp_set_lock(&my_lock);
		a+=1;
		omp_unset_lock(&my_lock);
	}
	omp_destroy_lock(&my_lock);
	
	time = omp_get_wtime() - time;
	printf("Final value = %d \n ", a);
	printf("Locks: Total time = %f seconds \n ", time);
} // end foo_locks
예제 #3
0
/* Gets the next chunk of iterations to perform for the given thread_id
 * If USE_LOCKS is set to True, function will implement locks
 * Else it will initiate a critical region for the shared variable read-write
*/
void get_chunks(int thread_id, double K, int* start_iter, int* chunk)
{
	int remaining_iters_num, chunk_size;
	if(USE_LOCKS == FALSE) {
		#pragma omp critical (chunk)
		{
			remaining_iters_num = remaining_iters[thread_id];
        		chunk_size = (int) ceil((double)remaining_iters_num*K);
        		if (chunk_size > remaining_iters_num) chunk_size = remaining_iters_num;
			remaining_iters[thread_id] = (remaining_iters_num - chunk_size);
		}
	} else {
		omp_set_lock(&(remaining_iters_lock[thread_id]));
		remaining_iters_num = remaining_iters[thread_id];
        	chunk_size = (int) ceil((double)remaining_iters_num*K);
        	if (chunk_size > remaining_iters_num) chunk_size = remaining_iters_num;
		remaining_iters[thread_id] = (remaining_iters_num - chunk_size);
		omp_unset_lock(&(remaining_iters_lock[thread_id]));
	}
	*start_iter = hi[thread_id]-remaining_iters_num;
	*chunk = chunk_size;
		
}
예제 #4
0
void IntList_Insert(pIntList pList, int x, pArrNode an) 
{
	pIntListNode prev, p , newNode;
	// assert(newNode!=NULL);

	omp_set_lock(&listLock);
	newNode = ArrNode_getNode(an);

	if (pList->head == NULL) { /* list is empty, insert the first element */
		pList->head = newNode;
	}
	else { /* list is not empty, find the right place to insert element */
		p = pList->head;
		prev = NULL;
		while (p != NULL && p->data < newNode->data) {
			prev = p;
			p = p->next;
		}

		if (p == NULL) { /* insert as the last element */
			prev->next = newNode;
			newNode->prev = prev;
		}
		else if (prev == NULL) { /* insert as the first element */
			pList->head = newNode;
			newNode->next = p;
			p->prev = newNode;
		}
		else { /* insert right between prev and p */
			prev->next = newNode;
			newNode->prev = prev;
			newNode->next = p;
			p->prev = newNode;
		}
	}
	omp_unset_lock(&listLock);
}
예제 #5
0
double computeGraph(graph* G, graphSDG* SDGdata) {

    VERT_T* endV;
    LONG_T *degree, *numEdges, *pos, *pSums;
    WEIGHT_T* w;
    double elapsed_time;

#ifdef _OPENMP
    omp_lock_t *vLock;
    LONG_T chunkSize;
#endif

    elapsed_time = get_seconds();

#ifdef _OPENMP
    omp_set_num_threads(NUM_THREADS);
#endif

#ifdef _OPENMP
#pragma omp parallel
#endif    
{
    LONG_T i, j, u, n, m, tid, nthreads;
#ifdef DIAGNOSTIC
    double elapsed_time_part;
#endif
    
#ifdef _OPENMP    
    nthreads = omp_get_num_threads();
    tid = omp_get_thread_num();
#else
    tid = 0;
    nthreads = 1;
#endif

    n = N;
    m = M;
    
    if (tid == 0) {
#ifdef _OPENMP
        vLock = (omp_lock_t *) malloc(n*sizeof(omp_lock_t));
        assert(vLock != NULL);
        chunkSize = n/nthreads;
#endif
        pos = (LONG_T *) malloc(m*sizeof(LONG_T));
        assert(pos != NULL);
        degree = (LONG_T *) calloc(n, sizeof(LONG_T));
        assert(degree != NULL);
    }
  
#ifdef DIAGNOSTIC
    if (tid == 0) {
        elapsed_time_part = get_seconds();
    }
#endif
    
#ifdef _OPENMP    
#pragma omp barrier
    
    #pragma omp for schedule(static, chunkSize)
    for (i=0; i<n; i++) {
        omp_init_lock(&vLock[i]);
    }

    #pragma omp barrier
  
#ifdef DIAGNOSTIC
    if (tid == 0) {
        elapsed_time_part = get_seconds() - elapsed_time_part;
        fprintf(stderr, "Lock initialization time: %lf seconds\n",
                elapsed_time_part);
        elapsed_time_part = get_seconds();

    }
#endif
 
    #pragma omp for
#endif
    for (i=0; i<m; i++) {
        u = SDGdata->startVertex[i];
#ifdef _OPENMP        
        omp_set_lock(&vLock[u]);
#endif
        pos[i] = degree[u]++;
#ifdef _OPENMP
        omp_unset_lock(&vLock[u]);
#endif
    } 
   
#ifdef DIAGNOSTIC
    if (tid == 0) {
        elapsed_time_part = get_seconds() - elapsed_time_part;
        fprintf(stderr, "Degree computation time: %lf seconds\n",
                elapsed_time_part);
        elapsed_time_part = get_seconds();

    }
#endif
   
#ifdef _OPENMP
#pragma omp barrier

#pragma omp for schedule(static, chunkSize)
    for (i=0; i<n; i++) {
        omp_destroy_lock(&vLock[i]);
    }

    if (tid == 0) 
        free(vLock);
#endif
    
#ifdef DIAGNOSTIC
    if (tid == 0) {
        elapsed_time_part = get_seconds() - elapsed_time_part;
        fprintf(stderr, "Lock destruction time: %lf seconds\n",
                elapsed_time_part);
        elapsed_time_part = get_seconds();

    }
#endif
   
    if (tid == 0) {
        numEdges = (LONG_T *) malloc((n+1)*sizeof(LONG_T));
        pSums = (LONG_T *) malloc(nthreads*sizeof(LONG_T));
   }

#ifdef _OPENMP
#pragma omp barrier
#endif

    prefix_sums(degree, numEdges, pSums, n); 
    
#ifdef DIAGNOSTIC
    if (tid == 0) {
        elapsed_time_part = get_seconds() - elapsed_time_part;
        fprintf(stderr, "Prefix sums time: %lf seconds\n",
                elapsed_time_part);
        elapsed_time_part = get_seconds();

    }
#endif
 
#ifdef _OPENMP
#pragma omp barrier
#endif

    if (tid == 0) {
        free(degree);
        free(pSums);
        w = (WEIGHT_T *) malloc(m*sizeof(WEIGHT_T));
        endV = (VERT_T *) malloc(m* sizeof(VERT_T));
    }

#ifdef _OPENMP
    #pragma omp barrier

    #pragma omp for
#endif
    for (i=0; i<m; i++) {
        u = SDGdata->startVertex[i];
        j = numEdges[u] + pos[i];
        endV[j] = SDGdata->endVertex[i];
        //TODO: 
		//w[j] = SDGdata->weight[i]; 
		fprintf(stderr, "%d\n", SDGdata->weight[i]);
		w[j] = 1; 
    }
    
#ifdef DIAGNOSTIC
    if (tid == 0) {
        elapsed_time_part = get_seconds() - elapsed_time_part;
        fprintf(stderr, "Edge data structure construction time: %lf seconds\n",
                elapsed_time_part);
        elapsed_time_part = get_seconds();

    }
#endif
 
    if (tid == 0) {
        free(pos);
        G->n = n;
        G->m = m;
        G->numEdges = numEdges;
        G->endV = endV;
        G->weight = w;
    }
#ifdef _OPENMP    
#endif
}
    /* Verification */
#if 0 
    fprintf(stderr, "SDG data:\n");
    for (int i=0; i<SDGdata->m; i++) {
        fprintf(stderr, "[%ld %ld %ld] ", SDGdata->startVertex[i], 
                SDGdata->endVertex[i], SDGdata->weight[i]);
    }
 
    fprintf(stderr, "\n");

    for (int i=0; i<G->n + 1; i++) {
        fprintf(stderr, "[%ld] ", G->numEdges[i]);
    }
    
    fprintf(stderr, "\nGraph:\n");
    for (int i=0; i<G->n; i++) {
        for (int j=G->numEdges[i]; j<G->numEdges[i+1]; j++) {
            fprintf(stderr, "[%ld %ld %ld] ", i, G->endV[j], G->weight[j]);
        }
    }
#endif 
    
    free(SDGdata->startVertex);
    free(SDGdata->endVertex);
    free(SDGdata->weight);
    
    elapsed_time = get_seconds() - elapsed_time; 
    
    return elapsed_time;
}
예제 #6
0
파일: kd.c 프로젝트: N-BodyShop/fof
int kdFoF(KD kd,float fEps)
{
	PARTICLE *p;
	KDN *c;
	int pi,pj,pn,cp;

	int iGroup;

	int *Fifo,iHead,iTail,nFifo;
	float fEps2;
	float dx,dy,dz,x,y,z,lx,ly,lz,sx,sy,sz,fDist2;
#ifdef _OPENMP
    int idSelf;
    omp_lock_t *locks;

	for (pn=0;pn<kd->nActive;++pn) kd->p[pn].iTouched = -1;
    /* We really want to make an independent lock for each particle.  However, each lock
     * seems to use a buttload of memory (something like 312 bytes per lock).  Therefore,
     * to ensure that we don't use too much memory, only use 1 lock per 100 particles.
     * This should still provide very low lock contention while not using oodles of 
     * memory at the same time, since it is extremely rare that two threads will be looking
     * two particles that map to the same lock at the same time.*/
    kd->nHash = (int)(kd->nActive/100);
    locks = (omp_lock_t *)malloc(kd->nHash*sizeof(omp_lock_t));
    assert(locks != NULL);
    for (pn=0;pn<kd->nHash;++pn) omp_init_lock(&locks[pn]);
#endif

	p = kd->p;
	c = kd->kdNodes;
	lx = kd->fPeriod[0];
	ly = kd->fPeriod[1];
	lz = kd->fPeriod[2];
	fEps2 = fEps*fEps;
	for (pn=0;pn<kd->nActive;++pn) p[pn].iGroup = 0;
#pragma omp parallel default(none) shared(kd,locks,p,c,lx,ly,lz,fEps2) \
    private(pi,pj,pn,cp,iGroup,Fifo,iHead,iTail,dx,dy,dz,x,y,z,sx,sy,sz,fDist2,idSelf,nFifo)
  {
#ifdef _OPENMP
    nFifo = kd->nActive/omp_get_num_threads();
    idSelf = omp_get_thread_num();
#else
    nFifo = kd->nActive;
#endif
	Fifo = (int *)malloc(nFifo*sizeof(int));
	assert(Fifo != NULL);
	iHead = 0;
	iTail = 0;
	iGroup = 0;
#pragma omp for schedule(runtime)
	for (pn=0;pn<kd->nActive;++pn) {
		if (p[pn].iGroup) continue;
		/*
		 ** Mark it and add to the do-fifo.
		 */
#ifdef _OPENMP
        omp_set_lock(&locks[_hashLock(kd,pn)]);
        if (p[pn].iTouched >= 0 && p[pn].iTouched < idSelf ) {
            assert(p[pn].iGroup > 0);
            omp_unset_lock(&locks[_hashLock(kd,pn)]);
            continue;
        }
        p[pn].iTouched = idSelf;
        iGroup = pn+1;
		p[pn].iGroup = iGroup;
        omp_unset_lock(&locks[_hashLock(kd,pn)]);
#else
		++iGroup;
		p[pn].iGroup = iGroup;
#endif
		Fifo[iTail++] = pn;
		if (iTail == nFifo) iTail = 0;
		while (iHead != iTail) {
			pi = Fifo[iHead++];
			if (iHead == nFifo) iHead = 0;
			/*
			 ** Now do an fEps-Ball Gather!
			 */
			x = p[pi].r[0];
			y = p[pi].r[1];
			z = p[pi].r[2];
			cp = ROOT;
			while (1) {
				INTERSECT(c,cp,fEps2,lx,ly,lz,x,y,z,sx,sy,sz);
				/*
				 ** We have an intersection to test.
				 */
				if (c[cp].iDim >= 0) {
					cp = LOWER(cp);
					continue;
					}
				else {
					for (pj=c[cp].pLower;pj<=c[cp].pUpper;++pj) {
#ifdef _OPENMP
                        if (p[pj].iGroup == iGroup) {
                            /* We have already looked at this particle */
                            //assert(p[pj].iTouched == idSelf);  particle is not locked.
                            continue;
                        }
                        if (p[pj].iTouched >= 0 && p[pj].iTouched < idSelf) {
                            /* Somebody more important than us is already looking at this
                             * particle.  However, we do not yet know if this particle belongs
                             * in our group, so just skip it to save time but don't restart the
                             * entire group. */
                            // assert(p[pj].iGroup > 0); particle is not locked
                            continue;
                        }
#else
						if (p[pj].iGroup) continue;
#endif
						dx = sx - p[pj].r[0];
						dy = sy - p[pj].r[1];
						dz = sz - p[pj].r[2];
						fDist2 = dx*dx + dy*dy + dz*dz;
						if (fDist2 < fEps2) {
							/*
							 ** Mark it and add to the do-fifo.
							 */
#ifdef _OPENMP
                            omp_set_lock(&locks[_hashLock(kd,pj)]);
                            if (p[pj].iTouched >= 0 && p[pj].iTouched < idSelf) {
                                /* Now we know this particle should be in our group.  If somebody more
                                 * important than us touched it, about the entire group. */
                                assert(p[pj].iGroup > 0);
                                omp_unset_lock(&locks[_hashLock(kd,pj)]);
                                iHead = iTail;
                                /*printf("Thread %d: Aborting group %d. p[%d].iOrder  p.iGroup=%d  p.iTouched=%d (Per-Particle2)\n",
                                  idSelf, iGroup, pj, p[pj].iOrder, p[pj].iGroup, p[pj].iTouched);*/
                                goto RestartSnake;
                            }
                            p[pj].iTouched = idSelf;
							p[pj].iGroup = iGroup;
                            omp_unset_lock(&locks[_hashLock(kd,pj)]);
#else
							p[pj].iGroup = iGroup;
#endif
							Fifo[iTail++] = pj;
							if (iTail == nFifo) iTail = 0;
							}
						}
					SETNEXT(cp);
					if (cp == ROOT) break;
					continue;
					}
			ContainedCell:
				for (pj=c[cp].pLower;pj<=c[cp].pUpper;++pj) {
#ifdef _OPENMP
                    if (p[pj].iGroup == iGroup) continue;
                    if (p[pj].iTouched >= 0 && p[pj].iTouched < idSelf) {
                        /* Somebody more important that us is already looking at this
                         * group.  Abort this entire group! */
                        //assert(p[pj].iGroup > 0); particle is not locked
                        iHead = iTail;
                        /*printf("Thread %d: Aborting group %d. p[%d].iOrder=%d  p.iGroup=%d  p.iTouched=%d (Per-Cell1)\n",
                          idSelf, iGroup, pj, p[pj].iOrder, p[pj].iGroup, p[pj].iTouched);*/
                        goto RestartSnake;
                    }
#else
					if (p[pj].iGroup) continue;
#endif                    
					/*
					 ** Mark it and add to the do-fifo.
					 */
#ifdef _OPENMP
                    omp_set_lock(&locks[_hashLock(kd,pj)]);
                    if (p[pj].iTouched >= 0 && p[pj].iTouched < idSelf) {
                        /* Check again in case somebody touched it before the lock. */
                        assert(p[pj].iGroup > 0);
                        omp_unset_lock(&locks[_hashLock(kd,pj)]);
                        iHead = iTail;
                        /*printf("Thread %d: Aborting group %d.  p[%d].iGroup=%d  p[%d].iTouched=%d (Per-Cell2)\n",
                          idSelf, iGroup, pj, p[pj].iGroup, pj, p[pj].iTouched);*/
                        goto RestartSnake;
                    }
                    p[pj].iTouched = idSelf;
                    p[pj].iGroup = iGroup;
                    omp_unset_lock(&locks[_hashLock(kd,pj)]);
#else
					p[pj].iGroup = iGroup;
#endif
					Fifo[iTail++] = pj;
					if (iTail == nFifo) iTail = 0;
					}
			GetNextCell:
				SETNEXT(cp);
				if (cp == ROOT) break;
            }
        } /* End while(iHead != iTail) */
#ifdef _OPENMP
    RestartSnake:
#endif
        assert(iHead == iTail);
    }
	free(Fifo);
  }  /* End of the OpenMP PARALLEL section */

#ifdef _OPENMP
    /* Now we have count how many groups there are.  This is straightforward,
     * since the number of groups is the number of particles whose groupID equals
     * their particleID+1. */
    pj = 0;
	for (pn=0;pn<kd->nActive;++pn)
        if (p[pn].iGroup == pn+1) ++pj;
    kd->nGroup = (kd->nActive)+1;
    free(locks);
#else
	kd->nGroup = iGroup+1;
#endif
	return(kd->nGroup-1);
	}
void vertex_betweenness_centrality_parBFS(graph_t* G, double* BC, long numSrcs) {

    attr_id_t *S;      /* stack of vertices in the order of non-decreasing 
                          distance from s. Also used to implicitly 
                          represent the BFS queue */
    plist_t* P;        /* predecessors of a vertex v on shortest paths from s */
    double* sig;       /* No. of shortest paths */
    attr_id_t* d;      /* Length of the shortest path between every pair */
    double* del;       /* dependency of vertices */
    attr_id_t *in_degree, *numEdges, *pSums;
    attr_id_t* pListMem;    
#if RANDSRCS
    attr_id_t* Srcs; 
#endif
    attr_id_t *start, *end;
    long MAX_NUM_PHASES;
    attr_id_t *psCount;

#ifdef _OPENMP    
    omp_lock_t* vLock;
    long chunkSize;
#endif
#ifdef DIAGNOSTIC
    double elapsed_time;
#endif
    int seed = 2387;

#ifdef _OPENMP    
#pragma omp parallel firstprivate(G)
    {
#endif

        attr_id_t *myS, *myS_t;
        attr_id_t myS_size;
        long i, j, k, p, count, myCount;
        long v, w, vert;
        long k0, k1;
        long numV, num_traversals, n, m, phase_num;
        long start_iter, end_iter;
        long tid, nthreads;
        int* stream;
#ifdef DIAGNOSTIC
        double elapsed_time_part;
#endif

#ifdef _OPENMP
        int myLock;
        tid = omp_get_thread_num();
        nthreads = omp_get_num_threads();
#else
        tid = 0;
        nthreads = 1;
#endif

#ifdef DIAGNOSTIC
        if (tid == 0) {
            elapsed_time = get_seconds();
            elapsed_time_part = get_seconds();
        }
#endif

        /* numV: no. of vertices to run BFS from = numSrcs */
        numV = numSrcs;
        n = G->n;
        m = G->m;

        /* Permute vertices */
        if (tid == 0) {
#if RANDSRCS
            Srcs = (attr_id_t *) malloc(n*sizeof(attr_id_t));
#endif
#ifdef _OPENMP
            vLock = (omp_lock_t *) malloc(n*sizeof(omp_lock_t));
#endif
        }

#ifdef _OPENMP   
#pragma omp barrier
#pragma omp for
        for (i=0; i<n; i++) {
            omp_init_lock(&vLock[i]);
        }
#endif

        /* Initialize RNG stream */ 
        stream = init_sprng(0, tid, nthreads, seed, SPRNG_DEFAULT);

#if RANDSRCS
#ifdef _OPENMP
#pragma omp for
#endif
        for (i=0; i<n; i++) {
            Srcs[i] = i;
        }

#ifdef _OPENMP
#pragma omp for
#endif
        for (i=0; i<n; i++) {
            j = n * sprng(stream);
            if (i != j) {
#ifdef _OPENMP
                int l1 = omp_test_lock(&vLock[i]);
                if (l1) {
                    int l2 = omp_test_lock(&vLock[j]);
                    if (l2) {
#endif
                        k = Srcs[i];
                        Srcs[i] = Srcs[j];
                        Srcs[j] = k;
#ifdef _OPENMP  
                        omp_unset_lock(&vLock[j]);
                    }
                    omp_unset_lock(&vLock[i]);
                }
#endif        
            }
        } 
#endif

#ifdef _OPENMP    
#pragma omp barrier
#endif

        if (tid == 0) {
            MAX_NUM_PHASES = 500;
        }

#ifdef _OPENMP
#pragma omp barrier    
#endif

        /* Initialize predecessor lists */

        /* The size of the predecessor list of each vertex is bounded by 
           its in-degree. So we first compute the in-degree of every
           vertex */ 

        if (tid == 0) {
            P   = (plist_t  *) calloc(n, sizeof(plist_t));
            in_degree = (attr_id_t *) calloc(n+1, sizeof(attr_id_t));
            numEdges = (attr_id_t *) malloc((n+1)*sizeof(attr_id_t));
            pSums = (attr_id_t *) malloc(nthreads*sizeof(attr_id_t));
        }

#ifdef _OPENMP
#pragma omp barrier
#pragma omp for
#endif
        for (i=0; i<m; i++) {
            v = G->endV[i];
#ifdef _OPENMP
            omp_set_lock(&vLock[v]);
#endif
            in_degree[v]++;
#ifdef _OPENMP
            omp_unset_lock(&vLock[v]);
#endif
        }

        prefix_sums(in_degree, numEdges, pSums, n);

        if (tid == 0) {
            pListMem = (attr_id_t *) malloc(m*sizeof(attr_id_t));
        }

#ifdef _OPENMP
#pragma omp barrier
#pragma omp for
#endif
        for (i=0; i<n; i++) {
            P[i].list = pListMem + numEdges[i];
            P[i].degree = in_degree[i];
            P[i].count = 0;
        }

#ifdef DIAGNOSTIC
        if (tid == 0) {
            elapsed_time_part = get_seconds() -elapsed_time_part;
            fprintf(stderr, "In-degree computation time: %lf seconds\n", 
                    elapsed_time_part);
            elapsed_time_part = get_seconds();
        }
#endif

        /* Allocate shared memory */ 
        if (tid == 0) {
            free(in_degree);
            free(numEdges);
            free(pSums);

            S   = (attr_id_t *) malloc(n*sizeof(attr_id_t));
            sig = (double *) malloc(n*sizeof(double));
            d   = (attr_id_t *) malloc(n*sizeof(attr_id_t));
            del = (double *) calloc(n, sizeof(double));

            start = (attr_id_t *) malloc(MAX_NUM_PHASES*sizeof(attr_id_t));
            end = (attr_id_t *) malloc(MAX_NUM_PHASES*sizeof(attr_id_t));
            psCount = (attr_id_t *) malloc((nthreads+1)*sizeof(attr_id_t));
        }

        /* local memory for each thread */  
        myS_size = (2*n)/nthreads;
        myS = (attr_id_t *) malloc(myS_size*sizeof(attr_id_t));
        num_traversals = 0;
        myCount = 0;

#ifdef _OPENMP    
#pragma omp barrier
#endif

#ifdef _OPENMP    
#pragma omp for
#endif
        for (i=0; i<n; i++) {
            d[i] = -1;
        }

#ifdef DIAGNOSTIC
        if (tid == 0) {
            elapsed_time_part = get_seconds() - elapsed_time_part;
            fprintf(stderr, "BC initialization time: %lf seconds\n", 
                    elapsed_time_part);
            elapsed_time_part = get_seconds();
        }
#endif

        for (p=0; p<n; p++) {
#if RANDSRCS
            i = Srcs[p];
#else
            i = p;
#endif
            if (G->numEdges[i+1] - G->numEdges[i] == 0) {
                continue;
            } else {
                num_traversals++;
            }

            if (num_traversals == numV + 1) {
                break;
            }

            if (tid == 0) {
                sig[i] = 1;
                d[i] = 0;
                S[0] = i;
                start[0] = 0;
                end[0] = 1;
            }

            count = 1;
            phase_num = 0;

#ifdef _OPENMP       
#pragma omp barrier
#endif

            while (end[phase_num] - start[phase_num] > 0) {

                myCount = 0;
                start_iter = start[phase_num];
                end_iter = end[phase_num];
#ifdef _OPENMP
#pragma omp barrier
#pragma omp for schedule(dynamic) nowait
#endif
                for (vert = start_iter; vert < end_iter; vert++) {
                    v = S[vert];
                    for (j=G->numEdges[v]; j<G->numEdges[v+1]; j++) {

                        w = G->endV[j];
                        if (v != w) {

#ifdef _OPENMP                            
                            myLock = omp_test_lock(&vLock[w]);
                            if (myLock) { 
#endif             
                                /* w found for the first time? */ 
                                if (d[w] == -1) {
                                    if (myS_size == myCount) {
                                        /* Resize myS */
                                        myS_t = (attr_id_t *)
                                            malloc(2*myS_size*sizeof(attr_id_t));
                                        memcpy(myS_t, myS, 
                                                myS_size*sizeof(attr_id_t));
                                        free(myS);
                                        myS = myS_t;
                                        myS_size = 2*myS_size;
                                    }
                                    myS[myCount++] = w;
                                    d[w] = d[v] + 1;
                                    sig[w] = sig[v];
                                    P[w].list[P[w].count++] = v;
                                } else if (d[w] == d[v] + 1) {
                                    sig[w] += sig[v];
                                    P[w].list[P[w].count++] = v;
                                }
#ifdef _OPENMP  

                                omp_unset_lock(&vLock[w]);
                            } else {
                                if ((d[w] == -1) || (d[w] == d[v]+ 1)) {
                                    omp_set_lock(&vLock[w]);
                                    sig[w] += sig[v];
                                    P[w].list[P[w].count++] = v;
                                    omp_unset_lock(&vLock[w]);
                                }
                            }
#endif

                        }
                    }
                }
                /* Merge all local stacks for next iteration */
                phase_num++; 
                if (tid == 0) {
                    if (phase_num >= MAX_NUM_PHASES) {
                        fprintf(stderr, "Error: Max num phases set to %ld\n",
                                MAX_NUM_PHASES);
                        fprintf(stderr, "Diameter of input network greater than"
                                " this value. Increase MAX_NUM_PHASES"
                                " in vertex_betweenness_centrality_parBFS()\n");
                        exit(-1);
                    }
                }

                psCount[tid+1] = myCount;

#ifdef _OPENMP
#pragma omp barrier
#endif

                if (tid == 0) {
                    start[phase_num] = end[phase_num-1];
                    psCount[0] = start[phase_num];
                    for(k=1; k<=nthreads; k++) {
                        psCount[k] = psCount[k-1] + psCount[k];
                    }
                    end[phase_num] = psCount[nthreads];
                }



#ifdef _OPENMP
#pragma omp barrier
#endif

                k0 = psCount[tid]; 
                k1 = psCount[tid+1];
                for (k = k0; k < k1; k++) {
                    S[k] = myS[k-k0];
                } 

                count = end[phase_num];
            }

            phase_num--;

            while (phase_num > 0) {
                start_iter = start[phase_num];
                end_iter = end[phase_num];
#ifdef _OPENMP        
#pragma omp for schedule(static) nowait
#endif
                for (j=start_iter; j<end_iter; j++) {
                    w = S[j];
                    for (k = 0; k<P[w].count; k++) {
                        v = P[w].list[k];
#ifdef _OPENMP
                        omp_set_lock(&vLock[v]);
#endif
                        del[v] = del[v] + sig[v]*(1+del[w])/sig[w];
#ifdef _OPENMP
                        omp_unset_lock(&vLock[v]);
#endif
                    }
                    BC[w] += del[w];
                }

                phase_num--;

#ifdef _OPENMP
#pragma omp barrier
#endif            
            }


#ifdef _OPENMP
            chunkSize = n/nthreads;
#pragma omp for schedule(static, chunkSize) nowait
#endif
            for (j=0; j<count; j++) {
                w = S[j];
                d[w] = -1;
                del[w] = 0;
                P[w].count = 0;
            }


#ifdef _OPENMP
#pragma omp barrier
#endif

        }

#ifdef DIAGNOSTIC
        if (tid == 0) {
            elapsed_time_part = get_seconds() - elapsed_time_part;
            fprintf(stderr, "BC computation time: %lf seconds\n", 
                    elapsed_time_part);
        }
#endif


#ifdef _OPENMP
#pragma omp barrier
#endif

#ifdef _OPENMP
#pragma omp for
        for (i=0; i<n; i++) {
            omp_destroy_lock(&vLock[i]);
        }
#endif

        free(myS);

        if (tid == 0) { 
            free(S);
            free(pListMem);
            free(P);
            free(sig);
            free(d);
            free(del);
#ifdef _OPENMP
            free(vLock);
#endif
            free(start);
            free(end);
            free(psCount);

#ifdef DIAGNOSTIC
            elapsed_time = get_seconds() - elapsed_time;
            fprintf(stderr, "Time taken: %lf\n seconds", elapsed_time);
#endif

#if RANDSRCS
            free(Srcs);
#endif
        }

        free_sprng(stream);
#ifdef _OPENMP
    }    
#endif

}
예제 #8
0
파일: lib-1.c 프로젝트: 0day-ci/gcc
int
main (void)
{
  double d, e;
  int l;
  omp_lock_t lck;
  omp_nest_lock_t nlck;

  d = omp_get_wtime ();

  omp_init_lock (&lck);
  omp_set_lock (&lck);
  if (omp_test_lock (&lck))
    abort ();
  omp_unset_lock (&lck);
  if (! omp_test_lock (&lck))
    abort ();
  if (omp_test_lock (&lck))
    abort ();
  omp_unset_lock (&lck);
  omp_destroy_lock (&lck);

  omp_init_nest_lock (&nlck);
  if (omp_test_nest_lock (&nlck) != 1)
    abort ();
  omp_set_nest_lock (&nlck);
  if (omp_test_nest_lock (&nlck) != 3)
    abort ();
  omp_unset_nest_lock (&nlck);
  omp_unset_nest_lock (&nlck);
  if (omp_test_nest_lock (&nlck) != 2)
    abort ();
  omp_unset_nest_lock (&nlck);
  omp_unset_nest_lock (&nlck);
  omp_destroy_nest_lock (&nlck);

  omp_set_dynamic (1);
  if (! omp_get_dynamic ())
    abort ();
  omp_set_dynamic (0);
  if (omp_get_dynamic ())
    abort ();

  omp_set_nested (1);
  if (! omp_get_nested ())
    abort ();
  omp_set_nested (0);
  if (omp_get_nested ())
    abort ();

  omp_set_num_threads (5);
  if (omp_get_num_threads () != 1)
    abort ();
  if (omp_get_max_threads () != 5)
    abort ();
  if (omp_get_thread_num () != 0)
    abort ();
  omp_set_num_threads (3);
  if (omp_get_num_threads () != 1)
    abort ();
  if (omp_get_max_threads () != 3)
    abort ();
  if (omp_get_thread_num () != 0)
    abort ();
  l = 0;
#pragma omp parallel reduction (|:l)
  {
    l = omp_get_num_threads () != 3;
    l |= omp_get_thread_num () < 0;
    l |= omp_get_thread_num () >= 3;
#pragma omp master
    l |= omp_get_thread_num () != 0;
  }
  if (l)
    abort ();

  if (omp_get_num_procs () <= 0)
    abort ();
  if (omp_in_parallel ())
    abort ();
#pragma omp parallel reduction (|:l)
  l = ! omp_in_parallel ();
#pragma omp parallel reduction (|:l) if (1)
  l = ! omp_in_parallel ();
  if (l)
    abort ();

  e = omp_get_wtime ();
  if (d > e)
    abort ();
  d = omp_get_wtick ();
  /* Negative precision is definitely wrong,
     bigger than 1s clock resolution is also strange.  */
  if (d <= 0 || d > 1)
    abort ();

  return 0;
}
예제 #9
0
파일: sort-1.c 프로젝트: abumaryam/gcc
static void
sort1 (int *array, int count)
{
  omp_lock_t lock;
  struct int_pair_stack global_stack;
  int busy = 1;
  int num_threads;

  omp_init_lock (&lock);
  init_int_pair_stack (&global_stack);
  #pragma omp parallel firstprivate (array, count)
  {
    int lo = 0, hi = 0, mid, next_lo, next_hi;
    bool idle = true;
    struct int_pair_stack local_stack;

    init_int_pair_stack (&local_stack);
    if (omp_get_thread_num () == 0)
      {
	num_threads = omp_get_num_threads ();
	hi = count - 1;
	idle = false;
      }

    for (;;)
      {
	if (hi - lo < THRESHOLD)
	  {
	    insertsort (array, lo, hi);
	    lo = hi;
	  }
	if (lo >= hi)
	  {
	    if (size_int_pair_stack (&local_stack) == 0)
	      {
	      again:
		omp_set_lock (&lock);
		if (size_int_pair_stack (&global_stack) == 0)
		  {
		    if (!idle)
		      busy--;
		    if (busy == 0)
		      {
			omp_unset_lock (&lock);
			break;
		      }
		    omp_unset_lock (&lock);
		    idle = true;
		    while (size_int_pair_stack (&global_stack) == 0
			   && busy)
		      busy_wait ();
		    goto again;
		  }
		if (idle)
		  busy++;
		pop_int_pair_stack (&global_stack, &lo, &hi);
		omp_unset_lock (&lock);
		idle = false;
	      }
	    else
	      pop_int_pair_stack (&local_stack, &lo, &hi);
	  }

	mid = partition (array, lo, hi);
	if (mid - lo < hi - mid)
	  {
	    next_lo = mid;
	    next_hi = hi;
	    hi = mid - 1;
	  }
	else
	  {
	    next_lo = lo;
	    next_hi = mid - 1;
	    lo = mid;
	  }

	if (next_hi - next_lo < THRESHOLD)
	  insertsort (array, next_lo, next_hi);
	else
	  {
	    if (size_int_pair_stack (&global_stack) < num_threads - 1)
	      {
		int size;

		omp_set_lock (&lock);
		size = size_int_pair_stack (&global_stack);
		if (size < num_threads - 1 && size < STACK_SIZE)
		  push_int_pair_stack (&global_stack, next_lo, next_hi);
		else
		  push_int_pair_stack (&local_stack, next_lo, next_hi);
		omp_unset_lock (&lock);
	      }
	    else
	      push_int_pair_stack (&local_stack, next_lo, next_hi);
	  }
      }
    }
  omp_destroy_lock (&lock);
}
예제 #10
0
파일: shared.c 프로젝트: kempj/Kernels
int main(int argc, char ** argv)
{
  long       iterations;      /* total number of reference pair counter updates */
  long       stream_size;     /* length of stream triad creating private work   */ 
  int        page_fit;        /* indicates that counters fit on different pages */
  size_t     store_size;      /* amount of space reserved for counters          */
  double     *pcounter1,     
             *pcounter2;      /* pointers to counters                           */
  double     cosa, sina;      /* cosine and sine of rotation angle              */
  double     *counter_space;  /* pointer to space reserved for counters         */
  double     refcounter1,
             refcounter2;     /* reference values for counters                  */
  double     epsilon=1.e-7;   /* required accuracy                              */
  omp_lock_t counter_lock;    /* lock that guards access to counters            */
  double     refcount_time;   /* timing parameter                               */
  int        nthread_input;   /* thread parameters                              */
  int        nthread; 
 
/*********************************************************************
** process and test input parameters    
*********************************************************************/

  printf("Parallel Research Kernels version %s\n", PRKVERSION);
  printf("OpenMP exclusive access test RefCount, shared counters\n");
 
  if (argc != 4){
    printf("Usage: %s <# threads> <# counter pair updates> <# private stream size>\n", *argv);
    return(1);
  }
 
  nthread_input = atoi(*++argv);
  if ((nthread_input < 1) || (nthread_input > MAX_THREADS)) {
    printf("ERROR: Invalid number of threads: %d\n", nthread_input);
    exit(EXIT_FAILURE);
  }
 
  iterations  = atol(*++argv);
  if (iterations < 1){
    printf("ERROR: iterations must be >= 1 : %d \n",iterations);
    exit(EXIT_FAILURE);
  }

  stream_size = atol(*++argv);
  if (stream_size < 0) {
    printf("ERROR: private stream size %ld must be non-negative\n", stream_size);
    exit(EXIT_FAILURE);
  }
 
  omp_set_num_threads(nthread_input);
   
  /* initialize shared counters; we put them on different pages, if possible.
     If the page size equals the whole memory, this will fail, and we reduce
     the space required */
  page_fit = 1;
  store_size = (size_t) getpagesize();
#ifdef VERBOSE
  printf("Page size = %d\n", getpagesize());
#endif
  counter_space = (double *) malloc(store_size+sizeof(double));
  while (!counter_space && store_size>2*sizeof(double)) {
    page_fit=0;

    store_size/=2;
    counter_space = (double *) malloc(store_size+sizeof(double));
  }
  if (!counter_space) {
    printf("ERROR: could not allocate space for counters\n");
    exit(EXIT_FAILURE);
  }
 
#ifdef VERBOSE
  if (!page_fit) printf("Counters do not fit on different pages\n");      
  else           printf("Counters fit on different pages\n");      
#endif
   
  pcounter1 = counter_space;
  pcounter2 = counter_space + store_size/sizeof(double);

  COUNTER1 = 1.0;
  COUNTER2 = 0.0;

  cosa = cos(1.0);
  sina = sin(1.0);
 
  /* initialize the lock on which we will be pounding */
  omp_init_lock(&counter_lock);
 
  #pragma omp parallel 
  {
  long   iter, j;   /* dummies                                        */
  double tmp1;      /* local copy of previous value of COUNTER1       */
  double *a, *b, *c;/* private vectors                                */
  int    num_error=0;/* errors in private stream execution            */
  double aj, bj, cj;
  long space;
  space = 3*sizeof(double)*stream_size;
  a = (double *) malloc(space);
  if (!a) {
    printf("ERROR: Could not allocate %ld words for private streams\n", 
           space);
    exit(EXIT_FAILURE);
  }
  b = a + stream_size;
  c = b + stream_size;
  for (j=0; j<stream_size; j++) {
    a[j] = A0;
    b[j] = B0;
    c[j] = C0;
  }
 
  #pragma omp master
  {
  nthread = omp_get_num_threads();
  if (nthread != nthread_input) {
    num_error = 1;
    printf("ERROR: number of requested threads %d does not equal ",
           nthread_input);
    printf("number of spawned threads %d\n", nthread);
  } 
  else {
    printf("Number of threads              = %d\n",nthread_input);
    printf("Number of counter pair updates = %ld\n", iterations);
    printf("Length of private stream       = %ld\n", stream_size);
#ifdef DEPENDENT
    printf("Dependent counter pair update\n");
#else
    printf("Independent counter pair updates using");
  #ifdef ATOMIC
    printf(" atomic operations\n");
  #else
    printf(" using locks\n");
  #endif
#endif
  }
  }
  bail_out(num_error);
 
  /* do one warmup iteration outside main loop to avoid overhead      */
#ifdef DEPENDENT
  omp_set_lock(&counter_lock);
  tmp1 = COUNTER1;
  COUNTER1 = cosa*tmp1 - sina*COUNTER2;
  COUNTER2 = sina*tmp1 + cosa*COUNTER2;
  omp_unset_lock(&counter_lock);
#else
  #ifndef ATOMIC
    omp_set_lock(&counter_lock);
  #else
    #pragma omp atomic
  #endif
    COUNTER1++;
  #ifdef ATOMIC
    #pragma omp atomic
  #endif
    COUNTER2++;
  #ifndef ATOMIC
    omp_unset_lock(&counter_lock);
  #endif
#endif
  /* give each thread some (overlappable) work to do                */
  private_stream(a, b, c, stream_size);

  #pragma omp master
  {
  refcount_time = wtime();
  }
 
  #pragma omp for
  /* start with iteration nthread to take into account pre-loop iter  */
  for (iter=nthread; iter<=iterations; iter++) { 
#ifdef DEPENDENT
    omp_set_lock(&counter_lock);
    tmp1 = COUNTER1;
    COUNTER1 = cosa*tmp1 - sina*COUNTER2;
    COUNTER2 = sina*tmp1 + cosa*COUNTER2;
    omp_unset_lock(&counter_lock);
#else
  #ifndef ATOMIC
    omp_set_lock(&counter_lock);
  #else
    #pragma omp atomic
  #endif
    COUNTER1++;
  #ifdef ATOMIC
    #pragma omp atomic
  #endif
    COUNTER2++;
  #ifndef ATOMIC
    omp_unset_lock(&counter_lock);
  #endif
#endif
    /* give each thread some (overlappable) work to do                */
    private_stream(a, b, c, stream_size);
  }
 
  #pragma omp master 
  { 
  refcount_time = wtime() - refcount_time;
  }

  /* check whether the private work has been done correctly           */
  aj = A0; bj = B0; cj = C0;
  #pragma omp for
  for (iter=0; iter<=iterations; iter++) {
    aj += bj + SCALAR*cj;
  }
  for (j=0; j<stream_size; j++) {
    num_error += MAX(ABS(a[j]-aj)>epsilon,num_error);
  }
  if (num_error>0) {
    printf("ERROR: Thread %d encountered errors in private work\n",
           omp_get_thread_num());           
  }
  bail_out(num_error);

  } /* end of OpenMP parallel region */
 
#ifdef DEPENDENT
  refcounter1 = cos(iterations+1);
  refcounter2 = sin(iterations+1);
#else
  refcounter1 = (double)(iterations+2);
  refcounter2 = (double)(iterations+1);
#endif
  if ((ABS(COUNTER1-refcounter1)>epsilon) || 
      (ABS(COUNTER2-refcounter2)>epsilon)) {
     printf("ERROR: Incorrect or inconsistent counter values %13.10lf %13.10lf; ",
            COUNTER1, COUNTER2);
     printf("should be %13.10lf, %13.10lf\n", refcounter1, refcounter2);
  }
  else {
#ifdef VERBOSE
    printf("Solution validates; Correct counter values %13.10lf %13.10lf\n", 
           COUNTER1, COUNTER2);
#else
    printf("Solution validates\n");
#endif
    printf("Rate (MCPUPs/s): %lf time (s): %lf\n", 
           iterations/refcount_time*1.e-6, refcount_time);
  }
 
  exit(EXIT_SUCCESS);
}
 void unlock()   { omp_unset_lock( &m_lock ); }
예제 #12
0
FLA_Error FLA_Gemm_nn_omp_var15( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj C, fla_gemm_t* cntl )
{
  FLA_Obj AT,              A0,
          AB,              A1,
                           A2;

  FLA_Obj CT,              C0,
          CB,              C1,
                           C2;

  FLA_Obj AL,    AR,       A10,  A11,  A12;

  FLA_Obj BT,              B0,
          BB,              B1,
                           B2;
  FLA_Obj C1_local;

  int i, j, lock_ldim, lock_i;
  int b_m, b_k;

  FLA_Part_2x1( A,    &AT, 
                      &AB,            0, FLA_TOP );

  FLA_Part_2x1( C,    &CT, 
                      &CB,            0, FLA_TOP );

  #pragma intel omp parallel taskq
  {
  while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) )
  {
    b_m = FLA_Determine_blocksize( A, AT, FLA_TOP, FLA_Cntl_blocksize( cntl ) );

    FLA_Repart_2x1_to_3x1( AT,                &A0, 
                        /* ** */            /* ** */
                                              &A1, 
                           AB,                &A2,        b_m, FLA_BOTTOM );

    FLA_Repart_2x1_to_3x1( CT,                &C0, 
                        /* ** */            /* ** */
                                              &C1, 
                           CB,                &C2,        b_m, FLA_BOTTOM );

    /*------------------------------------------------------------*/

    /* C1 = alpha * A1 * B + C1; */
  
    FLA_Part_1x2( A1,   &AL,  &AR,      0, FLA_LEFT );

    FLA_Part_2x1( B,    &BT, 
                        &BB,            0, FLA_TOP );
  
    while ( FLA_Obj_width( AL ) < FLA_Obj_width( A ) )
    {
      b_k = FLA_Determine_blocksize( A, AL, FLA_LEFT, FLA_Cntl_blocksize( cntl ) );

      // Get the index of the current partition.
      // FIX THIS: need + b_m - 1 or something like this
      //j = FLA_Obj_length( CT ) / b_m;
      //i = FLA_Obj_width( AL ) / b_k;
      //lock_ldim = FLA_get_num_threads_in_m_dim(omp_get_num_threads());
      lock_i = FLA_Obj_length( CT ) / b_m;
  
      FLA_Repart_1x2_to_1x3( AL,  /**/ AR,        &A10, /**/ &A11, &A12,
                             b_k, FLA_RIGHT );

      FLA_Repart_2x1_to_3x1( BT,                &B0, 
                          /* ** */            /* ** */
                                                &B1, 
                             BB,                &B2,        b_k, FLA_BOTTOM );
  
      /*------------------------------------------------------------*/
  
      /*    C1 = alpha * A11 * B1 + C1; */
      //// FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
      ////           alpha, A11, B1, FLA_ONE, C1 );

      #pragma intel omp task captureprivate( lock_i, A11, B1, C1 ), private( C1_local )
      {
      FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, C1, &C1_local );
      FLA_Obj_set_to_zero( C1_local );

      /*    C1_local = alpha * A1 * B11 + C1_local; */
      FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
                         alpha, A11, B1, FLA_ONE, C1_local );

      // Acquire lock[i] (the lock for C1).
      omp_set_lock( &fla_omp_lock[lock_i] );

      /* C1 += C1_local */
      FLA_Axpy_external( FLA_ONE, C1_local, C1 );
      //FLA_Axpy_sync_pipeline2( j*lock_ldim, FLA_ONE, C1_local, C1 );
      //FLA_Axpy_sync_circular2( j*lock_ldim, i, FLA_ONE, C1_local, C1 );
      //REF_Axpy_sync_circular2( j*lock_ldim, i, FLA_ONE, C1_local, C1 );

      // Release lock[i] (the lock for C1).
      omp_unset_lock( &fla_omp_lock[lock_i] );

      FLA_Obj_free( &C1_local );
      }
  
      /*------------------------------------------------------------*/
  
      FLA_Cont_with_1x3_to_1x2( &AL,  /**/ &AR,        A10, A11, /**/ A12,
                                FLA_LEFT );

      FLA_Cont_with_3x1_to_2x1( &BT,                B0, 
                                                    B1, 
                              /* ** */           /* ** */
                                &BB,                B2,     FLA_TOP );
    }
  
    /*------------------------------------------------------------*/

    FLA_Cont_with_3x1_to_2x1( &AT,                A0, 
                                                  A1, 
                            /* ** */           /* ** */
                              &AB,                A2,     FLA_TOP );

    FLA_Cont_with_3x1_to_2x1( &CT,                C0, 
                                                  C1, 
                            /* ** */           /* ** */
                              &CB,                C2,     FLA_TOP );
  }
  }

  return FLA_SUCCESS;
}
예제 #13
0
void Roundworld::process()
{
	if ( !pause )
	{
		killHalf();
		expireFood();
		autoinsertFood();
		expireCritters();
		autoexchangeCritters();
		autosaveCritters();
		autoinsertCritters();
		// adjust gravity vectors of all entities' rigid bodies
		unsigned int j, b;
		Food* f;
		CritterB* bod;
		btRigidBody* bo;
		
		for( j=0; j < entities.size(); j++)
		{	
			if ( entities[j]->type == FOOD )
			{
// 				f = food[j];
				Food* f = static_cast<Food*>( entities[j] );
				for( b=0; b < f->body.bodyparts.size(); b++)
				{
					bo = f->body.bodyparts[b]->body;
					bo->setGravity( -(bo->getCenterOfMassPosition().normalized()*10) );
				}
			}
		}
		for( j=0; j < critters.size(); j++)
		{
			bod = critters[j];
			for( b=0; b < bod->body.bodyparts.size(); b++)
			{
				bo = bod->body.bodyparts[b]->body;
				bo->setGravity( -(bo->getCenterOfMassPosition().normalized()*10) );
			}
		}
		
		if ( *critter_raycastvision == 0 )
		{
			renderVision();
			grabVision();
		}

		// do a bullet step
			m_dynamicsWorld->stepSimulation(0.016667f, 0, 0.016667f);
	// 		m_dynamicsWorld->stepSimulation(Timer::Instance()->bullet_ms / 1000.f);

		int lmax = (int)critters.size();
		CritterB *c;
		float freeEnergyc = 0.0f;

		// FIXME USE FROM WORLDB
		omp_set_num_threads( *threads );
		#pragma omp parallel for ordered shared(freeEnergyc, lmax) private(c) // ordered 
		for( int i=0; i < lmax; i++)
		{
			c = critters[i];
			
			omp_set_lock(&my_lock1);
				checkCollisions(  c );
			omp_unset_lock(&my_lock1);

			// process
				c->process();

			// record critter used energy
				freeEnergyc += c->energyUsed;

			// process Output Neurons
				eat(c);

			// procreation if procreation energy trigger is hit
			omp_set_lock(&my_lock1);
				procreate(c);
			omp_unset_lock(&my_lock1);
		}

		freeEnergy += freeEnergyc;

		getGeneralStats();
	}
}
예제 #14
0
파일: slic.cpp 프로젝트: chenguanzhou/SLIC
void Slic::_GenerateSuperpixels()
{
    const int MAX_ITER = 10;
    for(int I=0;I<MAX_ITER;++I)
    {
        clock_t t1 = clock();
        std::cout<<"This is the "<<I<<"th circulation:"<<std::endl;

        omp_init_lock(&lock);

        #pragma omp parallel for
        for(int n=0;n<_N;++n)
        {
            for(int m=0;m<_M;++m)
            {
                // Init
                int nXOff  = m*_regionSize;
                int nYOff  = n*_regionSize;
                int nXSize = m==(_M-1)? _width  - m*_regionSize : _regionSize;
                int nYSize = n==(_N-1)? _height - n*_regionSize : _regionSize;

                uchar *bufferSrc = new uchar[nXSize*nYSize*_dataSize*_bandCount];
                int *bufferDst = new int[nXSize*nYSize];

                omp_set_lock(&lock);
                _poSrcDS->RasterIO(GF_Read,nXOff,nYOff,nXSize,nYSize,bufferSrc,nXSize,nYSize,_dataType,_bandCount,0,0,0,0);
                _poDstDS->GetRasterBand(1)->RasterIO(GF_Read,nXOff,nYOff,nXSize,nYSize,bufferDst,nXSize,nYSize,GDT_Int32,0,0);
                omp_unset_lock(&lock);

                std::vector< int > candidateCenterID;
                for(int i=-1;i<2;++i)
                {
                    for(int j=-1;j<2;++j)
                    {
                        if ((n+i)>=0 && (n+i)<_N && (m+j)>=0 && (m+j)<_M)
                            candidateCenterID.push_back( (n+i) * _M + (m+j) ) ;
                    }
                }

                // GetFeatureInfo
                FeatureVector featureVec(_bandCount + 2);
                for(int i=0,index=0;i<nYSize;++i)
                {
                    for(int j=0;j<nXSize;++j,++index)
                    {
                        uchar* p = bufferSrc;
                        for(int k=0;k<_bandCount;++k,p += nXSize*nYSize*_dataSize)
                        {
                            featureVec[k]= SRCVAL(p,_dataType,index)/_regularizer;
                        }
                        featureVec[_bandCount]   = static_cast<double>(nXOff+j)/_regionSize;  //x
                        featureVec[_bandCount+1] = static_cast<double>(nYOff+i)/_regionSize; //y
                        bufferDst[i*nXSize+j] = _GetNearestCenter(candidateCenterID,featureVec);
                    }
                }

                omp_set_lock(&lock);
                _poDstDS->GetRasterBand(1)->RasterIO(GF_Write,nXOff,nYOff,nXSize,nYSize,bufferDst,nXSize,nYSize,GDT_Int32,0,0);
                omp_unset_lock(&lock);

                delete []bufferSrc;
                delete []bufferDst;
            }

        }

        omp_destroy_lock(&lock);
        _ComputeNewCenterVector();
        std::cout<<"This circle cost: "<<static_cast<double>(clock()-t1)/CLOCKS_PER_SEC<<"s"<<std::endl;
    }


}
예제 #15
0
파일: lock.cpp 프로젝트: fmof/sagepp
 void Mutex::unlock() { 
    omp_unset_lock(&lock_); 
  }
예제 #16
0
void PhotonMap::throughputByDensityEstimation(vec3f &color, Path &eyeMergePath, 
		std::vector<LightPoint> &surfaceVertices, std::vector<LightPoint> &volumeVertices)
{
	class Query{
		PhotonMap  *photonMap;
		vec3f	    contrib;
		vec3f		position;
		vec3f		hitNormal;
		float	    radius;
		int			photonsNum;
		Ray         outRay;
		float GaussianKernel(float mahalanobisDist) const{
			double exponent = exp((double)-mahalanobisDist/2);
			//photonMap->fout << " Gaussian exp = " << exponent << std::endl;
			return exponent / (2*M_PI);
		}
		float Kernel(float distSqr, float radiusSqr) const{
			float s = MAX(0, 1 - distSqr / radiusSqr);
			return 3 * s * s / M_PI;
		}
	public:
		Query(PhotonMap *map, float r, int n) : photonMap(map), radius(r), photonsNum(n)
		{}
		bool volumeMedia;
		void SetContrib(const vec3f &color) { contrib = color; }
		void SetPosition(const vec3f &pos)  { position = pos; }
		void SetOutRay(const Ray &ray) { outRay = ray; }
		void SetNormal(const vec3f &n) { hitNormal = n; }
		vec3f GetContrib() const  { return contrib; }
		vec3f GetPosition() const { return position; }
		void Process(const LightPoint &lightPoint){
			if(volumeMedia && lightPoint.photonType != Ray::INVOL)		return ;
			if(!volumeMedia && lightPoint.photonType != Ray::OUTVOL)	return ;	

			if(!lightPoint.pathThePointIn || lightPoint.indexInThePath < 0)
				return;

			Path &lightPath = *lightPoint.pathThePointIn;
			int index = lightPoint.indexInThePath;
			/*
			if (volumeMedia && lightPath[index].insideObject != outRay.insideObject)
			{
				printf("aye\n");
				return;
			}
			if (!volumeMedia && lightPath[index].contactObject != outRay.contactObject)
			{
				printf("aye\n");
				return;
			}
			*/
			vec3f photonThroughput(1,1,1);
			for(int i = 0; i < index; i++){
				photonThroughput *= lightPath[i].color / lightPath[i].directionProb / lightPath[i].originProb;
				photonThroughput *= lightPath[i].getCosineTerm();
				float dist = (lightPath[i].origin-lightPath[i+1].origin).length();
				photonThroughput *= lightPath[i].getRadianceDecay(dist);
			}
			photonThroughput /= lightPath[index].originProb;
			// runs here, photon's f/p is done.

			Ray photonRay = lightPath[index];
			photonRay.direction = lightPath[index-1].direction;
			vec3f color = photonThroughput * photonRay.getBSDF(outRay);
			float distSqr = powf((outRay.origin-lightPath[index].origin).length(), 2);
			if(intensity(color) < 1e-6f)	return ;
			float kernel = Kernel(distSqr, radius*radius);
			float normalization = volumeMedia ? kernel/(photonsNum*radius*radius*radius) : kernel/(photonsNum*radius*radius);
			//float normalization = volumeMedia==false ? 1.0 / (photonsNum*PI*radius*radius) : 1.0 / (photonsNum*PI*4.0/3*radius*radius*radius);
			contrib += color * normalization;
		}

		double sumWeight; 
		int    photonsCount;
		
		void weightScale(){
			contrib /= ( sumWeight / photonsCount );
		}

	};

	Query query(this, mRadius, mPhotonsNum);
	vec3f Tr(1,1,1), SurfaceColor(0,0,0), VolumeColor(0,0,0);
	int mergeIndex = 1;
	for(int i = 1; i < eyeMergePath.size(); i++){
		float dist = MAX((eyeMergePath[i-1].origin-eyeMergePath[i].origin).length(), EPSILON);

		if(eyeMergePath[i-1].insideObject && eyeMergePath[i-1].insideObject->isVolumetric()){
			if(eyeMergePath[i-1].insideObject->isHomogeneous())
			{
				// ray marching volume radiance
				Ray volThroughRay = eyeMergePath[i-1];
				SceneVPMObject *volume = static_cast<SceneVPMObject*>(volThroughRay.insideObject);
				float stepSize = volume->stepSize;
				int N = dist / stepSize;
				if(N == 0)		N++;
				float step = dist / N;
				float offset = step * RandGenerator::genFloat();
				float t = offset;
				Tr *= volume->getRadianceDecay(volThroughRay, offset);
				for(int j = 0; j < N; j++, t+=step){
					query.SetContrib(vec3f(0,0,0));
					query.SetPosition(volThroughRay.origin + volThroughRay.direction*t);
					Ray outRay = volThroughRay;
					outRay.direction = -volThroughRay.direction;
					outRay.origin = volThroughRay.origin + volThroughRay.direction*t;
					outRay.contactObject = NULL;
					query.SetOutRay(outRay);
					query.volumeMedia = true;

					volumeHashGrid.Process(volumeVertices, query);

					Tr *= volume->getRadianceDecay(outRay, step);
					vec3f volColor = query.GetContrib();
					VolumeColor += volColor * Tr * step;
				}
			}
			else{
				// ray marching volume radiance
				Ray volThroughRay = eyeMergePath[i-1];
				HeterogeneousVolume *volume = static_cast<HeterogeneousVolume*>(volThroughRay.insideObject);
				float stepSize = volume->getStepSize();
				int N = dist / stepSize;
				if(N == 0)		N++;
				float step = dist / N;
				float offset = step * RandGenerator::genFloat();
				float t = offset;
				Tr *= volume->getRadianceDecay(volThroughRay, offset);
				for(int j = 0; j < N; j++, t+=step){
					query.SetContrib(vec3f(0,0,0));
					query.SetPosition(volThroughRay.origin + volThroughRay.direction*t);
					Ray outRay = volThroughRay;
					outRay.direction = -volThroughRay.direction;
					outRay.origin = volThroughRay.origin + volThroughRay.direction*t;
					outRay.contactObject = NULL;
					query.SetOutRay(outRay);
					query.volumeMedia = true;

					volumeHashGrid.Process(volumeVertices, query);

					Tr *= volume->getRadianceDecay(outRay, step);
					vec3f volColor = query.GetContrib();
					VolumeColor += volColor * Tr * step;
				}
			}
			
		}
		else
		{
			if (eyeMergePath[i - 1].insideObject)
				Tr *= eyeMergePath[i - 1].getRadianceDecay(dist);
		}

		if(eyeMergePath[i].contactObject && eyeMergePath[i].contactObject->emissive()){
			// eye path hit light, surface color equals to light radiance
			SurfaceColor = eyeMergePath[i].color;
			mergeIndex = i;
			break;
		}

		if(eyeMergePath[i].contactObject && eyeMergePath[i].directionSampleType == Ray::RANDOM){
			// non-specular photon density estimation
			if(eyeMergePath[i].contactObject->isVolumetric())
				continue;
			query.SetContrib(vec3f(0,0,0));
			query.SetPosition(eyeMergePath[i].origin);
			Ray outRay = eyeMergePath[i];
			outRay.direction = -eyeMergePath[i-1].direction;
			query.SetOutRay(outRay);
			query.volumeMedia = false;
			Ray fromRay = eyeMergePath[i-1];
			omp_set_lock(&surfaceHashGridLock);
			surfaceHashGrid.Process(surfaceVertices, query);
			omp_unset_lock(&surfaceHashGridLock);
			SurfaceColor = query.GetContrib();
			mergeIndex = i;
			break;
		}
	}
	color = Tr * SurfaceColor + VolumeColor;

	if (rayMarching)
	{
		for(int i = 0; i < 1/*eyeMergePath.size()-1*/; i++){
			color *= eyeMergePath[i].getCosineTerm() * eyeMergePath[i].color
				/ eyeMergePath[i].directionProb / eyeMergePath[i].originProb;
		}
	}
	else
	{
		for(int i = 0; i < mergeIndex; i++){
			color *= eyeMergePath[i].getCosineTerm() * eyeMergePath[i].color
				/ eyeMergePath[i].directionProb / eyeMergePath[i].originProb;
			if (i + 1 < mergeIndex)
			{
				float dist = (eyeMergePath[i].origin - eyeMergePath[i+1].origin).length();
				color *= eyeMergePath[i].getRadianceDecay(dist);
			}
		}
	}
}
예제 #17
0
triangulation triangulate_cube_random(data_list * data) {
  int dim = data_list_dim(data);
  cube_points cube = gen_cube_points(dim); 
  triangulation result;
  omp_lock_t result_lock;
  omp_init_lock(&result_lock);  //If we found a triangulation, use this lock!

  facet_acute_data parameters; //Parameters for conform_
  triangulation tmp_triang; //Triangulation we are expanding in current thread
  ptetra tet_list; //List of tetrahedrons, used in the parallel section
  unsigned short tet_list_len; //Holds the length of this list
  int triangulation_found = 0; //Stop if one of the threads has found a triangulation!
  int rand_bound, i;
  unsigned short tet_max, tet_min, tet_rand, tet_add;

  size_t max_volume;
  //Start the parallel loop!
#pragma omp parallel default(none) \
  private(parameters, tmp_triang, tet_list, tet_list_len, rand_bound, i,max_volume,tet_max, tet_min, tet_rand, tet_add) \
  shared(result, result_lock, cube,data,dim, triangulation_found)
  {
    //Initalization for each thread
    parameters.cube = &cube;
    parameters.boundary_func = &triangle_boundary_cube;
    parameters.data = data;
    parameters.store_acute_ind = 1;
    parameters.acute_ind  = malloc(sizeof(vert_index) * cube.len);

    tet_list = malloc(sizeof(tetra) * cube.len);
    max_volume = 0;

    while (!triangulation_found) { //Not found a triangulation
      //Initalize the triangulation variables
      tmp_triang = triangulation_init(dim);
      tet_list_len = 0;
      //Start triangle (0,0,0), (rand,0,0), (rand,rand,0)
      tmp_triang.bound_len = 1;
      tmp_triang.bound_tri = triangulation_start_facet(data);
      //printf("Thread %d with iteration %zu starts with:\n", omp_get_thread_num(), ++iterations);
      //print_triangle(tmp_triang.bound_tri);

      //While we have triangles on the boundary
      while (tmp_triang.bound_len > 0) {
        /*
         * We are going to add a tetrahedron on the boundary triangle.
         * To do so, we select a random triangle on the boundary. Then we generate all the
         * acute tetrahedra (above and below) with facets in our possible list.
         * From this list we remove all the tetrahedrons that intersect with our current triangulation.
         * Then we add a random tetrahedron to our triangulation and repeat.
         */
        rand_bound = rand() % tmp_triang.bound_len;
        //
        //Calculate the conform tetrahedrons above and below
        if (!facet_conform(tmp_triang.bound_tri + rand_bound, &parameters))
          break; //Triangle on the boundary that does not have a conform facet

        tet_list_len = parameters.acute_ind_len;
        //Form explicit list of the tetrahedrons
        for (i = 0; i < tet_list_len; i++) 
        {
          copyArr3(tet_list[i].vertices[0], tmp_triang.bound_tri[rand_bound].vertices[0]);
          copyArr3(tet_list[i].vertices[1], tmp_triang.bound_tri[rand_bound].vertices[1]);
          copyArr3(tet_list[i].vertices[2], tmp_triang.bound_tri[rand_bound].vertices[2]);
          copyArr3(tet_list[i].vertices[3], cube.points[parameters.acute_ind[i]]);
        }

        //Remove all the tetrahedrons that intersect with current triangulation.
        filter_tet_list_disjoint_triangulation(tet_list, &tet_list_len, &tmp_triang);

        if (tet_list_len == 0) 
          break; //We can not find a conform tetrahedron for this boundary.. Restart

        //Select a ttetrahedron from the tet_list to add to the triangulation.. Different approaches.
        //Combinations between: random tetra, smallest volume, maximum volume. Indices stored in tet_max, tet_min and tet_rand

        tet_list_min_max_volume(tet_list, tet_list_len, &tet_max, &tet_min);
        tet_rand = rand() % tet_list_len; 

        switch (omp_get_thread_num() % 6) {
          case 0: //Choose tet with max volume
            tet_add = tet_max; break;
          case 1: //Choose tet with min volume
            tet_add = tet_min; break;
          case 2: //Choose random tet
            tet_add = tet_rand; break;
          case 3: //Choose either max or min (random)
            tet_add = (rand() % 2)? tet_min : tet_max; break;
          case 4: //Choose either max or rand
            tet_add = (rand() % 5)? tet_max : tet_rand; break;
          case 5: //Either min or rand
            tet_add = (rand() % 5)? tet_min : tet_rand; break;
          default:
            tet_add = 0;
        }

        /*
         * Add the above tetra to the triangulation.
         * This removes all the boundary triangles that are covered by this tetrahedron
         */
        add_tet_triangulation(tet_list + tet_add,&tmp_triang);
      }
      if (triangulation_volume(&tmp_triang) > max_volume) {
        max_volume = triangulation_volume(&tmp_triang);
        printf("Record for thread %d using method %d amount: %zu\n", omp_get_thread_num(), omp_get_thread_num() % 6, max_volume);
        triangulation_print(&tmp_triang);
      }
      if (tmp_triang.bound_len == 0)
      {
        printf("FOUND A TRIANGULATION!!!\n");
        triangulation_print(&tmp_triang);
        omp_set_lock(&result_lock);
        result = tmp_triang;
        triangulation_found = 1;
        omp_unset_lock(&result_lock);
      } else 
        triangulation_free(&tmp_triang);
    }
    free(parameters.acute_ind);
    free(tet_list);
  }
  free(cube.points);
  omp_destroy_lock(&result_lock);
  return result;
}
예제 #18
0
int main(int argc, char **argv){
	FILE *file = fopen("file1","r");
	FILE *out = NULL;
	char str_buf[1024][50];
	unsigned str_buf_in = 0;
	unsigned str_buf_out = 0;
	char str[50];
	int read_finish = 0;
	int num_read = 0, num_write = 0;
	char **input_filenames = NULL;
	int input_len; //num of input files
	FILE **input_files = NULL;
	int i,j; double elapsed_time;
	int mapping_done = 0;//done when all mapper thread done
	struct timeval tvalBefore, tvalAfter;
	////locks///
	int rank, size, len;
    char name[MPI_MAX_PROCESSOR_NAME];
    omp_set_num_threads(4);
    MPI_Init(&argc, &argv);                 
    MPI_Comm_size(MPI_COMM_WORLD, &size);   
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);  
    MPI_Get_processor_name(name, &len);
    MPI_Status status;
	omp_init_lock(&worklock);
	omp_init_lock(&inclock);
	omp_init_lock(&readlock);
	omp_init_lock(&readerlock);
	omp_init_lock(&mapperlock);
	if(argc < 5){
		printf("Usage ./mapreduce -in [input files].... -out [output file]\n");
		return 0;
	}else{
		if(strcmp("-in",argv[1])){
			printf("Usage ./mapreduce -in [input files].... -out [output file]\n");
			return 0;
		}
		for(i=2;i<argc;i++){ //start from first input file
			if(!strcmp("-out",argv[i])){
				break;
			}
		}
		input_len = i - 2;
		input_filenames = (char**)malloc(sizeof(char*)*input_len);
		for(j=0;j<input_len;j++)
			input_filenames[j] = (char*)malloc(sizeof(char)*50);
		for(i=2,j=0;j<input_len;i++,j++){
			strcpy(input_filenames[j],argv[i]);
		}
		input_files = read_in(input_filenames,input_len,0);
		if(strcmp("-out",argv[2+input_len])){
			printf("output file missing, using default name 'out'\n");
			out = fopen("out","w");
		}else{
			out = fopen(argv[3+input_len],"w");
		}
	}
	omp_set_num_threads(8);
	
	fifoQ *queue_to_map = initQ(1000000, "queue_to_map");
	fifoQ *queue_to_reduce = initQ(1000000, "queue_to_map");
	fifoQ **queues_to_map = (fifoQ**)malloc(sizeof(fifoQ*)*5);
	queues_to_map[0] = initQ(1000000, "queue_to_map0");
	queues_to_map[1] = initQ(1000000, "queue_to_map1");
	queues_to_map[2] = initQ(1000000, "queue_to_map2");
	queues_to_map[3] = initQ(1000000, "queue_to_map3");
	queues_to_map[4] = initQ(1000000, "queue_to_map4");
	fifoQ **queues_to_reduce = (fifoQ**)malloc(sizeof(fifoQ*)*5);
	queues_to_reduce[0] = initQ(1000000, "queue_to_reduce0");
	queues_to_reduce[1] = initQ(1000000, "queue_to_reduce1");
	queues_to_reduce[2] = initQ(1000000, "queue_to_reduce2");
	queues_to_reduce[3] = initQ(1000000, "queue_to_reduce3");
	queues_to_reduce[4] = initQ(1000000, "queue_to_reduce4");
	fifoQ **queues_reduced = (fifoQ**)malloc(sizeof(fifoQ*)*5);
	fifoQ *final_queue = initQ(1000000, "final Q");
	
	int sendsize = input_len/size + (input_len % size - rank > 0 ? 1 : 0); //num of files send to a node
	if(rank==0){ //distribute files
		int i,j;
		char ***files_tosend = (char***)malloc(sizeof(char**)*input_len);
		int lsendsize;
		FILE **node_files;
		
		for(i=0;i<size;i++){
			lsendsize = input_len/size + (input_len % size - i > 0 ? 1 : 0); //num of files send to a node
			printf("send size of core %d is %d\n",i,lsendsize);
			files_tosend[i] = (char**)malloc(sizeof(char*)*lsendsize);
			for(j=0;j<lsendsize;j++){
				files_tosend[i][j] = (char*)malloc(sizeof(char)*50);
			}
		}
		for(i=0;i<input_len;i++){
			int belongs_to = i % size;
			int pos = i/size;
			strcpy(files_tosend[belongs_to][pos],input_filenames[i]);
			printf("distributing file %s to files_tosend %d,%d, value %s\n",input_filenames[i],belongs_to,pos,files_tosend[belongs_to][pos]);
		}
		if(size>1){
			for(i=1;i<size;i++){
				lsendsize = input_len/size + (input_len % size - i > 0 ? 1 : 0);
				for(j=0;j<lsendsize;j++){
					printf("sending %s to cpu %d\n",files_tosend[i][j],i);
					MPI_Send(files_tosend[i][j],50,MPI_BYTE,i,1,MPI_COMM_WORLD);
					printf("send done\n");
				}
			}
		}
		node_files = (FILE**)malloc(sizeof(FILE*)*sendsize);
		for(i=0;i<sendsize;i++){
			node_files[i] = fopen(files_tosend[rank][i],"r");
		}
		gettimeofday (&tvalBefore, NULL);
		#pragma omp parallel sections
		{
		
		#pragma omp section //reader thread0
		{
			int i; int odd_even = 0;
			//printf("reader 0 is core #%d\n",rank);
			for(i=0;i<sendsize;i++){
				while(!feof(node_files[i])){
	                  /////////check if full///////////
					omp_set_lock(&readerlock);
					if(!feof(node_files[i])){
						strcpy(str,"");
						fscanf(node_files[i],"%s",str);
					}
					else{
						omp_unset_lock(&readerlock);
						break;
					}
					omp_unset_lock(&readerlock);
					if(strcmp(str,""))
						putWork(queues_to_map[0], constr_work(str));
				}
			}
			omp_set_lock(&inclock);
			read_finish++;
			omp_unset_lock(&inclock);
			//printf("reader thread0 done\n");
		}
		#pragma omp section //reader thread1
		{
			int i; int odd_even = 0;
			//printf("reader 1 is core #%d\n",rank);
			for(i=0;i<sendsize;i++){
				while(!feof(node_files[i])){
	                  /////////check if full///////////
					omp_set_lock(&readerlock);
					if(!feof(node_files[i])){
						strcpy(str,"");
						fscanf(node_files[i],"%s",str);
					}
					else{
						omp_unset_lock(&readerlock);
						break;
					}
					omp_unset_lock(&readerlock);
					if(strcmp(str,""))
						putWork(queues_to_map[1], constr_work(str));
				}
			}
			omp_set_lock(&inclock);
			read_finish++;
			omp_unset_lock(&inclock);
			//printf("reader thread1 done\n");
		}
		#pragma omp section //reader thread2
		{
			int i; int odd_even = 0;
			//printf("reader 2 is core #%d\n",rank);
			for(i=0;i<sendsize;i++){
				while(!feof(node_files[i])){
	                  /////////check if full///////////
					omp_set_lock(&readerlock);
					if(!feof(node_files[i])){
						strcpy(str,"");
						fscanf(node_files[i],"%s",str);
					}
					else{
						omp_unset_lock(&readerlock);
						break;
					}
					omp_unset_lock(&readerlock);
					if(strcmp(str,""))
						putWork(queues_to_map[2], constr_work(str));
				}
			}
			omp_set_lock(&inclock);
			read_finish++;
			omp_unset_lock(&inclock);
			//printf("reader thread2 done\n");
		}
		#pragma omp section //reader thread3
		{
			//	printf("reader 3 is core #%d\n",rank);
			int i; int odd_even = 0;
			for(i=0;i<sendsize;i++){
				while(!feof(node_files[i])){
	                  /////////check if full///////////
					omp_set_lock(&readerlock);
					if(!feof(node_files[i])){
						strcpy(str,"");
						fscanf(node_files[i],"%s",str);
					}
					else{
						omp_unset_lock(&readerlock);
						break;
					}
					omp_unset_lock(&readerlock);
					if(strcmp(str,""))
						putWork(queues_to_map[3], constr_work(str));
				}
			}
			omp_set_lock(&inclock);
			read_finish++;
			omp_unset_lock(&inclock);
			//printf("reader thread3 done %d\n",rank);
		}
		#pragma omp section //mapper thread 0
		{
			int i;
			fifoQ *innerQ = initQ(50000,"innerQ 0");
			while(read_finish<NUM_READ_THREADS || !is_empty(queues_to_map[0])){
				printf("");
				if(!is_empty(queues_to_map[0])){
					work work = getWork(queues_to_map[0]);
					//mapper(queues_to_reduce[hash(work.str)], work);
					mapper(innerQ, work);
				}
			}
			for(i=0;i<=innerQ->in;i++){
				work work = getWork(innerQ);
				putWork(queues_to_reduce[hash(work.str)],work);
			}
			omp_set_lock(&inclock);
			mapping_done++;
			omp_unset_lock(&inclock);
			//printf("mapper thread0 done %d\n",rank);
			gettimeofday (&tvalAfter, NULL);
    elapsed_time = (float)(tvalAfter.tv_sec - tvalBefore.tv_sec)+((float)(tvalAfter.tv_usec - tvalBefore.tv_usec)/1000000);
    if(rank==0)
		printf("elapsed time = %.2f sec,rank %d in map 0\n",elapsed_time,rank);
		}
		#pragma omp section //mapper thread 1
		{
			int i;
			fifoQ *innerQ = initQ(50000,"innerQ 1");
			while(read_finish<NUM_READ_THREADS || !is_empty(queues_to_map[1])){
				printf("");
				if(!is_empty(queues_to_map[1])){		
					work work = getWork(queues_to_map[1]);				
					//mapper(queues_to_reduce[hash(work.str)], work);
					mapper(innerQ, work);
				}
			}
			for(i=0;i<=innerQ->in;i++){
				work work = getWork(innerQ);
				putWork(queues_to_reduce[hash(work.str)],work);
			}
			omp_set_lock(&inclock);
			mapping_done++;
			omp_unset_lock(&inclock);
			//printf("mapper thread1 done %d\n",rank);
			gettimeofday (&tvalAfter, NULL);
    elapsed_time = (float)(tvalAfter.tv_sec - tvalBefore.tv_sec)+((float)(tvalAfter.tv_usec - tvalBefore.tv_usec)/1000000);
    if(rank==0)
		printf("elapsed time = %.2f sec,rank %d in map 1\n",elapsed_time,rank);
		}
		#pragma omp section //mapper thread 2
		{
			int i;
			fifoQ *innerQ = initQ(50000,"innerQ 2");
			while(read_finish<NUM_READ_THREADS || !is_empty(queues_to_map[2])){
				printf("");
				if(!is_empty(queues_to_map[2])){		
					work work = getWork(queues_to_map[2]);				
					//mapper(queues_to_reduce[hash(work.str)], work);
					mapper(innerQ, work);
				}
			}
			for(i=0;i<=innerQ->in;i++){
				work work = getWork(innerQ);
				putWork(queues_to_reduce[hash(work.str)],work);
			}
			omp_set_lock(&inclock);
			mapping_done++;
			omp_unset_lock(&inclock);
			//printf("mapper thread2 done %d\n",rank);
			gettimeofday (&tvalAfter, NULL);
    elapsed_time = (float)(tvalAfter.tv_sec - tvalBefore.tv_sec)+((float)(tvalAfter.tv_usec - tvalBefore.tv_usec)/1000000);
    if(rank==0)
		printf("elapsed time = %.2f sec,rank %d in map 2\n",elapsed_time,rank);
		}
		#pragma omp section //mapper thread 3
		{
			int i;
			fifoQ *innerQ = initQ(50000,"innerQ 2");
			while(read_finish<NUM_READ_THREADS || !is_empty(queues_to_map[3])){
				printf("");
				if(!is_empty(queues_to_map[3])){		
					work work = getWork(queues_to_map[3]);				
					//mapper(queues_to_reduce[hash(work.str)], work);
					mapper(innerQ, work);
				}
			}
			for(i=0;i<=innerQ->in;i++){
				work work = getWork(innerQ);
				putWork(queues_to_reduce[hash(work.str)],work);
			}
			omp_set_lock(&inclock);
			mapping_done++;
			omp_unset_lock(&inclock);
			//printf("mapper thread3 done %d\n",rank);
			gettimeofday (&tvalAfter, NULL);
    elapsed_time = (float)(tvalAfter.tv_sec - tvalBefore.tv_sec)+((float)(tvalAfter.tv_usec - tvalBefore.tv_usec)/1000000);
    if(rank==0)
		printf("elapsed time = %.2f sec,rank %d in map 3\n",elapsed_time,rank);
		}
		#pragma omp section //reducer thread 0 
		{
			int i;
			gettimeofday (&tvalAfter, NULL);
    elapsed_time = (float)(tvalAfter.tv_sec - tvalBefore.tv_sec)+((float)(tvalAfter.tv_usec - tvalBefore.tv_usec)/1000000);
    if(rank==0)
		printf("elapsed time = %.2f sec,rank %d\n",elapsed_time,rank);
			while(mapping_done<NUM_READ_THREADS){
				printf("");
			}
			gettimeofday (&tvalAfter, NULL);
    elapsed_time = (float)(tvalAfter.tv_sec - tvalBefore.tv_sec)+((float)(tvalAfter.tv_usec - tvalBefore.tv_usec)/1000000);
    if(rank==0)
		printf("elapsed time = %.2f sec,rank %d\n",elapsed_time,rank);
			queues_reduced[0] = reducer(queues_to_reduce[0]);
			//printf("reducer thread 0 done\n");
			gettimeofday (&tvalAfter, NULL);
    elapsed_time = (float)(tvalAfter.tv_sec - tvalBefore.tv_sec)+((float)(tvalAfter.tv_usec - tvalBefore.tv_usec)/1000000);
    if(rank==0)
		printf("elapsed time = %.2f sec,rank %d\n",elapsed_time,rank);
		}
		#pragma omp section //reducer thread 1
		{
			int i;
			while(mapping_done<NUM_READ_THREADS){printf("");}
			queues_reduced[1] = reducer(queues_to_reduce[1]);
			//printf("reducer thread 1 done\n");
		}
		#pragma omp section //reducer thread 2 
		{
			int i;
			while(mapping_done<NUM_READ_THREADS){printf("");}
			queues_reduced[2] = reducer(queues_to_reduce[2]);
			//printf("reducer thread 2 done\n");
		}
		#pragma omp section //reducer thread 3
		{
			int i;
			while(mapping_done<NUM_READ_THREADS){printf("");}
			queues_reduced[3] = reducer(queues_to_reduce[3]);
			//printf("reducer thread 3 done\n");
		}
		}
		gettimeofday (&tvalAfter, NULL);
    elapsed_time = (float)(tvalAfter.tv_sec - tvalBefore.tv_sec)+((float)(tvalAfter.tv_usec - tvalBefore.tv_usec)/1000000);
    if(rank==0)
		printf("elapsed time = %.2f sec,rank %d\n",elapsed_time,rank);
	}
	else{
		int i;
		FILE** node_files = (FILE**)malloc(sizeof(FILE*)*sendsize);
		for(i=0;i<sendsize;i++){
			char *bufstr = (char*)malloc(sizeof(char)*50);
			MPI_Recv(bufstr,50,MPI_BYTE, 0,1, MPI_COMM_WORLD, &status);
			//printf("%s received\n",bufstr);
			node_files[i] = fopen(bufstr,"r");
		}
		#pragma omp parallel sections shared(input_files) private(str)
		{	
		//printf("using %d threads in core %d\n",omp_get_num_threads(),rank);
		#pragma omp section //reader thread0
		{
			int i; int odd_even = 0;
		//	printf("reader 0 is core #%d\n",rank);
			for(i=0;i<sendsize;i++){
				while(!feof(node_files[i])){
	                  /////////check if full///////////
					omp_set_lock(&readerlock);
					if(!feof(node_files[i])){
						strcpy(str,"");
						fscanf(node_files[i],"%s",str);
					}
					else{
						omp_unset_lock(&readerlock);
						break;
					}
					omp_unset_lock(&readerlock);
					if(strcmp(str,""))
						putWork(queues_to_map[0], constr_work(str));
				}
			}
			omp_set_lock(&inclock);
			read_finish++;
			omp_unset_lock(&inclock);
			//printf("reader thread0 done\n");
		}
		#pragma omp section //reader thread1
		{
			int i; int odd_even = 0;
		//	printf("reader 1 is core #%d\n",rank);
			for(i=0;i<sendsize;i++){
				while(!feof(node_files[i])){
	                  /////////check if full///////////
					omp_set_lock(&readerlock);
					if(!feof(node_files[i])){
						strcpy(str,"");
						fscanf(node_files[i],"%s",str);
					}
					else{
						omp_unset_lock(&readerlock);
						break;
					}
					omp_unset_lock(&readerlock);
					if(strcmp(str,""))
						putWork(queues_to_map[1], constr_work(str));
				}
			}
			omp_set_lock(&inclock);
			read_finish++;
			omp_unset_lock(&inclock);
			//printf("reader thread1 done\n");
		}
		#pragma omp section //reader thread2
		{
			int i; int odd_even = 0;
			//printf("reader 2 is core #%d\n",rank);
			for(i=0;i<sendsize;i++){
				while(!feof(node_files[i])){
	                  /////////check if full///////////
					omp_set_lock(&readerlock);
					if(!feof(node_files[i])){
						strcpy(str,"");
						fscanf(node_files[i],"%s",str);
					}
					else{
						omp_unset_lock(&readerlock);
						break;
					}
					omp_unset_lock(&readerlock);
					if(strcmp(str,""))
						putWork(queues_to_map[2], constr_work(str));
				}
			}
			omp_set_lock(&inclock);
			read_finish++;
			omp_unset_lock(&inclock);
			//printf("reader thread2 done\n");
		}
		#pragma omp section //reader thread3
		{
				//printf("reader 3 is core #%d\n",rank);
			int i; int odd_even = 0;
			for(i=0;i<sendsize;i++){
				while(!feof(node_files[i])){
	                  /////////check if full///////////
					omp_set_lock(&readerlock);
					if(!feof(node_files[i])){
						strcpy(str,"");
						fscanf(node_files[i],"%s",str);
					}
					else{
						omp_unset_lock(&readerlock);
						break;
					}
					omp_unset_lock(&readerlock);
					if(strcmp(str,""))
						putWork(queues_to_map[3], constr_work(str));
				}
			}
			omp_set_lock(&inclock);
			read_finish++;
			omp_unset_lock(&inclock);
			//printf("reader thread3 done %d\n",rank);
		}
		#pragma omp section //mapper thread 0
		{
		int i;
		fifoQ *innerQ = initQ(50000,"innerQ 0");
		//printf("map1\n");
		while(read_finish<NUM_READ_THREADS || !is_empty(queues_to_map[0])){
			printf("");
			if(!is_empty(queues_to_map[0])){
				work work = getWork(queues_to_map[0]);
				//mapper(queues_to_reduce[hash(work.str)], work);
				mapper(innerQ, work);
			}
		}
		for(i=0;i<=innerQ->in;i++){
			work work = getWork(innerQ);
			putWork(queues_to_reduce[hash(work.str)],work);
		}
		omp_set_lock(&inclock);
		mapping_done++;
		omp_unset_lock(&inclock);
		//printf("mapper thread0 done %d\n",rank);
		}
		#pragma omp section //mapper thread 1
		{
			int i;
			fifoQ *innerQ = initQ(50000,"innerQ 1");
			while(read_finish<NUM_READ_THREADS || !is_empty(queues_to_map[1])){
				printf("");
				if(!is_empty(queues_to_map[1])){		
					work work = getWork(queues_to_map[1]);				
					//mapper(queues_to_reduce[hash(work.str)], work);
					mapper(innerQ, work);
				}
			}
			for(i=0;i<=innerQ->in;i++){
				work work = getWork(innerQ);
				putWork(queues_to_reduce[hash(work.str)],work);
			}
			omp_set_lock(&inclock);
			mapping_done++;
			omp_unset_lock(&inclock);
			//printf("mapper thread1 done %d\n",rank);
		}
		#pragma omp section //mapper thread 2
		{
			int i;
			fifoQ *innerQ = initQ(50000,"innerQ 2");
			while(read_finish<NUM_READ_THREADS || !is_empty(queues_to_map[2])){
				printf("");
				if(!is_empty(queues_to_map[2])){		
					work work = getWork(queues_to_map[2]);				
					//mapper(queues_to_reduce[hash(work.str)], work);
					mapper(innerQ, work);
				}
			}
			for(i=0;i<=innerQ->in;i++){
				work work = getWork(innerQ);
				putWork(queues_to_reduce[hash(work.str)],work);
			}
			omp_set_lock(&inclock);
			mapping_done++;
			omp_unset_lock(&inclock);
			//printf("mapper thread2 done %d\n",rank);
		}
		#pragma omp section //mapper thread 3
		{
			int i;
			fifoQ *innerQ = initQ(50000,"innerQ 2");
			while(read_finish<NUM_READ_THREADS || !is_empty(queues_to_map[3])){
				printf("");
				if(!is_empty(queues_to_map[3])){			
					work work = getWork(queues_to_map[3]);				
					//mapper(queues_to_reduce[hash(work.str)], work);
					mapper(innerQ, work);
				}
			}
			for(i=0;i<=innerQ->in;i++){
				work work = getWork(innerQ);
				putWork(queues_to_reduce[hash(work.str)],work);
			}
			omp_set_lock(&inclock);
			mapping_done++;
			omp_unset_lock(&inclock);
			//printf("mapper thread3 done %d\n",rank);
		}
		#pragma omp section //reducer thread 0 
		{
			int i;
			while(mapping_done<NUM_READ_THREADS){
				printf("");
			}
			queues_reduced[0] = reducer(queues_to_reduce[0]);
			//printf("reducer thread 0 done\n");
		}
		#pragma omp section //reducer thread 1
		{
			int i;
			while(mapping_done<NUM_READ_THREADS){printf("");}
			queues_reduced[1] = reducer(queues_to_reduce[1]);
			//printf("reducer thread 1 done\n");
		}
		#pragma omp section //reducer thread 2 
		{
			int i;
			while(mapping_done<NUM_READ_THREADS){printf("");}
			queues_reduced[2] = reducer(queues_to_reduce[2]);
			//printf("reducer thread 2 done\n");
		}
		#pragma omp section //reducer thread 3
		{
			int i;
			while(mapping_done<NUM_READ_THREADS){printf("");}
			queues_reduced[3] = reducer(queues_to_reduce[3]);
			//printf("reducer thread 3 done\n");
		}
		
		}
	}
	MPI_Barrier(MPI_COMM_WORLD);
	gettimeofday (&tvalAfter, NULL);
    elapsed_time = (float)(tvalAfter.tv_sec - tvalBefore.tv_sec)+((float)(tvalAfter.tv_usec - tvalBefore.tv_usec)/1000000);
    if(rank==0)
		printf("elapsed time = %.2f sec,rank %d\n",elapsed_time,rank);
	if(rank==0){ //final reducuction
		int i,j,revbuf;int mainct;
		for(i=0;i<NUM_READ_THREADS;i++){
			combine_queue(final_queue,queues_reduced[i]);
		}
		//printf("main node has %d to final reduce\n",calcnum(queues_reduced,NUM_READ_THREADS));
		for(i=1;i<size;i++){
			MPI_Recv(&revbuf,1,MPI_INT,i,1,MPI_COMM_WORLD,&status);
			//printf("need to receive %d strings from node %d\n",revbuf,i);
			char *strbuf = (char*)malloc(sizeof(char)*50);
			char ctbuf = 0;
			for(j=0;j<revbuf;j++){
				MPI_Recv(strbuf,50,MPI_BYTE,i,1,MPI_COMM_WORLD,&status);
				MPI_Recv(&ctbuf,50,MPI_INT,i,1,MPI_COMM_WORLD,&status);
				work work;
				strcpy(work.str,strbuf);
				work.count = ctbuf;
				//printf("received <%s,%d> from node %d\n",work.str,work.count,i);
				putWork(final_queue,work);
			}
		}
		fifoQ *output = reducer(final_queue);
		printQ_to_file(&output,1,out);
	}else{
		int i,total_num;
		total_num = calcnum(queues_reduced,NUM_READ_THREADS);
		MPI_Send(&total_num,1,MPI_INT,0,1,MPI_COMM_WORLD);
		for(i=0;i<NUM_READ_THREADS;i++){
			combine_queue(final_queue,queues_reduced[i]);
		}
		for(i=0;i<total_num;i++){
			MPI_Send(&final_queue->works[i].str,50,MPI_BYTE,0,1,MPI_COMM_WORLD);
			MPI_Send(&final_queue->works[i].count,1,MPI_INT,0,1,MPI_COMM_WORLD);
		}
	}
	
	for(i=0;i<input_len;i++){
		fclose(input_files[i]);
	}
	fclose(out);
	/*printQ(queues_to_map[0]);
	printQ(queues_to_map[1]);
	printQ(queues_to_map[2]);
	printQ(queues_to_map[3]);*/
	/*printQ(queues_reduced[0]);
	printQ(queues_reduced[1]);
	printQ(queues_reduced[2]);
	printQ(queues_reduced[3]);*/
	omp_destroy_lock(&inclock);
	omp_destroy_lock(&worklock);
	omp_destroy_lock(&readlock);
	omp_destroy_lock(&readerlock);
	omp_destroy_lock(&mapperlock);
	MPI_Finalize();

	return 0;
}
double betweennessCentrality(graph* G, DOUBLE_T* BC, int filter) {

    VERT_T *S;         /* stack of vertices in the order of non-decreasing 
                          distance from s. Also used to implicitly 
                          represent the BFS queue */
    plist* P;          /* predecessors of a vertex v on shortest paths from s */
    DOUBLE_T* sig;     /* No. of shortest paths */
    LONG_T* d;         /* Length of the shortest path between every pair */
    DOUBLE_T* del;     /* dependency of vertices */
    LONG_T *in_degree, *numEdges, *pSums;
    LONG_T *pListMem;    
    LONG_T* Srcs; 
    LONG_T *start, *end;
    LONG_T MAX_NUM_PHASES;
    LONG_T *psCount;
#ifdef _OPENMP    
    omp_lock_t* vLock;
    LONG_T chunkSize;
#endif
    int seed = 2387;
    double elapsed_time;

#ifdef _OPENMP    
#pragma omp parallel
{
#endif

    VERT_T *myS, *myS_t;
    LONG_T myS_size;
    LONG_T i, j, k, p, count, myCount;
    LONG_T v, w, vert;
    LONG_T numV, num_traversals, n, m, phase_num;
    LONG_T tid, nthreads;
    int* stream;
#ifdef DIAGNOSTIC
    double elapsed_time_part;
#endif

#ifdef _OPENMP
    int myLock;
    tid = omp_get_thread_num();
    nthreads = omp_get_num_threads();
#else
    tid = 0;
    nthreads = 1;
#endif

#ifdef DIAGNOSTIC
    if (tid == 0) {
        elapsed_time_part = get_seconds();
    }
#endif

    /* numV: no. of vertices to run BFS from = 2^K4approx */
    numV = 1<<K4approx;
    n = G->n;
    m = G->m;

    /* Permute vertices */
    if (tid == 0) {
        Srcs = (LONG_T *) malloc(n*sizeof(LONG_T));
#ifdef _OPENMP
        vLock = (omp_lock_t *) malloc(n*sizeof(omp_lock_t));
#endif
    }

#ifdef _OPENMP   
#pragma omp barrier
#pragma omp for
    for (i=0; i<n; i++) {
        omp_init_lock(&vLock[i]);
    }
#endif

    /* Initialize RNG stream */ 
	stream = init_sprng(0, tid, nthreads, seed, SPRNG_DEFAULT);

#ifdef _OPENMP
#pragma omp for
#endif
    for (i=0; i<n; i++) {
        Srcs[i] = i;
    }

#ifdef _OPENMP
#pragma omp for
#endif    
    for (i=0; i<n; i++) {
        j = n*sprng(stream);
        if (i != j) {
#ifdef _OPENMP
            int l1 = omp_test_lock(&vLock[i]);
            if (l1) {
                int l2 = omp_test_lock(&vLock[j]);
                if (l2) {
#endif
                    k = Srcs[i];
                    Srcs[i] = Srcs[j];
                    Srcs[j] = k;
#ifdef _OPENMP
                    omp_unset_lock(&vLock[j]);
                }
                omp_unset_lock(&vLock[i]);
            }
#endif
        }
    }

#ifdef _OPENMP    
#pragma omp barrier
#endif

#ifdef DIAGNOSTIC
    if (tid == 0) {
        elapsed_time_part = get_seconds() -elapsed_time_part;
        fprintf(stderr, "Vertex ID permutation time: %lf seconds\n", elapsed_time_part);
        elapsed_time_part = get_seconds();
    }
#endif

    /* Start timing code from here */
    if (tid == 0) {
        elapsed_time = get_seconds();
#ifdef VERIFYK4
        MAX_NUM_PHASES = 2*sqrt(n);
#else
        MAX_NUM_PHASES = 50;
#endif
    }

#ifdef _OPENMP
#pragma omp barrier    
#endif

    /* Initialize predecessor lists */
    
    /* The size of the predecessor list of each vertex is bounded by 
       its in-degree. So we first compute the in-degree of every
       vertex */ 

    if (tid == 0) {
        P   = (plist  *) calloc(n, sizeof(plist));
        in_degree = (LONG_T *) calloc(n+1, sizeof(LONG_T));
        numEdges = (LONG_T *) malloc((n+1)*sizeof(LONG_T));
        pSums = (LONG_T *) malloc(nthreads*sizeof(LONG_T));
    }

#ifdef _OPENMP
#pragma omp barrier
#pragma omp for
#endif
    for (i=0; i<m; i++) {
        v = G->endV[i];
#ifdef _OPENMP
        omp_set_lock(&vLock[v]);
#endif
        in_degree[v]++;
#ifdef _OPENMP
        omp_unset_lock(&vLock[v]);
#endif
    }

    prefix_sums(in_degree, numEdges, pSums, n);
    
    if (tid == 0) {
        pListMem = (LONG_T *) malloc(m*sizeof(LONG_T));
    }

#ifdef _OPENMP
#pragma omp barrier
#pragma omp for
#endif
    for (i=0; i<n; i++) {
        P[i].list = pListMem + numEdges[i];
        P[i].degree = in_degree[i];
        P[i].count = 0;
    }

#ifdef DIAGNOSTIC
    if (tid == 0) {
        elapsed_time_part = get_seconds() - elapsed_time_part;
        fprintf(stderr, "In-degree computation time: %lf seconds\n", elapsed_time_part);
        elapsed_time_part = get_seconds();
    }
#endif

    /* Allocate shared memory */ 
    if (tid == 0) {
        free(in_degree);
        free(numEdges);
        free(pSums);
        
        S   = (VERT_T *) malloc(n*sizeof(VERT_T));
        sig = (DOUBLE_T *) malloc(n*sizeof(DOUBLE_T));
        d   = (LONG_T *) malloc(n*sizeof(LONG_T));
        del = (DOUBLE_T *) calloc(n, sizeof(DOUBLE_T));
        
        start = (LONG_T *) malloc(MAX_NUM_PHASES*sizeof(LONG_T));
        end = (LONG_T *) malloc(MAX_NUM_PHASES*sizeof(LONG_T));
        psCount = (LONG_T *) malloc((nthreads+1)*sizeof(LONG_T));
    }

    /* local memory for each thread */  
    myS_size = (2*n)/nthreads;
    myS = (LONG_T *) malloc(myS_size*sizeof(LONG_T));
    num_traversals = 0;
    myCount = 0;

#ifdef _OPENMP    
#pragma omp barrier
#endif

#ifdef _OPENMP    
#pragma omp for
#endif
    for (i=0; i<n; i++) {
        d[i] = -1;
    }
 
#ifdef DIAGNOSTIC
    if (tid == 0) {
        elapsed_time_part = get_seconds() -elapsed_time_part;
        fprintf(stderr, "BC initialization time: %lf seconds\n", elapsed_time_part);
        elapsed_time_part = get_seconds();
    }
#endif
   
    for (p=0; p<n; p++) {

        i = Srcs[p];
        //printf ("%d \n", i);
//         i = p;
        if (G->numEdges[i+1] - G->numEdges[i] == 0) {
            continue;
        } else {
            num_traversals++;
        }

        if (num_traversals == numV + 1) {
            break;
        }
        
        if (tid == 0) {
            sig[i] = 1;
            d[i] = 0;
            S[0] = i;
            start[0] = 0;
            end[0] = 1;
        }
        
        count = 1;
        phase_num = 0;

#ifdef _OPENMP       
#pragma omp barrier
#endif
        
        while (end[phase_num] - start[phase_num] > 0) {
            
            myCount = 0;
#ifdef _OPENMP
#pragma omp barrier
#pragma omp for schedule(dynamic)
#endif
            for (vert = start[phase_num]; vert < end[phase_num]; vert++) {
                v = S[vert];
                for (j=G->numEdges[v]; j<G->numEdges[v+1]; j++) {

                     if ((G->weight[j] & 7) == 0 && filter==1) continue; 

                        w = G->endV[j];
                        if (v != w) {

#ifdef _OPENMP                            
                            myLock = omp_test_lock(&vLock[w]);
                            if (myLock) { 
#endif             
                                /* w found for the first time? */ 
                                if (d[w] == -1) {
                                    if (myS_size == myCount) {
                                        /* Resize myS */
                                        myS_t = (LONG_T *)
                                            malloc(2*myS_size*sizeof(VERT_T));
                                        memcpy(myS_t, myS, myS_size*sizeof(VERT_T));
                                        free(myS);
                                        myS = myS_t;
                                        myS_size = 2*myS_size;
                                    }
                                    myS[myCount++] = w;
                                    d[w] = d[v] + 1;
                                    sig[w] = sig[v];
                                    P[w].list[P[w].count++] = v;
                                } else if (d[w] == d[v] + 1) {
                                    sig[w] += sig[v];
                                    P[w].list[P[w].count++] = v;
                                }
#ifdef _OPENMP  
                            
                            omp_unset_lock(&vLock[w]);
                            } else {
                                if ((d[w] == -1) || (d[w] == d[v]+ 1)) {
                                    omp_set_lock(&vLock[w]);
                                    sig[w] += sig[v];
                                    P[w].list[P[w].count++] = v;
                                    omp_unset_lock(&vLock[w]);
                                }
                            }
#endif
                            
                        }
                }
             }
            /* Merge all local stacks for next iteration */
            phase_num++; 

            psCount[tid+1] = myCount;

#ifdef _OPENMP
#pragma omp barrier
#endif


            if (tid == 0) {
                start[phase_num] = end[phase_num-1];
                psCount[0] = start[phase_num];
                for(k=1; k<=nthreads; k++) {
                    psCount[k] = psCount[k-1] + psCount[k];
                }
                end[phase_num] = psCount[nthreads];
            }
            
#ifdef _OPENMP           
#pragma omp barrier
#endif

            for (k = psCount[tid]; k < psCount[tid+1]; k++) {
                S[k] = myS[k-psCount[tid]];
            } 
            
#ifdef _OPENMP            
#pragma omp barrier
#endif
            count = end[phase_num];
        }
     
        phase_num--;

#ifdef _OPENMP        
#pragma omp barrier
#endif
            //printf ("%d\n", phase_num);

        while (phase_num > 0) {
#ifdef _OPENMP        
#pragma omp for
#endif
            for (j=start[phase_num]; j<end[phase_num]; j++) {
                w = S[j];
                for (k = 0; k<P[w].count; k++) {
                    v = P[w].list[k];
#ifdef _OPENMP
                    omp_set_lock(&vLock[v]);
#endif
                    del[v] = del[v] + sig[v]*(1+del[w])/sig[w];
#ifdef _OPENMP
                    omp_unset_lock(&vLock[v]);
#endif
                }
                BC[w] += del[w];
            }

            phase_num--;
            
#ifdef _OPENMP
#pragma omp barrier
#endif            
        }

        
#ifdef _OPENMP
        chunkSize = n/nthreads;
#pragma omp for schedule(static, chunkSize)
#endif
        for (j=0; j<count; j++) {
            w = S[j];
            //fprintf (stderr, "w: %d\n", w);
            d[w] = -1;
            del[w] = 0;
            P[w].count = 0;
        }


#ifdef _OPENMP
#pragma omp barrier
#endif

    }
 
#ifdef DIAGNOSTIC
    if (tid == 0) {
        elapsed_time_part = get_seconds() -elapsed_time_part;
        fprintf(stderr, "BC computation time: %lf seconds\n", elapsed_time_part);
    }
#endif

#ifdef _OPENMP
#pragma omp for
    for (i=0; i<n; i++) {
        omp_destroy_lock(&vLock[i]);
    }
#endif

    free(myS);
    
    if (tid == 0) { 
        free(S);
        free(pListMem);
        free(P);
        free(sig);
        free(d);
        free(del);
#ifdef _OPENMP
        free(vLock);
#endif
        free(start);
        free(end);
        free(psCount);
        elapsed_time = get_seconds() - elapsed_time;
        free(Srcs);
    }

    free_sprng(stream);
#ifdef _OPENMP
}    
#endif
    /* Verification */
#ifdef VERIFYK4
    double BCval;
    if (SCALE % 2 == 0) {
        BCval = 0.5*pow(2, 3*SCALE/2)-pow(2, SCALE)+1.0;
    } else {
        BCval = 0.75*pow(2, (3*SCALE-1)/2)-pow(2, SCALE)+1.0;
    }
    int failed = 0;
    for (int i=0; i<G->n; i++) {
        if (round(BC[i] - BCval) != 0) {
            failed = 1;
            break;
        }
    }
    if (failed) {
        fprintf(stderr, "Kernel 4 failed validation!\n");
    } else {
        fprintf(stderr, "Kernel 4 validation successful!\n");
    }
#endif

    for (int i = 0; i < G->n; i++) printf ("BC: %d %f\n",i, BC[i]);
    return elapsed_time;
}
예제 #20
0
static void unLockNode(IDnum preNodeID)
{
	omp_unset_lock(nodeLocks + preNodeID);
}
void vertex_betweenness_centrality_simple(graph_t* G, double* BC, long numSrcs) {

    attr_id_t *in_degree, *numEdges, *pSums;
#if RANDSRCS
    attr_id_t* Srcs; 
#endif
    long num_traversals = 0;
#ifdef _OPENMP    
    omp_lock_t* vLock;
    long chunkSize;
#endif
#ifdef DIAGNOSTIC
    double elapsed_time;
#endif
    int seed = 2387;

    /* The outer loop is parallelized in this case. Each thread does a BFS 
       and the vertex BC values are incremented atomically */   
#ifdef _OPENMP
#pragma omp parallel firstprivate(G)
    {
#endif
        attr_id_t *S;      /* stack of vertices in the order of non-decreasing 
                              distance from s. Also used to implicitly 
                              represent the BFS queue */
        plist_t* P;          /* predecessors of a vertex v on shortest paths 
                                from s */
        attr_id_t* pListMem;    
        double* sig;       /* No. of shortest paths */
        attr_id_t* d;      /* Length of the shortest path between every pair */
        double* del;       /* dependency of vertices */
        attr_id_t *start, *end;
        long MAX_NUM_PHASES;

        long i, j, k, p, count;
        long v, w, vert;
        long numV, n, m, phase_num;
        long tid, nthreads;
        int* stream;
#ifdef DIAGNOSTIC
        double elapsed_time_part;
#endif

#ifdef _OPENMP
        int myLock;
        tid = omp_get_thread_num();
        nthreads = omp_get_num_threads();
#else
        tid = 0;
        nthreads = 1;
#endif

#ifdef DIAGNOSTIC
        if (tid == 0) {
            elapsed_time = get_seconds();
            elapsed_time_part = get_seconds();
        }
#endif

        /* numV: no. of vertices to run BFS from = numSrcs */
        numV = numSrcs;
        n = G->n;
        m = G->m;

        /* Permute vertices */
        if (tid == 0) {
#if RANDSRCS
            Srcs = (attr_id_t *) malloc(n*sizeof(attr_id_t));
#endif
#ifdef _OPENMP
            vLock = (omp_lock_t *) malloc(n*sizeof(omp_lock_t));
#endif
        }

#ifdef _OPENMP   
#pragma omp barrier
#pragma omp for
        for (i=0; i<n; i++) {
            omp_init_lock(&vLock[i]);
        }
#endif

        /* Initialize RNG stream */ 
        stream = init_sprng(0, tid, nthreads, seed, SPRNG_DEFAULT);

#if RANDSRCS
#ifdef _OPENMP
#pragma omp for
#endif
        for (i=0; i<n; i++) {
            Srcs[i] = i;
        }

#ifdef _OPENMP
#pragma omp for
#endif
        for (i=0; i<n; i++) {
            j = n * sprng(stream);
            if (i != j) {
#ifdef _OPENMP
                int l1 = omp_test_lock(&vLock[i]);
                if (l1) {
                    int l2 = omp_test_lock(&vLock[j]);
                    if (l2) {
#endif
                        k = Srcs[i];
                        Srcs[i] = Srcs[j];
                        Srcs[j] = k;
#ifdef _OPENMP  
                        omp_unset_lock(&vLock[j]);
                    }
                    omp_unset_lock(&vLock[i]);
                }
#endif        
            }
        } 
#endif

#ifdef _OPENMP    
#pragma omp barrier
#endif

        MAX_NUM_PHASES = 50;

        /* Initialize predecessor lists */

        /* The size of the predecessor list of each vertex is bounded by 
           its in-degree. So we first compute the in-degree of every
           vertex */ 

        if (tid == 0) {
            in_degree = (attr_id_t *) calloc(n+1, sizeof(attr_id_t));
            numEdges = (attr_id_t *) malloc((n+1)*sizeof(attr_id_t));
            pSums = (attr_id_t *) malloc(nthreads*sizeof(attr_id_t));
        }


#ifdef _OPENMP
#pragma omp barrier
#pragma omp for
#endif
        for (i=0; i<m; i++) {
            v = G->endV[i];
#ifdef _OPENMP
            omp_set_lock(&vLock[v]);
#endif
            in_degree[v]++;
#ifdef _OPENMP
            omp_unset_lock(&vLock[v]);
#endif
        }

        prefix_sums(in_degree, numEdges, pSums, n);

        P  = (plist_t  *) calloc(n, sizeof(plist_t));
        pListMem = (attr_id_t *) malloc(m*sizeof(attr_id_t));

        for (i=0; i<n; i++) {
            P[i].list = pListMem + numEdges[i];
            P[i].degree = in_degree[i];
            P[i].count = 0;
        }

#ifdef DIAGNOSTIC
        if (tid == 0) {
            elapsed_time_part = get_seconds() -elapsed_time_part;
            fprintf(stderr, "In-degree computation time: %lf seconds\n", 
                    elapsed_time_part);
            elapsed_time_part = get_seconds();
        }
#endif

#ifdef _OPENMP
#pragma omp barrier
#endif

        /* Allocate shared memory */ 
        if (tid == 0) {
            free(in_degree);
            free(numEdges);
            free(pSums);
        }

        S   = (attr_id_t *) malloc(n*sizeof(attr_id_t));
        sig = (double *) malloc(n*sizeof(double));
        d   = (attr_id_t *) malloc(n*sizeof(attr_id_t));
        del = (double *) calloc(n, sizeof(double));

        start = (attr_id_t *) malloc(MAX_NUM_PHASES*sizeof(attr_id_t));
        end = (attr_id_t *) malloc(MAX_NUM_PHASES*sizeof(attr_id_t));

#ifdef _OPENMP   
#pragma omp barrier
#endif

        for (i=0; i<n; i++) {
            d[i] = -1;
        }

#ifdef DIAGNOSTIC
        if (tid == 0) {
            elapsed_time_part = get_seconds() - elapsed_time_part;
            fprintf(stderr, "BC initialization time: %lf seconds\n", 
                    elapsed_time_part);
            elapsed_time_part = get_seconds();
        }
#endif

#ifdef _OPENMP
#pragma omp for reduction(+:num_traversals)
#endif
        for (p=0; p<numV; p++) {
#if RANDSRCS
            i = Srcs[p];
#else
            i = p;
#endif
            if (G->numEdges[i+1] - G->numEdges[i] == 0) {
                continue;
            } else {
                num_traversals++;
            }

            sig[i] = 1;
            d[i] = 0;
            S[0] = i;
            start[0] = 0;
            end[0] = 1;

            count = 1;
            phase_num = 0;

            while (end[phase_num] - start[phase_num] > 0) {

                for (vert = start[phase_num]; vert < end[phase_num]; vert++) {
                    v = S[vert];
                    for (j=G->numEdges[v]; j<G->numEdges[v+1]; j++) {
                        w = G->endV[j];
                        if (v != w) {
                            /* w found for the first time? */ 
                            if (d[w] == -1) {
                                S[count++] = w;
                                d[w] = d[v] + 1;
                                sig[w] = sig[v];
                                P[w].list[P[w].count++] = v;
                            } else if (d[w] == d[v] + 1) {
                                sig[w] += sig[v];
                                P[w].list[P[w].count++] = v;
                            }
                        }
                    }
                }

                phase_num++; 

                start[phase_num] = end[phase_num-1];
                end[phase_num] = count;
            }

            phase_num--;

            while (phase_num > 0) {
                for (j=start[phase_num]; j<end[phase_num]; j++) {
                    w = S[j];
                    for (k = 0; k<P[w].count; k++) {
                        v = P[w].list[k];
                        del[v] = del[v] + sig[v]*(1+del[w])/sig[w];
                    }
#ifdef _OPENMP
                    omp_set_lock(&vLock[w]);
                    BC[w] += del[w];
                    omp_unset_lock(&vLock[w]);
#else
                    BC[w] += del[w];
#endif
                }

                phase_num--;
            }

            for (j=0; j<count; j++) {
                w = S[j];
                d[w] = -1;
                del[w] = 0;
                P[w].count = 0;
            }

        }

#ifdef DIAGNOSTIC
        if (tid == 0) {
            elapsed_time_part = get_seconds() - elapsed_time_part;
            fprintf(stderr, "BC computation time: %lf seconds\n", 
                    elapsed_time_part);
        }
#endif


#ifdef _OPENMP
#pragma omp barrier
#endif

#ifdef _OPENMP
#pragma omp for
        for (i=0; i<n; i++) {
            omp_destroy_lock(&vLock[i]);
        }
#endif

        free(S);
        free(pListMem);
        free(P);
        free(sig);
        free(d);
        free(del);
        free(start);
        free(end);

        if (tid == 0) {

#ifdef _OPENMP
            free(vLock);
#endif

#if RANDSRCS
            free(Srcs);
#endif

#ifdef DIAGNOSTIC
            elapsed_time = get_seconds() - elapsed_time;
            fprintf(stderr, "Total time taken: %lf seconds\n", elapsed_time);
#endif

        }

        free_sprng(stream);

#ifdef _OPENMP
#pragma omp barrier
    }
#endif

}    
예제 #22
0
/*
 * Computes clusters' centroids.
 */
static void compute_centroids(void)
{
	int i, j;       /* Loop indexes.        */
	int population; /* Centroid population. */

	start = timer_get();
	
	memcpy(lcentroids, CENTROID(rank*(ncentroids/nprocs)), lncentroids[rank]*dimension*sizeof(float));
	memset(&has_changed[rank*NUM_THREADS], 0, NUM_THREADS*sizeof(int));
	memset(centroids, 0, (ncentroids + DELTA*nprocs)*dimension*sizeof(float));
	memset(ppopulation, 0, (ncentroids + nprocs*DELTA)*sizeof(int));

	/* Compute partial centroids. */
	#pragma omp parallel for schedule(static) default(shared) private(i, j)
	for (i = 0; i < lnpoints; i++)
	{
		j = map[i]%NUM_THREADS;
		
		omp_set_lock(&lock[j]);
		
		vector_add(CENTROID(map[i]), POINT(i));
			
		ppopulation[map[i]]++;
		
		omp_unset_lock(&lock[j]);
	}
	
	end = timer_get();
	total += timer_diff(start, end);
	
	sync_pcentroids();

	sync_ppopulation();
	
	start = timer_get();

	/* Compute centroids. */
	#pragma omp parallel for schedule(static) default(shared) private(i, j, population)
	for (j = 0; j < lncentroids[rank]; j++)
	{
		population = 0;
		
		for (i = 0; i < nprocs; i++)
		{
			if (*POPULATION(i, j) == 0)
				continue;
			
			population += *POPULATION(i, j);
			
			if (i == rank)
				continue;
			
			vector_add(PCENTROID(rank, j), PCENTROID(i, j));
		}
		
		if (population > 1)
			vector_mult(PCENTROID(rank, j), 1.0/population);
		
		/* Cluster mean has changed. */
		if (!vector_equal(PCENTROID(rank, j), LCENTROID(j)))
		{
			has_changed[rank*NUM_THREADS + omp_get_thread_num()] = 1;
			vector_assign(LCENTROID(j), PCENTROID(rank, j));
		}
	}
	
	end = timer_get();
	total += timer_diff(start, end);
		
	sync_centroids();
		
	sync_status();
}
예제 #23
0
 void OpenMPCounter::reset()
 {
     omp_set_lock(&_lock);
     _counter = 0;
     omp_unset_lock(&_lock);
 }
void YsOpenMPMutex::Unlock(void)
{
	omp_unset_lock(&lock);
}
예제 #25
0
파일: basefind.cpp 프로젝트: bcebere/ws30
	void Unlock() { omp_unset_lock(&lock); }
예제 #26
0
void IptTracer::genLightPaths(omp_lock_t& cmdLock , vector<Path*>& lightPathList , bool isFirstIter)
{
#pragma omp parallel for
	for(int p=0; p<lightPathNum; p++)
	{
		if (!renderer->scene.usingGPU())
		{
			Ray lightRay = genEmissiveSurfaceSample(true , false);
			lightPathList[p] = new Path;
			samplePath(*lightPathList[p] , lightRay);
		}
		
		Path& lightPath = *lightPathList[p];

		if (lightPath.size() <= 1)
			continue;

		IptPathState lightState;
		lightState.originRay = &lightPath[0];

		Real cosAtLight = lightPath[0].getCosineTerm();

		lightState.throughput = lightPath[0].color * cosAtLight / 
			(lightPath[0].originProb * lightPath[0].directionProb *
			lightPath[1].originProb);
		lightState.indirContrib = vec3f(0.0);
		lightState.mergedPath = 1;
		/*
		fprintf(fp , "====================\n");
		vec3f decay = lightPath[0].getRadianceDecay((lightPath[0].origin - lightPath[1].origin).length());
		fprintf(fp , "l = 0 , thr = (%.8f,%.8f,%.8f) , color = (%.8f,%.8f,%.8f)\ncosine = %.8f , dirPdf = %.8f , oPdf = %.8f\ndecay=(%.8f,%.8f,%.8f)\n" , 
			lightState.throughput.x , lightState.throughput.y , lightState.throughput.z ,
			lightPath[0].color[0] , lightPath[0].color[1] , lightPath[0].color[2] ,
			lightPath[0].getCosineTerm() , lightPath[0].directionProb , lightPath[1].originProb ,
			decay.x , decay.y , decay.z);
		*/
		int nonSpecPathLength = 0;

		for(unsigned i = 1; i < lightPath.size(); i++)
		//for (unsigned i = 1; i < 2; i++)
		{
			Real dist = std::max((lightPath[i].origin - lightPath[i - 1].origin).length() , 1e-5f);
			vec3f decayFactor = lightPath[i - 1].getRadianceDecay(dist);
			lightState.throughput *= decayFactor;

			if(lightPath[i].contactObject && lightPath[i].contactObject->emissive())
				break;

			lightState.pos = lightPath[i].origin;
			lightState.lastRay = &lightPath[i - 1];
			lightState.ray = &lightPath[i];
			lightState.pathLen = i;

			if(lightPath[i].directionSampleType == Ray::RANDOM &&
				(lightPath[i].insideObject != NULL || lightPath[i].contactObject != NULL) && 
				(lightPath[i].origin != lightPath[i - 1].origin))
			{
				//if (lightPath[i].insideObject && !lightPath[i].contactObject)
				//	fprintf(fp , "path length = %d, dirContrib = (%.8f,%.8f,%.8f)\n" , 
				//		i , lightState.dirContrib[0] , lightState.dirContrib[1] , lightState.dirContrib[2]);

				omp_set_lock(&cmdLock);
				partialSubPathList.push_back(lightState);
				omp_unset_lock(&cmdLock);
			}

			if (i == lightPath.size() - 1)
				break;
			if (lightPath[i].direction.length() < 0.5f)
				break;

			vec3f scatterFactor = (lightPath[i].color * lightPath[i].getCosineTerm() / 
				(lightPath[i + 1].originProb * lightPath[i].directionProb));

			lightState.throughput *= scatterFactor;
			/*
			vec3f decay = lightPath[i].getRadianceDecay((lightPath[i].origin - lightPath[i + 1].origin).length());
			fprintf(fp , "l = %d , thr = (%.8f,%.8f,%.8f) , color = (%.8f,%.8f,%.8f)\ncosine = %.8f , dirPdf = %.8f , oPdf = %.8f\ndecay=(%.8f,%.8f,%.8f)\n" , 
				i , lightState.throughput.x , lightState.throughput.y , lightState.throughput.z ,
				lightPath[i].color[0] , lightPath[i].color[1] , lightPath[i].color[2] ,
				lightPath[i].getCosineTerm() , lightPath[i].directionProb , lightPath[i + 1].originProb ,
				decay.x , decay.y , decay.z);
			*/
			if (lightPath[i].directionSampleType == Ray::RANDOM && useWeight)
			{
				Real pdf = lightPath[i].directionProb;
				if (pdf < 1e-7f)
					break;

				Real weightFactor;

				Real volMergeScale = 1;
				Real originProb;
				Real dirProb;
				if (lightPath[i].contactObject)
				{
					if (isFirstIter || useUniformSur)
						originProb = 1.f / totArea;
					else
						originProb = lightPath[i].contactObject->getOriginProb(lightPath[i].contactObjectTriangleID);
					if (useUniformDir)
						dirProb = INV_2_PI;
					else
						dirProb = lightPath[i].getCosineTerm() / M_PI;
				}

				//if (lightPath[i].insideObject && lightPath[i].contactObject)
				//	printf("!!!\n");
				if (lightPath[i].insideObject && !lightPath[i].contactObject && lightPath[i].insideObject->isVolumetric())
				{
					volMergeScale = 4.f / 3.f * mergeRadius;

					if (isFirstIter || useUniformVol)
						originProb = 1.f / totVol;
					else
						originProb = lightPath[i].insideObject->getOriginProb(lightPath[i].origin);
					
					dirProb = 0.25f / M_PI;
				}

				weightFactor = connectFactor(pdf) /
					(connectFactor(pdf) + mergeFactor(&volMergeScale , &originProb , &dirProb , &lightPathNum));

				if (_isnan(weightFactor) || abs(pdf) < 1e-6f)
				{
					fprintf(err , "sample light path error, %.8f , %.8f\n" , connectFactor(pdf) , 
						mergeFactor(&volMergeScale , &originProb , &dirProb , &lightPathNum));
				}
				/*
				if (abs(volMergeScale - 1.f) < 1e-6)
					printf("surface %.8f\n" , weightFactor);
				else
					printf("volume %.8f %.8f\n" , weightFactor);
				*/

				//if (lightPath[i].contactObject && lightPath[i].contactObject->objectIndex == 7)
				lightState.throughput *= weightFactor;
			}
		}
	}

	lightPhotonNum = partialPhotonNum = partialSubPathList.size();
}
예제 #27
0
파일: macros.hpp 프로젝트: aslanyan/cosmopp
 inline void unsetLock() { omp_unset_lock(&lock_); }
예제 #28
0
void IptTracer::genIntermediatePaths(omp_lock_t& cmdLock , vector<Path*>& interPathList)
{
#pragma omp parallel for
	for(int p=0; p<interPathNum; p++)
	{
		if (!renderer->scene.usingGPU())
		{
			Ray interRay = genIntermediateSamples(renderer->scene);
			interPathList[p] = new Path;
			samplePath(*interPathList[p] , interRay);
		}

		Path& interPath = *interPathList[p];

		//fprintf(fp , "=================\n");
		partPathMergeIndex[p].clear();

		if (interPath.size() <= 1)
			continue;

		IptPathState interState;
		interState.originRay = &interPath[0];

		interState.throughput = interPath[0].color * interPath[0].getCosineTerm() / 
			(interPath[0].originProb * interPath[0].directionProb * interPath[1].originProb);
		interState.indirContrib = vec3f(0.f);
		interState.mergedPath = 0;

		//if (intensity(interState.throughput) > 30.f)
		//	continue;

		/*
		fprintf(fp , "====================\n");
		vec3f decay = interPath[0].getRadianceDecay((interPath[0].origin - interPath[1].origin).length());
		fprintf(fp , "l = 0 , thr = (%.8f,%.8f,%.8f) , color = (%.8f,%.8f,%.8f)\ncosine = %.8f , dirPdf = %.8f , oPdf = %.8f\ndecay=(%.8f,%.8f,%.8f)\n" , 
			interState.throughput.x , interState.throughput.y , interState.throughput.z ,
			interPath[0].color[0] , interPath[0].color[1] , interPath[0].color[2] ,
			interPath[0].getCosineTerm() , interPath[0].directionProb , interPath[1].originProb ,
			decay.x , decay.y , decay.z);
		*/

		for(unsigned i = 1; i < interPath.size(); i++)
		//for (unsigned i = 1; i < 2; i++)
		{
			Real dist = std::max((interPath[i].origin - interPath[i - 1].origin).length() , 1e-5f);
			interState.throughput *= interPath[i - 1].getRadianceDecay(dist);

			if(interPath[i].contactObject && interPath[i].contactObject->emissive())
				break;

			interState.pos = interPath[i].origin;
			interState.lastRay = &interPath[i - 1];
			interState.ray = &interPath[i];
			interState.pathLen = i;
			
			if(interPath[i].directionSampleType != Ray::DEFINITE &&
				(interPath[i].insideObject != NULL || interPath[i].contactObject != NULL) &&
				(interPath[i].origin != interPath[i - 1].origin))
				//(interPath[i].insideObject && !interPath[i].contactObject)) // only volume
			{
				//fprintf(fp , "path length = %d, dirContrib = (%.8f,%.8f,%.8f)\n" , 
				//	i , interState.dirContrib[0] , interState.dirContrib[1] , interState.dirContrib[2]);

				omp_set_lock(&cmdLock);
				partialSubPathList.push_back(interState);
				partPathMergeIndex[p].push_back(partialSubPathList.size() - 1);
				omp_unset_lock(&cmdLock);
			}

			if (i == interPath.size() - 1)
				break;
			if (interPath[i].direction.length() < 0.5f)
				break;

			vec3f scatterFactor = (interPath[i].color * interPath[i].getCosineTerm() / 
				(interPath[i + 1].originProb * interPath[i].directionProb));

			interState.throughput *= scatterFactor;
			
			/*
			vec3f decay = interPath[i].getRadianceDecay((interPath[i].origin - interPath[i + 1].origin).length());
			fprintf(fp , "l = %d , thr = (%.8f,%.8f,%.8f) , color = (%.8f,%.8f,%.8f)\ncosine = %.8f , dirPdf = %.8f , oPdf = %.8f\ndecay=(%.8f,%.8f,%.8f)\n" , 
				i , interState.throughput.x , interState.throughput.y , interState.throughput.z ,
				interPath[i].color[0] , interPath[i].color[1] , interPath[i].color[2] ,
				interPath[i].getCosineTerm() , interPath[i].directionProb , interPath[i + 1].originProb ,
				decay.x , decay.y , decay.z);
			*/

			if (interPath[i].directionSampleType != Ray::DEFINITE && useWeight)
			{
				Real pdf = interPath[i].directionProb;
				if (pdf < 1e-7f)
					break;

				Real weightFactor;

				Real volMergeScale = 1.f;
				Real originProb;
				Real dirProb;
				if (interPath[i].contactObject)
				{
					if (useUniformSur)
						originProb = 1.f / totArea;
					else
						originProb = interPath[i].contactObject->getOriginProb(interPath[i].contactObjectTriangleID);
					if (useUniformDir)
						dirProb = INV_2_PI;
					else
						dirProb = interPath[i].getCosineTerm() / M_PI;
				}

				//if (interPath[i].insideObject && interPath[i].contactObject)
				//	printf("!!!\n");
				if (interPath[i].insideObject && !interPath[i].contactObject && interPath[i].insideObject->isVolumetric())
				{
					volMergeScale = 4.f / 3.f * mergeRadius;
					if (useUniformVol)
						originProb = 1.f / totVol;
					else
						originProb = interPath[i].insideObject->getOriginProb(interPath[i].origin);
					dirProb = 0.25f / M_PI;
				}
				
				weightFactor = connectFactor(pdf) /
					(connectFactor(pdf) + mergeFactor(&volMergeScale , &originProb , &dirProb , &partialPathNum));

				if (_isnan(weightFactor) || abs(pdf) < 1e-6f)
				{
					fprintf(err , "sample inter path error, %.8f , %.8f\n" , connectFactor(pdf) , 
						mergeFactor(&volMergeScale , &originProb , &dirProb , &partialPathNum));
				}

				//if (interPath[i].contactObject && interPath[i].contactObject->objectIndex == 7)
				interState.throughput *= weightFactor;
			}
		}
	}

	partialPhotonNum = partialSubPathList.size();
}
예제 #29
0
void Shape::splitshapes(vector<Shape*> &shapes, ViewProgress *progress)
{
  int n_tr = (int)triangles.size();
  if (progress) progress->start(_("Split Shapes"), n_tr);
  int progress_steps = max(1,(int)(n_tr/100));
  vector<bool> done(n_tr);
  bool cont = true;
  // make list of adjacent triangles for each triangle
  vector< vector<uint> > adj(n_tr);
  if (progress) progress->set_label(_("Split: Sorting Triangles ..."));
#ifdef _OPENMP
  omp_lock_t progress_lock;
  omp_init_lock(&progress_lock);
#pragma omp parallel for schedule(dynamic)
#endif
  for (int i = 0; i < n_tr; i++) {
    if (progress && i%progress_steps==0) {
#ifdef _OPENMP
      omp_set_lock(&progress_lock);
#endif
      cont = progress->update(i);
#ifdef _OPENMP
      omp_unset_lock(&progress_lock);
#endif
    }
    vector<uint> trv;
    for (int j = 0; j < n_tr; j++) {
      if (i!=j) {
	bool add = false;
	if (j<i) // maybe(!) we have it already
	  for (uint k = 0; k<adj[j].size(); k++) {
	    if ((int)adj[j][k] == i) {
	      add = true; break;
	    }
	  }
	add |= (triangles[i].isConnectedTo(triangles[j], 0.01));
	if (add) trv.push_back(j);
      }
    }
    adj[i] = trv;
    if (!cont) i=n_tr;
  }

  if (progress) progress->set_label(_("Split: Building shapes ..."));


  // triangle indices of shapes
  vector< vector<uint> > shape_tri;

  for (int i = 0; i < n_tr; i++) done[i] = false;
  for (int i = 0; i < n_tr; i++) {
    if (progress && i%progress_steps==0)
      cont = progress->update(i);
    if (!done[i]){
      cerr << _("Shape ") << shapes.size()+1 << endl;
      vector<uint> current;
      addtoshape(i, adj, current, done);
      Shape *shape = new Shape();
      shapes.push_back(shape);
      shapes.back()->triangles.resize(current.size());
      for (uint i = 0; i < current.size(); i++)
	shapes.back()->triangles[i] = triangles[current[i]];
      shapes.back()->CalcBBox();
    }
    if (!cont) i=n_tr;
  }

  if (progress) progress->stop("_(Done)");
}
예제 #30
0
vector<vec3f> PhotonMap::renderPixels(const Camera& camera){
	uint width = camera.width, height = camera.height;
	std::vector<vec3f> pixelColors(width * height, vec3f(0,0,0));
		
	omp_init_lock(&surfaceHashGridLock);
	omp_init_lock(&volumeHashGridLock);
	omp_init_lock(&debugPrintLock);

	//std::vector<int> pixelMaps(pixelColors.size(), 0);

	preprocessEmissionSampler();
		
	mRadius = mBaseRadius;

	clock_t startTime = clock();

	for(uint s = 0; s < spp; s++){
		std::cout << "iteration : " << s << std::endl;
		
		std::vector<vec3f> oneIterColors(pixelColors.size(), vec3f(0,0,0));
#ifdef PPM
		//if (renderer->scene.getTotalVolume() > 1e-6f)
		if (true)
		{
			rayMarching = true;
			mRadius = MAX(mBaseRadius * powf(powf(s+1 , mAlpha-1) , 1.f / 3.f) , EPSILON);
		}
		else
		{
			rayMarching = false;
			mRadius = MAX(mBaseRadius * sqrt(powf(s+1, mAlpha-1)), EPSILON);
		}
#endif
		std::vector<Path*> pixelLightPaths(mPhotonsNum, NULL);
		std::vector<LightPoint> surfaceLightVertices(0);
		std::vector<LightPoint> volumeLightVertices(0);

		surfaceHashGrid.Reserve(pixelColors.size());
		volumeHashGrid.Reserve(pixelColors.size());

#pragma omp parallel for
		// step1: sample light paths and build range search struct independently for surface and volume
		for(int p = 0; p < mPhotonsNum; p++){
			Ray lightRay = genEmissiveSurfaceSample(true , false);
			pixelLightPaths[p] = new Path;
			Path &lightPath = *pixelLightPaths[p];
			samplePath(lightPath, lightRay);
			for(int i = 1; i < lightPath.size(); i++){
				// light is not reflective
				if(lightPath[i].contactObject && lightPath[i].contactObject->emissive())
					break;
				// only store particles non-specular
				if(lightPath[i].directionSampleType == Ray::DEFINITE)
					continue;
				LightPoint lightPoint;
				lightPoint.position = lightPath[i].origin;
				lightPoint.indexInThePath = i;
				lightPoint.pathThePointIn = &lightPath;
				lightPoint.photonType = lightPath[i].photonType;
				if(lightPoint.photonType == Ray::OUTVOL){
					omp_set_lock(&surfaceHashGridLock);
					surfaceLightVertices.push_back(lightPoint);
					omp_unset_lock(&surfaceHashGridLock);
				}
				if(lightPoint.photonType == Ray::INVOL){
					omp_set_lock(&volumeHashGridLock);
					volumeLightVertices.push_back(lightPoint);
					omp_unset_lock(&volumeHashGridLock);
				}
			}
		}
		std::cout<< "vol vertices= " << volumeLightVertices.size() << " sur vertices= " << surfaceLightVertices.size() << std::endl;
			
		surfaceHashGrid.Build(surfaceLightVertices, mRadius);
		volumeHashGrid.Build(volumeLightVertices, mRadius);

		std::cout<< "finish building hashgrid" << std::endl;

		// step2: calculate pixel colors by progressive photon mapping
#pragma omp parallel for
		for(int p = 0; p < pixelColors.size(); p++){
			Path eyePath;
			if (rayMarching)
				sampleMergePath(eyePath, camera.generateRay(p), 0);
			else
				samplePath(eyePath, camera.generateRay(p));

			//fprintf(fp , "===================\n");
			//for (int i = 0; i < eyePath.size(); i++)
			//{
			//	fprintf(fp , "l=%d, bsdf=(%.8f,%.8f,%.8f), originPdf=%.8f, dirPdf=%.8f\n" , i , eyePath[i].color.x ,
			//		eyePath[i].color.y , eyePath[i].color.z , eyePath[i].originProb , eyePath[i].directionProb);
			//}

			/*if(eyePath[1].contactObj && eyePath[1].contactObj->anisotropic()){
				pixelMaps[p] = 1;
			}*/
			throughputByDensityEstimation(oneIterColors[p], eyePath, surfaceLightVertices, volumeLightVertices);
		}
		/*std::ofstream fout(engine->renderer->name + engine->scene.name+"pixelMap.txt");
		for(int p = 0; p < pixelMaps.size(); p++)
			fout << pixelMaps[p] << ' ' ;
		fout << std::endl;
		fout.close();*/

		std::cout << "calculation done" << std::endl;

		for(uint i = 0; i < pixelColors.size(); i++){
			pixelColors[i] *= s / float(s+1);
			pixelColors[i] += camera.eliminateVignetting(oneIterColors[i], i) / (s + 1);
			delete pixelLightPaths[i];
		}

		unsigned nowTime = (float)(clock() - startTime) / 1000;
		//if (nowTime > recordTime)
		if (s % outputIter == 0)
		{
			showCurrentResult(pixelColors , &nowTime , &s);
			//showCurrentResult(pixelColors , &lastTime , &s);
			//recordTime += timeInterval;
		}
		else
			showCurrentResult(pixelColors);
	}
	return pixelColors;
}