/** * setup task thread * @param me * ************************************************************/ static void setup_task_thread(task_thread_t *me) { if( me == NULL ){ LM_ERR( " in setup_task_thread task_thread me is null \n"); return; } me->base = event_base_new(); if (! me->base) { fprintf(stderr, "Can't allocate event base\n"); exit(1); } /* Listen for notifications from other threads */ event_assign(&me->notify_event, me->base, me->notify_receive_fd, EV_READ | EV_PERSIST, me->ttm->thread_libevent_process, me); if (event_add(&me->notify_event, 0) == -1) { fprintf(stderr, "Can't monitor libevent notify pipe\n"); exit(1); } me->new_task_queue = (task_queue_t*)malloc(sizeof(task_queue_t)); if (me->new_task_queue == NULL) { LM_ERR("Failed to allocate memory for connection queue"); exit(EXIT_FAILURE); } task_queue_init(me->new_task_queue); /* if (pthread_mutex_init(&me->stats.mutex, NULL) != 0) { perror("Failed to initialize mutex"); exit(EXIT_FAILURE); } */ }
int64_t ceParallelRadixJoin::join_init_run (vector<SAP_UINT> *relR, vector<SAP_UINT> *relC, int nthreads) { //#define CPU_ZERO(PTR) (*(PTR) = 0) //#define CPU_SET(N, PTR) (*(PTR) = (N)) //#define pthread_attr_setaffinity_np(ATTR, SZ, PTR) setaffinity(ATTR, SZ, PTR) //#define sched_setaffinity(A, SZ, PTR) setaffinity(A, SZ, PTR) int i, rv; pthread_t tid[nthreads]; pthread_attr_t attr; pthread_barrier_t barrier; int set; //cpu_set_t set; arg_t args[nthreads]; int32_t ** histR, ** histS; //tuple_t * tmpRelR, * tmpRelS; int32_t numperthr[2]; int64_t result = 0; task_queue_t * part_queue, * join_queue; task_queue_t * skew_queue; task_t * skewtask = NULL; skew_queue = task_queue_init(FANOUT_PASS1); part_queue = task_queue_init(FANOUT_PASS1); join_queue = task_queue_init((1<<NUM_RADIX_BITS)); /* allocate temporary space for partitioning */ //tmpRelR = (tuple_t*) alloc_aligned(relR->size() * sizeof(SAP_UINT) + // RELATION_PADDING); //tmpRelS = (tuple_t*) alloc_aligned(relS->size() * sizeof(SAP_UINT) + // RELATION_PADDING); //MALLOC_CHECK((tmpRelR && tmpRelS)); vector<SAP_UINT> tmpRelR, tmpRelS; /** Not an elegant way of passing whether we will numa-localize, but this feature is experimental anyway. */ if(numalocalize) { numa_localize(&tmpRelR, relR->size(), nthreads); numa_localize(&tmpRelS, relS->size(), nthreads); } /* allocate histograms arrays, actual allocation is local to threads */ histR = (SAP_UINT**) alloc_aligned(nthreads * sizeof(SAP_UINT*)); histS = (SAP_UINT**) alloc_aligned(nthreads * sizeof(SAP_UINT*)); MALLOC_CHECK((histR && histS)); rv = pthread_barrier_init(&barrier, NULL, nthreads); if(rv != 0){ printf("[ERROR] Couldn't create the barrier\n"); exit(EXIT_FAILURE); } pthread_attr_init(&attr); /* first assign chunks of relR & relS for each thread */ numperthr[0] = relR->size() / nthreads; numperthr[1] = relS->size() / nthreads; for(i = 0; i < nthreads; i++){ int cpu_idx = get_cpu_id(i); //DEBUGMSG(1, "Assigning thread-%d to CPU-%d\n", i, cpu_idx); *(&set) = 0; //CPU_ZERO(&set); *(&set) = cpu_idx; //CPU_SET(cpu_idx, &set); pthread_attr_setaffinity_np(&attr, sizeof(int), &set); int32_t numR = (i == (nthreads-1)) ? (relR->size() - i * numperthr[0]) : numperthr[0]; int32_t numS = (i == (nthreads-1)) ? (relS->size() - i * numperthr[1]) : numperthr[1]; vector<SAP_UINT> cpRelR(relR.begin() + (i * numperthr[0]), relR.begin + numR); args[i].relR = cpRelR; //args[i].relR = relR->tuples + i * numperthr[0]; args[i].tmpR = tmpRelR; args[i].histR = histR; vector<SAP_UINT> cpRelS(relS.begin() + (i * numperthr[0]), relS.begin + numS); args[i].relS = cpRelS; //args[i].relS = relS->tuples + i * numperthr[1]; args[i].tmpS = tmpRelS; args[i].histS = histS; args[i].totalR = relR->size(); args[i].totalS = relS->size(); args[i].my_tid = i; args[i].part_queue = part_queue; args[i].join_queue = join_queue; args[i].skew_queue = skew_queue; args[i].skewtask = &skewtask; args[i].barrier = &barrier; args[i].nthreads = nthreads; rv = pthread_create(&tid[i], &attr, prj_thread, (void*)&args[i]); //if (rv){ // printf("[ERROR] return code from pthread_create() is %d\n", rv); // exit(-1); //} } /* wait for threads to finish */ for(i = 0; i < nthreads; i++){ pthread_join(tid[i], NULL); result += args[i].result; } /* #define ABSDIFF(X,Y) (((X) > (Y)) ? ((X)-(Y)) : ((Y)-(X))) */ //fprintf(stdout, "TID JTASKS T1.1 T1.1-IDLE T1.2 T1.2-IDLE "\ // "T3 T3-IDLE T4 T4-IDLE T5 T5-IDLE\n"); //for(i = 0; i < nthreads; i++){ // synctimer_t * glob = args[0].globaltimer; // synctimer_t * local = & args[i].localtimer; // fprintf(stdout, // "%d %d %llu %llu %llu %llu %llu %llu %llu %llu "\ // "%llu %llu\n", // (i+1), args[i].parts_processed, local->sync1[0], // glob->sync1[0] - local->sync1[0], // local->sync1[1] - glob->sync1[0], // glob->sync1[1] - local->sync1[1], // local->sync3 - glob->sync1[1], // glob->sync3 - local->sync3, // local->sync4 - glob->sync3, // glob->sync4 - local->sync4, // local->finish_time - glob->sync4, // glob->finish_time - local->finish_time); //} /* clean up */ for(i = 0; i < nthreads; i++) { free(histR[i]); free(histS[i]); } free(histR); free(histS); task_queue_free(part_queue); task_queue_free(join_queue); task_queue_free(skew_queue); free(tmpRelR); free(tmpRelS); return result; }