Example #1
0
void mp_barrier(cycles_t *measurement)
{
    coreid_t tid = get_core_id();

#ifdef QRM_DBG_ENABLED
    ++_num_barrier;
    uint32_t _num_barrier_recv = _num_barrier;
#endif

    debug_printfff(DBG__REDUCE, "barrier enter #%d\n", _num_barrier);

    // Recution
    // --------------------------------------------------
#ifdef QRM_DBG_ENABLED
    uint32_t _tmp =
#endif
    mp_reduce(_num_barrier);

#ifdef QRM_DBG_ENABLED
    // Sanity check
    if (tid==get_sequentializer()) {
        assert (_tmp == get_num_threads()*_num_barrier);
    }
    if (measurement)
        *measurement = bench_tsc();

#endif

    // Broadcast
    // --------------------------------------------------
    if (tid == get_sequentializer()) {
        mp_send_ab(_num_barrier);

    } else {
#ifdef QRM_DBG_ENABLED
        _num_barrier_recv =
#endif
            mp_receive_forward(0);
    }

#ifdef QRM_DBG_ENABLED
    if (_num_barrier_recv != _num_barrier) {
    debug_printf("ASSERTION fail %d != %d\n", _num_barrier_recv, _num_barrier);
    }
    assert (_num_barrier_recv == _num_barrier);

    // Add a shared memory barrier to absolutely make sure that
    // everybody finished the barrier before leaving - this simplifies
    // debugging, as the programm will get stuck if barriers are
    // broken, rather than some threads (wrongly) continuing and
    // causing problems somewhere else
#if 0 // Enable separately
    debug_printfff(DBG_REDUCE, "finished barrier .. waiting for others\n");
    shl_barrier_shm(get_num_threads());
#endif
#endif

    debug_printfff(DBG__REDUCE, "barrier complete #%d\n", _num_barrier);
}
static void _convolution(const my_Dvector& vec1, const my_Dvector& vec2, my_Dvector& conv, int init, ThreadPool& pool)
{
    // To do the convolution I trim the beginning and ending points if they are zero (1e-30);
    int min1 (0), max1 (vec1.size());
    _trim_vec(vec1, min1, max1);

    int min2 (0), max2 (vec2.size());
    _trim_vec(vec2, min2, max2);

    int ini = max(min1+min2-init,0);
    int fin = min(int(conv.size()), max1 + max2 - 1);

    unsigned int nr_th = get_num_threads();
    unsigned int nr_job = max((fin-ini)/100, min(int(nr_th), fin-ini));
	my_Ivector lims (get_bounds(ini, fin, nr_job));
	std::vector< std::future<int> > results;

    //lambda function
    auto func = [&](my_Dvector& v, int com, int ter)
    {
        for (int ii=com;ii<ter;++ii){
            const int delta = max(ii+init-min2-max1,0);
            for (int j=min2+delta,k=(ii+init-min2-delta);j<max2 && k>=min1;++j,--k){
                v[ii] += vec1[k]*vec2[j];
            }
        }
        return 1;
    }; //

	for (unsigned int i=0;i<nr_job;++i) results.emplace_back(pool.enqueue(
        func, ref(conv), lims[i], lims[i+1]));
    for(auto && result: results) result.get();


}
my_Dvector convolution_same_orig(const my_Dvector& vec1, const my_Dvector& vec2, ThreadPool& pool)
{
    my_Dvector conv (vec1.size(),0.0);
    int init = vec2.size()/2;

    unsigned int nr_th = get_num_threads();
    my_Ivector lims (get_bounds(0,conv.size()));
    std::vector< std::future<int> > results;

    //lambda function
    auto func = [&](my_Dvector& v, int com, int ter)
    {
        for (int ii=com;ii<ter;++ii){
            for (int j=0,k=(ii+init);j<vec2.size() && k>=0;++j,--k){
                if (k<vec1.size()){ v[ii] += vec1[k]*vec2[j];}
            }
        }
        return 1;
    }; //
    for (unsigned int i=0;i<nr_th;++i) results.emplace_back(pool.enqueue(
                        func, ref(conv), lims[i], lims[i+1]));
    for(auto && result: results) result.get();

    return conv;
}
uint64_t parallel_nth_prime(int64_t n, uint64_t start)
{
  ParallelPrimeSieve pps;
  pps.setSieveSize(get_sieve_size());
  pps.setNumThreads(get_num_threads());
  return pps.nthPrime(n, start);
}
uint64_t parallel_count_sextuplets(uint64_t start, uint64_t stop)
{
  ParallelPrimeSieve pps;
  pps.setSieveSize(get_sieve_size());
  pps.setNumThreads(get_num_threads());
  return pps.countSextuplets(start, stop);
}
Example #6
0
unsigned Extrae_get_num_threads (void)
{
#if defined(OMP_SUPPORT) && !defined(OMPT_INSTRUMENTATION)
	return omp_get_num_threads();
#elif defined(SMPSS_SUPPORT)
	return css_get_max_threads();
#elif defined(NANOS_SUPPORT)
	return get_num_threads();
#elif defined(PTHREAD_SUPPORT)
	return Backend_getNumberOfThreads();
#elif defined(UPC_SUPPORT)
	return GetNumUPCthreads();
#else
	return get_num_threads();
#endif
}
Example #7
0
JNIEXPORT jint JNICALL Java_es_bsc_cepbatools_extrae_Wrapper_GetNumThreads(
	JNIEnv *env, jclass jc)
{
	UNREFERENCED(env);
	UNREFERENCED(jc);

	return get_num_threads();
}
Example #8
0
void
memcache_unlock(void)
{
    /* ok to ask for locks or mark stale before all_memory_areas is allocated,
     * during heap init and before we can allocate it.  no lock needed then.
     */
    ASSERT(all_memory_areas != NULL ||
           get_num_threads() <= 1 /*must be only DR thread*/);
    if (all_memory_areas == NULL)
        return;
    if (all_memory_areas_recursion > 0) {
        ASSERT_OWN_WRITE_LOCK(true, &all_memory_areas->lock);
        all_memory_areas_recursion--;
    } else
        write_unlock(&all_memory_areas->lock);
}
unsigned int Trick::Executive::get_process_id() {

    unsigned int ii ;
    pthread_t curr_pthread_id ;

    if ( get_num_threads() > 1 ) {
        curr_pthread_id = pthread_self() ;
        for (ii = 0 ; ii < threads.size() ; ii++ ) {
            if ( pthread_equal(curr_pthread_id,threads[ii]->get_pthread_id()) ) {
                return(ii) ;
            }
        }
    }
    
    return(0) ;

}
Example #10
0
/* HACK to get recursive write lock for internal and external use
 * FIXME: code blatantly copied from dynamo_vm_areas_{un}lock(); eliminate duplication!
 */
void
memcache_lock(void)
{
    /* ok to ask for locks or mark stale before all_memory_areas is allocated,
     * during heap init and before we can allocate it.  no lock needed then.
     */
    ASSERT(all_memory_areas != NULL ||
           get_num_threads() <= 1 /* must be only DR thread */);
    if (all_memory_areas == NULL)
        return;
    if (self_owns_write_lock(&all_memory_areas->lock)) {
        all_memory_areas_recursion++;
        /* we have a 5-deep path:
         *   global_heap_alloc | heap_create_unit | get_guarded_real_memory |
         *   heap_low_on_memory | release_guarded_real_memory
         */
        ASSERT_CURIOSITY(all_memory_areas_recursion <= 4);
    } else
        write_lock(&all_memory_areas->lock);
}
static void fillThreadsAndLoadObjects(JNIEnv* env, jobject this_obj, struct ps_prochandle* ph) {
  int n = 0, i = 0;

  // add threads
  n = get_num_threads(ph);
  for (i = 0; i < n; i++) {
    jobject thread;
    jobject threadList;
    lwpid_t lwpid;

    lwpid = get_lwp_id(ph, i);
    thread = (*env)->CallObjectMethod(env, this_obj, getThreadForThreadId_ID,
                                      (jlong)lwpid);
    CHECK_EXCEPTION;
    threadList = (*env)->GetObjectField(env, this_obj, threadList_ID);
    CHECK_EXCEPTION;
    (*env)->CallBooleanMethod(env, threadList, listAdd_ID, thread);
    CHECK_EXCEPTION;
  }

  // add load objects
  n = get_num_libs(ph);
  for (i = 0; i < n; i++) {
     uintptr_t base;
     const char* name;
     jobject loadObject;
     jobject loadObjectList;

     base = get_lib_base(ph, i);
     name = get_lib_name(ph, i);
     loadObject = (*env)->CallObjectMethod(env, this_obj, createLoadObject_ID,
                                   (*env)->NewStringUTF(env, name), (jlong)0, (jlong)base);
     CHECK_EXCEPTION;
     loadObjectList = (*env)->GetObjectField(env, this_obj, loadObjectList_ID);
     CHECK_EXCEPTION;
     (*env)->CallBooleanMethod(env, loadObjectList, listAdd_ID, loadObject);
     CHECK_EXCEPTION;
  }
}
Example #12
0
extern "C" CDECL int
rust_start(uintptr_t main_fn, int argc, char **argv, void* crate_map) {

    update_log_settings(crate_map, getenv("RUST_LOG"));
    enable_claims(getenv("CHECK_CLAIMS"));

    rust_srv *srv = new rust_srv();
    rust_kernel *kernel = new rust_kernel(srv);
    kernel->start();
    rust_scheduler *sched = kernel->get_scheduler();
    command_line_args *args
        = new (kernel) command_line_args(sched->root_task, argc, argv);

    DLOG(sched, dom, "startup: %d args in 0x%" PRIxPTR,
             args->argc, (uintptr_t)args->args);
    for (int i = 0; i < args->argc; i++) {
        DLOG(sched, dom, "startup: arg[%d] = '%s'", i, args->argv[i]);
    }

    sched->root_task->start(main_fn, (uintptr_t)args->args);

    int num_threads = get_num_threads();

    DLOG(sched, dom, "Using %d worker threads.", num_threads);

    int ret = kernel->start_task_threads(num_threads);
    delete args;
    delete kernel;
    delete srv;

#if !defined(__WIN32__)
    // Don't take down the process if the main thread exits without an
    // error.
    if (!ret)
        pthread_exit(NULL);
#endif
    return ret;
}
Example #13
0
int main(int argc, char *argv[]) {

	int nobs, sizex, nsample = 0;
	char *location = NULL;
	int ret = 0;

	//////////////////////////////////////
	/////////////// PARSERS //////////////
	//////////////////////////////////////
	// Parse the command line
	ret = parse_command_line(argc,argv,&nobs,&sizex,&nsample,&location);
	if( ret != PARSER_SUCCESS ) {
		printf("Parsing failed ! Exiting...\n");
		return EXIT_FAILURE;
	}

	// Parse the data on master
	double *buffer_X = (double*)malloc(nobs*sizex*sizeof(double));	
	double *isigma = (double*)malloc(sizex*sizex*sizeof(double));
	double *mu = (double*)malloc(sizex*sizeof(double));
	double det_sigma = 0.0; 

	ret = read_data(buffer_X, isigma, &det_sigma, mu, &nobs, &sizex, location);
	if( ret != PARSER_SUCCESS ) {
		printf("Parsing failed ! Exiting...\n");
		return EXIT_FAILURE;
	}	

	////////////////////////////////////////
	/////////////// Variables //////////////
	////////////////////////////////////////
	// Thread variables
	int nthreads = 1;
	int th_num = 0;
	int th_nobs = nobs;

	nthreads = get_num_threads();


	// Timing variables
	double tic, toc, tot_time = 0.0; 

	//// Arrays for all threads 
	// The pool is allocated inside the shared memory
	double *pool_LV = (double*)malloc(nobs*sizex*sizeof(double)); // Left hand side vector (X-mu)
	double *pool_tmp = (double*)malloc(nobs*sizex*sizeof(double)); // Temporary holder for (X-mu)*SIG 
	double *pool_ones = (double*)malloc(nobs*sizeof(double)); // Temporary holder to create LV
	double *pool_res = (double*)malloc(nthreads*sizeof(double)); // Each thread puts its result in pool_res
	
	// Use pointers to get the correct location in the array 
	double *LV = NULL;
	double *tmp = NULL;
	double *ones = NULL;
	double *X = NULL;

	// Holder for final sum
	double final_sum = 0.0;

	////////////////////////////////////////
	/////////////// Algorithm //////////////
	////////////////////////////////////////
	//// Start time sampling
	for(int k = 0; k < nsample; k++) {
		tic = omp_get_wtime();
		final_sum = 0.0;

		// Main driver
		#pragma omp parallel private(th_num,th_nobs,LV,tmp,ones,X) default(shared)
		{
			// Get thread number
			th_num = omp_get_thread_num();
			// Total number of observations for that thread
			th_nobs = nobs/nthreads;

			// Use the address to point to the correct location in the vector
			X = &buffer_X[th_num*nobs*sizex/nthreads];
			LV = &pool_LV[th_num*th_nobs*sizex];
			tmp = &pool_tmp[th_num*th_nobs*sizex];
			ones = &pool_ones[th_num*th_nobs];
			
			// Each process can now calculate the term in the
			// exponent for a subset of random vectors

			// Naive approach: for loop on each vector X 
			// pool_res[th_num] += exp_term();		

			// Guru approach: BLAS
			log_likelihood(X,isigma,mu,det_sigma,th_nobs,sizex,&pool_res[th_num],LV,tmp,ones);

			#pragma omp barrier

			// Reduction: sum all the intermediary results
			#pragma omp for reduction(+:final_sum)
			for(int i = 0; i < nthreads; i++)
				final_sum = final_sum + pool_res[i];
		}
		toc = omp_get_wtime();
		tot_time += toc-tic;
	}	

	printf("Result: %f\n",final_sum);
	printf("Total time: %f\n",tot_time/(double)nsample);

	////////////////////////////////////////
	/////////////// Clean up ///////////////
	////////////////////////////////////////
	free(pool_res);
	free(pool_ones);
	free(pool_tmp);
	free(pool_LV);

	free(buffer_X);
	free(isigma);
	free(mu);
	free(location);

	return EXIT_SUCCESS;
}
Example #14
0
 PrimeCountOptions() :
   x(-1),
   option(OPTION_PI),
   time(false),
   threads(get_num_threads())
 { }
// this function follows matlab's convention of same, not numpy's.
my_Dvector convolution_same_orig(const my_Dvector& vec1, const my_Dvector& vec2)
{
    ThreadPool pool (get_num_threads());
    return convolution_same_orig(vec1, vec2, pool);
}
Example #16
0
int get_multithreaded() {
    return get_multithread_capable() && get_num_threads() > 1;
}
Example #17
0
//---------------------------------------------------------------
// START FUNC DECL
int 
num_in_range(
	       char *t1,
	       char *f1,
	       char *t2,
	       char *lb,
	       char *ub,
	       char *cnt
	       )
// STOP FUNC DECL
{
  int status = 0;
  char *f1_X = NULL; size_t f1_nX = 0;
  char *lb_X = NULL; size_t lb_nX = 0;
  char *ub_X = NULL; size_t ub_nX = 0;
  char *cnt_X = NULL; size_t cnt_nX = 0;
  int t1_id = INT_MIN, t2_id = INT_MIN;
  int f1_id = INT_MIN, lb_id = INT_MIN, ub_id = INT_MIN, cnt_id = INT_MIN;
  FLD_TYPE *f1_meta = NULL, *lb_meta = NULL, *ub_meta = NULL;
  long long nR1 = INT_MIN, nR2 = INT_MIN, chk_nR1 = INT_MIN;
  long long **cntptrs = NULL;
  // For multi-threading 
  int nT;
  int rc; // result code for thread create 
  pthread_t threads[MAX_NUM_THREADS];
  pthread_attr_t attr;
  void *thread_status;

  char str_meta_data[1024];
  char *opfile = NULL; FILE *ofp = NULL;
  //----------------------------------------------------------------
  if ( ( t1 == NULL ) || ( *t1 == '\0' ) ) { go_BYE(-1); }
  if ( ( f1 == NULL ) || ( *f1 == '\0' ) ) { go_BYE(-1); }
  if ( ( t2 == NULL ) || ( *t2 == '\0' ) ) { go_BYE(-1); }
  if ( ( lb == NULL ) || ( *lb == '\0' ) ) { go_BYE(-1); }
  if ( ( ub == NULL ) || ( *ub == '\0' ) ) { go_BYE(-1); }
  if ( ( cnt  == NULL ) || ( *cnt == '\0' ) ) { go_BYE(-1); }
  if ( strcmp(t1, t2) == 0 ) { go_BYE(-1); }
  zero_string(str_meta_data, 1024);
  //--------------------------------------------------------
  status = is_tbl(t1, &t1_id); cBYE(status);
  chk_range(t1_id, 0, g_n_tbl);
  nR1 = g_tbl[t1_id].nR;
  //--------------------------------------------------------
  status = is_fld(NULL, t1_id, f1, &f1_id); cBYE(status);
  chk_range(f1_id, 0, g_n_fld);
  f1_meta = &(g_fld[f1_id]);
  status = rs_mmap(f1_meta->filename, &f1_X, &f1_nX, 0); cBYE(status);
  // Have not implemented case where f1 has null field 
  if ( f1_meta->nn_fld_id >= 0 ) { go_BYE(-1); }
  // Have implemented only for int 
  if ( strcmp(f1_meta->fldtype, "int") != 0 ) { cBYE(-1); } 
  //--------------------------------------------------------
  status = is_tbl(t2, &t2_id); cBYE(status);
  chk_range(t2_id, 0, g_n_tbl);
  nR2 = g_tbl[t2_id].nR;
  //--------------------------------------------------------
  status = is_fld(NULL, t2_id, lb, &lb_id); cBYE(status);
  chk_range(lb_id, 0, g_n_fld);
  lb_meta = &(g_fld[lb_id]);
  status = rs_mmap(lb_meta->filename, &lb_X, &lb_nX, 0); cBYE(status);
  // Have not implemented case where lb has null field 
  if ( lb_meta->nn_fld_id >= 0 ) { go_BYE(-1); }
  // Have implemented only for int 
  if ( strcmp(lb_meta->fldtype, "int") != 0 ) { cBYE(-1); } 
  //--------------------------------------------------------
  status = is_fld(NULL, t2_id, ub, &ub_id); cBYE(status);
  chk_range(ub_id, 0, g_n_fld);
  ub_meta = &(g_fld[ub_id]);
  status = rs_mmap(ub_meta->filename, &ub_X, &ub_nX, 0); cBYE(status);
  // Have not implemented case where ub has null field 
  if ( ub_meta->nn_fld_id >= 0 ) { go_BYE(-1); }
  // Have implemented only for int 
  if ( strcmp(ub_meta->fldtype, "int") != 0 ) { cBYE(-1); } 
  //--------------------------------------------------------
  // Set up access to input
  int *inptr = (int *)f1_X;
  int *lbptr = (int *)lb_X;
  int *ubptr = (int *)ub_X;
  //--------------------------------------------------------
  //--- Decide on how much parallelism to use
  for ( int i = 0; i < MAX_NUM_THREADS; i++ ) { 
    g_thread_id[i] = i;
    g_num_rows[i] = 0;
  }
  status = get_num_threads(&nT);
  cBYE(status);
  //--------------------------------------------
#define MIN_ROWS_FOR_SUBSAMPLE 10000 // 1048576
  if ( nR1 <= MIN_ROWS_FOR_SUBSAMPLE ) {
    nT = 1;
  }
  /* Don't create more threads than you can use */
  if ( nT > nR1 ) { nT = nR1; }
  //--------------------------------------------
  /* Make space for output */
  long long filesz = nR2 * sizeof(long long);
  status = open_temp_file(&ofp, &opfile, filesz); cBYE(status);
  fclose_if_non_null(ofp);
  status = mk_file(opfile, filesz); cBYE(status);
  status = rs_mmap(opfile, &cnt_X, &cnt_nX, 1);
  long long *cntptr = (long long *)cnt_X;
  /* Make a holding tank for partial results */
  cntptrs = malloc(nT * sizeof(long long *));
  return_if_malloc_failed(cntptrs);
  for ( int i = 0; i < nT; i++ ) {
    cntptrs[i] = malloc(nR2 * sizeof(long long));
    return_if_malloc_failed(cntptrs[i]);
    for ( long long j = 0; j <nR2; j++ ) { 
      cntptrs[i][j] = 0;
    }
  }

  // Add count field to meta data 
  sprintf(str_meta_data, "fldtype=long long:n_sizeof=8:filename=%s", opfile);
  status = add_fld(t2, cnt, str_meta_data, &cnt_id); cBYE(status);
  chk_range(cnt_id, 0, g_n_fld);
  //-----------------------------------------------------------
  // Now we count how much there is in each range 
  // Set up global variables
  g_nT = nT;
  g_inptr = inptr;
  g_lbptr = lbptr;
  g_ubptr = ubptr;
  g_cntptrs = cntptrs;
  g_nR1 = nR1;
  g_nR2 = nR2;
  if ( g_nT == 1 ) { 
    core_num_in_range(&(g_thread_id[0]));
    chk_nR1 = g_num_rows[0];
  }
  else {
    chk_nR1 = 0;
    pthread_attr_init(&attr);
    pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
    for ( int t = 0; t < g_nT; t++ ) { 
      rc = pthread_create(&threads[t], NULL, core_num_in_range,
	  &(g_thread_id[t]));
      if ( rc ) { go_BYE(-1); }
    }
    /* Free attribute and wait for the other threads */
    pthread_attr_destroy(&attr);
    for ( int t = 0; t < g_nT; t++ ) { 
      rc = pthread_join(threads[t], &thread_status);
      if ( rc ) { go_BYE(-1); }
      chk_nR1 += g_num_rows[t];
    }
  }
  if ( chk_nR1 != nR1 ) { go_BYE(-1); }
  // Accumulate partial results
  for ( long long i = 0; i < nR2; i++ ) { 
    cntptr[i] = 0;
    for ( int j= 0;  j < nT; j++ ) { 
      cntptr[i] += cntptrs[j][i];
    }
  }

 BYE:
  g_write_to_temp_dir = false;
  rs_munmap(f1_X, f1_nX);
  rs_munmap(lb_X, lb_nX);
  rs_munmap(ub_X, ub_nX);
  rs_munmap(cnt_X, cnt_nX);
  free_if_non_null(opfile);
  return(status);
}
Example #18
0
/// Calculate the number of primes below x using Legendre's formula.
/// Run time: O(x) operations, O(x^(1/2)) space.
///
int64_t pi_legendre(int64_t x)
{
  return pi_legendre(x, get_num_threads());
}
Example #19
0
/// Alias for the fastest prime summing function in primesum.
/// @param x  integer arithmetic expression e.g. "10^12".
/// @pre   x  <= get_max_x().
///
string pi(const string& x)
{
  return pi(x, get_num_threads());
}
Example #20
0
int256_t pi(int128_t x)
{
  return pi(x, get_num_threads());
}
Example #21
0
/// Partial sieve function (a.k.a. Legendre-sum).
/// phi(x, a) counts the numbers <= x that are not divisible
/// by any of the first a primes.
///
int64_t phi(int64_t x, int64_t a)
{
  return phi(x, a, get_num_threads());
}
Example #22
0
//---------------------------------------------------------------
// START FUNC DECL
int 
parsort1(
       char *tbl,
       char *f1,
       char *f2,
       char *up_or_down /* not used right now */
       )
// STOP FUNC DECL
{
  int status = 0;
  char *f1_X = NULL; size_t f1_nX = 0;
  char *op_X = NULL; size_t op_nX = 0;
  char *cnt_X = NULL; size_t cnt_nX = 0;
  char *t2f2_X = NULL; size_t t2f2_nX = 0;
  FLD_TYPE *f1_meta = NULL; 
  FLD_TYPE *f2_meta = NULL; 
  FLD_TYPE *t2f2_meta = NULL; 
  FLD_TYPE *cnt_meta = NULL; 
  long long nR, nR2;
  int tbl_id = INT_MIN, f1_id = INT_MIN, f2_id = INT_MIN, cnt_id = INT_MIN;
  int t2f2_id = INT_MIN;
  char str_meta_data[1024];
  FILE *ofp = NULL; char *opfile = NULL;
  FILE *tfp = NULL; char *tempfile = NULL;
  char str_rslt[32]; zero_string(str_rslt, 32);
  char t2[MAX_LEN_TBL_NAME]; 
  int itemp;
  int *xxx = NULL, *f1lb = NULL, *f1ub = NULL; 
  long long *count = NULL, *chk_count = NULL;
  int **offsets = NULL, **bak_offsets = NULL;
  int *inptr = NULL;
  // For multi-threading 
  int rc; // result code for thread create 
  pthread_t threads[MAX_NUM_THREADS];
  pthread_attr_t attr;
  void *thread_status;
  // START: For timing
  struct timeval Tps;
  struct timezone Tpf;
  void *Tzp = NULL;
  long long t_before_sec = 0, t_before_usec = 0, t_before = 0;
  long long t_after_sec, t_after_usec, t_after;
  long long t_delta_usec;
  // STOP : For timing
  //----------------------------------------------------------------
  if ( ( tbl == NULL ) || ( *tbl == '\0' ) ) { go_BYE(-1); }
  if ( ( f1 == NULL ) || ( *f1 == '\0' ) ) { go_BYE(-1); }
  if ( ( f2 == NULL ) || ( *f2 == '\0' ) ) { go_BYE(-1); }
  zero_string(str_meta_data, 1024);
  /* t2 isa temporary table */
  zero_string(t2, MAX_LEN_TBL_NAME);
  status = qd_uq_str(t2, MAX_LEN_TBL_NAME);
  strcpy(t2, "t2"); // TODO DELETE THIS 
  g_offsets = NULL;
  g_count = NULL;
  //--------------------------------------------------------
  status = is_tbl(tbl, &tbl_id); cBYE(status);
  chk_range(tbl_id, 0, g_n_tbl);
  nR = g_tbl[tbl_id].nR;
  status = is_fld(NULL, tbl_id, f1, &f1_id); cBYE(status);
  chk_range(f1_id, 0, g_n_fld);
  f1_meta = &(g_fld[f1_id]);
  status = rs_mmap(f1_meta->filename, &f1_X, &f1_nX, 0); 
  cBYE(status);
  // Not implemented for following cases 
  if ( g_fld[f1_id].nn_fld_id >= 0 ) { go_BYE(-1); }
  if ( strcmp(f1_meta->fldtype, "int") != 0 ) { go_BYE(-1); }
  if ( nR <= 1048576 ) { go_BYE(-1); }
  //---------------------------------------------
  status = gettimeofday(&Tps, &Tpf); cBYE(status);
  t_before_sec  = (long long)Tps.tv_sec;
  t_before_usec = (long long)Tps.tv_usec;
  t_before = t_before_sec * 1000000 + t_before_usec;

  int reduction_factor = (int)(sqrt((double)nR));
  sprintf(str_rslt, "%d", reduction_factor);
  status = subsample(tbl, f1, str_rslt, t2, "f2"); cBYE(status);

  status = gettimeofday(&Tps, &Tpf); cBYE(status);
  t_after_sec  = (long long)Tps.tv_sec;
  t_after_usec = (long long)Tps.tv_usec;
  t_after = t_after_sec * 1000000 + t_after_usec;
  fprintf(stderr, "TIME0 = %lld \n", t_after - t_before); 
  t_before = t_after;


  // Must have sufficient diversity of values
  status = f1opf2(t2, "f2", "op=shift:val=-1", "nextf2"); cBYE(status);
  status = drop_nn_fld(t2, "nextf2"); cBYE(status);
  status = f1f2opf3(t2, "f2", "nextf2", "==", "x"); cBYE(status);
  status = f_to_s(t2, "x", "sum", str_rslt);
  char *endptr;
  long long lltemp = strtoll(str_rslt, &endptr, 10);
  if ( lltemp != 0 ) { go_BYE(-1); }
  //-------------------------------------------------
  // Get range of values of f1 
  status = f_to_s(tbl, f1, "max", str_rslt);
  int f1max = strtoll(str_rslt, &endptr, 10);
  status = f_to_s(tbl, f1, "min", str_rslt);
  int f1min = strtoll(str_rslt, &endptr, 10);
  //-------------------------------------------------
  // Now we sort the values that we sampled
  status = fop(t2, "f2", "sortA"); cBYE(status);
  // status = pr_fld(t2, "f2", "", stdout);
  status = get_nR(t2, &nR2);
  // Now each thread selects a range to work on
  int nT;
  for ( int i = 0; i < MAX_NUM_THREADS; i++ ) { 
    g_thread_id[i] = i;
  }
  status = get_num_threads(&nT);
  cBYE(status);
  //--------------------------------------------
#define MIN_ROWS_FOR_PARSORT1 1048576
  if ( nR <= MIN_ROWS_FOR_PARSORT1 ) {
    nT = 1;
  }
  /* Don't create more threads than you can use */
  if ( nT > nR ) { nT = nR; }
  //--------------------------------------------

  double block_size = (double)nR2 / (double)nT;
  status = is_fld(t2, -1, "f2", &t2f2_id); cBYE(status);
  chk_range(t2f2_id, 0, g_n_fld);
  t2f2_meta = &(g_fld[t2f2_id]);
  status = rs_mmap(t2f2_meta->filename, &t2f2_X, &t2f2_nX, 0); 
  cBYE(status);
  int *iptr = (int *)t2f2_X;
  xxx = malloc(nT * sizeof(int)); return_if_malloc_failed(xxx);
  f1lb = malloc(nT * sizeof(int)); return_if_malloc_failed(f1lb);
  f1ub = malloc(nT * sizeof(int)); return_if_malloc_failed(f1ub);
  /* FOR OLD_WAY 
  count = malloc(nT * sizeof(long long)); return_if_malloc_failed(count);
  */
  chk_count = malloc(nT * sizeof(long long));
  return_if_malloc_failed(chk_count);
  g_count = malloc(nT * sizeof(long long)); return_if_malloc_failed(g_count);

  for ( int i = 0; i < nT; i++ ) { 
    // FOR OLD_WAY count[i]= 0;
    chk_count[i]= 0;
    int j = i+1;
    long long idx = j * block_size;
    if ( idx >= nR2 ) { idx = nR2 -1 ; }
    int y = iptr[idx];
    xxx[i] = y;
    // fprintf(stdout,"idx = %lld: j = %d: y = %d \n", idx, j, y);
  }
  for ( int i = 0; i < nT; i++ ) { 
    if ( ( i == 0 ) && ( i == (nT - 1 ) ) ) {
      f1lb[i] = f1min;
      f1ub[i] = f1max;
    }
    else if ( i == 0 ) { 
      f1lb[i] = f1min;
      f1ub[i] = xxx[i];
    }
    else if ( i == (nT -1 ) ) {
      f1lb[i] = xxx[i-1] + 1;
      f1ub[i] = f1max;
    }
    else {
      f1lb[i] = xxx[i-1] + 1;
      f1ub[i] = xxx[i];
    }
  }
  // STOP: Each thread has now a range to work on
  // Create a temporary table t3 to store ranges
  char t3[MAX_LEN_TBL_NAME]; int t3_id;
  zero_string(t3, MAX_LEN_TBL_NAME);
  status = qd_uq_str(t3, MAX_LEN_TBL_NAME);
  strcpy(t3, "t3"); // TODO DELETE THIS 
  sprintf(str_rslt, "%d", nT);
  status = add_tbl(t3, str_rslt, &t3_id);

  // Add lower bound to t3
  status = open_temp_file(&tfp, &tempfile, -1); cBYE(status);
  fclose_if_non_null(tfp);
  tfp = fopen(tempfile, "wb"); return_if_fopen_failed(tfp, tempfile, "wb");
  fwrite(f1lb, sizeof(int),  nT, tfp); 
  fclose_if_non_null(tfp);
  sprintf(str_meta_data, "fldtype=%s:n_sizeof=%d:filename=%s",
      f1_meta->fldtype, f1_meta->n_sizeof, tempfile);
  status = add_fld(t3, "lb", str_meta_data, &itemp); cBYE(status);
  free_if_non_null(tempfile);

  // Add upper bound to t3
  status = open_temp_file(&tfp, &tempfile, -1); cBYE(status);
  fclose_if_non_null(tfp);
  tfp = fopen(tempfile, "wb"); return_if_fopen_failed(tfp, tempfile, "wb");
  fwrite(f1ub, sizeof(int),  nT, tfp); 
  fclose_if_non_null(tfp);
  sprintf(str_meta_data, "fldtype=%s:n_sizeof=%d:filename=%s",
      f1_meta->fldtype, f1_meta->n_sizeof, tempfile);
  status = add_fld(t3, "ub", str_meta_data, &itemp); cBYE(status);
  free_if_non_null(tempfile);

#undef OLD_WAY
#ifdef OLD_WAY
  // Now we count how much there is in each range 
  inptr = (int *)f1_X;
  for ( long long i = 0; i < nR; i++ ) { 
    int ival = *inptr++;
    int range_idx = INT_MIN;
    // TODO: Improve sequential search
    for ( int j = 0; j < nT; j++ ) { 
      if ( ival >= f1lb[j] && ( ival <= f1ub[j] ) ) {
	range_idx = j;
	break;
      }
    }
    count[range_idx]++;
  }
  /*
  for ( int i = 0; i < nT; i++ ) { 
    fprintf(stdout,"%d: (%d, %d) = %lld \n", i, f1lb[i], f1ub[i], count[i]);
  }
  */
#else
  status = num_in_range(tbl, f1, t3, "lb", "ub", "cnt"); cBYE(status);
  // Get a pointer to the count field 
  status = is_tbl(t3, &t3_id);
  chk_range(t3_id, 0, g_n_tbl);
  status = is_fld(NULL, t3_id, "cnt", &cnt_id);
  chk_range(cnt_id, 0, g_n_fld);
  cnt_meta = &(g_fld[cnt_id]); 
  status = rs_mmap(cnt_meta->filename, &cnt_X, &cnt_nX, 0); cBYE(status);
  count = (long long *)cnt_X;
#endif
  status = gettimeofday(&Tps, &Tpf); cBYE(status);
  t_after_sec  = (long long)Tps.tv_sec;
  t_after_usec = (long long)Tps.tv_usec;
  t_after = t_after_sec * 1000000 + t_after_usec;
  fprintf(stderr, "TIME1 = %lld \n", t_after - t_before); 
  t_before = t_after;


  bak_offsets = malloc(nT * sizeof(int *)); return_if_malloc_failed(bak_offsets);
  g_offsets = malloc(nT * sizeof(int *)); return_if_malloc_failed(g_offsets);
#ifdef OLD_WAY
  // Make space for output 
  long long filesz = nR * f1_meta->n_sizeof;
  status = open_temp_file(&ofp, &opfile, filesz); cBYE(status);
  status = mk_file(opfile, filesz); cBYE(status);
  status = rs_mmap(opfile, &op_X, &op_nX, 1); cBYE(status);
  offsets = malloc(nT * sizeof(int *)); return_if_malloc_failed(offsets);
  long long cum_count = 0;
  for ( int i = 0; i < nT; i++ ) {
    bak_offsets[i] = offsets[i] = (int *)op_X;
    if ( i > 0 ) {
      cum_count += count[i-1];
      offsets[i] += cum_count;
      bak_offsets[i] = offsets[i];
    }
  }

  inptr = (int *)f1_X;
  // Now we place each item into its thread bucket
  for ( long long i = 0; i < nR; i++ ) { 
    int ival = *inptr++;
    int range_idx = INT_MIN;
    // TODO: Improve sequential search
    for ( int j = 0; j < nT; j++ ) { 
      if ( ival >= f1lb[j] && ( ival <= f1ub[j] ) ) {
	range_idx = j;
	break;
      }
    }
    int *xptr = offsets[range_idx];
    *xptr = ival;
    offsets[range_idx]++;
    chk_count[range_idx]++;
    if ( chk_count[range_idx] > count[range_idx] ) {
      go_BYE(-1);
    }
  }
  cum_count = 0;
  for ( int i = 0; i < nT-1; i++ ) { 
    if ( offsets[i] != bak_offsets[i+1] ) { 
      go_BYE(-1);
    }
  }
#else
  status = mv_range(tbl, f1, f2, t3, "lb", "ub", "cnt"); 
  cBYE(status);
  status = is_fld(NULL, tbl_id, f2, &f2_id);
  chk_range(f2_id, 0, g_n_fld);
  f2_meta = &(g_fld[f2_id]); 
  status = rs_mmap(f2_meta->filename, &op_X, &op_nX, 1); cBYE(status);
#endif

  long long cum_count = 0;
  for ( int i = 0; i < nT; i++ ) {
    bak_offsets[i] = (int *)op_X;
    if ( i > 0 ) {
      cum_count += count[i-1];
      bak_offsets[i] += cum_count;
    }
  }

  status = gettimeofday(&Tps, &Tpf); cBYE(status);
  t_after_sec  = (long long)Tps.tv_sec;
  t_after_usec = (long long)Tps.tv_usec;
  t_after = t_after_sec * 1000000 + t_after_usec;
  fprintf(stderr, "TIME2 = %lld \n", t_after - t_before); 
  t_before = t_after;

  // Set up global variables
  g_nT = nT;
  for ( int i = 0; i < nT; i++ ) { 
    g_offsets[i] = bak_offsets[i];
    g_count[i] = count[i];
  }
  if ( g_nT == 1 ) { 
    core_parsort1(&(g_thread_id[0]));
  }
  else {
    pthread_attr_init(&attr);
    pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
    for ( int t = 0; t < g_nT; t++ ) { 
      rc = pthread_create(&threads[t], NULL, core_parsort1,
	  &(g_thread_id[t]));
      if ( rc ) { go_BYE(-1); }
    }
    /* Free attribute and wait for the other threads */
    pthread_attr_destroy(&attr);
    for ( int t = 0; t < g_nT; t++ ) { 
      rc = pthread_join(threads[t], &thread_status);
      if ( rc ) { go_BYE(-1); }
    }
  }
  /* SEQUENTIAL CODE 
  for ( int i = 0; i < nT; i++ ) { 
    qsort_asc_int(bak_offsets[i], count[i], sizeof(int), NULL);
  }
  */
  status = gettimeofday(&Tps, &Tpf); cBYE(status);
  t_after_sec  = (long long)Tps.tv_sec;
  t_after_usec = (long long)Tps.tv_usec;
  t_after = t_after_sec * 1000000 + t_after_usec;
  fprintf(stderr, "TIME3 = %lld \n", t_after - t_before); 

  // Indicate the dst_fld is sorted ascending
  status = set_fld_info(tbl, f2, "sort=1");


  rs_munmap(op_X, op_nX);
  status = del_tbl(t2, -1); cBYE(status);
  status = del_tbl(t3, -1); cBYE(status);
BYE:
  rs_munmap(op_X, op_nX);
  rs_munmap(cnt_X, cnt_nX);
  free_if_non_null(xxx);
  free_if_non_null(f1lb);
  free_if_non_null(f1ub);
  // Do not delete unless using OLD_WAY free_if_non_null(count);
  free_if_non_null(g_count);
  free_if_non_null(g_offsets);
  free_if_non_null(offsets);
  free_if_non_null(bak_offsets);
  free_if_non_null(chk_count);

  fclose_if_non_null(ofp);
  g_write_to_temp_dir = false;
  rs_munmap(f1_X, f1_nX);
  rs_munmap(op_X, op_nX);
  free_if_non_null(opfile);
  return(status);
}
Example #23
0
//---------------------------------------------------------------
// START FUNC DECL
int 
crossprod(
	  char *t1,
	  char *f1,
	  char *t2,
	  char *f2,
	  char *t3
	  )
// STOP FUNC DECL
{
  int status = 0;

  char *Y1 = NULL; size_t nY1 = 0;
  char *Y2 = NULL; size_t nY2 = 0;

  char *f1_X = NULL; size_t f1_nX = 0; char *f1_opfile = NULL; 
  int f1type, f2type;
  char *f2_X = NULL; size_t f2_nX = 0; char *f2_opfile = NULL; 
  long long f1size, f2size;

  int t2f1_fld_id = INT_MIN, t2f2_fld_id = INT_MIN;
  int t3_id = INT_MIN, itemp; 
  long long chk_nR1 = 0, nR1, nR2, nR3;
  char str_meta_data[1024];
  char *t3f1_opfile = NULL, *t3f2_opfile = NULL;
  FILE *ofp = NULL;
  char buffer[32];
  // For multi-threading 
  int rc; // result code for thread create 
  pthread_t threads[MAX_NUM_THREADS];
  pthread_attr_t attr;
  void *thread_status;
  //----------------------------------------------------------------
  zero_string(str_meta_data, 1024);
  zero_string(buffer, 32);
  if ( strcmp(f1, f2) == 0 ) { go_BYE(-1); } 
  /* Remove f1 != f2 restriction later. To do so, we need to specify
   * fields of t3 explicitly */
  //----------------------------------------------------------------
  status = get_data(t1, f1, &nR1, &f1_X, &f1_nX, &f1_opfile, &f1type, &f1size);
  cBYE(status);
  status = get_data(t2, f2, &nR2, &f2_X, &f2_nX, &f2_opfile,  &f2type, &f2size);
  cBYE(status);
  nR3 = nR1 * nR2;
  if ( nR3 == 0 ) {
    fprintf(stderr, "No data to create t3 \n");
    goto BYE;
  }
  // Create storage for field 1 in Table t3 */
  long long  filesz = nR3 * f1size;
  status = open_temp_file(&ofp, &t3f1_opfile, filesz); cBYE(status);
  fclose_if_non_null(ofp);
  status = mk_file(t3f1_opfile, filesz); cBYE(status);
  status = rs_mmap(t3f1_opfile, &Y1, &nY1, 1); cBYE(status);
  // Create storage for field 2 in Table t3 */
  filesz = nR3 * f2size;
  status = open_temp_file(&ofp, &t3f2_opfile, filesz); cBYE(status);
  fclose_if_non_null(ofp);
  status = mk_file(t3f2_opfile, filesz); cBYE(status);
  status = rs_mmap(t3f2_opfile, &Y2, &nY2, 1); cBYE(status);
  //----------------------------------------------------------------
  /* Set up parallelism computations. Parallelization strategy is
   * simple. Partition field 1 (nR1 rows) among the threads */
  g_nR1 = nR1;
  g_nR2 = nR2;
  g_nR3 = nR3;
  g_f1type = f1type;
  g_f2type = f2type;
  g_f1size = f1size;
  g_f2size = f2size;
  g_f1_X = f1_X;
  g_f2_X = f2_X;
  g_Y1 = Y1;
  g_Y2 = Y2;
 
  for ( int i = 0; i < MAX_NUM_THREADS; i++ ) { 
    g_thread_id[i] = i;
    g_num_rows_processed[i] = 0;
  }
  status = get_num_threads(&g_nT);
  cBYE(status);
  //--------------------------------------------
#define MIN_ROWS_FOR_CROSSPROD 4 // 1024
  if ( nR1 <= MIN_ROWS_FOR_CROSSPROD ) {
    g_nT = 1;
  }
  /* Don't create more threads than you can use */
  if ( g_nT > nR1 ) { g_nT = nR1; }

  if ( g_nT == 1 ) { 
    core_crossprod(&(g_thread_id[0]));
    chk_nR1 = g_num_rows_processed[0];
  }
  else { /* Create threads */
    pthread_attr_init(&attr);
    pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
    for ( int t = 0; t < g_nT; t++ ) { 
      rc = pthread_create(&threads[t], NULL, core_crossprod,
	  &(g_thread_id[t]));
      if ( rc ) { go_BYE(-1); }
    }
    /* Free attribute and wait for the other threads */
    pthread_attr_destroy(&attr);
    for ( int t = 0; t < g_nT; t++ ) { 
      rc = pthread_join(threads[t], &thread_status);
      if ( rc ) { go_BYE(-1); }
      chk_nR1 += g_num_rows_processed[t];
    }
  }
  if ( chk_nR1 != nR1 ) { go_BYE(-1); }
  //----------------------------------------------------------------
  // Add output fields to t3 meta data 
  status = is_tbl(t3, &t3_id); cBYE(status);
  if ( t3_id >= 0 ) { 
    status = del_tbl(NULL, t3_id);
    cBYE(status);
  }
  sprintf(buffer, "%lld", nR3);
  status = add_tbl(t3, buffer, &itemp); cBYE(status);

  sprintf(str_meta_data, "fldtype=int:n_sizeof=%u:filename=%s",
	  f1size, t3f1_opfile);
  status = add_fld(t3, f1, str_meta_data, &t2f1_fld_id); cBYE(status);
  zero_string(str_meta_data, 1024);
  sprintf(str_meta_data, "fldtype=int:n_sizeof=%u:filename=%s", 
	  f2size, t3f2_opfile);
  status = add_fld(t3, f2, str_meta_data, &t2f2_fld_id); cBYE(status);
 BYE:
  fclose_if_non_null(ofp);
  rs_munmap(f1_X, f1_nX);
  rs_munmap(f2_X, f2_nX);
  if ( f1_opfile != NULL ) { 
    unlink(f1_opfile); free_if_non_null(f1_opfile);
  }
  if ( f2_opfile != NULL ) { 
    unlink(f2_opfile); free_if_non_null(f2_opfile);
  }
  free_if_non_null(t3f1_opfile);
  free_if_non_null(t3f2_opfile);
  return(status);
}