void QthreadExec::resize_worker_scratch( const int reduce_size , const int shared_size ) { const int exec_all_reduce_alloc = align_alloc( reduce_size ); const int shepherd_scan_alloc = align_alloc( 8 ); const int shepherd_shared_end = exec_all_reduce_alloc + shepherd_scan_alloc + align_alloc( shared_size ); if ( s_worker_reduce_end < exec_all_reduce_alloc || s_worker_shared_end < shepherd_shared_end ) { // Clear current worker memory before allocating new worker memory clear_workers(); // Increase the buffers to an aligned allocation s_worker_reduce_end = exec_all_reduce_alloc ; s_worker_shared_begin = exec_all_reduce_alloc + shepherd_scan_alloc ; s_worker_shared_end = shepherd_shared_end ; // Need to query which shepherd this main 'process' is running... // Have each worker resize its memory for proper first-touch for ( int jshep = 0 ; jshep < s_number_shepherds ; ++jshep ) { for ( int i = jshep ? 0 : 1 ; i < s_number_workers_per_shepherd ; ++i ) { // Unit tests hang with this call: // // qthread_fork_to_local_priority( driver_resize_workers , NULL , NULL , jshep ); // qthread_fork_to( driver_resize_worker_scratch , NULL , NULL , jshep ); }} driver_resize_worker_scratch( NULL ); // Verify all workers allocated bool ok = true ; for ( int iwork = 0 ; ok && iwork < s_number_workers ; ++iwork ) { ok = 0 != s_exec[iwork] ; } if ( ! ok ) { std::ostringstream msg ; msg << "Kokkos::Impl::QthreadExec::resize : FAILED for workers {" ; for ( int iwork = 0 ; iwork < s_number_workers ; ++iwork ) { if ( 0 == s_exec[iwork] ) { msg << " " << ( s_number_workers - ( iwork + 1 ) ); } } msg << " }" ; Kokkos::Impl::throw_runtime_exception( msg.str() ); } } }
void *mcache_alloc(mcache_t mc){ struct mem_cache *m = mc; void *ptr; ASSERT(m != NULL); ptr = align_alloc(m->mc_size, m->mc_align); if(ptr != NULL){ atomic_inc(&m->mc_allocs); } return ptr; }
int main(int argc, char *argv[]) { int nchann, nsample, niter; int k, opt, ns; hfilter filt = NULL; void *buffin, *buffout; int filein = -1, fileout = -1; size_t samsize; uint32_t nchan32; uint32_t type; uint32_t r = 4; int retval = 1; int keepfiles = 0; // Process command-line options nchann = NCHANN; nsample = NSAMPLE; niter = NITER; type = TYPE_DEF; while ((opt = getopt(argc, argv, "hc:s:i:t:r:k:")) != -1) { switch (opt) { case 'c': nchann = atoi(optarg); break; case 's': nsample = atoi(optarg); break; case 'i': niter = atoi(optarg); break; case 'r': r = atoi(optarg); break; case 't': type = atoi(optarg); break; case 'k': keepfiles = atoi(optarg); break; case 'h': default: /* '?' */ fprintf(stderr, "Usage: %s [-c numchannel] [-s numsample] [-i numiteration] " "[-t (0 for float/1 for double)]\n", argv[0]); exit(EXIT_FAILURE); } } printf("\tnumber of channels: %i \t\tlength of batch: %i\n", nchann, nsample); samsize = sizeof_data(type) * nchann; nchan32 = nchann; // Allocate buffers buffin = align_alloc(16, nsample*samsize); buffout = align_alloc(16, nsample*samsize); if (!buffin || !buffout) { fprintf(stderr, "buffer allocation failed\n"); goto out; } // Open files for writing filein = open(infilename, O_WRONLY|O_CREAT, S_IRWXU); fileout = open(outfilename, O_WRONLY|O_CREAT, S_IRWXU); if ((filein < 0) || (fileout < 0)) { fprintf(stderr, "File opening failed\n"); goto out; } // write filter params on fileout if (write(filein, &type, sizeof(type)) == -1 || write(filein, &nchan32, sizeof(nchan32)) == -1 || write(fileout, &type, sizeof(type)) == -1 || write(fileout, &nchan32, sizeof(nchan32)) == -1 || write(fileout, &r, sizeof(r)) == -1) goto out; // create filters filt = rtf_create_downsampler(nchann, type, r); if (!filt) { fprintf(stderr,"Creation of filter failed\n"); goto out; } // Filter chunks of data and write input and output on files for (k=0; k<niter; k++) { set_signals(nchann, nsample, type, buffin); ns = rtf_filter(filt, buffin, buffout, nsample); if ( write(filein, buffin, nsample*samsize) == -1 || write(fileout, buffout, ns*samsize) == -1 ) { fprintf(stderr,"Error while writing file\n"); break; } } retval = 0; out: rtf_destroy_filter(filt); align_free(buffin); align_free(buffout); if (filein != -1) close(filein); if (fileout != -1) close(fileout); #if HAVE_MATLAB if (retval == 0) retval = compare_results(0.02); #endif if (!keepfiles) { unlink(infilename); unlink(outfilename); } return retval; }
void QthreadExec::resize_worker_scratch( const int reduce_size , const int shared_size ) { const int exec_all_reduce_alloc = align_alloc( reduce_size ); const int shepherd_scan_alloc = align_alloc( 8 ); const int shepherd_shared_end = exec_all_reduce_alloc + shepherd_scan_alloc + align_alloc( shared_size ); if ( s_worker_reduce_end < exec_all_reduce_alloc || s_worker_shared_end < shepherd_shared_end ) { /* fprintf( stdout , "QthreadExec::resize\n"); fflush(stdout); */ // Clear current worker memory before allocating new worker memory clear_workers(); // Increase the buffers to an aligned allocation s_worker_reduce_end = exec_all_reduce_alloc ; s_worker_shared_begin = exec_all_reduce_alloc + shepherd_scan_alloc ; s_worker_shared_end = shepherd_shared_end ; // Need to query which shepherd this main 'process' is running... const int main_shep = qthread_shep(); // Have each worker resize its memory for proper first-touch #if 1 for ( int jshep = 0 ; jshep < s_number_shepherds ; ++jshep ) { for ( int i = jshep != main_shep ? 0 : 1 ; i < s_number_workers_per_shepherd ; ++i ) { qthread_fork_to( driver_resize_worker_scratch , NULL , NULL , jshep ); }} #else // If this function is used before the 'qthread.task_policy' unit test // the 'qthread.task_policy' unit test fails with a seg-fault within libqthread.so. for ( int jshep = 0 ; jshep < s_number_shepherds ; ++jshep ) { const int num_clone = jshep != main_shep ? s_number_workers_per_shepherd : s_number_workers_per_shepherd - 1 ; if ( num_clone ) { const int ret = qthread_fork_clones_to_local_priority ( driver_resize_worker_scratch /* function */ , NULL /* function data block */ , NULL /* pointer to return value feb */ , jshep /* shepherd number */ , num_clone - 1 /* number of instances - 1 */ ); assert(ret == QTHREAD_SUCCESS); } } #endif driver_resize_worker_scratch( NULL ); // Verify all workers allocated bool ok = true ; for ( int iwork = 0 ; ok && iwork < s_number_workers ; ++iwork ) { ok = 0 != s_exec[iwork] ; } if ( ! ok ) { std::ostringstream msg ; msg << "Kokkos::Impl::QthreadExec::resize : FAILED for workers {" ; for ( int iwork = 0 ; iwork < s_number_workers ; ++iwork ) { if ( 0 == s_exec[iwork] ) { msg << " " << ( s_number_workers - ( iwork + 1 ) ); } } msg << " }" ; Kokkos::Impl::throw_runtime_exception( msg.str() ); } } }
int main(int argc, char *argv[]) { int nchann, nsample, niter, filtorder; int k, opt; hfilter filt = NULL; void *buffin, *buffout; int filein = -1, fileout = -1; float fc = FC_DEF; size_t buffinsize, buffoutsize; uint32_t nchan32, ntotsample32; int32_t datintype, datouttype; int retval = 1; int keepfiles = 0; // Process command-line options nchann = NCHANN; nsample = NSAMPLE; niter = NITER; filtorder = FILTORDER; datouttype = datintype = TYPE_DEF; while ((opt = getopt(argc, argv, "hc:s:i:o:f:d:p:k:")) != -1) { switch (opt) { case 'c': nchann = atoi(optarg); break; case 's': nsample = atoi(optarg); break; case 'i': niter = atoi(optarg); break; case 'o': filtorder = atoi(optarg); break; case 'f': fc = atof(optarg); break; case 'd': datintype = atoi(optarg); break; case 'p': ptype = atoi(optarg); break; case 'k': keepfiles = atoi(optarg); break; case 'h': default: /* '?' */ fprintf(stderr, "Usage: %s [-c numchannel] [-s numsample] [-i numiteration] " "[-o filterorder] [-f cutoff_freq] [-d (0 for float/1 for double)]\n", argv[0]); exit(EXIT_FAILURE); } } printf("\tfilter order: %i \tnumber of channels: %i \t\tlength of batch: %i\n",filtorder, nchann, nsample); set_param(ptype); datouttype = datintype; if (ptype & RTF_COMPLEX_MASK) datouttype |= RTF_COMPLEX_MASK; buffinsize = sizeof_data(datintype) * nchann * nsample; buffoutsize = sizeof_data(datouttype) * nchann * nsample; nchan32 = nchann; ntotsample32 = nsample*niter; // Allocate buffers buffin = align_alloc(16, buffinsize); buffout = align_alloc(16, buffoutsize); if (!buffin || !buffout) { fprintf(stderr, "buffer allocation failed\n"); goto out; } // Open files for writing filein = open(infilename, O_WRONLY|O_CREAT, S_IRWXU); fileout = open(outfilename, O_WRONLY|O_CREAT, S_IRWXU); if ((filein < 0) || (fileout < 0)) { fprintf(stderr, "File opening failed\n"); goto out; } fprintf(stdout, "datin=%i datout=%i ptype=%i\n", datintype, datouttype, ptype ); // write filter params on fileout if (write(fileout, &ptype, sizeof(ptype)) == -1 || write(fileout, &numlen, sizeof(numlen)) == -1 || write(fileout, num, numlen*sizeof_data(ptype)) == -1 || write(fileout, &denumlen, sizeof(denumlen)) == -1 || write(fileout, denum, denumlen*sizeof_data(ptype)) == -1 || write(filein, &datintype, sizeof(datintype)) == -1 || write(filein, &nchan32, sizeof(nchan32)) == -1 || write(filein, &ntotsample32, sizeof(ntotsample32)) == -1 || write(fileout, &datouttype, sizeof(datouttype)) == -1 || write(fileout, &nchan32, sizeof(nchan32)) == -1 || write(fileout, &ntotsample32, sizeof(ntotsample32)) == -1) goto out; // set signals (ramps) set_signals(nchann, nsample, datintype, buffin); // create filters filt = rtf_create_filter(nchann, datintype, numlen, num, denumlen, denum, ptype); if (!filt) { fprintf(stderr,"Creation of filter failed\n"); goto out; } // Test the type if (datintype != rtf_get_type(filt, 1) || datouttype != rtf_get_type(filt, 0)) { fprintf(stderr, "Unexpected data type\n" "expected: in=%i out=%i\n" "returned: in=%i out=%i\n", datintype, datouttype, rtf_get_type(filt, 1), rtf_get_type(filt, 0)); goto out; } // Filter chunks of data and write input and output on files for (k=0; k<niter; k++) { rtf_filter(filt, buffin, buffout, nsample); if ( write(filein, buffin, buffinsize) == -1 || write(fileout, buffout, buffoutsize) == -1 ) { fprintf(stderr,"Error while writing file\n"); break; } } retval = 0; out: rtf_destroy_filter(filt); align_free(buffin); align_free(buffout); if (filein != -1) close(filein); if (fileout != -1) close(fileout); #if HAVE_MATLAB if (retval == 0) retval = compare_results( (datintype & RTF_PRECISION_MASK) ? 1e-12 : 1e-4); #endif if (!keepfiles) { unlink(infilename); unlink(outfilename); } return retval; }