int main(){ clock_t start = clock(); cilk_spawn hello(); cilk_spawn world(); //cilk_spawn hello(); //cilk_spawn world(); //cilk_sync; clock_t end = clock(); double duration = (double)(end - start) / CLOCKS_PER_SEC; printf("Done in %.9f!\n", duration); printf("Currently using %d workers\n", __cilkrts_get_nworkers()); }
int main(int argc, char *argv[]) { if(argc<=1) { printf("Pass an integer as a command line argument..."); exit(1); } //otherwise continue on our merry way.... int max=atoi(argv[1]); // Get the number of workers int numWorkers = __cilkrts_get_nworkers(); printf("Set the number of workers to be %d.\n",numWorkers); // Create an array int64_t* arr = malloc(max*sizeof(int64_t)); // Fill in the array int i=0; while(i<max){ arr[i] = max -i; //printf("arr[%d]=%d\n", i, arr[i]); i++; } int64_t* output = mergesort(arr, max, 0, max); // Print out output arr /*i=0; while(i<max){ printf("output[%d]=%d\n", i, output[i]); i++; }*/ printf("output[%d]=%ld\n", 0, output[0]); printf("output[%d]=%ld\n", 1, output[1]); printf("output[%d]=%ld\n", 2, output[2]); printf("output[%d]=%ld\n", max-3, output[max-3]); printf("output[%d]=%ld\n", max-2, output[max-2]); printf("output[%d]=%ld\n", max-1, output[max-1]); free(output); exit(0); }
/****************************************************** * Unbalanced Tree Search v2.1 * * Based on the implementation available at * * http://sourceforge.net/projects/uts-benchmark * ******************************************************/ #ifdef HAVE_CONFIG_H # include "config.h" /* for _GNU_SOURCE */ #endif #include <assert.h> #include <stdio.h> #include <stdlib.h> #include <limits.h> /* for INT_MAX */ #include <math.h> /* for floor, log, sin */ #include <cilk/cilk.h> #include <cilk/cilk_api.h> #include <cilk/reducer_opadd.h> #include <pthread.h> #include <qthread/qthread.h> #include <qthread/qtimer.h> #define SILENT_ARGPARSING #include "argparsing.h" #include "log.h" #define BRG_RNG // Select RNG #include "../../utils/rng/rng.h" #define PRINT_STATS 1 #define MAXNUMCHILDREN 100 static size_t nodecount; typedef enum { BIN = 0, GEO, HYBRID, BALANCED } tree_t; static char *type_names[] = { "Binomial", "Geometric", "Hybrid", "Balanced" }; typedef enum { LINEAR = 0, EXPDEC, CYCLIC, FIXED } shape_t; static char *shape_names[] = { "Linear decrease", "Exponential decrease", "Cyclic", "Fixed branching factor" }; typedef struct { int height; // Depth of node in the tree struct state_t state; // Local RNG state int num_children; } node_t; // Default values static tree_t tree_type = GEO; static double bf_0 = 4.0; static int root_seed = 0; static int num_samples = 1; static int tree_depth = 6; static shape_t shape_fn = LINEAR; static int non_leaf_bf = 4; static double non_leaf_prob = 15.0 / 64.0; static double shift_depth = 0.5; // Tree metrics static uint64_t tree_height = 0; static uint64_t num_leaves = 0; static double normalize(int n) { if (n < 0) { printf("*** toProb: rand n = %d out of range\n", n); } return ((n < 0) ? 0.0 : ((double)n) / (double)INT_MAX); } static int calc_num_children_bin(node_t *parent) { int v = rng_rand(parent->state.state); double d = normalize(v); return (d < non_leaf_prob) ? non_leaf_bf : 0; } static int calc_num_children(node_t *parent) { int num_children = 0; if (parent->height == 0) { num_children = (int)floor(bf_0); } else { num_children = calc_num_children_bin(parent); } if (parent->height == 0) { int root_bf = (int)ceil(bf_0); if (num_children > root_bf) { printf("*** Number of children truncated from %d to %d\n", num_children, root_bf); num_children = root_bf; } } else { if (num_children > MAXNUMCHILDREN) { printf("*** Number of children truncated from %d to %d\n", num_children, MAXNUMCHILDREN); num_children = MAXNUMCHILDREN; } } return num_children; } // Notes: // - Each task receives distinct copy of parent // - Copy of child is shallow, be careful with `state` member static long visit(node_t parent) { node_t child; uint64_t *child_descendants = calloc(sizeof(long), parent.num_children); CILK_C_REDUCER_OPADD(num_descendants, ulong, 0); uint64_t tmp; // Spawn children, if any for (int i = 0; i < parent.num_children; i++) { child.height = parent.height + 1; for (int j = 0; j < num_samples; j++) { rng_spawn(parent.state.state, child.state.state, i); } child.num_children = calc_num_children(&child); child_descendants[i] = _Cilk_spawn visit(child); } _Cilk_sync; CILK_C_REGISTER_REDUCER(num_descendants); _Cilk_for(int i = 0; i < parent.num_children; i++) { REDUCER_VIEW(num_descendants) += child_descendants[i]; } tmp = 1 + REDUCER_VIEW(num_descendants); CILK_C_UNREGISTER_REDUCER(num_descendants); return tmp; } #ifdef PRINT_STATS static void print_stats(void) { LOG_UTS_PARAMS_YAML() fflush(stdout); } #else /* ifdef PRINT_STATS */ static void print_banner(void) { printf("UTS - Unbalanced Tree Search 2.1 (C/Qthreads)\n"); printf("Tree type:%3d (%s)\n", tree_type, type_names[tree_type]); printf("Tree shape parameters:\n"); printf(" root branching factor b_0 = %.1f, root seed = %d\n", bf_0, root_seed); if ((tree_type == GEO) || (tree_type == HYBRID)) { printf(" GEO parameters: gen_mx = %d, shape function = %d (%s)\n", tree_depth, shape_fn, shape_names[shape_fn]); } if ((tree_type == BIN) || (tree_type == HYBRID)) { double q = non_leaf_prob; int m = non_leaf_bf; double es = (1.0 / (1.0 - q * m)); printf(" BIN parameters: q = %f, m = %d, E(n) = %f, E(s) = %.2f\n", q, m, q * m, es); } if (tree_type == HYBRID) { printf(" HYBRID: GEO from root to depth %d, then BIN\n", (int)ceil(shift_depth * tree_depth)); } if (tree_type == BALANCED) { printf(" BALANCED parameters: gen_mx = %d\n", tree_depth); printf(" Expected size: %llu nodes, %llu leaves\n", (unsigned long long)((pow(bf_0, tree_depth + 1) - 1.0) / (bf_0 - 1.0)), (unsigned long long)pow(bf_0, tree_depth)); } printf("Random number generator: "); printf("SHA-1 (state size = %ldB)\n", sizeof(struct state_t)); printf("Compute granularity: %d\n", num_samples); printf("Execution strategy:\n"); printf(" Workers: %d\n", __cilkrts_get_nworkers()); printf("\n"); fflush(stdout); }
int main(int argc, char **argv) { struct timespec begin, end; struct timespec veryStart; srand( time(NULL) ); get_time( begin ); get_time( veryStart ); // read args parse_args(argc,argv); std::cerr << "Available threads: " << __cilkrts_get_nworkers() << "\n"; get_time (end); print_time("init", begin, end); // Directory listing get_time( begin ); typedef asap::word_list<std::deque<const char*>, asap::word_bank_managed> directory_listing_type; directory_listing_type dir_list; asap::get_directory_listing( indir, dir_list ); get_time (end); print_time("directory listing", begin, end); typedef size_t index_type; typedef asap::word_bank_pre_alloc word_bank_type; typedef asap::sparse_vector<index_type, float, false, asap::mm_no_ownership_policy> vector_type; /* typedef asap::word_map< std::unordered_map<const char *, size_t, asap::text::charp_hash, asap::text::charp_eql>, word_bank_type> internal_map_type; typedef asap::kv_list<std::vector<std::pair<const char *, size_t>>, word_bank_type> intermediate_map_type; typedef asap::word_map< std::unordered_map<const char *, asap::appear_count<size_t, index_type>, asap::text::charp_hash, asap::text::charp_eql>, word_bank_type> aggregate_map_type; */ typedef asap::hash_table<const char *, size_t, asap::text::charp_hash, asap::text::charp_eql> wc_unordered_map; typedef asap::hash_table<const char *, asap::appear_count<size_t, index_type>, asap::text::charp_hash, asap::text::charp_eql> dc_unordered_map; typedef asap::word_map<wc_unordered_map, word_bank_type> internal_map_type; typedef asap::kv_list<std::vector<std::pair<const char*, size_t>>, word_bank_type> intermediate_map_type; typedef asap::word_map<dc_unordered_map, word_bank_type> aggregate_map_type; typedef asap::data_set<vector_type, aggregate_map_type, directory_listing_type> data_set_type; data_set_type tfidf( intm_map ? tfidf_driver<directory_listing_type, internal_map_type, internal_map_type, aggregate_map_type, data_set_type, false>( dir_list ) : tfidf_driver<directory_listing_type, internal_map_type, intermediate_map_type, aggregate_map_type, data_set_type, true>( dir_list ) ); get_time( begin ); if( outfile ) asap::arff_write( outfile, tfidf ); get_time (end); print_time("output", begin, end); print_time("complete time", veryStart, end); return 0; }
int main(int argc, char **argv) { struct timespec begin, end; struct timespec veryStart; srand( time(NULL) ); get_time( begin ); get_time( veryStart ); // read args parse_args(argc,argv); std::cerr << "Available threads: " << __cilkrts_get_nworkers() << "\n"; get_time (end); print_time("init", begin, end); // Directory listing get_time( begin ); typedef asap::word_list<std::deque<const char*>, asap::word_bank_managed> directory_listing_type; directory_listing_type dir_list; asap::get_directory_listing( indir, dir_list ); get_time (end); print_time("directory listing", begin, end); // word count get_time( begin ); typedef asap::word_map<std::map<const char *, size_t, asap::text::charp_cmp>, asap::word_bank_pre_alloc> word_map_type; typedef asap::kv_list<std::vector<std::pair<const char *, size_t>>, asap::word_bank_pre_alloc> word_list_type; typedef asap::sparse_vector<size_t, float, false, asap::mm_no_ownership_policy> vector_type; typedef asap::word_map<std::map<const char *, asap::appear_count<size_t, typename vector_type::index_type>, asap::text::charp_cmp>, asap::word_bank_pre_alloc> word_map_type2; size_t num_files = dir_list.size(); std::vector<word_list_type> catalog; catalog.resize( num_files ); asap::word_container_reducer<word_map_type2> allwords; cilk_for( size_t i=0; i < num_files; ++i ) { std::string filename = *std::next(dir_list.cbegin(),i); // std::cerr << "Read file " << filename; { // Build up catalog for each file using a map word_map_type wmap; asap::word_catalog<word_map_type>( std::string(*std::next(dir_list.cbegin(),i)), wmap ); // catalog[i] ); // Convert file's catalog to a (sorted) list of pairs catalog[i].reserve( wmap.size() ); // avoid re-allocations catalog[i].insert( std::move(wmap) ); // move out wmap contents } // delete wmap // std::cerr << ": " << catalog[i].size() << " words\n"; // Reading from std::vector rather than std::map should be faster... // Validated: about 10% on word count, 20% on TF/IDF, 16 threads allwords.count_presence( catalog[i] ); } get_time (end); print_time("word count", begin, end); get_time( begin ); typedef asap::data_set<vector_type, word_map_type2> data_set_type; // TODO: consider linearising the word_map to a word_list with exchanged // word_bank in order to avoid storing the ID? Problem: lookup // during TF/IDF computation // TODO: infer word_map_type2 from word_map_type* in template definition? // TODO: construct aggregate word_map_type2 during wc loop above // std::shared_ptr<word_map_type2> allwords_ptr // = std::make_shared<word_map_type2>(); // allwords_ptr->swap( allwords.get_value() ); asap::internal::assign_ids( allwords.get_value().begin(), allwords.get_value().end() ); asap::tfidf_inplace<float>( catalog.begin(), catalog.end(), allwords.get_value() ); get_time (end); print_time("TF/IDF", begin, end); get_time( begin ); std::ofstream of( outfile, std::ios_base::out ); size_t i=0; for( auto I=catalog.cbegin(), E=catalog.cend(); I != E; ++I, ++i ) { of << dir_list[i] << ": " << "TBC\n"; // *I << std::endl; } of.close(); get_time (end); print_time("output", begin, end); print_time("complete time", veryStart, end); return 0; }
Storage_thread_local() : store(new PaddedT[__cilkrts_get_nworkers()]) { };
static int get_nworkers() { return __cilkrts_get_nworkers(); }
void initialize_rt() { cilk_spawn f(); cilk_sync; int workers = __cilkrts_get_nworkers(); worker_tables = calloc(workers*sizeof(worker_table_t), 1); }
int main(int argc, char* argv[]) { if(argc != 3 && argc != 4){ fprintf(stderr, "Execute: %s <input file> <alphabet size> [<validation_file>]\n", argv[0]); exit(-1); } unsigned long n; // Size of the input sequence symbol* text = read_text_from_file(argv[1], &n); // Input sequence unsigned int alphabet = (unsigned int)atoi(argv[2]); // Size of the alphabet //printf("n: %lu, alphabet: %u, threads: %d\n", n, alphabet, num_threads); // printf("%s,%lu,%u,", argv[1], n, alphabet); // Memory usage #ifdef MALLOC_COUNT size_t s_total_memory = malloc_count_total(); size_t s_current_memory = malloc_count_current(); malloc_reset_peak(); // Running time. CLOCK_THREAD_CPUTIME_ID: Running time of the thread that call it (main thread in this case) #else struct timespec stime, etime; double t; if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &stime)) { fprintf(stderr, "clock_gettime failed"); exit(-1); } #endif // Wavelet tree construction #ifdef NO_RANK_SELECT BIT_ARRAY** wtree = wt_create(text, n, alphabet); #else bitRankW32Int** wtree = wt_create(text, n, alphabet); #endif #ifdef MALLOC_COUNT size_t e_total_memory = malloc_count_total(); size_t e_current_memory = malloc_count_current(); printf("%s, %u, %zu, %zu, %zu, %zu, %zu\n", argv[1], alphabet, s_total_memory, e_total_memory, malloc_count_peak(), s_current_memory, e_current_memory); #else if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &etime)) { fprintf(stderr, "clock_gettime failed"); exit(-1); } t = (etime.tv_sec - stime.tv_sec) + (etime.tv_nsec - stime.tv_nsec) / 1000000000.0; printf("%d,%s,%lu,%lf\n", __cilkrts_get_nworkers(), argv[1], n, t); // printf("%d,%s,%lu\n", __cilkrts_get_nworkers(), argv[1], n); // Merge #endif free(text); // Validation mode // Generate the original text using the wavelet tree if(argc > 3) { #ifdef NO_RANK_SELECT printf("Compile without -DNO_RANK_SELECT to use the validation mode\n"); #else FILE *test_file; test_file = fopen(argv[3],"wb"); if (!test_file) { printf("Unable to open file!"); return EXIT_FAILURE; } symbol access_return = 0; unsigned long i = 0; if(n*sizeof(symbol) > 10485760) printf("Please, use a file smaller than 10MB to validate the algorithm. Otherwise, the validation could take a while.\n"); else { for(i = 0; i < n; i++) { access_return = wt_access(wtree, i, alphabet); fwrite(&access_return, sizeof(symbol), 1, test_file); } } fclose(test_file); #endif } return EXIT_SUCCESS; }
int main(int argc, char *argv[]) { uint64_t total_num_nodes = 0; qtimer_t timer; double total_time = 0.0; CHECK_VERBOSE(); { unsigned long tmp = 0; NUMARG(tmp, "UTS_TREE_TYPE"); tree_type = (tree_t)tmp; } DBLARG(bf_0, "UTS_BF_0"); NUMARG(root_seed, "UTS_ROOT_SEED"); { unsigned long tmp = 0; NUMARG(tmp, "UTS_SHAPE_FN"); shape_fn = (shape_t)tmp; } NUMARG(tree_depth, "UTS_TREE_DEPTH"); DBLARG(non_leaf_prob, "UTS_NON_LEAF_PROB"); NUMARG(non_leaf_bf, "UTS_NON_LEAF_NUM"); NUMARG(shift_depth, "UTS_SHIFT_DEPTH"); NUMARG(num_samples, "UTS_NUM_SAMPLES"); #ifdef PRINT_STATS print_stats(); #else print_banner(); #endif timer = qtimer_create(); qtimer_start(timer); node_t root; root.height = 0; rng_init(root.state.state, root_seed); root.num_children = calc_num_children(&root); nodecount = 1; long retval; { retval = _Cilk_spawn visit(root); _Cilk_sync; } total_num_nodes = retval; qtimer_stop(timer); total_time = qtimer_secs(timer); qtimer_destroy(timer); #ifdef PRINT_STATS LOG_UTS_RESULTS_YAML(total_num_nodes, total_time) LOG_ENV_CILK_YAML() #else printf("Tree size = %lu, tree depth = %d, num leaves = %llu (%.2f%%)\n", (unsigned long)total_num_nodes, (int)tree_height, (unsigned long long)num_leaves, num_leaves / (float)total_num_nodes * 100.0); printf("Wallclock time = %.3f sec, performance = %.0f " "nodes/sec (%.0f nodes/sec per PE)\n\n", total_time, total_num_nodes / total_time, total_num_nodes / total_time / __cilkrts_get_nworkers()); #endif /* ifdef PRINT_STATS */ return 0; }