TopBlock(double centerFreq, double sampleRate, double freqRes, double cycFreqRes, std::string fileName) : gr_top_block("Top Block"), Np(pow2roundup((int)sampleRate/freqRes)), L(Np/4), P(pow2roundup(sampleRate/cycFreqRes/L)), N(P*L), vector_length(N), source(osmosdr_make_source_c()), /* OsmoSDR Source */ fileSource(gr_make_file_source(sizeof(float)*2, "/home/ylb/QAM16_44_1k.dat", false)), stv(gr_make_stream_to_vector(sizeof(float)*2, vector_length)), /* Stream to vector */ /* autoFam - this does most of the interesting work */ sink(make_autofam_sink(source, vector_length, centerFreq, sampleRate, freqRes, cycFreqRes, fileName)) { /* Set up the OsmoSDR Source */ source->set_sample_rate(sampleRate); source->set_center_freq(centerFreq); source->set_freq_corr(0.0); source->set_gain_mode(false); source->set_gain(30); source->set_if_gain(25.0); /* Set up the connections */ connect(fileSource, 0, stv, 0); connect(stv, 0, sink, 0); }
void test_int_helpers () { std::cout << "\ntest_int_helpers\n"; // ispow2 for (int i = 1; i < (1<<30); i *= 2) { OIIO_CHECK_ASSERT (ispow2(i)); if (i > 1) OIIO_CHECK_ASSERT (! ispow2(i+1)); } OIIO_CHECK_ASSERT (ispow2(int(0))); OIIO_CHECK_ASSERT (! ispow2(-1)); OIIO_CHECK_ASSERT (! ispow2(-2)); // ispow2, try size_t, which is unsigned for (size_t i = 1; i < (1<<30); i *= 2) { OIIO_CHECK_ASSERT (ispow2(i)); if (i > 1) OIIO_CHECK_ASSERT (! ispow2(i+1)); } OIIO_CHECK_ASSERT (ispow2((unsigned int)0)); // pow2roundup OIIO_CHECK_EQUAL (pow2roundup(4), 4); OIIO_CHECK_EQUAL (pow2roundup(5), 8); OIIO_CHECK_EQUAL (pow2roundup(6), 8); OIIO_CHECK_EQUAL (pow2roundup(7), 8); OIIO_CHECK_EQUAL (pow2roundup(8), 8); // pow2rounddown OIIO_CHECK_EQUAL (pow2rounddown(4), 4); OIIO_CHECK_EQUAL (pow2rounddown(5), 4); OIIO_CHECK_EQUAL (pow2rounddown(6), 4); OIIO_CHECK_EQUAL (pow2rounddown(7), 4); OIIO_CHECK_EQUAL (pow2rounddown(8), 8); // round_to_multiple OIIO_CHECK_EQUAL (round_to_multiple(1, 5), 5); OIIO_CHECK_EQUAL (round_to_multiple(2, 5), 5); OIIO_CHECK_EQUAL (round_to_multiple(3, 5), 5); OIIO_CHECK_EQUAL (round_to_multiple(4, 5), 5); OIIO_CHECK_EQUAL (round_to_multiple(5, 5), 5); OIIO_CHECK_EQUAL (round_to_multiple(6, 5), 10); // round_to_multiple_of_pow2 OIIO_CHECK_EQUAL (round_to_multiple_of_pow2(int(1), 4), 4); OIIO_CHECK_EQUAL (round_to_multiple_of_pow2(int(2), 4), 4); OIIO_CHECK_EQUAL (round_to_multiple_of_pow2(int(3), 4), 4); OIIO_CHECK_EQUAL (round_to_multiple_of_pow2(int(4), 4), 4); OIIO_CHECK_EQUAL (round_to_multiple_of_pow2(int(5), 4), 8); // round_to_multiple_of_pow2 OIIO_CHECK_EQUAL (round_to_multiple_of_pow2(size_t(1), size_t(4)), 4); OIIO_CHECK_EQUAL (round_to_multiple_of_pow2(size_t(2), size_t(4)), 4); OIIO_CHECK_EQUAL (round_to_multiple_of_pow2(size_t(3), size_t(4)), 4); OIIO_CHECK_EQUAL (round_to_multiple_of_pow2(size_t(4), size_t(4)), 4); OIIO_CHECK_EQUAL (round_to_multiple_of_pow2(size_t(5), size_t(4)), 8); }
int main(int argc, char* const argv[]) { //place thread on the first cpu set_cpu(0); //initialize the custom memory allocator ssalloc_init(); pthread_t *threads; pthread_attr_t attr; barrier_t barrier; pthread_mutex_t init_lock; struct timeval start, end; struct timespec timeout; thread_data_t *data; sigset_t block_set; //initially, set parameters to their default values num_threads = DEFAULT_NUM_THREADS; seed=DEFAULT_SEED; max_key=DEFAULT_RANGE; updates=DEFAULT_UPDATES; finds=DEFAULT_READS; //inserts=DEFAULT_INSERTS; //removes=DEFAULT_REMOVES; duration=DEFAULT_DURATION; //now read the parameters in case the user provided values for them //we use getopt, the same skeleton may be used for other bechmarks, //though the particular parameters may be different struct option long_options[] = { // These options don't set a flag {"help", no_argument, NULL, 'h'}, {"duration", required_argument, NULL, 'd'}, {"range", required_argument, NULL, 'r'}, {"initial", required_argument, NULL, 'i'}, {"num-threads", required_argument, NULL, 'n'}, {"updates", required_argument, NULL, 'u'}, {"seed", required_argument, NULL, 's'}, {NULL, 0, NULL, 0} }; int i,c; //actually get the parameters form the command-line while(1) { i = 0; c = getopt_long(argc, argv, "hd:n:l:u:i:r:s", long_options, &i); if(c == -1) break; if(c == 0 && long_options[i].flag == 0) c = long_options[i].val; switch(c) { case 0: /* Flag is automatically set */ break; case 'h': printf("lock stress test\n" "\n" "Usage:\n" " stress_test [options...]\n" "\n" "Options:\n" " -h, --help\n" " Print this message\n" " -d, --duration <int>\n" " Test duration in milliseconds (0=infinite, default=" XSTR(DEFAULT_DURATION) ")\n" " -u, --updates <int>\n" " Percentage of update operations (default=" XSTR(DEFAULT_UPDATES) ")\n" " -r, --range <int>\n" " Key range (default=" XSTR(DEFAULT_RANGE) ")\n" " -n, --num-threads <int>\n" " Number of threads (default=" XSTR(DEFAULT_NUM_THREADS) ")\n" " -s, --seed <int>\n" " RNG seed (0=time-based, default=" XSTR(DEFAULT_SEED) ")\n" ); exit(0); case 'd': duration = atoi(optarg); break; case 'u': updates = atoi(optarg); finds = 100 - updates; break; case 'r': max_key = atoi(optarg); break; case 'i': break; case 'l': break; case 'n': num_threads = atoi(optarg); break; case 's': seed = atoi(optarg); break; case '?': printf("Use -h or --help for help\n"); exit(0); default: exit(1); } } max_key--; //we round the max key up to the nearest power of 2, which makes our random key generation more efficient max_key = pow2roundup(max_key)-1; //initialization of the tree root = bst_initialize(num_threads); //initialize the data which will be passed to the threads if ((data = (thread_data_t *)malloc(num_threads * sizeof(thread_data_t))) == NULL) { perror("malloc"); exit(1); } if ((threads = (pthread_t *)malloc(num_threads * sizeof(pthread_t))) == NULL) { perror("malloc"); exit(1); } if (seed == 0) srand((int)time(NULL)); else srand(seed); //flag signaling the threads until when to run *running = 1; //global barrier initialization (used to start the threads at the same time) barrier_init(&barrier, num_threads + 1); pthread_mutex_init(&init_lock, NULL); pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); timeout.tv_sec = duration / 1000; timeout.tv_nsec = (duration % 1000) * 1000000; //set the data for each thread and create the threads for (i = 0; i < num_threads; i++) { data[i].id = i; data[i].num_operations = 0; data[i].total_time=0; data[i].num_insert=0; data[i].num_remove=0; data[i].num_search=0; data[i].num_add = max_key/(2 * num_threads); if (i< ((max_key/2)%num_threads)) data[i].num_add++; data[i].seed = rand(); data[i].barrier = &barrier; data[i].init_lock = &init_lock; if (pthread_create(&threads[i], &attr, test, (void *)(&data[i])) != 0) { fprintf(stderr, "Error creating thread\n"); exit(1); } } pthread_attr_destroy(&attr); /* Catch some signals */ if (signal(SIGHUP, catcher) == SIG_ERR || signal(SIGINT, catcher) == SIG_ERR || signal(SIGTERM, catcher) == SIG_ERR) { perror("signal"); exit(1); } // seeds = seed_rand(); // skey_t key; // for (i=0;i<max_key/2;++i) { // key = my_random(&seeds[0],&seeds[1],&seeds[2]) & max_key; // //we make sure the insert was effective (as opposed to just updating an existing entry) // if (bst_add(key, root, 0)!=TRUE) { // i--; // } // } // bst_print(root); /* Start threads */ barrier_cross(&barrier); gettimeofday(&start, NULL); if (duration > 0) { //sleep for the duration of the experiment nanosleep(&timeout, NULL); } else { sigemptyset(&block_set); sigsuspend(&block_set); } //signal the threads to stop *running = 0; gettimeofday(&end, NULL); /* Wait for thread completion */ for (i = 0; i < num_threads; i++) { if (pthread_join(threads[i], NULL) != 0) { fprintf(stderr, "Error waiting for thread completion\n"); exit(1); } } DDPRINT("threads finshed\n",NULL); //compute the exact duration of the experiment duration = (end.tv_sec * 1000 + end.tv_usec / 1000) - (start.tv_sec * 1000 + start.tv_usec / 1000); //bst_print(root); unsigned long operations = 0; ticks total_ticks = 0; long reported_total = 1; //the tree contains two initial dummy nodes, INF1 and INF2 //report some experiment statistics for (i = 0; i < num_threads; i++) { printf("Thread %d\n", i); printf(" #operations : %lu\n", data[i].num_operations); printf(" #inserts : %lu\n", data[i].num_insert); printf(" #removes : %lu\n", data[i].num_remove); operations += data[i].num_operations; total_ticks += data[i].total_time; reported_total = reported_total + data[i].num_add + data[i].num_insert - data[i].num_remove; } printf("Duration : %d (ms)\n", duration); printf("#txs : %lu (%f / s)\n", operations, operations * 1000.0 / duration); //printf("Operation latency %lu\n", total_ticks / operations); //make sure the tree is correct printf("Expected size: %ld Actual size: %lu\n",reported_total,bst_size(root)); free(threads); free(data); return 0; }
size_t ht_status(clht_t* h, int resize_increase, int just_print) { if (TRYLOCK_ACQ(&h->status_lock) && !resize_increase) { return 0; } clht_hashtable_t* hashtable = h->ht; uint64_t num_buckets = hashtable->num_buckets; volatile bucket_t* bucket = NULL; size_t size = 0; int expands = 0; int expands_max = 0; uint64_t bin; for (bin = 0; bin < num_buckets; bin++) { bucket = hashtable->table + bin; int expands_cont = -1; expands--; uint32_t j; do { expands_cont++; expands++; for (j = 0; j < ENTRIES_PER_BUCKET; j++) { if (bucket->key[j] > 0) { size++; } } bucket = bucket->next; } while (bucket != NULL); if (expands_cont > expands_max) { expands_max = expands_cont; } } double full_ratio = 100.0 * size / ((hashtable->num_buckets) * ENTRIES_PER_BUCKET); if (just_print) { printf("[STATUS-%02d] #bu: %7zu / #elems: %7zu / full%%: %8.4f%% / expands: %4d / max expands: %2d\n", 99, hashtable->num_buckets, size, full_ratio, expands, expands_max); } else { if (full_ratio > 0 && full_ratio < CLHT_PERC_FULL_HALVE) { printf("[STATUS-%02d] #bu: %7zu / #elems: %7zu / full%%: %8.4f%% / expands: %4d / max expands: %2d\n", clht_gc_get_id(), hashtable->num_buckets, size, full_ratio, expands, expands_max); ht_resize_pes(h, 0, 33); } else if ((full_ratio > 0 && full_ratio > CLHT_PERC_FULL_DOUBLE) || expands_max > CLHT_MAX_EXPANSIONS || resize_increase) { int inc_by = (full_ratio / CLHT_OCCUP_AFTER_RES); int inc_by_pow2 = pow2roundup(inc_by); printf("[STATUS-%02d] #bu: %7zu / #elems: %7zu / full%%: %8.4f%% / expands: %4d / max expands: %2d\n", clht_gc_get_id(), hashtable->num_buckets, size, full_ratio, expands, expands_max); if (inc_by_pow2 == 1) { inc_by_pow2 = 2; } ht_resize_pes(h, 1, inc_by_pow2); } } if (!just_print) { clht_gc_collect(h); } TRYLOCK_RLS(h->status_lock); return size; }
UINT buildTree(icl_buffer *nodelist, icl_buffer *particlesD, icl_buffer *treeD, UINT nParticles, icl_device* dev) { UINT level = 1; UINT nNodes = nParticles * 2 - 1; icl_timer* timer = icl_init_timer(ICL_MILLI); // void icl_start_timer(icl_timer* timer); double time = 0; // overapproximate size of temporal lists /* struct Node** activelist = (struct Node**)malloc(nParticles * sizeof(struct Node*)); UINT activeN = 0; struct Node** smalllist = (struct Node**)malloc(nParticles * sizeof(struct Node*)); UINT smallN = 0; struct Node** nextlist = (struct Node**)malloc(nParticles * sizeof(struct Node*)); UINT nextN = 0;*/ icl_buffer* activelist = icl_create_buffer(dev, CL_MEM_READ_WRITE, sizeof(NodeId) * nParticles); icl_buffer* smalllist = icl_create_buffer(dev, CL_MEM_READ_WRITE, sizeof(NodeId) * nParticles); icl_buffer* nextlist = icl_create_buffer(dev, CL_MEM_READ_WRITE, sizeof(NodeId) * nParticles); icl_buffer* sizes = icl_create_buffer(dev, CL_MEM_READ_WRITE, sizeof(UINT) * 5); // holds the current size of each of 3 buffers: // 0 activelist // 1 nodelist // 2 smalllist // 3 old max level // 4 new max level UINT maxNchunks = ((nParticles / fmin(T, chunk_size)) * 2) -1; // assert(maxNchunks <= 256 && "adapt implementation"); // TODO allow more than 256 chunks per node icl_buffer* chunks = icl_create_buffer(dev, CL_MEM_READ_WRITE, sizeof(struct Chunk) * maxNchunks); icl_buffer* bboxes = icl_create_buffer(dev, CL_MEM_READ_WRITE, sizeof(struct BBox) * maxNchunks); size_t localSize = 1; size_t globalSize = 1; /* struct Particle* particles = (struct Particle*)malloc(3000 * sizeof(struct Particle)); icl_read_buffer(particlesD, CL_TRUE, sizeof(struct Particle) * 3000, &particles[0], NULL, NULL); printf("%f %f %f\n", particles[0].pos.x, particles[0].pos.y, particles[0].pos.z); */ // compile OpenCL kernels icl_kernel* init = icl_create_kernel(dev, "kernel/init.cl", "init", KERNEL_BUILD_MACRO, ICL_SOURCE); icl_kernel* resetChunks = icl_create_kernel(dev, "kernel/init.cl", "memset_chunks", KERNEL_BUILD_MACRO, ICL_SOURCE); icl_kernel* gp2c = icl_create_kernel(dev, "kernel/groupToChunks.cl", "groupParticlesIntoChunks", KERNEL_BUILD_MACRO, ICL_SOURCE); icl_kernel* cBBox = icl_create_kernel(dev, "kernel/chunkedBBox.cl", "chunkedBBox", KERNEL_BUILD_MACRO, ICL_SOURCE); icl_kernel* bBox = icl_create_kernel(dev, "kernel/bBox.cl", "bBox", KERNEL_BUILD_MACRO, ICL_SOURCE); icl_kernel* sln = icl_create_kernel(dev, "kernel/splitLargeNodes.cl", "splitLargeNodes", KERNEL_BUILD_MACRO, ICL_SOURCE); icl_kernel* sortP = icl_create_kernel(dev, "kernel/sortParticlesToChilds.cl", "sortParticlesToChilds", KERNEL_BUILD_MACRO, ICL_SOURCE); icl_kernel* snf = icl_create_kernel(dev, "kernel/smallNodeFiltering.cl", "smallNodeFiltering", KERNEL_BUILD_MACRO, ICL_SOURCE); icl_kernel* pnl = icl_create_kernel(dev, "kernel/packNextlist.cl", "packNextlist", KERNEL_BUILD_MACRO, ICL_SOURCE); ////////////////////////////////////////////////////////////////////////// icl_kernel* preScan = icl_create_kernel(dev, "kernel/sortP_prescan.cl", "sortP_prescan_chunked", KERNEL_BUILD_MACRO, ICL_SOURCE); icl_kernel* postScan = icl_create_kernel(dev, "kernel/sortP_postscan.cl", "sortP_postscan_chunked", KERNEL_BUILD_MACRO, ICL_SOURCE); segmented_scan_init(nParticles, dev, KERNEL_BUILD_MACRO, ICL_SOURCE); icl_kernel* memset_int_s = icl_create_kernel(dev, "kernel/init.cl", "memset_int_s", KERNEL_BUILD_MACRO, ICL_SOURCE); // approach with segmented scan icl_buffer *scan_data = icl_create_buffer(dev, CL_MEM_READ_WRITE, sizeof(int) * nParticles); icl_buffer *scan_flag = icl_create_buffer(dev, CL_MEM_READ_WRITE, sizeof(int) * nParticles); icl_buffer* buffered_particles = icl_create_buffer(dev, CL_MEM_READ_WRITE, sizeof(struct Particle) * nParticles); #if timing == 1 icl_timer* timer_gp2c = icl_init_timer(ICL_MILLI); icl_timer* timer_cBBox = icl_init_timer(ICL_MILLI); icl_timer* timer_bBox = icl_init_timer(ICL_MILLI); icl_timer* timer_sln = icl_init_timer(ICL_MILLI); icl_timer* timer_sortP = icl_init_timer(ICL_MILLI); icl_timer* timer_snf = icl_init_timer(ICL_MILLI); icl_timer* timer_pnl = icl_init_timer(ICL_MILLI); icl_timer* timer_ran = icl_init_timer(ICL_MILLI); icl_timer* timer_prescan = icl_init_timer(ICL_MILLI); icl_timer* timer_scan = icl_init_timer(ICL_MILLI); icl_timer* timer_postscan = icl_init_timer(ICL_MILLI); #endif //add root node to the activelist and initialize size lists icl_run_kernel(init, 1, &globalSize, &localSize, NULL, NULL, 3, (size_t)0, (void *)activelist, (size_t)0, (void *)sizes, (size_t)0, (void *)particlesD); UINT activeN = 1; icl_finish(dev); // smallest power of 2 bigger or equal to maxxNchnunks UINT pow2maxNchunks = pow2roundup(maxNchunks); // processLargeNode while(activeN != 0) { icl_start_timer(timer); // group triangles into chunks size_t localSize1 = min(pow2maxNchunks, 256); #if timing == 1 icl_start_timer(timer_gp2c); #endif size_t globalSize1 = ((maxNchunks + localSize1 -1) / localSize1) * localSize1; // reset chunks icl_run_kernel(resetChunks, 1, &globalSize1, &localSize1, NULL, NULL, 2, (size_t)0, (void *)chunks, sizeof(UINT), &maxNchunks); globalSize1 = localSize1 * activeN; // split every node in chunk of chunk_size icl_run_kernel(gp2c, 1, &globalSize1, &localSize1, NULL, NULL, 4, (size_t)0, (void *)nodelist, (size_t)0, (void *)activelist, (size_t)0, (void *)chunks, sizeof(UINT), &activeN); #if timing == 1 icl_finish(dev); icl_stop_timer(timer_gp2c); #endif // compute per chunk bounding box size_t localSize2 = chunk_size; size_t globalSize2 = maxNchunks * chunk_size; #if timing == 1 icl_start_timer(timer_cBBox); #endif icl_run_kernel(cBBox, 1, &globalSize2, &localSize2, NULL, NULL, 5, (size_t)0, (void *)nodelist, (size_t)0, (void *)activelist, (size_t)0, (void *)particlesD, (size_t)0, (void *)chunks, (size_t)0, (void *)bboxes); #if timing == 1 icl_finish(dev); icl_stop_timer(timer_cBBox); #endif // compute per node bounding box size_t localSize3 = min(pow2maxNchunks, 256); size_t globalSize3 = localSize3 * activeN; #if timing == 1 icl_start_timer(timer_bBox); #endif icl_run_kernel(bBox, 1, &globalSize3, &localSize3, NULL, NULL, 4, (size_t)0, (void *)nodelist, (size_t)0, (void *)activelist, (size_t)0, (void *)bboxes, sizeof(UINT), &activeN); #if timing == 1 icl_finish(dev); icl_stop_timer(timer_bBox); #endif // split large nodes size_t localSize4 = 256; size_t globalSize4 = ((activeN + 255) / 256) * 256; #if timing == 1 icl_start_timer(timer_sln); #endif icl_run_kernel(sln, 1, &globalSize4, &localSize4, NULL, NULL, 5, (size_t)0, (void *)nodelist, (size_t)0, (void *)activelist, (size_t)0, (void *)nextlist, (size_t)0, (void *)sizes, sizeof(UINT), &activeN); #if timing == 1 icl_finish(dev); icl_stop_timer(timer_sln); #endif /////////////////////////////////////////////////////////////////////////////// // XXx replaced with segmented scan //globalSize = (activeN+1) * 256; #if timing == 1 icl_start_timer(timer_sortP); #endif #if DEVICE == ICL_CPU // sort particles to child nodes size_t localSize5 = 256; size_t globalSize5 = ((activeN + 255) / 256) * 256; icl_run_kernel(sortP, 1, &globalSize5, &localSize5, NULL, NULL, 5, (size_t)0, (void *)nodelist, (size_t)0, (void *)particlesD, (size_t)0, (void *)activelist, (size_t)0, (void *)nextlist, sizeof(UINT), &activeN ); #else // init scan_flag to 1 cl_int initFlag = 1; size_t np = (size_t)((nParticles + localSize4 -1 ) / localSize4) * localSize4; icl_run_kernel(memset_int_s, 1, &np, &localSize4, NULL, NULL, 3, (size_t)0, (void *)scan_flag, sizeof(cl_int), &initFlag, sizeof(UINT), &nParticles ); #if timing == 1 icl_start_timer(timer_prescan); #endif // pre-scan fills data0 and data1 with 1 and 0 whenever value < pivot localSize = chunk_size; // globalSize = activeN * 256; globalSize = maxNchunks * chunk_size; icl_run_kernel(preScan, 1, &globalSize, &localSize, NULL, NULL, 7, (size_t)0, (void *)nodelist, (size_t)0, (void *)chunks, (size_t)0, (void *)particlesD, (size_t)0, (void *)activelist, sizeof(UINT), &activeN, (size_t)0, (void *)scan_data, (size_t)0, (void *)scan_flag ); #if timing == 1 icl_finish(dev); icl_stop_timer(timer_prescan); #endif #if timing == 1 icl_start_timer(timer_scan); #endif // scan for scan(scan_data, scan_flag, nParticles); #if timing == 1 icl_finish(dev); icl_stop_timer(timer_scan); #endif // copy partially sorted data to the final icl_copy_buffer(particlesD, buffered_particles, sizeof(struct Particle) * nParticles, NULL, NULL); // swap(particlesD, buffered_particles); #if timing == 1 icl_start_timer(timer_postscan); #endif localSize = chunk_size; globalSize = maxNchunks * chunk_size; icl_run_kernel(postScan, 1, &globalSize, &localSize, NULL, NULL, 7, (size_t)0, (void *)nodelist, (size_t)0, (void *)chunks, (size_t)0, (void *)particlesD, (size_t)0, (void *)activelist, sizeof(UINT), &activeN, (size_t)0, (void *)buffered_particles, (size_t)0, (void *)scan_data ); #if timing == 1 icl_finish(dev); icl_stop_timer(timer_postscan); #endif icl_finish(dev); #endif /////////////////////////////////////////////////////////////////////////////// #if timing == 1 icl_finish(dev); icl_stop_timer(timer_sortP); #endif // small node filtering size_t localSize6 = 256; size_t globalSize6 = ((activeN*2 + 255) / 256) * 256; #if timing == 1 icl_start_timer(timer_snf); #endif icl_run_kernel(snf, 1, &globalSize6, &localSize6, NULL, NULL, 4, (size_t)0, (void *)nodelist, (size_t)0, (void *)nextlist, (size_t)0, (void *)smalllist, (size_t)0, (void *)sizes //, sizeof(UINT), &nParticles ); #if timing == 1 icl_finish(dev); icl_stop_timer(timer_snf); #endif // packing of nextlist size_t localSize7 = 1; size_t globalSize7 = 1; #if timing == 1 icl_start_timer(timer_pnl); #endif icl_run_kernel(pnl, 1, &globalSize7, &localSize7, NULL, NULL, 2, (size_t)0, (void *)nextlist, (size_t)0, (void *)sizes ); #if timing == 1 icl_finish(dev); icl_stop_timer(timer_pnl); #endif // swap nextlist and activelist swap(&nextlist, &activelist); #if timing == 1 icl_start_timer(timer_ran); #endif // read size of next activelist set in kernel icl_read_buffer(sizes, CL_TRUE, sizeof(UINT), &activeN, NULL, NULL); icl_finish(dev); #if timing == 1 icl_stop_timer(timer_ran); #endif ++level; //printf("%d: ActiveN %d\n", level, activeN); time = icl_stop_timer(timer); } icl_release_kernel(init); icl_release_kernel(gp2c); icl_release_kernel(cBBox); icl_release_kernel(bBox); icl_release_kernel(sln); icl_release_kernel(sortP); icl_release_kernel(snf); icl_release_kernel(pnl); ////////////////////////////////////////////////////////////////////////// icl_release_kernel(preScan); icl_release_kernel(postScan); segmented_scan_release(); icl_release_buffers(3, scan_data, scan_flag, buffered_particles); #if timing == 1 printf("gp2c %f\ncBBox %f\nbBox %f\nsln %f\nsortP %f\nsnf %f\npnl %f\nran %f\n\n", timer_gp2c->current_time, timer_cBBox->current_time, timer_bBox->current_time, timer_sln->current_time, timer_sortP->current_time, timer_snf->current_time, timer_pnl->current_time, timer_ran->current_time); icl_release_timer(timer_gp2c); icl_release_timer(timer_cBBox); icl_release_timer(timer_bBox); icl_release_timer(timer_sln); icl_release_timer(timer_sortP); icl_release_timer(timer_snf); icl_release_timer(timer_pnl); printf("prescan %f\nscan %f\npostscan %f\n\n", timer_prescan->current_time, timer_scan->current_time, timer_postscan->current_time); icl_release_timer(timer_prescan); icl_release_timer(timer_scan); icl_release_timer(timer_postscan); #endif /* icl_read_buffer(nodelist, CL_TRUE, sizeof(struct Node) * 6000, tree->nodelist, NULL, NULL); printf("node: %d, left %d, right %d", tree->nodelist[0].particlesHigh - tree->nodelist[0].particlesLow, tree->nodelist[1].particlesHigh - tree->nodelist[1].particlesLow, tree->nodelist[2].particlesHigh - tree->nodelist[2].particlesLow); printBox(tree->nodelist[49].bounding_box); printBox(tree->nodelist[53].bounding_box); printBox(tree->nodelist[54].bounding_box); for(int i = 0; i < 6000; ++i) if(tree->nodelist[i].bounding_box.box[0].x != 0.0) printBox(tree->nodelist[i].bounding_box); */ //small nodes stage // preprocessSmallNodes(smalllist); icl_release_buffers(3, activelist, chunks, bboxes); icl_kernel* sasl = icl_create_kernel(dev, "kernel/swapActiveAndSmalllist.cl", "swapActiveAndSmalllist", KERNEL_BUILD_MACRO, ICL_SOURCE); icl_kernel* ssn = icl_create_kernel(dev, "kernel/splitSmallNodes.cl", "splitSmallNodes", KERNEL_BUILD_MACRO, ICL_SOURCE); #if timing == 1 icl_timer* timer_ssn = icl_init_timer(ICL_MILLI); icl_timer* timer_sasl = icl_init_timer(ICL_MILLI); icl_timer* timer_rsn = icl_init_timer(ICL_MILLI); #endif size_t localSize8 = 1; size_t globalSize8 = 1; UINT setMaxLevel = 0; icl_run_kernel(sasl, 1, &globalSize8, &localSize8, NULL, NULL, 2, (size_t)0, (void *)sizes, sizeof(UINT), &level); // get number of small nodes icl_read_buffer(sizes, CL_TRUE, sizeof(UINT), &activeN, NULL, NULL); while(activeN != 0) { icl_start_timer(timer); // compute SVH and determine the split plane size_t localSize9 = 256; size_t globalSize9 = ((activeN + 255) / 256) * 256; #if timing == 1 icl_start_timer(timer_ssn); #endif icl_run_kernel(ssn, 1, &globalSize9, &localSize9, NULL, NULL, 5, (size_t)0, (void *)nodelist, (size_t)0, (void *)smalllist, (size_t)0, (void *)nextlist, (size_t)0, (void *)particlesD, (size_t)0, (void *)sizes); #if timing == 1 icl_finish(dev); icl_stop_timer(timer_ssn); #endif size_t localSizeA = 1; size_t globalSizeA = 1; #if timing == 1 icl_start_timer(timer_sasl); #endif icl_run_kernel(sasl, 1, &globalSizeA, &localSizeA, NULL, NULL, 2, (size_t)0, (void *)sizes, sizeof(UINT), &setMaxLevel); #if timing == 1 icl_finish(dev); icl_stop_timer(timer_sasl); #endif swap(&nextlist, &smalllist); // read size of next activelist set in kernel #if timing == 1 icl_start_timer(timer_rsn); #endif icl_read_buffer(sizes, CL_TRUE, sizeof(UINT), &activeN, NULL, NULL); //printf("small size %d\n", activeN); icl_finish(dev); #if timing == 1 icl_stop_timer(timer_rsn); #endif time = icl_stop_timer(timer); } icl_release_buffer(smalllist); icl_release_buffer(nextlist); icl_release_kernel(sasl); icl_release_kernel(ssn); #if timing == 1 printf("ssn %f\nsasl %f\nrsn %f\n\n", timer_ssn->current_time, timer_sasl->current_time, timer_rsn->current_time); icl_release_timer(timer_ssn); icl_release_timer(timer_sasl); icl_release_timer(timer_rsn); #endif UINT s[5]; icl_read_buffer(sizes, CL_TRUE, sizeof(UINT) * 5, &s, NULL, NULL); icl_release_buffer(sizes); icl_kernel* upPass = icl_create_kernel(dev, "kernel/upPass.cl", "upPass", KERNEL_BUILD_MACRO, ICL_SOURCE); icl_kernel* downPass = icl_create_kernel(dev, "kernel/kdDownPass.cl", "downPass", KERNEL_BUILD_MACRO, ICL_SOURCE); #if timing == 1 icl_timer* timer_upPass = icl_init_timer(ICL_MILLI); icl_timer* timer_downPass = icl_init_timer(ICL_MILLI); icl_timer* timer_rt = icl_init_timer(ICL_MILLI); #endif UINT treeHeight = s[4]; printf("Tree height: %d\n", treeHeight); size_t localSizeB = 256; size_t globalSizeB = ((nNodes + 255) / 256) * 256; icl_start_timer(timer); #if timing == 1 icl_start_timer(timer_upPass); #endif for(int l = (int)treeHeight; l >= 0; --l) { icl_run_kernel(upPass, 1, &globalSizeB, &localSizeB, NULL, NULL, 4, (size_t)0, (void *)nodelist, (size_t)0, (void *)particlesD, sizeof(int), &l, sizeof(UINT), &nNodes); } #if timing == 1 icl_finish(dev); icl_stop_timer(timer_upPass); icl_start_timer(timer_downPass); #endif for(UINT l = 0; l <= treeHeight; ++l) { icl_run_kernel(downPass, 1, &globalSizeB, &localSizeB, NULL, NULL, 4, (size_t)0, (void *)nodelist, (size_t)0, (void *)treeD, sizeof(UINT), &l, sizeof(UINT), &nNodes); } icl_finish(dev); #if timing == 1 icl_stop_timer(timer_downPass); #endif time = icl_stop_timer(timer); icl_release_kernel(upPass); icl_release_kernel(downPass); #if timing == 1 icl_start_timer(timer_rt); #endif #if timing == 1 icl_finish(dev); icl_stop_timer(timer_rt); printf("upPass %f\ndownPass %f\nread Tree %f\n\n", timer_upPass->current_time, timer_downPass->current_time, timer_rt->current_time); icl_release_timer(timer_upPass); icl_release_timer(timer_downPass); icl_release_timer(timer_rt); #endif // struct Node* kdTree = (struct Node*)malloc(sizeof(struct Node) * nNodes); // icl_read_buffer(treeD, CL_TRUE, sizeof(struct Node) * nNodes, kdTree, NULL, NULL); // printf("%d", tree->nodelist[0].left_child); printf("\nTime: %f\n", time); icl_release_timer(timer); return treeHeight; }
int main(int argc, char* const argv[]) { set_cpu(the_cores[0]); #ifdef PRINT_OUTPUT fprintf(stderr, "The size of the data being tested: %lu\n",sizeof(data_type)); fprintf(stderr, "Number of entries per cache line: %lu\n",CACHE_LINE_SIZE / sizeof(data_t)); #endif struct option long_options[] = { // These options don't set a flag {"help", no_argument, NULL, 'h'}, {"entries", required_argument, NULL, 'e'}, {"duration", required_argument, NULL, 'd'}, {"pause", required_argument, NULL, 'p'}, {"num-threads", required_argument, NULL, 'n'}, {"seed", required_argument, NULL, 's'}, {NULL, 0, NULL, 0} }; int i, c; thread_data_t *data; pthread_t *threads; pthread_attr_t attr; barrier_t barrier; struct timeval start, end; struct timespec timeout; num_entries = DEFAULT_NUM_ENTRIES; num_threads = DEFAULT_NUM_THREADS; duration = DEFAULT_DURATION; seed = DEFAULT_SEED; op_pause = DEFAULT_PAUSE; sigset_t block_set; while(1) { i = 0; c = getopt_long(argc, argv, "he:d:p:n:s", long_options, &i); if(c == -1) break; if(c == 0 && long_options[i].flag == 0) c = long_options[i].val; switch(c) { case 0: /* Flag is automatically set */ break; case 'h': printf("lock stress test\n" "\n" "Usage:\n" " stress_test [options...]\n" "\n" "Options:\n" " -h, --help\n" " Print this message\n" " -e, --entires <int>\n" " Number of entries in the test (default=" XSTR(DEFAULT_NUM_LOCKS) ")\n" " -d, --duration <int>\n" " Test duration in milliseconds (0=infinite, default=" XSTR(DEFAULT_DURATION) ")\n" " -p, --pause <int>\n" " Pause between consecutive atomic operations in cycles (default=" XSTR(DEFAULT_DURATION) ")\n" " -n, --num-threads <int>\n" " Number of threads (default=" XSTR(DEFAULT_NUM_THREADS) ")\n" " -s, --seed <int>\n" " RNG seed (0=time-based, default=" XSTR(DEFAULT_SEED) ")\n" ); exit(0); case 'e': num_entries = atoi(optarg); break; case 'd': duration = atoi(optarg); break; case 'n': num_threads = atoi(optarg); break; case 'p': op_pause = atoi(optarg); break; case 's': seed = atoi(optarg); break; case '?': printf("Use -h or --help for help\n"); exit(0); default: exit(1); } } op_pause=op_pause/NOP_DURATION; num_entries = pow2roundup(num_entries); assert(duration >= 0); assert(num_entries >= 1); assert(num_threads > 0); #ifdef PRINT_OUTPUT printf("Number of entries : %d\n", num_entries); printf("Duration : %d\n", duration); printf("Number of threads : %d\n", num_threads); printf("Type sizes : int=%d/long=%d/ptr=%d\n", (int)sizeof(int), (int)sizeof(long), (int)sizeof(void *)); #endif timeout.tv_sec = duration / 1000; timeout.tv_nsec = (duration % 1000) * 1000000; the_data = (data_t*)malloc(num_entries * sizeof(data_t)); for (i = 0; i < num_entries; i++) { the_data[i].data=0; } if ((data = (thread_data_t *)malloc(num_threads * sizeof(thread_data_t))) == NULL) { perror("malloc"); exit(1); } if ((threads = (pthread_t *)malloc(num_threads * sizeof(pthread_t))) == NULL) { perror("malloc"); exit(1); } if (seed == 0) srand((int)time(NULL)); else srand(seed); stop = 0; /* Access set from all threads */ barrier_init(&barrier, num_threads + 1); pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); for (i = 0; i < num_threads; i++) { #ifdef PRINT_OUTPUT printf("Creating thread %d\n", i); #endif data[i].id = i; data[i].num_operations = 0; data[i].seed = rand(); data[i].barrier = &barrier; if (pthread_create(&threads[i], &attr, test, (void *)(&data[i])) != 0) { fprintf(stderr, "Error creating thread\n"); exit(1); } } pthread_attr_destroy(&attr); /* Catch some signals */ if (signal(SIGHUP, catcher) == SIG_ERR || signal(SIGINT, catcher) == SIG_ERR || signal(SIGTERM, catcher) == SIG_ERR) { perror("signal"); exit(1); } /* Start threads */ barrier_cross(&barrier); #ifdef PRINT_OUTPUT printf("STARTING...\n"); #endif gettimeofday(&start, NULL); if (duration > 0) { nanosleep(&timeout, NULL); } else { sigemptyset(&block_set); sigsuspend(&block_set); } stop = 1; gettimeofday(&end, NULL); #ifdef PRINT_OUTPUT printf("STOPPING...\n"); #endif /* Wait for thread completion */ for (i = 0; i < num_threads; i++) { if (pthread_join(threads[i], NULL) != 0) { fprintf(stderr, "Error waiting for thread completion\n"); exit(1); } } duration = (end.tv_sec * 1000 + end.tv_usec / 1000) - (start.tv_sec * 1000 + start.tv_usec / 1000); unsigned long operations = 0; for (i = 0; i < num_threads; i++) { #ifdef PRINT_OUTPUT printf("Thread %d\n", i); printf(" #operations : %lu\n", data[i].num_operations); #endif operations += data[i].num_operations; } #ifdef PRINT_OUTPUT printf("Duration : %d (ms)\n", duration); #endif printf("#operations : %lu (%f / s)\n", operations, operations * 1000.0 / duration); free((data_t*) the_data); free(threads); free(data); return 0; }
int main(int argc, char **argv) { set_cpu(the_cores[0]); assert(sizeof(clht_hashtable_t) == 2*CACHE_LINE_SIZE); struct option long_options[] = { // These options don't set a flag {"help", no_argument, NULL, 'h'}, {"duration", required_argument, NULL, 'd'}, {"num-threads", required_argument, NULL, 'n'}, {"range", required_argument, NULL, 'r'}, {"correctness", no_argument, NULL, 'c'}, {"num-buckets", required_argument, NULL, 'b'}, {"print-vals", required_argument, NULL, 'v'}, {"vals-pf", required_argument, NULL, 'f'}, {"obj-size", required_argument, NULL, 's'}, {NULL, 0, NULL, 0} }; int i, c; while(1) { i = 0; c = getopt_long(argc, argv, "hAf:d:i:n:r:s:u:m:a:l:p:b:v:f:c", long_options, &i); if(c == -1) break; if(c == 0 && long_options[i].flag == 0) c = long_options[i].val; switch(c) { case 0: /* Flag is automatically set */ break; case 'h': printf("intset -- STM stress test " "(linked list)\n" "\n" "Usage:\n" " intset [options...]\n" "\n" "Options:\n" " -h, --help\n" " Print this message\n" " -d, --duration <int>\n" " Test duration in milliseconds\n" " -n, --num-threads <int>\n" " Number of threads\n" " -r, --range <int>\n" " Range of integer values inserted in set\n" " -s, --obj-size <int>\n" " Size of the objects stored in the hash table\n" " -b, --num-buckets <int>\n" " Number of initial buckets (stronger than -l)\n" " -v, --print-vals <int>\n" " When using detailed profiling, how many values to print.\n" " -f, --val-pf <int>\n" " When using detailed profiling, how many values to keep track of.\n" ); exit(0); case 'd': duration = atoi(optarg); break; case 'n': num_threads = atoi(optarg); break; case 's': obj_size = atol(optarg); break; case 'v': print_vals_num = atoi(optarg); break; case 'f': pf_vals_num = pow2roundup(atoi(optarg)) - 1; break; case '?': default: printf("Use -h or --help for help\n"); exit(1); } } rand_max = num_elements; struct timeval start, end; struct timespec timeout; timeout.tv_sec = duration / 1000; timeout.tv_nsec = (duration % 1000) * 1000000; printf("//duration: sec: %lu, ns: %lu\n", timeout.tv_sec, timeout.tv_nsec); stop = 0; /* Initialize the hashtable */ snap = (clht_snapshot_t*) memalign(CACHE_LINE_SIZE, CACHE_LINE_SIZE); assert(snap != NULL); /* Initializes the local data */ putting_succ = (ticks *) calloc(num_threads , sizeof(ticks)); putting_fail = (ticks *) calloc(num_threads , sizeof(ticks)); getting_succ = (ticks *) calloc(num_threads , sizeof(ticks)); getting_fail = (ticks *) calloc(num_threads , sizeof(ticks)); removing_succ = (ticks *) calloc(num_threads , sizeof(ticks)); removing_fail = (ticks *) calloc(num_threads , sizeof(ticks)); putting_count = (ticks *) calloc(num_threads , sizeof(ticks)); putting_count_succ = (ticks *) calloc(num_threads , sizeof(ticks)); getting_count = (ticks *) calloc(num_threads , sizeof(ticks)); getting_count_succ = (ticks *) calloc(num_threads , sizeof(ticks)); removing_count = (ticks *) calloc(num_threads , sizeof(ticks)); removing_count_succ = (ticks *) calloc(num_threads , sizeof(ticks)); pthread_t threads[num_threads]; pthread_attr_t attr; int rc; void *status; barrier_init(&barrier_global, num_threads + 1); barrier_init(&barrier, num_threads); /* Initialize and set thread detached attribute */ pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); thread_data_t* tds = (thread_data_t*) malloc(num_threads * sizeof(thread_data_t)); long t; for(t = 0; t < num_threads; t++) { tds[t].id = t; rc = pthread_create(&threads[t], &attr, test, tds + t); if (rc) { printf("ERROR; return code from pthread_create() is %d\n", rc); exit(-1); } } /* Free attribute and wait for the other threads */ pthread_attr_destroy(&attr); barrier_cross(&barrier_global); gettimeofday(&start, NULL); nanosleep(&timeout, NULL); stop = 1; gettimeofday(&end, NULL); duration = (end.tv_sec * 1000 + end.tv_usec / 1000) - (start.tv_sec * 1000 + start.tv_usec / 1000); for(t = 0; t < num_threads; t++) { rc = pthread_join(threads[t], &status); if (rc) { printf("ERROR; return code from pthread_join() is %d\n", rc); exit(-1); } } free(tds); volatile ticks putting_suc_total = 1; volatile ticks putting_fal_total = 1; volatile ticks getting_suc_total = 1; volatile ticks getting_fal_total = 1; volatile ticks removing_suc_total = 1; volatile ticks removing_fal_total = 1; volatile uint64_t putting_count_total = 1; volatile uint64_t putting_count_total_succ = 1; volatile uint64_t getting_count_total = 1; volatile uint64_t getting_count_total_succ = 1; volatile uint64_t removing_count_total = 1; volatile uint64_t removing_count_total_succ = 1; for(t=0; t < num_threads; t++) { putting_suc_total += putting_succ[t]; putting_fal_total += putting_fail[t]; getting_suc_total += getting_succ[t]; getting_fal_total += getting_fail[t]; removing_suc_total += removing_succ[t]; removing_fal_total += removing_fail[t]; putting_count_total += putting_count[t]; putting_count_total_succ += putting_count_succ[t]; getting_count_total += getting_count[t]; getting_count_total_succ += getting_count_succ[t]; removing_count_total += removing_count[t]; removing_count_total_succ += removing_count_succ[t]; } if(putting_count_total == 0) { putting_suc_total = 0; putting_fal_total = 0; putting_count_total = 1; putting_count_total_succ = 2; } if(getting_count_total == 0) { getting_suc_total = 0; getting_fal_total = 0; getting_count_total = 1; getting_count_total_succ = 2; } if(removing_count_total == 0) { removing_suc_total = 0; removing_fal_total = 0; removing_count_total = 1; removing_count_total_succ = 2; } #if defined(COMPUTE_LATENCY) # if defined(DEBUG) printf("#thread get_suc get_fal put_suc put_fal rem_suc rem_fal\n"); fflush(stdout); # endif printf("%d\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\n", num_threads, getting_suc_total / getting_count_total_succ, getting_fal_total / (getting_count_total - getting_count_total_succ), putting_suc_total / putting_count_total_succ, putting_fal_total / (putting_count_total - putting_count_total_succ), removing_suc_total / removing_count_total_succ, removing_fal_total / (removing_count_total - removing_count_total_succ) ); #endif #define LLU long long unsigned int int pr = (int) (putting_count_total_succ - removing_count_total_succ); int size_after = 0; for (i = 0; i < rand_max; i++) { size_after += (snap->map[i] == MAP_VALID); } printf("\n"); printf("#Maps | "); for (i = 0; i < rand_max; i++) { printf("#%d = %-5d | ", i, snap->map[i]); } printf("\n"); printf("#Keys | "); for (i = 0; i < rand_max; i++) { printf("#%d = %-5jd | ", i, key[i]); } printf("\n"); printf("#Vals | "); for (i = 0; i < rand_max; i++) { printf("#%d = %-5jd | ", i, (val[i]) ? *(size_t*) val[i] : -1); } printf("\n"); #if defined(DEBUG) printf("puts - rems : %d\n", pr); #endif /* assert(size_after == (pr)); */ if (size_after != pr) { printf("###### SIZE missmatch\n"); } printf(" : %-10s | %-10s | %-11s | %s\n", "total", "success", "succ %", "total %"); uint64_t total = putting_count_total + getting_count_total + removing_count_total; double putting_perc = 100.0 * (1 - ((double)(total - putting_count_total) / total)); double removing_perc = 100.0 * (1 - ((double)(total - removing_count_total) / total)); printf("puts: %-10llu | %-10llu | %10.1f%% | %.1f%%\n", (LLU) putting_count_total, (LLU) putting_count_total_succ, (1 - (double) (putting_count_total - putting_count_total_succ) / putting_count_total) * 100, putting_perc); printf("rems: %-10llu | %-10llu | %10.1f%% | %.1f%%\n", (LLU) removing_count_total, (LLU) removing_count_total_succ, (1 - (double) (removing_count_total - removing_count_total_succ) / removing_count_total) * 100, removing_perc); size_t all_total = putting_count_total + getting_count_total + removing_count_total; double throughput = (all_total) * 1000.0 / duration; printf("#txs tot (%zu\n", all_total); printf("#txs %-4d(%-10.0f = %.3f M\n", num_threads, throughput, throughput / 1e6); /* Last thing that main() should do */ //printf("Main: program completed. Exiting.\n"); pthread_exit(NULL); return 0; }
int main(int argc, char **argv) { set_cpu(the_cores[0]); struct option long_options[] = { // These options don't set a flag {"help", no_argument, NULL, 'h'}, {"accounts", required_argument, NULL, 'a'}, {"duration", required_argument, NULL, 'd'}, {"num-threads", required_argument, NULL, 'n'}, {"check", required_argument, NULL, 'c'}, {"deposit_perc", required_argument, NULL, 'e'}, {"servers", required_argument, NULL, 's'}, {"withdraws", required_argument, NULL, 'w'}, {NULL, 0, NULL, 0} }; bank_t *bank; int i, c; unsigned long reads, writes, updates; thread_data_t *data; pthread_t *threads; pthread_attr_t attr; barrier_t barrier; struct timeval start, end; struct timespec timeout; int nb_threads = DEFAULT_NUM_THREADS; int duration = DEFAULT_DURATION; int nb_accounts = DEFAULT_NB_ACCOUNTS; int balance_perc = DEFAULT_BALANCE_PERC; int deposit_perc = DEFAULT_DEPOSIT_PERC; int seed = DEFAULT_SEED; int withdraw_perc = DEFAULT_WITHDRAW_PERC; sigset_t block_set; while(1) { i = 0; c = getopt_long(argc, argv, "ha:c:d:n:r:e:s:w:W:j", long_options, &i); if(c == -1) break; if(c == 0 && long_options[i].flag == 0) c = long_options[i].val; switch(c) { case 0: /* Flag is automatically set */ break; case 'h': printf("bank -- lock stress test\n" "\n" "Usage:\n" " bank [options...]\n" "\n" "Options:\n" " -h, --help\n" " Print this message\n" " -a, --accounts <int>\n" " Number of accounts in the bank (default=" XSTR(DEFAULT_NB_ACCOUNTS) ")\n" " -d, --duraiton <int>\n" " Duration of the test in ms (0=infinite, default=" XSTR(DEFAULT_DURATION) ")\n" " -n, --num-threads <int>\n" " Number of threads (default=" XSTR(DEFAULT_NUM_THREADS) ")\n" " -c, --check <int>\n" " Percentage of check balance transactions (default=" XSTR(DEFAULT_BALANCE_PERC) ")\n" " -e, --deposit_perc <int>\n" " Percentage of deposit transactions (default=" XSTR(DEFAULT_DEPOSIT_PERC) ")\n" " -w, --withdraws <int>\n" " Percentage of withdraw_perc transactions (default=" XSTR(DEFAULT_WITHDRAW_PERC) ")\n" ); exit(0); case 'a': nb_accounts = atoi(optarg); break; case 'd': duration = atoi(optarg); break; case 'n': nb_threads = atoi(optarg); break; case 'c': balance_perc = atoi(optarg); break; case 'e': deposit_perc = atoi(optarg); break; case 'w': withdraw_perc = atoi(optarg); break; case '?': printf("Use -h or --help for help\n"); exit(0); default: exit(1); } } assert(duration >= 0); assert(nb_accounts >= 2); assert(nb_threads > 0); assert(balance_perc >= 0 && withdraw_perc >= 0 && deposit_perc >= 0 && deposit_perc + balance_perc + withdraw_perc <= 100); nb_accounts = pow2roundup(nb_accounts); uint32_t missing = 100 - (deposit_perc + balance_perc + withdraw_perc); if (missing > 0) { balance_perc += missing; } printf("Nb accounts : %d\n", nb_accounts); printf("Num ops : %d\n", duration); printf("Nb threads : %d\n", nb_threads); printf("Check balance : %d\n", balance_perc); printf("Deposit : %d\n", deposit_perc); printf("Withdraws : %d\n", withdraw_perc); withdraw_perc += deposit_perc; balance_perc += withdraw_perc; timeout.tv_sec = duration / 1000; timeout.tv_nsec = (duration % 1000) * 1000000; if ((data = (thread_data_t *)malloc(nb_threads * sizeof(thread_data_t))) == NULL) { perror("malloc"); exit(1); } if ((threads = (pthread_t *)malloc(nb_threads * sizeof(pthread_t))) == NULL) { perror("malloc"); exit(1); } if (seed == 0) srand((int)time(NULL)); else srand(seed); bank = (bank_t *)malloc(sizeof(bank_t)); bank->accounts = (account_t *)malloc(nb_accounts * sizeof(account_t)); bank->size = nb_accounts; for (i = 0; i < bank->size; i++) { bank->accounts[i].number = i; bank->accounts[i].balance = 0; } gl.lock_data = 0; local_th_data = (local_data *)malloc(nb_threads*sizeof(local_data)); stop = 0; /* Init locks */ printf("Initializing locks\n"); the_locks = init_lock_array_global(nb_accounts, nb_threads); /* Access set from all threads */ barrier_init(&barrier, nb_threads + 1); pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); for (i = 0; i < nb_threads; i++) { printf("Creating thread %d\n", i); data[i].id = i; data[i].nb_threads = nb_threads; data[i].nb_balance = 0; data[i].nb_deposit = 0; data[i].nb_withdraw = 0; data[i].balance_perc = balance_perc; data[i].deposit_perc = deposit_perc; data[i].withdraw_perc = withdraw_perc; data[i].seed = rand(); data[i].bank = bank; data[i].barrier = &barrier; if (pthread_create(&threads[i], &attr, test, (void *)(&data[i])) != 0) { fprintf(stderr, "Error creating thread\n"); exit(1); } } pthread_attr_destroy(&attr); /* Catch some signals */ if (signal(SIGHUP, catcher) == SIG_ERR || signal(SIGINT, catcher) == SIG_ERR || signal(SIGTERM, catcher) == SIG_ERR) { perror("signal"); exit(1); } /* Start threads */ barrier_cross(&barrier); printf("STARTING...\n"); gettimeofday(&start, NULL); if (duration > 0) { nanosleep(&timeout, NULL); } else { sigemptyset(&block_set); sigsuspend(&block_set); } stop = 1; gettimeofday(&end, NULL); printf("STOPPING...\n"); /* Wait for thread completion */ for (i = 0; i < nb_threads; i++) { if (pthread_join(threads[i], NULL) != 0) { fprintf(stderr, "Error waiting for thread completion\n"); exit(1); } } duration = (end.tv_sec * 1000 + end.tv_usec / 1000) - (start.tv_sec * 1000 + start.tv_usec / 1000); reads = 0; writes = 0; updates = 0; for (i = 0; i < nb_threads; i++) { printf("Thread %d\n", i); printf(" #balance : %lu\n", data[i].nb_balance); printf(" #withdraw : %lu\n", data[i].nb_withdraw); printf(" #deposit : %lu\n", data[i].nb_deposit); updates += data[i].nb_withdraw; reads += data[i].nb_balance; writes += data[i].nb_deposit; } /* printf("Bank total : %d (expected: 0)\n", total(bank, 0)); */ printf("Duration : %d (ms)\n", duration); printf("#read txs : %lu ( %f / s)\n", reads, reads * 1000.0 / duration); printf("#write txs : %lu ( %f / s)\n", writes, writes * 1000.0 / duration); printf("#update txs : %lu ( %f / s)\n", updates, updates * 1000.0 / duration); printf("#txs : %lu ( %f / s)\n", reads + writes + updates, (reads + writes + updates) * 1000.0 / duration); /* Delete bank and accounts */ free(bank->accounts); free(bank); /* Cleanup locks */ //free_global(the_locks, nb_accounts); //free(threads); //free(data); return 0; }
bool ImageBufAlgo::make_texture (ImageBufAlgo::MakeTextureMode mode, const std::vector<std::string> &filenames, const std::string &_outputfilename, const ImageSpec &_configspec, std::ostream *outstream_ptr) { ASSERT (mode >= 0 && mode < ImageBufAlgo::_MakeTxLast); Timer alltime; ImageSpec configspec = _configspec; // const char *modenames[] = { "texture map", "shadow map", // "latlong environment map" }; std::stringstream localstream; // catch output when user doesn't want it std::ostream &outstream (outstream_ptr ? *outstream_ptr : localstream); double stat_readtime = 0; double stat_writetime = 0; double stat_resizetime = 0; double stat_miptime = 0; double stat_colorconverttime = 0; std::string filename = filenames[0]; if (! Filesystem::exists (filename)) { outstream << "maketx ERROR: \"" << filename << "\" does not exist\n"; return false; } std::string outputfilename = _outputfilename.length() ? _outputfilename : Filesystem::replace_extension (filename, ".tx"); // When was the input file last modified? std::time_t in_time = Filesystem::last_write_time (filename); // When in update mode, skip making the texture if the output already // exists and has the same file modification time as the input file. bool updatemode = configspec.get_int_attribute ("maketx:updatemode"); if (updatemode && Filesystem::exists (outputfilename) && (in_time == Filesystem::last_write_time (outputfilename))) { outstream << "maketx: no update required for \"" << outputfilename << "\"\n"; return true; } bool shadowmode = (mode == ImageBufAlgo::MakeTxShadow); bool envlatlmode = (mode == ImageBufAlgo::MakeTxEnvLatl); // Find an ImageIO plugin that can open the output file, and open it std::string outformat = configspec.get_string_attribute ("maketx:fileformatname", outputfilename); ImageOutput *out = ImageOutput::create (outformat.c_str()); if (! out) { outstream << "maketx ERROR: Could not find an ImageIO plugin to write " << outformat << " files:" << geterror() << "\n"; return false; } if (! out->supports ("tiles")) { outstream << "maketx ERROR: \"" << outputfilename << "\" format does not support tiled images\n"; return false; } ImageBuf src (filename); src.init_spec (filename, 0, 0); // force it to get the spec, not read // The cache might mess with the apparent data format. But for the // purposes of what we should output, figure it out now, before the // file has been read and cached. TypeDesc out_dataformat = src.spec().format; if (configspec.format != TypeDesc::UNKNOWN) out_dataformat = configspec.format; // We cannot compute the prman / oiio options until after out_dataformat // has been determined, as it's required (and can potentially change // out_dataformat too!) if (configspec.get_int_attribute("maketx:prman_options")) out_dataformat = set_prman_options (out_dataformat, configspec); else if (configspec.get_int_attribute("maketx:oiio_options")) out_dataformat = set_oiio_options (out_dataformat, configspec); // Read the full file locally if it's less than 1 GB, otherwise // allow the ImageBuf to use ImageCache to manage memory. bool read_local = (src.spec().image_bytes() < size_t(1024*1024*1024)); bool verbose = configspec.get_int_attribute ("maketx:verbose"); if (verbose) outstream << "Reading file: " << filename << std::endl; Timer readtimer; if (! src.read (0, 0, read_local)) { outstream << "maketx ERROR: Could not read \"" << filename << "\" : " << src.geterror() << "\n"; return false; } stat_readtime += readtimer(); // If requested - and we're a constant color - make a tiny texture instead // Only safe if the full/display window is the same as the data window. // Also note that this could affect the appearance when using "black" // wrap mode at runtime. std::vector<float> constantColor(src.nchannels()); bool isConstantColor = false; if (configspec.get_int_attribute("maketx:constant_color_detect") && src.spec().x == 0 && src.spec().y == 0 && src.spec().z == 0 && src.spec().full_x == 0 && src.spec().full_y == 0 && src.spec().full_z == 0 && src.spec().full_width == src.spec().width && src.spec().full_height == src.spec().height && src.spec().full_depth == src.spec().depth) { isConstantColor = ImageBufAlgo::isConstantColor (src, &constantColor[0]); if (isConstantColor) { // Reset the image, to a new image, at the tile size ImageSpec newspec = src.spec(); newspec.width = std::min (configspec.tile_width, src.spec().width); newspec.height = std::min (configspec.tile_height, src.spec().height); newspec.depth = std::min (configspec.tile_depth, src.spec().depth); newspec.full_width = newspec.width; newspec.full_height = newspec.height; newspec.full_depth = newspec.depth; std::string name = src.name() + ".constant_color"; src.reset(name, newspec); ImageBufAlgo::fill (src, &constantColor[0]); if (verbose) { outstream << " Constant color image detected. "; outstream << "Creating " << newspec.width << "x" << newspec.height << " texture instead.\n"; } } } int nchannels = configspec.get_int_attribute ("maketx:nchannels", -1); // If requested -- and alpha is 1.0 everywhere -- drop it. if (configspec.get_int_attribute("maketx:opaque_detect") && src.spec().alpha_channel == src.nchannels()-1 && nchannels <= 0 && ImageBufAlgo::isConstantChannel(src,src.spec().alpha_channel,1.0f)) { ImageBuf newsrc(src.name() + ".noalpha", src.spec()); ImageBufAlgo::setNumChannels (newsrc, src, src.nchannels()-1); src.copy (newsrc); if (verbose) { outstream << " Alpha==1 image detected. Dropping the alpha channel.\n"; } } // If requested - and we're a monochrome image - drop the extra channels if (configspec.get_int_attribute("maketx:monochrome_detect") && nchannels <= 0 && src.nchannels() == 3 && src.spec().alpha_channel < 0 && // RGB only ImageBufAlgo::isMonochrome(src)) { ImageBuf newsrc(src.name() + ".monochrome", src.spec()); ImageBufAlgo::setNumChannels (newsrc, src, 1); src.copy (newsrc); if (verbose) { outstream << " Monochrome image detected. Converting to single channel texture.\n"; } } // If we've otherwise explicitly requested to write out a // specific number of channels, do it. if ((nchannels > 0) && (nchannels != src.nchannels())) { ImageBuf newsrc(src.name() + ".channels", src.spec()); ImageBufAlgo::setNumChannels (newsrc, src, nchannels); src.copy (newsrc); if (verbose) { outstream << " Overriding number of channels to " << nchannels << "\n"; } } if (shadowmode) { // Some special checks for shadow maps if (src.spec().nchannels != 1) { outstream << "maketx ERROR: shadow maps require 1-channel images,\n" << "\t\"" << filename << "\" is " << src.spec().nchannels << " channels\n"; return false; } // Shadow maps only make sense for floating-point data. if (out_dataformat != TypeDesc::FLOAT && out_dataformat != TypeDesc::HALF && out_dataformat != TypeDesc::DOUBLE) out_dataformat = TypeDesc::FLOAT; } if (configspec.get_int_attribute("maketx:set_full_to_pixels")) { // User requested that we treat the image as uncropped or not // overscan ImageSpec &spec (src.specmod()); spec.full_x = spec.x = 0; spec.full_y = spec.y = 0; spec.full_z = spec.z = 0; spec.full_width = spec.width; spec.full_height = spec.height; spec.full_depth = spec.depth; } // Copy the input spec const ImageSpec &srcspec = src.spec(); ImageSpec dstspec = srcspec; bool orig_was_volume = srcspec.depth > 1 || srcspec.full_depth > 1; bool orig_was_crop = (srcspec.x > srcspec.full_x || srcspec.y > srcspec.full_y || srcspec.z > srcspec.full_z || srcspec.x+srcspec.width < srcspec.full_x+srcspec.full_width || srcspec.y+srcspec.height < srcspec.full_y+srcspec.full_height || srcspec.z+srcspec.depth < srcspec.full_z+srcspec.full_depth); bool orig_was_overscan = (srcspec.x < srcspec.full_x && srcspec.y < srcspec.full_y && srcspec.x+srcspec.width > srcspec.full_x+srcspec.full_width && srcspec.y+srcspec.height > srcspec.full_y+srcspec.full_height && (!orig_was_volume || (srcspec.z < srcspec.full_z && srcspec.z+srcspec.depth > srcspec.full_z+srcspec.full_depth))); // Make the output not a crop window if (orig_was_crop) { dstspec.x = 0; dstspec.y = 0; dstspec.z = 0; dstspec.width = srcspec.full_width; dstspec.height = srcspec.full_height; dstspec.depth = srcspec.full_depth; dstspec.full_x = 0; dstspec.full_y = 0; dstspec.full_z = 0; dstspec.full_width = dstspec.width; dstspec.full_height = dstspec.height; dstspec.full_depth = dstspec.depth; } if (orig_was_overscan) configspec.attribute ("wrapmodes", "black,black"); if ((dstspec.x < 0 || dstspec.y < 0 || dstspec.z < 0) && (out && !out->supports("negativeorigin"))) { // User passed negative origin but the output format doesn't // support it. Try to salvage the situation by shifting the // image into the positive range. if (dstspec.x < 0) { dstspec.full_x -= dstspec.x; dstspec.x = 0; } if (dstspec.y < 0) { dstspec.full_y -= dstspec.y; dstspec.y = 0; } if (dstspec.z < 0) { dstspec.full_z -= dstspec.z; dstspec.z = 0; } } // Make the output tiled, regardless of input dstspec.tile_width = configspec.tile_width ? configspec.tile_width : 64; dstspec.tile_height = configspec.tile_height ? configspec.tile_height : 64; dstspec.tile_depth = configspec.tile_depth ? configspec.tile_depth : 1; // Try to force zip (still can be overriden by configspec dstspec.attribute ("compression", "zip"); // Always prefer contiguous channels, unless overridden by configspec dstspec.attribute ("planarconfig", "contig"); // Default to black wrap mode, unless overridden by configspec dstspec.attribute ("wrapmodes", "black,black"); if (configspec.get_int_attribute ("maketx:ignore_unassoc")) dstspec.erase_attribute ("oiio:UnassociatedAlpha"); // Put a DateTime in the out file, either now, or matching the date // stamp of the input file (if update mode). time_t date; if (updatemode) date = in_time; // update mode: use the time stamp of the input else time (&date); // not update: get the time now dstspec.attribute ("DateTime", datestring(date)); std::string cmdline = configspec.get_string_attribute ("maketx:full_command_line"); if (! cmdline.empty()) { // Append command to image history std::string history = dstspec.get_string_attribute ("Exif:ImageHistory"); if (history.length() && ! Strutil::iends_with (history, "\n")) history += std::string("\n"); history += cmdline; dstspec.attribute ("Exif:ImageHistory", history); } bool prman_metadata = configspec.get_int_attribute ("maketx:prman_metadata"); if (shadowmode) { dstspec.attribute ("textureformat", "Shadow"); if (prman_metadata) dstspec.attribute ("PixarTextureFormat", "Shadow"); } else if (envlatlmode) { dstspec.attribute ("textureformat", "LatLong Environment"); configspec.attribute ("wrapmodes", "periodic,clamp"); if (prman_metadata) dstspec.attribute ("PixarTextureFormat", "Latlong Environment"); } else { dstspec.attribute ("textureformat", "Plain Texture"); if (prman_metadata) dstspec.attribute ("PixarTextureFormat", "Plain Texture"); } // FIXME -- should we allow tile sizes to reduce if the image is // smaller than the tile size? And when we do, should we also try // to make it bigger in the other direction to make the total tile // size more constant? // If --checknan was used and it's a floating point image, check for // nonfinite (NaN or Inf) values and abort if they are found. if (configspec.get_int_attribute("maketx:checknan") && (srcspec.format.basetype == TypeDesc::FLOAT || srcspec.format.basetype == TypeDesc::HALF || srcspec.format.basetype == TypeDesc::DOUBLE)) { int found_nonfinite = 0; ImageBufAlgo::parallel_image (boost::bind(check_nan_block, &src, _1, boost::ref(found_nonfinite)), OIIO::get_roi(dstspec)); if (found_nonfinite) { if (found_nonfinite > 3) outstream << "maketx ERROR: ...and Nan/Inf at " << (found_nonfinite-3) << " other pixels\n"; return false; } } // Fix nans/infs (if requested ImageBufAlgo::NonFiniteFixMode fixmode = ImageBufAlgo::NONFINITE_NONE; std::string fixnan = configspec.get_string_attribute("maketx:fixnan"); if (fixnan.empty() || fixnan == "none") { } else if (fixnan == "black") { fixmode = ImageBufAlgo::NONFINITE_BLACK; } else if (fixnan == "box3") { fixmode = ImageBufAlgo::NONFINITE_BOX3; } else { outstream << "maketx ERROR: Unknown --fixnan mode " << " fixnan\n"; return false; } int pixelsFixed = 0; if (!ImageBufAlgo::fixNonFinite (src, src, fixmode, &pixelsFixed)) { outstream << "maketx ERROR: Error fixing nans/infs.\n"; return false; } if (verbose && pixelsFixed>0) { outstream << " Warning: " << pixelsFixed << " nan/inf pixels fixed.\n"; } // Color convert the pixels, if needed, in place. If a color // conversion is required we will promote the src to floating point // (or there wont be enough precision potentially). Also, // independently color convert the constant color metadata ImageBuf * ccSrc = &src; // Ptr to cc'd src image ImageBuf colorBuffer; std::string incolorspace = configspec.get_string_attribute ("incolorspace"); std::string outcolorspace = configspec.get_string_attribute ("outcolorspace"); if (!incolorspace.empty() && !outcolorspace.empty() && incolorspace != outcolorspace) { if (src.spec().format != TypeDesc::FLOAT) { ImageSpec floatSpec = src.spec(); floatSpec.set_format(TypeDesc::FLOAT); colorBuffer.reset("bitdepth promoted", floatSpec); ccSrc = &colorBuffer; } Timer colorconverttimer; ColorConfig colorconfig; if (verbose) { outstream << " Converting from colorspace " << incolorspace << " to colorspace " << outcolorspace << std::endl; } if (colorconfig.error()) { outstream << "Error Creating ColorConfig\n"; outstream << colorconfig.geterror() << std::endl; return false; } ColorProcessor * processor = colorconfig.createColorProcessor ( incolorspace.c_str(), outcolorspace.c_str()); if (!processor || colorconfig.error()) { outstream << "Error Creating Color Processor." << std::endl; outstream << colorconfig.geterror() << std::endl; return false; } bool unpremult = configspec.get_int_attribute ("maketx:unpremult"); if (unpremult && verbose) outstream << " Unpremulting image..." << std::endl; if (!ImageBufAlgo::colorconvert (*ccSrc, src, processor, unpremult)) { outstream << "Error applying color conversion to image.\n"; return false; } if (isConstantColor) { if (!ImageBufAlgo::colorconvert (&constantColor[0], static_cast<int>(constantColor.size()), processor, unpremult)) { outstream << "Error applying color conversion to constant color.\n"; return false; } } ColorConfig::deleteColorProcessor(processor); processor = NULL; stat_colorconverttime += colorconverttimer(); } // Force float for the sake of the ImageBuf math dstspec.set_format (TypeDesc::FLOAT); // Handle resize to power of two, if called for if (configspec.get_int_attribute("maketx:resize") && ! shadowmode) { dstspec.width = pow2roundup (dstspec.width); dstspec.height = pow2roundup (dstspec.height); dstspec.full_width = dstspec.width; dstspec.full_height = dstspec.height; } bool do_resize = false; // Resize if we're up-resing for pow2 if (dstspec.width != srcspec.width || dstspec.height != srcspec.height || dstspec.full_depth != srcspec.full_depth) do_resize = true; // resize if the original was a crop if (orig_was_crop) do_resize = true; // resize if we're converting from non-border sampling to border sampling // (converting TO an OpenEXR environment map). if (envlatlmode && (Strutil::iequals(configspec.get_string_attribute("maketx:fileformatname"),"openexr") || Strutil::iends_with(outputfilename,".exr"))) do_resize = true; if (do_resize && orig_was_overscan && out && !out->supports("displaywindow")) { outstream << "maketx ERROR: format " << out->format_name() << " does not support separate display windows,\n" << " which is necessary when combining resizing" << " and an input image with overscan."; return false; } std::string filtername = configspec.get_string_attribute ("maketx:filtername", "box"); Filter2D *filter = setup_filter (filtername); if (! filter) { outstream << "maketx ERROR: could not make filter '" << filtername << "\n"; return false; } Timer resizetimer; ImageBuf dst ("temp", dstspec); ImageBuf *toplevel = &dst; // Ptr to top level of mipmap if (! do_resize) { // Don't need to resize if (dstspec.format == ccSrc->spec().format) { // Even more special case, no format change -- just use // the original copy. toplevel = ccSrc; } else { ImageBufAlgo::parallel_image (boost::bind(copy_block,&dst,ccSrc,_1), OIIO::get_roi(dstspec)); } } else { // Resize if (verbose) outstream << " Resizing image to " << dstspec.width << " x " << dstspec.height << std::endl; if (filtername == "box" && filter->width() == 1.0f) ImageBufAlgo::parallel_image (boost::bind(resize_block, &dst, ccSrc, _1, envlatlmode), OIIO::get_roi(dstspec)); else ImageBufAlgo::parallel_image (boost::bind(resize_block_HQ, &dst, ccSrc, _1, filter), OIIO::get_roi(dstspec)); } stat_resizetime += resizetimer(); // Update the toplevel ImageDescription with the sha1 pixel hash and constant color std::string desc = dstspec.get_string_attribute ("ImageDescription"); bool updatedDesc = false; // Eliminate any SHA-1 or ConstantColor hints in the ImageDescription. if (desc.size()) { desc = boost::regex_replace (desc, boost::regex("SHA-1=[[:xdigit:]]*[ ]*"), ""); static const char *fp_number_pattern = "([+-]?((?:(?:[[:digit:]]*\\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?)))"; const std::string color_pattern = std::string ("ConstantColor=(\\[?") + fp_number_pattern + ",?)+\\]?[ ]*"; desc = boost::regex_replace (desc, boost::regex(color_pattern), ""); updatedDesc = true; } // The hash is only computed for the top mipmap level of pixel data. // Thus, any additional information that will effect the lower levels // (such as filtering information) needs to be manually added into the // hash. std::ostringstream addlHashData; addlHashData << filter->name() << " "; addlHashData << filter->width() << " "; std::string hash_digest = ImageBufAlgo::computePixelHashSHA1 (*toplevel, addlHashData.str()); if (hash_digest.length()) { if (desc.length()) desc += " "; desc += "SHA-1="; desc += hash_digest; if (verbose) outstream << " SHA-1: " << hash_digest << std::endl; updatedDesc = true; dstspec.attribute ("oiio:SHA-1", hash_digest); } if (isConstantColor) { std::ostringstream os; // Emulate a JSON array os << "["; for (unsigned int i=0; i<constantColor.size(); ++i) { if (i!=0) os << ","; os << constantColor[i]; } os << "]"; if (desc.length()) desc += " "; desc += "ConstantColor="; desc += os.str(); if (verbose) outstream << " ConstantColor: " << os.str() << std::endl; updatedDesc = true; dstspec.attribute ("oiio:ConstantColor", os.str()); } if (updatedDesc) { dstspec.attribute ("ImageDescription", desc); } if (configspec.get_float_attribute("fovcot") == 0.0f) configspec.attribute("fovcot", float(srcspec.full_width) / float(srcspec.full_height)); maketx_merge_spec (dstspec, configspec); // Write out, and compute, the mipmap levels for the speicifed image bool nomipmap = configspec.get_int_attribute ("maketx:nomipmap"); bool ok = write_mipmap (mode, *toplevel, dstspec, outputfilename, out, out_dataformat, !shadowmode && !nomipmap, filter, configspec, outstream, stat_writetime, stat_miptime); delete out; // don't need it any more // If using update mode, stamp the output file with a modification time // matching that of the input file. if (ok && updatemode) Filesystem::last_write_time (outputfilename, in_time); Filter2D::destroy (filter); if (verbose || configspec.get_int_attribute("maketx:stats")) { double all = alltime(); outstream << Strutil::format ("maketx run time (seconds): %5.2f\n", all);; outstream << Strutil::format (" file read: %5.2f\n", stat_readtime); outstream << Strutil::format (" file write: %5.2f\n", stat_writetime); outstream << Strutil::format (" initial resize: %5.2f\n", stat_resizetime); outstream << Strutil::format (" mip computation: %5.2f\n", stat_miptime); outstream << Strutil::format (" color convert: %5.2f\n", stat_colorconverttime); outstream << Strutil::format (" unaccounted: %5.2f\n", all-stat_readtime-stat_writetime-stat_resizetime-stat_miptime); size_t kb = Sysutil::memory_used(true) / 1024; outstream << Strutil::format ("maketx memory used: %5.1f MB\n", (double)kb/1024.0); } return ok; }
void check_opengl_features(void) { GL_vendor = xstrdup((char*)gl(GetString, GL_VENDOR)); GL_renderer = xstrdup((char*)gl(GetString, GL_RENDERER)); GL_version = xstrdup((char*)gl(GetString, GL_VERSION)); /* according to opengl spec, the version string of opengl and * glsl is * * <version number> <space> <vendor spec information> * * and <version number> is * * major.minor * * or * * major.minor.release * * */ /* build gl version */ int err; err = sscanf(GL_version, "%d.%d", &GL_major_version, &GL_minor_version); assert(err == 2); assert((GL_major_version > 0) && (GL_major_version <= 3)); assert(GL_minor_version > 0); GL_full_version = MKVER(GL_major_version, GL_minor_version); const char * tmp = (const char *)gl(GetString, GL_SHADING_LANGUAGE_VERSION); if (GL_POP_ERROR() != GL_NO_ERROR) { WARNING(OPENGL, "Doesn't support glsl\n"); GL_glsl_version = NULL; } else { GL_glsl_version = xstrdup(tmp); err = sscanf(GL_glsl_version, "%d.%d", &GLSL_major_version, &GLSL_minor_version); assert(err == 2); assert(GLSL_major_version > 0); assert(GLSL_minor_version > 0); GLSL_full_version = MKVER(GLSL_major_version, GLSL_minor_version); } VERBOSE(OPENGL, "OpenGL engine information:\n"); VERBOSE(OPENGL, "\tvendor: %s\n", GL_vendor); VERBOSE(OPENGL, "\trenderer: %s\n", GL_renderer); VERBOSE(OPENGL, "\tversion: %s\n", GL_version); VERBOSE(OPENGL, "\tglsl version: %s\n", GL_glsl_version); int x; gl(GetIntegerv, GL_SAMPLES, &x); VERBOSE(OPENGL, "\tSamples : %d\n", x); gl(GetIntegerv, GL_SAMPLE_BUFFERS, &x); VERBOSE(OPENGL, "\tSample buffers : %d\n", x); if (x > 0) gl(Enable, GL_MULTISAMPLE); if (GL_POP_ERROR()) WARNING(OPENGL, "platform does not support multisample\n"); gl(GetIntegerv, GL_MAX_TEXTURE_SIZE, &GL_max_texture_size); DEBUG(OPENGL, "system max texture size: %d\n", GL_max_texture_size); int conf_mts = conf_get_int("video.opengl.texture.maxsize", 0); if (conf_mts != 0) { conf_mts = pow2roundup(conf_mts); if (conf_mts < GL_max_texture_size) GL_max_texture_size = conf_mts; } DEBUG(OPENGL, "max texture size is set to %d\n", GL_max_texture_size); gl(GetIntegerv, GL_MAX_VERTEX_ATTRIBS, &GL_max_vertex_attribs); DEBUG(OPENGL, "max vertex attributes is set to %d\n", GL_max_vertex_attribs); build_extensions(); assert(GL_extensions_dict != NULL); GL_POP_ERROR(); #define verbose_feature(name, exp) do {\ if (exp) \ DEBUG(OPENGL, name " is enabled\n"); \ else \ DEBUG(OPENGL, name " is disabled\n"); \ } while(0) GL_texture_NPOT = check_extension("video.opengl.texture.enableNPOT", "GL_ARB_texture_non_power_of_two", NULL); verbose_feature("NPOT texture", GL_texture_NPOT); GL_texture_RECT = check_extension("video.opengl.texture.enableRECT", "GL_ARB_texture_rectangle", "GL_EXT_texture_rectangle", "GL_NV_texture_rectangle", NULL); verbose_feature("RECT texture", GL_texture_RECT); GL_texture_COMPRESSION = check_extension("video.opengl.texture.enableCOMPRESSION", "GL_ARB_texture_compression", NULL); verbose_feature("texture compression", GL_texture_COMPRESSION); GL_vertex_buffer_object = check_extension("video.opengl.enableVBO", "GL_ARB_vertex_buffer_object", NULL); GL_pixel_buffer_object = check_extension("video.opengl.enablePBO", "GL_ARB_pixel_buffer_object", NULL); GL_vertex_array_object = check_extension("video.opengl.enableVAO", "GL_ARB_vertex_array_object", NULL); #undef verbose_feature }