Пример #1
0
    TopBlock(double centerFreq, double sampleRate, double freqRes, double cycFreqRes, std::string fileName) :
        gr_top_block("Top Block"),
        Np(pow2roundup((int)sampleRate/freqRes)),
        L(Np/4),
        P(pow2roundup(sampleRate/cycFreqRes/L)),
        N(P*L),
        vector_length(N),
        source(osmosdr_make_source_c()), /* OsmoSDR Source */
        fileSource(gr_make_file_source(sizeof(float)*2, "/home/ylb/QAM16_44_1k.dat", false)),
        stv(gr_make_stream_to_vector(sizeof(float)*2, vector_length)), /* Stream to vector */
        /* autoFam - this does most of the interesting work */
        sink(make_autofam_sink(source, vector_length, centerFreq, sampleRate, freqRes, cycFreqRes, fileName))
    {

        /* Set up the OsmoSDR Source */
        source->set_sample_rate(sampleRate);
        source->set_center_freq(centerFreq);
        source->set_freq_corr(0.0);
        source->set_gain_mode(false);
        source->set_gain(30);
        source->set_if_gain(25.0);

        /* Set up the connections */
        connect(fileSource, 0, stv, 0);
        connect(stv, 0, sink, 0);
    }
Пример #2
0
void
test_int_helpers ()
{
    std::cout << "\ntest_int_helpers\n";
 
    // ispow2
    for (int i = 1; i < (1<<30); i *= 2) {
        OIIO_CHECK_ASSERT (ispow2(i));
        if (i > 1)
            OIIO_CHECK_ASSERT (! ispow2(i+1));
    }
    OIIO_CHECK_ASSERT (ispow2(int(0)));
    OIIO_CHECK_ASSERT (! ispow2(-1));
    OIIO_CHECK_ASSERT (! ispow2(-2));

    // ispow2, try size_t, which is unsigned
    for (size_t i = 1; i < (1<<30); i *= 2) {
        OIIO_CHECK_ASSERT (ispow2(i));
        if (i > 1)
            OIIO_CHECK_ASSERT (! ispow2(i+1));
    }
    OIIO_CHECK_ASSERT (ispow2((unsigned int)0));

    // pow2roundup
    OIIO_CHECK_EQUAL (pow2roundup(4), 4);
    OIIO_CHECK_EQUAL (pow2roundup(5), 8);
    OIIO_CHECK_EQUAL (pow2roundup(6), 8);
    OIIO_CHECK_EQUAL (pow2roundup(7), 8);
    OIIO_CHECK_EQUAL (pow2roundup(8), 8);

    // pow2rounddown
    OIIO_CHECK_EQUAL (pow2rounddown(4), 4);
    OIIO_CHECK_EQUAL (pow2rounddown(5), 4);
    OIIO_CHECK_EQUAL (pow2rounddown(6), 4);
    OIIO_CHECK_EQUAL (pow2rounddown(7), 4);
    OIIO_CHECK_EQUAL (pow2rounddown(8), 8);

    // round_to_multiple
    OIIO_CHECK_EQUAL (round_to_multiple(1, 5), 5);
    OIIO_CHECK_EQUAL (round_to_multiple(2, 5), 5);
    OIIO_CHECK_EQUAL (round_to_multiple(3, 5), 5);
    OIIO_CHECK_EQUAL (round_to_multiple(4, 5), 5);
    OIIO_CHECK_EQUAL (round_to_multiple(5, 5), 5);
    OIIO_CHECK_EQUAL (round_to_multiple(6, 5), 10);

    // round_to_multiple_of_pow2
    OIIO_CHECK_EQUAL (round_to_multiple_of_pow2(int(1), 4), 4);
    OIIO_CHECK_EQUAL (round_to_multiple_of_pow2(int(2), 4), 4);
    OIIO_CHECK_EQUAL (round_to_multiple_of_pow2(int(3), 4), 4);
    OIIO_CHECK_EQUAL (round_to_multiple_of_pow2(int(4), 4), 4);
    OIIO_CHECK_EQUAL (round_to_multiple_of_pow2(int(5), 4), 8);

    // round_to_multiple_of_pow2
    OIIO_CHECK_EQUAL (round_to_multiple_of_pow2(size_t(1), size_t(4)), 4);
    OIIO_CHECK_EQUAL (round_to_multiple_of_pow2(size_t(2), size_t(4)), 4);
    OIIO_CHECK_EQUAL (round_to_multiple_of_pow2(size_t(3), size_t(4)), 4);
    OIIO_CHECK_EQUAL (round_to_multiple_of_pow2(size_t(4), size_t(4)), 4);
    OIIO_CHECK_EQUAL (round_to_multiple_of_pow2(size_t(5), size_t(4)), 8);
}
int main(int argc, char* const argv[]) {
    //place thread on the first cpu
    set_cpu(0);
    //initialize the custom memory allocator
    ssalloc_init();
    pthread_t *threads;
    pthread_attr_t attr;
    barrier_t barrier;
    pthread_mutex_t init_lock;
    struct timeval start, end;
    struct timespec timeout;

    thread_data_t *data;
    sigset_t block_set;

    //initially, set parameters to their default values
    num_threads = DEFAULT_NUM_THREADS;
    seed=DEFAULT_SEED;
    max_key=DEFAULT_RANGE;
    updates=DEFAULT_UPDATES;
    finds=DEFAULT_READS;
    //inserts=DEFAULT_INSERTS;
    //removes=DEFAULT_REMOVES;
    duration=DEFAULT_DURATION;

    //now read the parameters in case the user provided values for them 
    //we use getopt, the same skeleton may be used for other bechmarks,
    //though the particular parameters may be different
    struct option long_options[] = {
        // These options don't set a flag
        {"help",                      no_argument,       NULL, 'h'},
        {"duration",                  required_argument, NULL, 'd'},
        {"range",                     required_argument, NULL, 'r'},
        {"initial",                     required_argument, NULL, 'i'},
        {"num-threads",               required_argument, NULL, 'n'},
        {"updates",             required_argument, NULL, 'u'},
        {"seed",                      required_argument, NULL, 's'},
        {NULL, 0, NULL, 0}
    };

    int i,c;

    //actually get the parameters form the command-line
    while(1) {
        i = 0;
        c = getopt_long(argc, argv, "hd:n:l:u:i:r:s", long_options, &i);

        if(c == -1)
            break;

        if(c == 0 && long_options[i].flag == 0)
            c = long_options[i].val;

        switch(c) {
            case 0:
                /* Flag is automatically set */
                break;
            case 'h':
                printf("lock stress test\n"
                        "\n"
                        "Usage:\n"
                        "  stress_test [options...]\n"
                        "\n"
                        "Options:\n"
                        "  -h, --help\n"
                        "        Print this message\n"
                        "  -d, --duration <int>\n"
                        "        Test duration in milliseconds (0=infinite, default=" XSTR(DEFAULT_DURATION) ")\n"
                        "  -u, --updates <int>\n"
                        "        Percentage of update operations (default=" XSTR(DEFAULT_UPDATES) ")\n"
                        "  -r, --range <int>\n"
                        "        Key range (default=" XSTR(DEFAULT_RANGE) ")\n"
                        "  -n, --num-threads <int>\n"
                        "        Number of threads (default=" XSTR(DEFAULT_NUM_THREADS) ")\n"
                        "  -s, --seed <int>\n"
                        "        RNG seed (0=time-based, default=" XSTR(DEFAULT_SEED) ")\n"
                      );
                exit(0);
            case 'd':
                duration = atoi(optarg);
                break;
            case 'u':
                updates = atoi(optarg);
                finds = 100 - updates;
                break;
            case 'r':
                max_key = atoi(optarg);
                break;
            case 'i':
                break;
            case 'l':
                break;
            case 'n':
                num_threads = atoi(optarg);
                break;
            case 's':
                seed = atoi(optarg);
                break;
            case '?':
                printf("Use -h or --help for help\n");
                exit(0);
            default:
                exit(1);
        }
    }

    max_key--;
    //we round the max key up to the nearest power of 2, which makes our random key generation more efficient
    max_key = pow2roundup(max_key)-1;

    //initialization of the tree
    root = bst_initialize(num_threads);

    //initialize the data which will be passed to the threads
    if ((data = (thread_data_t *)malloc(num_threads * sizeof(thread_data_t))) == NULL) {
        perror("malloc");
        exit(1);
    }

    if ((threads = (pthread_t *)malloc(num_threads * sizeof(pthread_t))) == NULL) {
        perror("malloc");
        exit(1);
    }

    if (seed == 0)
        srand((int)time(NULL));
    else
        srand(seed);

    //flag signaling the threads until when to run
    *running = 1;

    //global barrier initialization (used to start the threads at the same time)
    barrier_init(&barrier, num_threads + 1);
    pthread_mutex_init(&init_lock, NULL);
    pthread_attr_init(&attr);
    pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);

    timeout.tv_sec = duration / 1000;
    timeout.tv_nsec = (duration % 1000) * 1000000;
    

    //set the data for each thread and create the threads
    for (i = 0; i < num_threads; i++) {
        data[i].id = i;
        data[i].num_operations = 0;
        data[i].total_time=0;
        data[i].num_insert=0;
        data[i].num_remove=0;
        data[i].num_search=0;
        data[i].num_add = max_key/(2 * num_threads); 
        if (i< ((max_key/2)%num_threads)) data[i].num_add++;
        data[i].seed = rand();
        data[i].barrier = &barrier;
        data[i].init_lock = &init_lock;
        if (pthread_create(&threads[i], &attr, test, (void *)(&data[i])) != 0) {
            fprintf(stderr, "Error creating thread\n");
            exit(1);
        }
    }
    pthread_attr_destroy(&attr);

    /* Catch some signals */
    if (signal(SIGHUP, catcher) == SIG_ERR ||
            signal(SIGINT, catcher) == SIG_ERR ||
            signal(SIGTERM, catcher) == SIG_ERR) {
        perror("signal");
        exit(1);
    }

    // seeds = seed_rand();
    // skey_t key;
    // for (i=0;i<max_key/2;++i) {
    //     key = my_random(&seeds[0],&seeds[1],&seeds[2]) & max_key;
    //     //we make sure the insert was effective (as opposed to just updating an existing entry)
    //     if (bst_add(key, root, 0)!=TRUE) {
    //         i--;
    //     }
    // }

    // bst_print(root);


    /* Start threads */
    barrier_cross(&barrier);
    gettimeofday(&start, NULL);
    if (duration > 0) {
        //sleep for the duration of the experiment
        nanosleep(&timeout, NULL);
    } else {
        sigemptyset(&block_set);
        sigsuspend(&block_set);
    }

    //signal the threads to stop
    *running = 0;
    gettimeofday(&end, NULL);

    /* Wait for thread completion */
    for (i = 0; i < num_threads; i++) {
        if (pthread_join(threads[i], NULL) != 0) {
            fprintf(stderr, "Error waiting for thread completion\n");
            exit(1);
        }
    }
    DDPRINT("threads finshed\n",NULL);
    //compute the exact duration of the experiment
    duration = (end.tv_sec * 1000 + end.tv_usec / 1000) - (start.tv_sec * 1000 + start.tv_usec / 1000);
    
    //bst_print(root);

    unsigned long operations = 0;
    ticks total_ticks = 0;
    long reported_total = 1; //the tree contains two initial dummy nodes, INF1 and INF2
    //report some experiment statistics
    for (i = 0; i < num_threads; i++) {
        printf("Thread %d\n", i);
        printf("  #operations   : %lu\n", data[i].num_operations);
        printf("  #inserts   : %lu\n", data[i].num_insert);
        printf("  #removes   : %lu\n", data[i].num_remove);
        operations += data[i].num_operations;
        total_ticks += data[i].total_time;
        reported_total = reported_total + data[i].num_add + data[i].num_insert - data[i].num_remove;
    }

    printf("Duration      : %d (ms)\n", duration);
    printf("#txs     : %lu (%f / s)\n", operations, operations * 1000.0 / duration);
    //printf("Operation latency %lu\n", total_ticks / operations);
    //make sure the tree is correct
    printf("Expected size: %ld Actual size: %lu\n",reported_total,bst_size(root));

    free(threads);
    free(data);

    return 0;

}
Пример #4
0
size_t
ht_status(clht_t* h, int resize_increase, int just_print)
{
  if (TRYLOCK_ACQ(&h->status_lock) && !resize_increase)
    {
      return 0;
    }

  clht_hashtable_t* hashtable = h->ht;
  uint64_t num_buckets = hashtable->num_buckets;
  volatile bucket_t* bucket = NULL;
  size_t size = 0;
  int expands = 0;
  int expands_max = 0;

  uint64_t bin;
  for (bin = 0; bin < num_buckets; bin++)
    {
      bucket = hashtable->table + bin;

      int expands_cont = -1;
      expands--;
      uint32_t j;
      do
	{
	  expands_cont++;
	  expands++;
	  for (j = 0; j < ENTRIES_PER_BUCKET; j++)
	    {
	      if (bucket->key[j] > 0)
		{
		  size++;
		}
	    }

	  bucket = bucket->next;
	}
      while (bucket != NULL);

      if (expands_cont > expands_max)
	{
	  expands_max = expands_cont;
	}
    }

  double full_ratio = 100.0 * size / ((hashtable->num_buckets) * ENTRIES_PER_BUCKET);

  if (just_print)
    {
      printf("[STATUS-%02d] #bu: %7zu / #elems: %7zu / full%%: %8.4f%% / expands: %4d / max expands: %2d\n",
	     99, hashtable->num_buckets, size, full_ratio, expands, expands_max);
    }
  else
    {
      if (full_ratio > 0 && full_ratio < CLHT_PERC_FULL_HALVE)
	{
	  printf("[STATUS-%02d] #bu: %7zu / #elems: %7zu / full%%: %8.4f%% / expands: %4d / max expands: %2d\n",
		 clht_gc_get_id(), hashtable->num_buckets, size, full_ratio, expands, expands_max);
	  ht_resize_pes(h, 0, 33);
	}
      else if ((full_ratio > 0 && full_ratio > CLHT_PERC_FULL_DOUBLE) || expands_max > CLHT_MAX_EXPANSIONS ||
	       resize_increase)
	{
	  int inc_by = (full_ratio / CLHT_OCCUP_AFTER_RES);
	  int inc_by_pow2 = pow2roundup(inc_by);

	  printf("[STATUS-%02d] #bu: %7zu / #elems: %7zu / full%%: %8.4f%% / expands: %4d / max expands: %2d\n",
		 clht_gc_get_id(), hashtable->num_buckets, size, full_ratio, expands, expands_max);
	  if (inc_by_pow2 == 1)
	    {
	      inc_by_pow2 = 2;
	    }
	  ht_resize_pes(h, 1, inc_by_pow2);
	}
    }

  if (!just_print)
    {
      clht_gc_collect(h);
    }

  TRYLOCK_RLS(h->status_lock);
  return size;
}
Пример #5
0
UINT buildTree(icl_buffer *nodelist, icl_buffer *particlesD, icl_buffer *treeD, UINT nParticles, icl_device* dev)
{
	UINT level = 1;
	UINT nNodes = nParticles * 2 - 1;

	icl_timer* timer = icl_init_timer(ICL_MILLI);
//	void icl_start_timer(icl_timer* timer);
	double time = 0;

	// overapproximate size of temporal lists
/*	struct Node** activelist = (struct Node**)malloc(nParticles * sizeof(struct Node*)); UINT activeN = 0;
	struct Node** smalllist = (struct Node**)malloc(nParticles * sizeof(struct Node*)); UINT smallN = 0;
	struct Node** nextlist = (struct Node**)malloc(nParticles * sizeof(struct Node*)); UINT nextN = 0;*/
	icl_buffer* activelist = icl_create_buffer(dev, CL_MEM_READ_WRITE, sizeof(NodeId) * nParticles);
	icl_buffer* smalllist = icl_create_buffer(dev, CL_MEM_READ_WRITE, sizeof(NodeId) * nParticles);
	icl_buffer* nextlist = icl_create_buffer(dev, CL_MEM_READ_WRITE, sizeof(NodeId) * nParticles);
	icl_buffer* sizes = icl_create_buffer(dev, CL_MEM_READ_WRITE, sizeof(UINT) * 5); // holds the current size of each of 3 buffers:
		// 0 activelist
		// 1 nodelist
		// 2 smalllist
		// 3 old max level
		// 4 new max level

	UINT maxNchunks = ((nParticles / fmin(T, chunk_size)) * 2) -1;
//	assert(maxNchunks <= 256 && "adapt implementation"); // TODO allow more than 256 chunks per node
	icl_buffer* chunks = icl_create_buffer(dev, CL_MEM_READ_WRITE, sizeof(struct Chunk) * maxNchunks);
	icl_buffer* bboxes = icl_create_buffer(dev, CL_MEM_READ_WRITE, sizeof(struct BBox) * maxNchunks);

	size_t localSize = 1;
	size_t globalSize = 1;

/*
struct Particle* particles = (struct Particle*)malloc(3000 * sizeof(struct Particle));
icl_read_buffer(particlesD, CL_TRUE, sizeof(struct Particle) * 3000, &particles[0], NULL, NULL);
printf("%f %f %f\n", particles[0].pos.x, particles[0].pos.y, particles[0].pos.z);
*/
	// compile OpenCL kernels
	icl_kernel* init = icl_create_kernel(dev, "kernel/init.cl", "init", KERNEL_BUILD_MACRO, ICL_SOURCE);
	icl_kernel* resetChunks = icl_create_kernel(dev, "kernel/init.cl", "memset_chunks", KERNEL_BUILD_MACRO, ICL_SOURCE);
	icl_kernel* gp2c = icl_create_kernel(dev, "kernel/groupToChunks.cl", "groupParticlesIntoChunks", KERNEL_BUILD_MACRO, ICL_SOURCE);
	icl_kernel* cBBox = icl_create_kernel(dev, "kernel/chunkedBBox.cl", "chunkedBBox", KERNEL_BUILD_MACRO, ICL_SOURCE);
	icl_kernel* bBox = icl_create_kernel(dev, "kernel/bBox.cl", "bBox", KERNEL_BUILD_MACRO, ICL_SOURCE);
	icl_kernel* sln = icl_create_kernel(dev, "kernel/splitLargeNodes.cl", "splitLargeNodes", KERNEL_BUILD_MACRO, ICL_SOURCE);
	icl_kernel* sortP = icl_create_kernel(dev, "kernel/sortParticlesToChilds.cl", "sortParticlesToChilds", KERNEL_BUILD_MACRO, ICL_SOURCE);
	icl_kernel* snf = icl_create_kernel(dev, "kernel/smallNodeFiltering.cl", "smallNodeFiltering", KERNEL_BUILD_MACRO, ICL_SOURCE);
	icl_kernel* pnl = icl_create_kernel(dev, "kernel/packNextlist.cl", "packNextlist", KERNEL_BUILD_MACRO, ICL_SOURCE);	

	//////////////////////////////////////////////////////////////////////////
	icl_kernel* preScan  = icl_create_kernel(dev, "kernel/sortP_prescan.cl", "sortP_prescan_chunked", KERNEL_BUILD_MACRO, ICL_SOURCE);
	icl_kernel* postScan = icl_create_kernel(dev, "kernel/sortP_postscan.cl", "sortP_postscan_chunked", KERNEL_BUILD_MACRO, ICL_SOURCE);
	segmented_scan_init(nParticles, dev, KERNEL_BUILD_MACRO, ICL_SOURCE);
	icl_kernel* memset_int_s  = icl_create_kernel(dev, "kernel/init.cl", "memset_int_s", KERNEL_BUILD_MACRO, ICL_SOURCE);

	// approach with segmented scan
	icl_buffer *scan_data = icl_create_buffer(dev, CL_MEM_READ_WRITE, sizeof(int) * nParticles);
	icl_buffer *scan_flag = icl_create_buffer(dev, CL_MEM_READ_WRITE, sizeof(int) * nParticles);
	icl_buffer* buffered_particles = icl_create_buffer(dev, CL_MEM_READ_WRITE, sizeof(struct Particle) * nParticles);


#if timing == 1
	icl_timer* timer_gp2c =  icl_init_timer(ICL_MILLI);
	icl_timer* timer_cBBox =  icl_init_timer(ICL_MILLI);
	icl_timer* timer_bBox =  icl_init_timer(ICL_MILLI);
	icl_timer* timer_sln =  icl_init_timer(ICL_MILLI);
	icl_timer* timer_sortP =  icl_init_timer(ICL_MILLI);
	icl_timer* timer_snf =  icl_init_timer(ICL_MILLI);
	icl_timer* timer_pnl =  icl_init_timer(ICL_MILLI);
	icl_timer* timer_ran =  icl_init_timer(ICL_MILLI);


	icl_timer* timer_prescan =  icl_init_timer(ICL_MILLI);
	icl_timer* timer_scan =  icl_init_timer(ICL_MILLI);
	icl_timer* timer_postscan =  icl_init_timer(ICL_MILLI);

#endif
	//add root node to the activelist and initialize size lists
	icl_run_kernel(init, 1, &globalSize, &localSize, NULL, NULL, 3,
			(size_t)0, (void *)activelist,
			(size_t)0, (void *)sizes,
			(size_t)0, (void *)particlesD);

	UINT activeN = 1;

	icl_finish(dev);

	// smallest power of 2 bigger or equal to maxxNchnunks
	UINT pow2maxNchunks = pow2roundup(maxNchunks);
	// processLargeNode
	while(activeN != 0)
	{

		icl_start_timer(timer);
		// group triangles into chunks
		size_t localSize1 = min(pow2maxNchunks, 256);

#if timing == 1
		icl_start_timer(timer_gp2c);
#endif
		size_t globalSize1 = ((maxNchunks + localSize1 -1) / localSize1) * localSize1;
		// reset chunks
		icl_run_kernel(resetChunks, 1, &globalSize1, &localSize1, NULL, NULL, 2,
				(size_t)0, (void *)chunks,
				sizeof(UINT), &maxNchunks);

		globalSize1 = localSize1 * activeN;
		// split every node in chunk of chunk_size
		icl_run_kernel(gp2c, 1, &globalSize1, &localSize1, NULL, NULL, 4,
				(size_t)0, (void *)nodelist,
				(size_t)0, (void *)activelist,
				(size_t)0, (void *)chunks,
				sizeof(UINT), &activeN);
#if timing == 1
		icl_finish(dev);
		icl_stop_timer(timer_gp2c);
#endif

		// compute per chunk bounding box
		size_t localSize2 = chunk_size;
		size_t globalSize2 = maxNchunks * chunk_size;
#if timing == 1
		icl_start_timer(timer_cBBox);
#endif

		icl_run_kernel(cBBox, 1, &globalSize2, &localSize2, NULL, NULL, 5,
				(size_t)0, (void *)nodelist,
				(size_t)0, (void *)activelist,
				(size_t)0, (void *)particlesD,
				(size_t)0, (void *)chunks,
				(size_t)0, (void *)bboxes);
#if timing == 1
		icl_finish(dev);
		icl_stop_timer(timer_cBBox);
#endif

		// compute per node bounding box
		size_t localSize3 = min(pow2maxNchunks, 256);
		size_t globalSize3 = localSize3 * activeN;
#if timing == 1
		icl_start_timer(timer_bBox);
#endif
		icl_run_kernel(bBox, 1, &globalSize3, &localSize3, NULL, NULL, 4,
						(size_t)0, (void *)nodelist,
						(size_t)0, (void *)activelist,
						(size_t)0, (void *)bboxes,
						sizeof(UINT), &activeN);
#if timing == 1
		icl_finish(dev);
		icl_stop_timer(timer_bBox);
#endif

		// split large nodes
		size_t localSize4 = 256;
		size_t globalSize4 = ((activeN + 255) / 256) * 256;
#if timing == 1
		icl_start_timer(timer_sln);
#endif
		icl_run_kernel(sln, 1, &globalSize4, &localSize4, NULL, NULL, 5,
						(size_t)0, (void *)nodelist,
						(size_t)0, (void *)activelist,
						(size_t)0, (void *)nextlist,
						(size_t)0, (void *)sizes,
						sizeof(UINT), &activeN);
#if timing == 1
		icl_finish(dev);
		icl_stop_timer(timer_sln);
#endif

		///////////////////////////////////////////////////////////////////////////////
		// XXx replaced with segmented scan

		//globalSize = (activeN+1) * 256;
#if timing == 1
		icl_start_timer(timer_sortP);
#endif


#if DEVICE == ICL_CPU 
		// sort particles to child nodes
		size_t localSize5 = 256;
		size_t globalSize5 = ((activeN + 255) / 256) * 256;
		icl_run_kernel(sortP, 1, &globalSize5, &localSize5, NULL, NULL, 5,
			(size_t)0, (void *)nodelist,
			(size_t)0, (void *)particlesD,
			(size_t)0, (void *)activelist,
			(size_t)0, (void *)nextlist,
			sizeof(UINT), &activeN
		);
#else		
		// init scan_flag to 1
		cl_int initFlag = 1;
		size_t np = (size_t)((nParticles + localSize4 -1 ) / localSize4) * localSize4;
		icl_run_kernel(memset_int_s, 1, &np, &localSize4, NULL, NULL, 3,
			(size_t)0, (void *)scan_flag,
			sizeof(cl_int), &initFlag,
			sizeof(UINT), &nParticles
		);

#if timing == 1
		icl_start_timer(timer_prescan);
#endif
		// pre-scan fills data0 and data1 with 1 and 0 whenever value < pivot
		localSize = chunk_size;
//		globalSize = activeN * 256;
		globalSize = maxNchunks * chunk_size;
		icl_run_kernel(preScan, 1, &globalSize, &localSize, NULL, NULL, 7,
			(size_t)0, (void *)nodelist,
			(size_t)0, (void *)chunks,
			(size_t)0, (void *)particlesD,
			(size_t)0, (void *)activelist,
			sizeof(UINT), &activeN,
			(size_t)0, (void *)scan_data,
			(size_t)0, (void *)scan_flag						
		);
#if timing == 1
		icl_finish(dev);
		icl_stop_timer(timer_prescan);
#endif

#if timing == 1
		icl_start_timer(timer_scan);
#endif
		// scan for
		scan(scan_data, scan_flag, nParticles);
#if timing == 1
		icl_finish(dev);
		icl_stop_timer(timer_scan);
#endif
		// copy partially sorted data to the final
		icl_copy_buffer(particlesD, buffered_particles, sizeof(struct Particle) * nParticles, NULL, NULL);
//		swap(particlesD, buffered_particles);
#if timing == 1
		icl_start_timer(timer_postscan);
#endif

		localSize = chunk_size;
		globalSize = maxNchunks * chunk_size;
		icl_run_kernel(postScan, 1, &globalSize, &localSize, NULL, NULL, 7,
			(size_t)0, (void *)nodelist,
			(size_t)0, (void *)chunks,
			(size_t)0, (void *)particlesD,
			(size_t)0, (void *)activelist,
			sizeof(UINT), &activeN,

			(size_t)0, (void *)buffered_particles,
			(size_t)0, (void *)scan_data						
		);
#if timing == 1
		icl_finish(dev);
		icl_stop_timer(timer_postscan);
#endif

icl_finish(dev);
#endif
		///////////////////////////////////////////////////////////////////////////////

#if timing == 1
		icl_finish(dev);
		icl_stop_timer(timer_sortP);
#endif
		
		// small node filtering
		size_t localSize6 = 256;
		size_t globalSize6 = ((activeN*2 + 255) / 256) * 256;
#if timing == 1
		icl_start_timer(timer_snf);
#endif
		icl_run_kernel(snf, 1, &globalSize6, &localSize6, NULL, NULL, 4,
			(size_t)0, (void *)nodelist,
			(size_t)0, (void *)nextlist,
			(size_t)0, (void *)smalllist,
			(size_t)0, (void *)sizes
			//, sizeof(UINT), &nParticles
		);
#if timing == 1
		icl_finish(dev);
		icl_stop_timer(timer_snf);
#endif

		// packing of nextlist
		size_t localSize7 = 1;
		size_t globalSize7 = 1;
#if timing == 1
		icl_start_timer(timer_pnl);
#endif
		icl_run_kernel(pnl, 1, &globalSize7, &localSize7, NULL, NULL, 2,
			(size_t)0, (void *)nextlist,
			(size_t)0, (void *)sizes
		);
#if timing == 1
		icl_finish(dev);
		icl_stop_timer(timer_pnl);
#endif

		// swap nextlist and activelist
		swap(&nextlist, &activelist);

#if timing == 1
		icl_start_timer(timer_ran);
#endif
		// read size of next activelist set in kernel
		icl_read_buffer(sizes, CL_TRUE, sizeof(UINT), &activeN, NULL, NULL);
		icl_finish(dev);
#if timing == 1
		icl_stop_timer(timer_ran);
#endif

		++level;
//printf("%d: ActiveN %d\n", level, activeN);
		time = icl_stop_timer(timer);

	}

	icl_release_kernel(init);
	icl_release_kernel(gp2c);
	icl_release_kernel(cBBox);
	icl_release_kernel(bBox);
	icl_release_kernel(sln);
	icl_release_kernel(sortP);
	icl_release_kernel(snf);
	icl_release_kernel(pnl);
//////////////////////////////////////////////////////////////////////////
	icl_release_kernel(preScan);
	icl_release_kernel(postScan);
	segmented_scan_release();

	icl_release_buffers(3, scan_data, scan_flag, buffered_particles);

#if timing == 1
	printf("gp2c %f\ncBBox %f\nbBox %f\nsln  %f\nsortP %f\nsnf %f\npnl %f\nran %f\n\n",
			timer_gp2c->current_time,
			timer_cBBox->current_time,
			timer_bBox->current_time,
			timer_sln->current_time,
			timer_sortP->current_time,
			timer_snf->current_time,
			timer_pnl->current_time,
			timer_ran->current_time);
	icl_release_timer(timer_gp2c);
	icl_release_timer(timer_cBBox);
	icl_release_timer(timer_bBox);
	icl_release_timer(timer_sln);
	icl_release_timer(timer_sortP);
	icl_release_timer(timer_snf);
	icl_release_timer(timer_pnl);

	printf("prescan %f\nscan %f\npostscan %f\n\n", timer_prescan->current_time, timer_scan->current_time, timer_postscan->current_time);

	icl_release_timer(timer_prescan);
	icl_release_timer(timer_scan);
	icl_release_timer(timer_postscan);
#endif

/*
icl_read_buffer(nodelist, CL_TRUE, sizeof(struct Node) * 6000, tree->nodelist, NULL, NULL);
printf("node: %d, left %d, right %d", tree->nodelist[0].particlesHigh - tree->nodelist[0].particlesLow,
		tree->nodelist[1].particlesHigh - tree->nodelist[1].particlesLow, tree->nodelist[2].particlesHigh - tree->nodelist[2].particlesLow);

printBox(tree->nodelist[49].bounding_box);
printBox(tree->nodelist[53].bounding_box);
printBox(tree->nodelist[54].bounding_box);


for(int i = 0; i < 6000; ++i)
	if(tree->nodelist[i].bounding_box.box[0].x != 0.0)
		printBox(tree->nodelist[i].bounding_box);
*/
	//small nodes stage
//	preprocessSmallNodes(smalllist);
	icl_release_buffers(3, activelist, chunks, bboxes);
	icl_kernel* sasl = icl_create_kernel(dev, "kernel/swapActiveAndSmalllist.cl", "swapActiveAndSmalllist", KERNEL_BUILD_MACRO, ICL_SOURCE);
	icl_kernel* ssn = icl_create_kernel(dev, "kernel/splitSmallNodes.cl", "splitSmallNodes", KERNEL_BUILD_MACRO, ICL_SOURCE);

#if timing == 1
	icl_timer* timer_ssn = icl_init_timer(ICL_MILLI);
	icl_timer* timer_sasl = icl_init_timer(ICL_MILLI);
	icl_timer* timer_rsn = icl_init_timer(ICL_MILLI);
#endif

	size_t localSize8 = 1;
	size_t globalSize8 = 1;
	UINT setMaxLevel = 0;
	icl_run_kernel(sasl, 1, &globalSize8, &localSize8, NULL, NULL, 2,
					(size_t)0, (void *)sizes,
					sizeof(UINT), &level);
	// get number of small nodes
	icl_read_buffer(sizes, CL_TRUE, sizeof(UINT), &activeN, NULL, NULL);

	while(activeN != 0)
	{
		icl_start_timer(timer);
		// compute SVH and determine the split plane
		size_t localSize9 = 256;
		size_t globalSize9 = ((activeN + 255) / 256) * 256;
#if timing == 1
		icl_start_timer(timer_ssn);
#endif
		icl_run_kernel(ssn, 1, &globalSize9, &localSize9, NULL, NULL, 5,
						(size_t)0, (void *)nodelist,
						(size_t)0, (void *)smalllist,
						(size_t)0, (void *)nextlist,
						(size_t)0, (void *)particlesD,
						(size_t)0, (void *)sizes);
#if timing == 1
		icl_finish(dev);
		icl_stop_timer(timer_ssn);
#endif

		size_t localSizeA = 1;
		size_t globalSizeA = 1;
#if timing == 1
		icl_start_timer(timer_sasl);
#endif
		icl_run_kernel(sasl, 1, &globalSizeA, &localSizeA, NULL, NULL, 2,
						(size_t)0, (void *)sizes,
						sizeof(UINT), &setMaxLevel);
#if timing == 1
		icl_finish(dev);
		icl_stop_timer(timer_sasl);
#endif

		swap(&nextlist, &smalllist);

		// read size of next activelist set in kernel
#if timing == 1
		icl_start_timer(timer_rsn);
#endif
		icl_read_buffer(sizes, CL_TRUE, sizeof(UINT), &activeN, NULL, NULL);
//printf("small size %d\n", activeN);
		icl_finish(dev);
#if timing == 1
		icl_stop_timer(timer_rsn);
#endif
		time = icl_stop_timer(timer);
	}

	icl_release_buffer(smalllist);
	icl_release_buffer(nextlist);

	icl_release_kernel(sasl);
	icl_release_kernel(ssn);

#if timing == 1
	printf("ssn %f\nsasl %f\nrsn %f\n\n", timer_ssn->current_time, timer_sasl->current_time, timer_rsn->current_time);
	icl_release_timer(timer_ssn);
	icl_release_timer(timer_sasl);
	icl_release_timer(timer_rsn);
#endif

	UINT s[5];
	icl_read_buffer(sizes, CL_TRUE, sizeof(UINT) * 5, &s, NULL, NULL);
	icl_release_buffer(sizes);

	icl_kernel* upPass = icl_create_kernel(dev, "kernel/upPass.cl", "upPass", KERNEL_BUILD_MACRO, ICL_SOURCE);
	icl_kernel* downPass = icl_create_kernel(dev, "kernel/kdDownPass.cl", "downPass", KERNEL_BUILD_MACRO, ICL_SOURCE);

#if timing == 1
	icl_timer* timer_upPass = icl_init_timer(ICL_MILLI);
	icl_timer* timer_downPass = icl_init_timer(ICL_MILLI);
	icl_timer* timer_rt = icl_init_timer(ICL_MILLI);
#endif

	UINT treeHeight = s[4];
	printf("Tree height: %d\n", treeHeight);

	size_t localSizeB = 256;
	size_t globalSizeB = ((nNodes + 255) / 256) * 256;
	icl_start_timer(timer);
#if timing == 1
		icl_start_timer(timer_upPass);
#endif

	for(int l = (int)treeHeight; l >= 0; --l)
	{
		icl_run_kernel(upPass, 1, &globalSizeB, &localSizeB, NULL, NULL, 4,
						(size_t)0, (void *)nodelist,
						(size_t)0, (void *)particlesD,
						sizeof(int), &l,
						sizeof(UINT), &nNodes);
	}
#if timing == 1
		icl_finish(dev);
		icl_stop_timer(timer_upPass);
		icl_start_timer(timer_downPass);
#endif
	for(UINT l = 0; l <= treeHeight; ++l)
	{
		icl_run_kernel(downPass, 1, &globalSizeB, &localSizeB, NULL, NULL, 4,
						(size_t)0, (void *)nodelist,
						(size_t)0, (void *)treeD,
						sizeof(UINT), &l,
						sizeof(UINT), &nNodes);
	}
	icl_finish(dev);
#if timing == 1
	icl_stop_timer(timer_downPass);
#endif
	time = icl_stop_timer(timer);

	icl_release_kernel(upPass);
	icl_release_kernel(downPass);

#if timing == 1
	icl_start_timer(timer_rt);
#endif

#if timing == 1
	icl_finish(dev);
	icl_stop_timer(timer_rt);

	printf("upPass %f\ndownPass %f\nread Tree %f\n\n", timer_upPass->current_time, timer_downPass->current_time, timer_rt->current_time);
	icl_release_timer(timer_upPass);
	icl_release_timer(timer_downPass);
	icl_release_timer(timer_rt);
#endif

	//	struct Node* kdTree = (struct Node*)malloc(sizeof(struct Node) * nNodes);
//	icl_read_buffer(treeD, CL_TRUE, sizeof(struct Node) * nNodes, kdTree, NULL, NULL);
//	printf("%d", tree->nodelist[0].left_child);

	printf("\nTime: %f\n", time);
	icl_release_timer(timer);

	return treeHeight;
}
Пример #6
0
int main(int argc, char* const argv[])
{
    set_cpu(the_cores[0]);
#ifdef PRINT_OUTPUT
    fprintf(stderr, "The size of the data being tested: %lu\n",sizeof(data_type));
    fprintf(stderr, "Number of entries per cache line: %lu\n",CACHE_LINE_SIZE / sizeof(data_t));
#endif
    struct option long_options[] = {
            // These options don't set a flag
            {"help",                      no_argument,       NULL, 'h'},
            {"entries",                   required_argument, NULL, 'e'},
            {"duration",                  required_argument, NULL, 'd'},
            {"pause",                     required_argument, NULL, 'p'},
            {"num-threads",               required_argument, NULL, 'n'},
            {"seed",                      required_argument, NULL, 's'},
            {NULL, 0, NULL, 0}
    };

    int i, c;
    thread_data_t *data;
    pthread_t *threads;
    pthread_attr_t attr;
    barrier_t barrier;
    struct timeval start, end;
    struct timespec timeout;

    num_entries = DEFAULT_NUM_ENTRIES;
    num_threads = DEFAULT_NUM_THREADS;
    duration = DEFAULT_DURATION;
    seed = DEFAULT_SEED;
    op_pause = DEFAULT_PAUSE;

    sigset_t block_set;

    while(1) {
        i = 0;
        c = getopt_long(argc, argv, "he:d:p:n:s", long_options, &i);

        if(c == -1)
            break;

        if(c == 0 && long_options[i].flag == 0)
            c = long_options[i].val;

        switch(c) {
        case 0:
            /* Flag is automatically set */
            break;
        case 'h':
            printf("lock stress test\n"
                    "\n"
                    "Usage:\n"
                    "  stress_test [options...]\n"
                    "\n"
                    "Options:\n"
                    "  -h, --help\n"
                    "        Print this message\n"
                    "  -e, --entires <int>\n"
                    "        Number of entries in the test (default=" XSTR(DEFAULT_NUM_LOCKS) ")\n"
                    "  -d, --duration <int>\n"
                    "        Test duration in milliseconds (0=infinite, default=" XSTR(DEFAULT_DURATION) ")\n"
                    "  -p, --pause <int>\n"
                    "        Pause between consecutive atomic operations in cycles (default=" XSTR(DEFAULT_DURATION) ")\n"
                    "  -n, --num-threads <int>\n"
                    "        Number of threads (default=" XSTR(DEFAULT_NUM_THREADS) ")\n"
                    "  -s, --seed <int>\n"
                    "        RNG seed (0=time-based, default=" XSTR(DEFAULT_SEED) ")\n"
            );
            exit(0);
        case 'e':
            num_entries = atoi(optarg);
            break;
        case 'd':
            duration = atoi(optarg);
            break;
        case 'n':
            num_threads = atoi(optarg);
            break;
        case 'p':
            op_pause = atoi(optarg);
            break;
        case 's':
            seed = atoi(optarg);
            break;
        case '?':
            printf("Use -h or --help for help\n");
            exit(0);
        default:
            exit(1);
        }
    }

    op_pause=op_pause/NOP_DURATION;
    num_entries = pow2roundup(num_entries);

    assert(duration >= 0);
    assert(num_entries >= 1);
    assert(num_threads > 0);

#ifdef PRINT_OUTPUT
    printf("Number of entries   : %d\n", num_entries);
    printf("Duration       : %d\n", duration);
    printf("Number of threads     : %d\n", num_threads);
    printf("Type sizes     : int=%d/long=%d/ptr=%d\n",
            (int)sizeof(int),
            (int)sizeof(long),
            (int)sizeof(void *));
#endif
    timeout.tv_sec = duration / 1000;
    timeout.tv_nsec = (duration % 1000) * 1000000;


    the_data = (data_t*)malloc(num_entries * sizeof(data_t));
    for (i = 0; i < num_entries; i++) {
        the_data[i].data=0;
    }

    if ((data = (thread_data_t *)malloc(num_threads * sizeof(thread_data_t))) == NULL) {
        perror("malloc");
        exit(1);
    }

    if ((threads = (pthread_t *)malloc(num_threads * sizeof(pthread_t))) == NULL) {
        perror("malloc");
        exit(1);
    }

    if (seed == 0)
        srand((int)time(NULL));
    else
        srand(seed);

    stop = 0;
    /* Access set from all threads */
    barrier_init(&barrier, num_threads + 1);
    pthread_attr_init(&attr);
    pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
    for (i = 0; i < num_threads; i++) {
#ifdef PRINT_OUTPUT
        printf("Creating thread %d\n", i);
#endif
        data[i].id = i;
        data[i].num_operations = 0;
        data[i].seed = rand();
        data[i].barrier = &barrier;
        if (pthread_create(&threads[i], &attr, test, (void *)(&data[i])) != 0) {
            fprintf(stderr, "Error creating thread\n");
            exit(1);
        }
    }
    pthread_attr_destroy(&attr);

    /* Catch some signals */
    if (signal(SIGHUP, catcher) == SIG_ERR ||
            signal(SIGINT, catcher) == SIG_ERR ||
            signal(SIGTERM, catcher) == SIG_ERR) {
        perror("signal");
        exit(1);
    }

    /* Start threads */
    barrier_cross(&barrier);

#ifdef PRINT_OUTPUT
    printf("STARTING...\n");
#endif
    gettimeofday(&start, NULL);
    if (duration > 0) {
        nanosleep(&timeout, NULL);
    } else {
        sigemptyset(&block_set);
        sigsuspend(&block_set);
    }
    stop = 1;
    gettimeofday(&end, NULL);
#ifdef PRINT_OUTPUT
    printf("STOPPING...\n");
#endif

    /* Wait for thread completion */
    for (i = 0; i < num_threads; i++) {
        if (pthread_join(threads[i], NULL) != 0) {
            fprintf(stderr, "Error waiting for thread completion\n");
            exit(1);
        }
    }

    duration = (end.tv_sec * 1000 + end.tv_usec / 1000) - (start.tv_sec * 1000 + start.tv_usec / 1000);

    unsigned long operations = 0;
    for (i = 0; i < num_threads; i++) {
#ifdef PRINT_OUTPUT
        printf("Thread %d\n", i);
        printf("  #operations   : %lu\n", data[i].num_operations);
#endif
        operations += data[i].num_operations;
    }

#ifdef PRINT_OUTPUT
    printf("Duration      : %d (ms)\n", duration);
#endif
    printf("#operations     : %lu (%f / s)\n", operations, operations * 1000.0 / duration);


    free((data_t*) the_data);
    free(threads);
    free(data);


    return 0;
}
Пример #7
0
int
main(int argc, char **argv) 
{
  set_cpu(the_cores[0]);
    
  assert(sizeof(clht_hashtable_t) == 2*CACHE_LINE_SIZE);

  struct option long_options[] = {
    // These options don't set a flag
    {"help",                      no_argument,       NULL, 'h'},
    {"duration",                  required_argument, NULL, 'd'},
    {"num-threads",               required_argument, NULL, 'n'},
    {"range",                     required_argument, NULL, 'r'},
    {"correctness",               no_argument, NULL, 'c'},
    {"num-buckets",               required_argument, NULL, 'b'},
    {"print-vals",                required_argument, NULL, 'v'},
    {"vals-pf",                   required_argument, NULL, 'f'},
    {"obj-size",                  required_argument, NULL, 's'},
    {NULL, 0, NULL, 0}
  };

  int i, c;
  while(1) 
    {
      i = 0;
      c = getopt_long(argc, argv, "hAf:d:i:n:r:s:u:m:a:l:p:b:v:f:c", long_options, &i);
		
      if(c == -1)
	break;
		
      if(c == 0 && long_options[i].flag == 0)
	c = long_options[i].val;
		
      switch(c) 
	{
	case 0:
	  /* Flag is automatically set */
	  break;
	case 'h':
	  printf("intset -- STM stress test "
		 "(linked list)\n"
		 "\n"
		 "Usage:\n"
		 "  intset [options...]\n"
		 "\n"
		 "Options:\n"
		 "  -h, --help\n"
		 "        Print this message\n"
		 "  -d, --duration <int>\n"
		 "        Test duration in milliseconds\n"
		 "  -n, --num-threads <int>\n"
		 "        Number of threads\n"
		 "  -r, --range <int>\n"
		 "        Range of integer values inserted in set\n"
		 "  -s, --obj-size <int>\n"
		 "        Size of the objects stored in the hash table\n"
		 "  -b, --num-buckets <int>\n"
		 "        Number of initial buckets (stronger than -l)\n"
		 "  -v, --print-vals <int>\n"
		 "        When using detailed profiling, how many values to print.\n"
		 "  -f, --val-pf <int>\n"
		 "        When using detailed profiling, how many values to keep track of.\n"
		 );
	  exit(0);
	case 'd':
	  duration = atoi(optarg);
	  break;
	case 'n':
	  num_threads = atoi(optarg);
	  break;
	case 's':
	  obj_size = atol(optarg);
	  break;
	case 'v':
	  print_vals_num = atoi(optarg);
	  break;
	case 'f':
	  pf_vals_num = pow2roundup(atoi(optarg)) - 1;
	  break;
	case '?':
	default:
	  printf("Use -h or --help for help\n");
	  exit(1);
	}
    }


  rand_max = num_elements;
    
  struct timeval start, end;
  struct timespec timeout;
  timeout.tv_sec = duration / 1000;
  timeout.tv_nsec = (duration % 1000) * 1000000;
    
  printf("//duration: sec: %lu, ns: %lu\n", timeout.tv_sec, timeout.tv_nsec);
  stop = 0;
    
  /* Initialize the hashtable */

  snap = (clht_snapshot_t*) memalign(CACHE_LINE_SIZE, CACHE_LINE_SIZE);
  assert(snap != NULL);

  /* Initializes the local data */
  putting_succ = (ticks *) calloc(num_threads , sizeof(ticks));
  putting_fail = (ticks *) calloc(num_threads , sizeof(ticks));
  getting_succ = (ticks *) calloc(num_threads , sizeof(ticks));
  getting_fail = (ticks *) calloc(num_threads , sizeof(ticks));
  removing_succ = (ticks *) calloc(num_threads , sizeof(ticks));
  removing_fail = (ticks *) calloc(num_threads , sizeof(ticks));
  putting_count = (ticks *) calloc(num_threads , sizeof(ticks));
  putting_count_succ = (ticks *) calloc(num_threads , sizeof(ticks));
  getting_count = (ticks *) calloc(num_threads , sizeof(ticks));
  getting_count_succ = (ticks *) calloc(num_threads , sizeof(ticks));
  removing_count = (ticks *) calloc(num_threads , sizeof(ticks));
  removing_count_succ = (ticks *) calloc(num_threads , sizeof(ticks));
    
  pthread_t threads[num_threads];
  pthread_attr_t attr;
  int rc;
  void *status;
    
  barrier_init(&barrier_global, num_threads + 1);
  barrier_init(&barrier, num_threads);
    
  /* Initialize and set thread detached attribute */
  pthread_attr_init(&attr);
  pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
    
  thread_data_t* tds = (thread_data_t*) malloc(num_threads * sizeof(thread_data_t));

  long t;
  for(t = 0; t < num_threads; t++)
    {
      tds[t].id = t;
      rc = pthread_create(&threads[t], &attr, test, tds + t);
      if (rc)
	{
	  printf("ERROR; return code from pthread_create() is %d\n", rc);
	  exit(-1);
	}
        
    }
    
  /* Free attribute and wait for the other threads */
  pthread_attr_destroy(&attr);
    
  barrier_cross(&barrier_global);
  gettimeofday(&start, NULL);
  nanosleep(&timeout, NULL);

  stop = 1;
  gettimeofday(&end, NULL);
  duration = (end.tv_sec * 1000 + end.tv_usec / 1000) - (start.tv_sec * 1000 + start.tv_usec / 1000);
    
  for(t = 0; t < num_threads; t++) 
    {
      rc = pthread_join(threads[t], &status);
      if (rc) 
	{
	  printf("ERROR; return code from pthread_join() is %d\n", rc);
	  exit(-1);
	}
    }

  free(tds);
    
  volatile ticks putting_suc_total = 1;
  volatile ticks putting_fal_total = 1;
  volatile ticks getting_suc_total = 1;
  volatile ticks getting_fal_total = 1;
  volatile ticks removing_suc_total = 1;
  volatile ticks removing_fal_total = 1;
  volatile uint64_t putting_count_total = 1;
  volatile uint64_t putting_count_total_succ = 1;
  volatile uint64_t getting_count_total = 1;
  volatile uint64_t getting_count_total_succ = 1;
  volatile uint64_t removing_count_total = 1;
  volatile uint64_t removing_count_total_succ = 1;
    
  for(t=0; t < num_threads; t++) 
    {
      putting_suc_total += putting_succ[t];
      putting_fal_total += putting_fail[t];
      getting_suc_total += getting_succ[t];
      getting_fal_total += getting_fail[t];
      removing_suc_total += removing_succ[t];
      removing_fal_total += removing_fail[t];
      putting_count_total += putting_count[t];
      putting_count_total_succ += putting_count_succ[t];
      getting_count_total += getting_count[t];
      getting_count_total_succ += getting_count_succ[t];
      removing_count_total += removing_count[t];
      removing_count_total_succ += removing_count_succ[t];
    }

  if(putting_count_total == 0) 
    {
      putting_suc_total = 0;
      putting_fal_total = 0;
      putting_count_total = 1;
      putting_count_total_succ = 2;
    }
    
  if(getting_count_total == 0) 
    {
      getting_suc_total = 0;
      getting_fal_total = 0;
      getting_count_total = 1;
      getting_count_total_succ = 2;
    }
    
  if(removing_count_total == 0) 
    {
      removing_suc_total = 0;
      removing_fal_total = 0;
      removing_count_total = 1;
      removing_count_total_succ = 2;
    }
    
#if defined(COMPUTE_LATENCY)
#  if defined(DEBUG)
  printf("#thread get_suc get_fal put_suc put_fal rem_suc rem_fal\n"); fflush(stdout);
#  endif
  printf("%d\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\n",
	 num_threads,
	 getting_suc_total / getting_count_total_succ,
	 getting_fal_total / (getting_count_total - getting_count_total_succ),
	 putting_suc_total / putting_count_total_succ,
	 putting_fal_total / (putting_count_total - putting_count_total_succ),
	 removing_suc_total / removing_count_total_succ,
	 removing_fal_total / (removing_count_total - removing_count_total_succ)
	 );
#endif

    
#define LLU long long unsigned int

  int pr = (int) (putting_count_total_succ - removing_count_total_succ);
  int size_after = 0;

  for (i = 0; i < rand_max; i++)
    {
      size_after += (snap->map[i] == MAP_VALID);
    }
  printf("\n");
  printf("#Maps     | ");
  for (i = 0; i < rand_max; i++)
    {
      printf("#%d = %-5d | ", i, snap->map[i]);
    }
  printf("\n");
  printf("#Keys     | ");
  for (i = 0; i < rand_max; i++)
    {
      printf("#%d = %-5jd | ", i, key[i]);
    }
  printf("\n");
  printf("#Vals     | ");
  for (i = 0; i < rand_max; i++)
    {
      printf("#%d = %-5jd | ", i, (val[i]) ? *(size_t*) val[i] : -1);
    }
  printf("\n");

#if defined(DEBUG)
  printf("puts - rems  : %d\n", pr);
#endif
  /* assert(size_after == (pr)); */
  if (size_after != pr)
    {
      printf("######                                                                                      SIZE missmatch\n");
    }


  printf("    : %-10s | %-10s | %-11s | %s\n", "total", "success", "succ %", "total %");
  uint64_t total = putting_count_total + getting_count_total + removing_count_total;
  double putting_perc = 100.0 * (1 - ((double)(total - putting_count_total) / total));
  double removing_perc = 100.0 * (1 - ((double)(total - removing_count_total) / total));
  printf("puts: %-10llu | %-10llu | %10.1f%% | %.1f%%\n", (LLU) putting_count_total, 
	 (LLU) putting_count_total_succ,
	 (1 - (double) (putting_count_total - putting_count_total_succ) / putting_count_total) * 100,
	 putting_perc);
  printf("rems: %-10llu | %-10llu | %10.1f%% | %.1f%%\n", (LLU) removing_count_total, 
	 (LLU) removing_count_total_succ,
	 (1 - (double) (removing_count_total - removing_count_total_succ) / removing_count_total) * 100,
	 removing_perc);


  size_t all_total = putting_count_total + getting_count_total + removing_count_total;
  double throughput = (all_total) * 1000.0 / duration;
  printf("#txs tot (%zu\n", all_total);
  printf("#txs %-4d(%-10.0f = %.3f M\n", num_threads, throughput, throughput / 1e6);
    
    
  /* Last thing that main() should do */
  //printf("Main: program completed. Exiting.\n");
  pthread_exit(NULL);
    
  return 0;
}
Пример #8
0
int 
main(int argc, char **argv)
{
    set_cpu(the_cores[0]);
  struct option long_options[] = {
    // These options don't set a flag
    {"help",                      no_argument,       NULL, 'h'},
    {"accounts",                  required_argument, NULL, 'a'},
    {"duration",                  required_argument, NULL, 'd'},
    {"num-threads",               required_argument, NULL, 'n'},
    {"check",                     required_argument, NULL, 'c'},
    {"deposit_perc",              required_argument, NULL, 'e'},
    {"servers",                   required_argument, NULL, 's'},
    {"withdraws",            required_argument, NULL, 'w'},
    {NULL, 0, NULL, 0}
  };

  bank_t *bank;
  int i, c;
  unsigned long reads, writes, updates;
  thread_data_t *data;
  pthread_t *threads;
  pthread_attr_t attr;
  barrier_t barrier;
  struct timeval start, end;
  struct timespec timeout;
  int nb_threads = DEFAULT_NUM_THREADS;
  int duration = DEFAULT_DURATION;
  int nb_accounts = DEFAULT_NB_ACCOUNTS;
  int balance_perc = DEFAULT_BALANCE_PERC;
  int deposit_perc = DEFAULT_DEPOSIT_PERC;
  int seed = DEFAULT_SEED;
  int withdraw_perc = DEFAULT_WITHDRAW_PERC;

  sigset_t block_set;

  while(1) 
    {
      i = 0;
      c = getopt_long(argc, argv, "ha:c:d:n:r:e:s:w:W:j", long_options, &i);

      if(c == -1)
	break;

      if(c == 0 && long_options[i].flag == 0)
	c = long_options[i].val;

      switch(c) {
      case 0:
	/* Flag is automatically set */
	break;
      case 'h':
	printf("bank -- lock stress test\n"
	       "\n"
	       "Usage:\n"
	       "  bank [options...]\n"
	       "\n"
	       "Options:\n"
	       "  -h, --help\n"
	       "        Print this message\n"
	       "  -a, --accounts <int>\n"
	       "        Number of accounts in the bank (default=" XSTR(DEFAULT_NB_ACCOUNTS) ")\n"
	       "  -d, --duraiton <int>\n"
	       "        Duration of the test in ms (0=infinite, default=" XSTR(DEFAULT_DURATION) ")\n"
	       "  -n, --num-threads <int>\n"
	       "        Number of threads (default=" XSTR(DEFAULT_NUM_THREADS) ")\n"
	       "  -c, --check <int>\n"
	       "        Percentage of check balance transactions (default=" XSTR(DEFAULT_BALANCE_PERC) ")\n"
	       "  -e, --deposit_perc <int>\n"
	       "        Percentage of deposit transactions (default=" XSTR(DEFAULT_DEPOSIT_PERC) ")\n"
	       "  -w, --withdraws <int>\n"
	       "        Percentage of withdraw_perc transactions (default=" XSTR(DEFAULT_WITHDRAW_PERC) ")\n"
	       );
	exit(0);
      case 'a':
	nb_accounts = atoi(optarg);
	break;
      case 'd':
	duration = atoi(optarg);
	break;
      case 'n':
	nb_threads = atoi(optarg);
	break;
      case 'c':
	balance_perc = atoi(optarg);
	break;
      case 'e':
	deposit_perc = atoi(optarg);
	break;
      case 'w':
	withdraw_perc = atoi(optarg);
	break;
      case '?':
	printf("Use -h or --help for help\n");
	exit(0);
      default:
	exit(1);
      }
    }

  assert(duration >= 0);
  assert(nb_accounts >= 2);
  assert(nb_threads > 0);
  assert(balance_perc >= 0 && withdraw_perc >= 0
	 && deposit_perc >= 0
	 && deposit_perc + balance_perc + withdraw_perc <= 100);

  nb_accounts = pow2roundup(nb_accounts);

  uint32_t missing = 100 - (deposit_perc + balance_perc + withdraw_perc);
  if (missing > 0)
    {
      balance_perc += missing;
    }

  printf("Nb accounts    : %d\n", nb_accounts);
  printf("Num ops        : %d\n", duration);
  printf("Nb threads     : %d\n", nb_threads);
  printf("Check balance  : %d\n", balance_perc);
  printf("Deposit        : %d\n", deposit_perc);
  printf("Withdraws      : %d\n", withdraw_perc);

  withdraw_perc += deposit_perc;
  balance_perc += withdraw_perc;

  timeout.tv_sec = duration / 1000;
  timeout.tv_nsec = (duration % 1000) * 1000000;

  if ((data = (thread_data_t *)malloc(nb_threads * sizeof(thread_data_t))) == NULL) {
    perror("malloc");
    exit(1);
  }
  if ((threads = (pthread_t *)malloc(nb_threads * sizeof(pthread_t))) == NULL) {
    perror("malloc");
    exit(1);
  }

  if (seed == 0)
    srand((int)time(NULL));
  else
    srand(seed);

  bank = (bank_t *)malloc(sizeof(bank_t));
  bank->accounts = (account_t *)malloc(nb_accounts * sizeof(account_t));
  bank->size = nb_accounts;
  for (i = 0; i < bank->size; i++) {
    bank->accounts[i].number = i;
    bank->accounts[i].balance = 0;
  }

  gl.lock_data = 0;

  local_th_data = (local_data *)malloc(nb_threads*sizeof(local_data));

  stop = 0;
  /* Init locks */
  printf("Initializing locks\n");
  the_locks = init_lock_array_global(nb_accounts, nb_threads);

  /* Access set from all threads */
  barrier_init(&barrier, nb_threads + 1);
  pthread_attr_init(&attr);
  pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
  for (i = 0; i < nb_threads; i++) {
    printf("Creating thread %d\n", i);
    data[i].id = i;
    data[i].nb_threads = nb_threads;
    data[i].nb_balance = 0;
    data[i].nb_deposit = 0;
    data[i].nb_withdraw = 0;
    data[i].balance_perc = balance_perc;
    data[i].deposit_perc = deposit_perc;
    data[i].withdraw_perc = withdraw_perc;

    data[i].seed = rand();
    data[i].bank = bank;
    data[i].barrier = &barrier;
    if (pthread_create(&threads[i], &attr, test, (void *)(&data[i])) != 0) {
      fprintf(stderr, "Error creating thread\n");
      exit(1);
    }
  }
  pthread_attr_destroy(&attr);

  /* Catch some signals */
  if (signal(SIGHUP, catcher) == SIG_ERR ||
      signal(SIGINT, catcher) == SIG_ERR ||
      signal(SIGTERM, catcher) == SIG_ERR) {
    perror("signal");
    exit(1);
  }


  /* Start threads */
  barrier_cross(&barrier);

  printf("STARTING...\n");
  gettimeofday(&start, NULL);
  if (duration > 0) {
    nanosleep(&timeout, NULL);
  } else {
    sigemptyset(&block_set);
    sigsuspend(&block_set);
  }
  stop = 1;
  gettimeofday(&end, NULL);
  printf("STOPPING...\n");

  /* Wait for thread completion */
  for (i = 0; i < nb_threads; i++) {
    if (pthread_join(threads[i], NULL) != 0) {
      fprintf(stderr, "Error waiting for thread completion\n");
      exit(1);
    }
  }

  duration = (end.tv_sec * 1000 + end.tv_usec / 1000) - (start.tv_sec * 1000 + start.tv_usec / 1000);
  reads = 0;
  writes = 0;
  updates = 0;
  for (i = 0; i < nb_threads; i++) {
    printf("Thread %d\n", i);
    printf("  #balance    : %lu\n", data[i].nb_balance);
    printf("  #withdraw   : %lu\n", data[i].nb_withdraw);
    printf("  #deposit    : %lu\n", data[i].nb_deposit);
    updates += data[i].nb_withdraw;
    reads += data[i].nb_balance;
    writes += data[i].nb_deposit;
  }
  /* printf("Bank total    : %d (expected: 0)\n", total(bank, 0)); */
  printf("Duration      : %d (ms)\n", duration);
  printf("#read txs     : %lu ( %f / s)\n", reads, reads * 1000.0 / duration);
  printf("#write txs    : %lu ( %f / s)\n", writes, writes * 1000.0 / duration);
  printf("#update txs   : %lu ( %f / s)\n", updates, updates * 1000.0 / duration);
  printf("#txs          : %lu ( %f / s)\n", reads + writes + updates, (reads + writes + updates) * 1000.0 / duration);
  /* Delete bank and accounts */
  free(bank->accounts);
  free(bank);

  /* Cleanup locks */
  //free_global(the_locks, nb_accounts);

  //free(threads);
  //free(data);

  return 0;
}
Пример #9
0
bool
ImageBufAlgo::make_texture (ImageBufAlgo::MakeTextureMode mode,
                            const std::vector<std::string> &filenames,
                            const std::string &_outputfilename,
                            const ImageSpec &_configspec,
                            std::ostream *outstream_ptr)
{
    ASSERT (mode >= 0 && mode < ImageBufAlgo::_MakeTxLast);
    Timer alltime;
    ImageSpec configspec = _configspec;
//    const char *modenames[] = { "texture map", "shadow map",
//                                "latlong environment map" };
    std::stringstream localstream; // catch output when user doesn't want it
    std::ostream &outstream (outstream_ptr ? *outstream_ptr : localstream);

    double stat_readtime = 0;
    double stat_writetime = 0;
    double stat_resizetime = 0;
    double stat_miptime = 0;
    double stat_colorconverttime = 0;

    std::string filename = filenames[0];
    if (! Filesystem::exists (filename)) {
        outstream << "maketx ERROR: \"" << filename << "\" does not exist\n";
        return false;
    }
    std::string outputfilename = _outputfilename.length() ? _outputfilename
        : Filesystem::replace_extension (filename, ".tx");

    // When was the input file last modified?
    std::time_t in_time = Filesystem::last_write_time (filename);

    // When in update mode, skip making the texture if the output already
    // exists and has the same file modification time as the input file.
    bool updatemode = configspec.get_int_attribute ("maketx:updatemode");
    if (updatemode && Filesystem::exists (outputfilename) &&
        (in_time == Filesystem::last_write_time (outputfilename))) {
        outstream << "maketx: no update required for \"" 
                  << outputfilename << "\"\n";
        return true;
    }

    bool shadowmode = (mode == ImageBufAlgo::MakeTxShadow);
    bool envlatlmode = (mode == ImageBufAlgo::MakeTxEnvLatl);

    // Find an ImageIO plugin that can open the output file, and open it
    std::string outformat = configspec.get_string_attribute ("maketx:fileformatname",
                                                             outputfilename);
    ImageOutput *out = ImageOutput::create (outformat.c_str());
    if (! out) {
        outstream 
            << "maketx ERROR: Could not find an ImageIO plugin to write " 
            << outformat << " files:" << geterror() << "\n";
        return false;
    }
    if (! out->supports ("tiles")) {
        outstream << "maketx ERROR: \"" << outputfilename
                  << "\" format does not support tiled images\n";
        return false;
    }

    ImageBuf src (filename);
    src.init_spec (filename, 0, 0); // force it to get the spec, not read

    // The cache might mess with the apparent data format.  But for the 
    // purposes of what we should output, figure it out now, before the
    // file has been read and cached.
    TypeDesc out_dataformat = src.spec().format;

    if (configspec.format != TypeDesc::UNKNOWN)
        out_dataformat = configspec.format;
    
    // We cannot compute the prman / oiio options until after out_dataformat
    // has been determined, as it's required (and can potentially change 
    // out_dataformat too!)
    if (configspec.get_int_attribute("maketx:prman_options"))
        out_dataformat = set_prman_options (out_dataformat, configspec);
    else if (configspec.get_int_attribute("maketx:oiio_options"))
        out_dataformat = set_oiio_options (out_dataformat, configspec);

    // Read the full file locally if it's less than 1 GB, otherwise
    // allow the ImageBuf to use ImageCache to manage memory.
    bool read_local = (src.spec().image_bytes() < size_t(1024*1024*1024));

    bool verbose = configspec.get_int_attribute ("maketx:verbose");
    if (verbose)
        outstream << "Reading file: " << filename << std::endl;
    Timer readtimer;
    if (! src.read (0, 0, read_local)) {
        outstream 
            << "maketx ERROR: Could not read \"" 
            << filename << "\" : " << src.geterror() << "\n";
        return false;
    }
    stat_readtime += readtimer();
    
    // If requested - and we're a constant color - make a tiny texture instead
    // Only safe if the full/display window is the same as the data window.
    // Also note that this could affect the appearance when using "black"
    // wrap mode at runtime.
    std::vector<float> constantColor(src.nchannels());
    bool isConstantColor = false;
    if (configspec.get_int_attribute("maketx:constant_color_detect") &&
        src.spec().x == 0 && src.spec().y == 0 && src.spec().z == 0 &&
        src.spec().full_x == 0 && src.spec().full_y == 0 &&
        src.spec().full_z == 0 && src.spec().full_width == src.spec().width &&
        src.spec().full_height == src.spec().height &&
        src.spec().full_depth == src.spec().depth) {
        isConstantColor = ImageBufAlgo::isConstantColor (src, &constantColor[0]);
        if (isConstantColor) {
            // Reset the image, to a new image, at the tile size
            ImageSpec newspec = src.spec();
            newspec.width  = std::min (configspec.tile_width, src.spec().width);
            newspec.height = std::min (configspec.tile_height, src.spec().height);
            newspec.depth  = std::min (configspec.tile_depth, src.spec().depth);
            newspec.full_width  = newspec.width;
            newspec.full_height = newspec.height;
            newspec.full_depth  = newspec.depth;
            std::string name = src.name() + ".constant_color";
            src.reset(name, newspec);
            ImageBufAlgo::fill (src, &constantColor[0]);
            if (verbose) {
                outstream << "  Constant color image detected. ";
                outstream << "Creating " << newspec.width << "x" << newspec.height << " texture instead.\n";
            }
        }
    }
    
    int nchannels = configspec.get_int_attribute ("maketx:nchannels", -1);

    // If requested -- and alpha is 1.0 everywhere -- drop it.
    if (configspec.get_int_attribute("maketx:opaque_detect") &&
          src.spec().alpha_channel == src.nchannels()-1 &&
          nchannels <= 0 &&
          ImageBufAlgo::isConstantChannel(src,src.spec().alpha_channel,1.0f)) {
        ImageBuf newsrc(src.name() + ".noalpha", src.spec());
        ImageBufAlgo::setNumChannels (newsrc, src, src.nchannels()-1);
        src.copy (newsrc);
        if (verbose) {
            outstream << "  Alpha==1 image detected. Dropping the alpha channel.\n";
        }
    }

    // If requested - and we're a monochrome image - drop the extra channels
    if (configspec.get_int_attribute("maketx:monochrome_detect") &&
          nchannels <= 0 &&
          src.nchannels() == 3 && src.spec().alpha_channel < 0 &&  // RGB only
          ImageBufAlgo::isMonochrome(src)) {
        ImageBuf newsrc(src.name() + ".monochrome", src.spec());
        ImageBufAlgo::setNumChannels (newsrc, src, 1);
        src.copy (newsrc);
        if (verbose) {
            outstream << "  Monochrome image detected. Converting to single channel texture.\n";
        }
    }

    // If we've otherwise explicitly requested to write out a
    // specific number of channels, do it.
    if ((nchannels > 0) && (nchannels != src.nchannels())) {
        ImageBuf newsrc(src.name() + ".channels", src.spec());
        ImageBufAlgo::setNumChannels (newsrc, src, nchannels);
        src.copy (newsrc);
        if (verbose) {
            outstream << "  Overriding number of channels to " << nchannels << "\n";
        }
    }
    
    if (shadowmode) {
        // Some special checks for shadow maps
        if (src.spec().nchannels != 1) {
            outstream << "maketx ERROR: shadow maps require 1-channel images,\n"
                      << "\t\"" << filename << "\" is " 
                      << src.spec().nchannels << " channels\n";
            return false;
        }
        // Shadow maps only make sense for floating-point data.
        if (out_dataformat != TypeDesc::FLOAT &&
              out_dataformat != TypeDesc::HALF &&
              out_dataformat != TypeDesc::DOUBLE)
            out_dataformat = TypeDesc::FLOAT;
    }

    if (configspec.get_int_attribute("maketx:set_full_to_pixels")) {
        // User requested that we treat the image as uncropped or not
        // overscan
        ImageSpec &spec (src.specmod());
        spec.full_x = spec.x = 0;
        spec.full_y = spec.y = 0;
        spec.full_z = spec.z = 0;
        spec.full_width = spec.width;
        spec.full_height = spec.height;
        spec.full_depth = spec.depth;
    }

    // Copy the input spec
    const ImageSpec &srcspec = src.spec();
    ImageSpec dstspec = srcspec;
    bool orig_was_volume = srcspec.depth > 1 || srcspec.full_depth > 1;
    bool orig_was_crop = (srcspec.x > srcspec.full_x ||
                          srcspec.y > srcspec.full_y ||
                          srcspec.z > srcspec.full_z ||
                          srcspec.x+srcspec.width < srcspec.full_x+srcspec.full_width ||
                          srcspec.y+srcspec.height < srcspec.full_y+srcspec.full_height ||
                          srcspec.z+srcspec.depth < srcspec.full_z+srcspec.full_depth);
    bool orig_was_overscan = (srcspec.x < srcspec.full_x &&
                              srcspec.y < srcspec.full_y &&
                              srcspec.x+srcspec.width > srcspec.full_x+srcspec.full_width &&
                              srcspec.y+srcspec.height > srcspec.full_y+srcspec.full_height &&
                              (!orig_was_volume || (srcspec.z < srcspec.full_z &&
                                                    srcspec.z+srcspec.depth > srcspec.full_z+srcspec.full_depth)));
    // Make the output not a crop window
    if (orig_was_crop) {
        dstspec.x = 0;
        dstspec.y = 0;
        dstspec.z = 0;
        dstspec.width = srcspec.full_width;
        dstspec.height = srcspec.full_height;
        dstspec.depth = srcspec.full_depth;
        dstspec.full_x = 0;
        dstspec.full_y = 0;
        dstspec.full_z = 0;
        dstspec.full_width = dstspec.width;
        dstspec.full_height = dstspec.height;
        dstspec.full_depth = dstspec.depth;
    }
    if (orig_was_overscan)
        configspec.attribute ("wrapmodes", "black,black");

    if ((dstspec.x < 0 || dstspec.y < 0 || dstspec.z < 0) &&
        (out && !out->supports("negativeorigin"))) {
        // User passed negative origin but the output format doesn't
        // support it.  Try to salvage the situation by shifting the
        // image into the positive range.
        if (dstspec.x < 0) {
            dstspec.full_x -= dstspec.x;
            dstspec.x = 0;
        }
        if (dstspec.y < 0) {
            dstspec.full_y -= dstspec.y;
            dstspec.y = 0;
        }
        if (dstspec.z < 0) {
            dstspec.full_z -= dstspec.z;
            dstspec.z = 0;
        }
    }

    // Make the output tiled, regardless of input
    dstspec.tile_width  = configspec.tile_width  ? configspec.tile_width  : 64;
    dstspec.tile_height = configspec.tile_height ? configspec.tile_height : 64;
    dstspec.tile_depth  = configspec.tile_depth  ? configspec.tile_depth  : 1;

    // Try to force zip (still can be overriden by configspec
    dstspec.attribute ("compression", "zip");
    // Always prefer contiguous channels, unless overridden by configspec
    dstspec.attribute ("planarconfig", "contig");
    // Default to black wrap mode, unless overridden by configspec
    dstspec.attribute ("wrapmodes", "black,black");

    if (configspec.get_int_attribute ("maketx:ignore_unassoc"))
        dstspec.erase_attribute ("oiio:UnassociatedAlpha");

    // Put a DateTime in the out file, either now, or matching the date
    // stamp of the input file (if update mode).
    time_t date;
    if (updatemode)
        date = in_time;  // update mode: use the time stamp of the input
    else
        time (&date);    // not update: get the time now
    dstspec.attribute ("DateTime", datestring(date));

    std::string cmdline = configspec.get_string_attribute ("maketx:full_command_line");
    if (! cmdline.empty()) {
        // Append command to image history
        std::string history = dstspec.get_string_attribute ("Exif:ImageHistory");
        if (history.length() && ! Strutil::iends_with (history, "\n"))
            history += std::string("\n");
        history += cmdline;
        dstspec.attribute ("Exif:ImageHistory", history);
    }

    bool prman_metadata = configspec.get_int_attribute ("maketx:prman_metadata");
    if (shadowmode) {
        dstspec.attribute ("textureformat", "Shadow");
        if (prman_metadata)
            dstspec.attribute ("PixarTextureFormat", "Shadow");
    } else if (envlatlmode) {
        dstspec.attribute ("textureformat", "LatLong Environment");
        configspec.attribute ("wrapmodes", "periodic,clamp");
        if (prman_metadata)
            dstspec.attribute ("PixarTextureFormat", "Latlong Environment");
    } else {
        dstspec.attribute ("textureformat", "Plain Texture");
        if (prman_metadata)
            dstspec.attribute ("PixarTextureFormat", "Plain Texture");
    }

    // FIXME -- should we allow tile sizes to reduce if the image is
    // smaller than the tile size?  And when we do, should we also try
    // to make it bigger in the other direction to make the total tile
    // size more constant?

    // If --checknan was used and it's a floating point image, check for
    // nonfinite (NaN or Inf) values and abort if they are found.
    if (configspec.get_int_attribute("maketx:checknan") &&
                    (srcspec.format.basetype == TypeDesc::FLOAT ||
                     srcspec.format.basetype == TypeDesc::HALF ||
                     srcspec.format.basetype == TypeDesc::DOUBLE)) {
        int found_nonfinite = 0;
        ImageBufAlgo::parallel_image (boost::bind(check_nan_block, &src, _1, boost::ref(found_nonfinite)),
                                      OIIO::get_roi(dstspec));
        if (found_nonfinite) {
            if (found_nonfinite > 3)
                outstream << "maketx ERROR: ...and Nan/Inf at "
                          << (found_nonfinite-3) << " other pixels\n";
            return false;
        }
    }
    
    // Fix nans/infs (if requested
    ImageBufAlgo::NonFiniteFixMode fixmode = ImageBufAlgo::NONFINITE_NONE;
    std::string fixnan = configspec.get_string_attribute("maketx:fixnan");
    if (fixnan.empty() || fixnan == "none") { }
    else if (fixnan == "black") { fixmode = ImageBufAlgo::NONFINITE_BLACK; }
    else if (fixnan == "box3") { fixmode = ImageBufAlgo::NONFINITE_BOX3; }
    else {
        outstream << "maketx ERROR: Unknown --fixnan mode " << " fixnan\n";
        return false;
    }
    
    int pixelsFixed = 0;
    if (!ImageBufAlgo::fixNonFinite (src, src, fixmode, &pixelsFixed)) {
        outstream << "maketx ERROR: Error fixing nans/infs.\n";
        return false;
    }
    
    if (verbose && pixelsFixed>0) {
        outstream << "  Warning: " << pixelsFixed << " nan/inf pixels fixed.\n";
    }
    
    
    
    // Color convert the pixels, if needed, in place.  If a color
    // conversion is required we will promote the src to floating point
    // (or there wont be enough precision potentially).  Also,
    // independently color convert the constant color metadata
    ImageBuf * ccSrc = &src;    // Ptr to cc'd src image
    ImageBuf colorBuffer;
    std::string incolorspace = configspec.get_string_attribute ("incolorspace");
    std::string outcolorspace = configspec.get_string_attribute ("outcolorspace");
    if (!incolorspace.empty() && !outcolorspace.empty() && incolorspace != outcolorspace) {
        if (src.spec().format != TypeDesc::FLOAT) {
            ImageSpec floatSpec = src.spec();
            floatSpec.set_format(TypeDesc::FLOAT);
            colorBuffer.reset("bitdepth promoted", floatSpec);
            ccSrc = &colorBuffer;
        }
        
        Timer colorconverttimer;
        ColorConfig colorconfig;
        if (verbose) {
            outstream << "  Converting from colorspace " << incolorspace 
                      << " to colorspace " << outcolorspace << std::endl;
        }
        
        if (colorconfig.error()) {
            outstream << "Error Creating ColorConfig\n";
            outstream << colorconfig.geterror() << std::endl;
            return false;
        }
        
        ColorProcessor * processor = colorconfig.createColorProcessor (
            incolorspace.c_str(), outcolorspace.c_str());
        
        if (!processor || colorconfig.error()) {
            outstream << "Error Creating Color Processor." << std::endl;
            outstream << colorconfig.geterror() << std::endl;
            return false;
        }
        
        bool unpremult = configspec.get_int_attribute ("maketx:unpremult");
        if (unpremult && verbose)
            outstream << "  Unpremulting image..." << std::endl;
        
        if (!ImageBufAlgo::colorconvert (*ccSrc, src, processor, unpremult)) {
            outstream << "Error applying color conversion to image.\n";
            return false;
        }
        
        if (isConstantColor) {
            if (!ImageBufAlgo::colorconvert (&constantColor[0],
                static_cast<int>(constantColor.size()), processor, unpremult)) {
                outstream << "Error applying color conversion to constant color.\n";
                return false;
            }
        }

        ColorConfig::deleteColorProcessor(processor);
        processor = NULL;
        stat_colorconverttime += colorconverttimer();
    }

    // Force float for the sake of the ImageBuf math
    dstspec.set_format (TypeDesc::FLOAT);

    // Handle resize to power of two, if called for
    if (configspec.get_int_attribute("maketx:resize")  &&  ! shadowmode) {
        dstspec.width = pow2roundup (dstspec.width);
        dstspec.height = pow2roundup (dstspec.height);
        dstspec.full_width = dstspec.width;
        dstspec.full_height = dstspec.height;
    }

    bool do_resize = false;
    // Resize if we're up-resing for pow2
    if (dstspec.width != srcspec.width || dstspec.height != srcspec.height ||
          dstspec.full_depth != srcspec.full_depth)
        do_resize = true;
    // resize if the original was a crop
    if (orig_was_crop)
        do_resize = true;
    // resize if we're converting from non-border sampling to border sampling
    // (converting TO an OpenEXR environment map).
    if (envlatlmode && 
        (Strutil::iequals(configspec.get_string_attribute("maketx:fileformatname"),"openexr") ||
         Strutil::iends_with(outputfilename,".exr")))
        do_resize = true;

    if (do_resize && orig_was_overscan &&
        out && !out->supports("displaywindow")) {
        outstream << "maketx ERROR: format " << out->format_name()
                  << " does not support separate display windows,\n"
                  << "              which is necessary when combining resizing"
                  << " and an input image with overscan.";
        return false;
    }

    std::string filtername = configspec.get_string_attribute ("maketx:filtername", "box");
    Filter2D *filter = setup_filter (filtername);
    if (! filter) {
        outstream << "maketx ERROR: could not make filter '" << filtername << "\n";
        return false;
    }

    Timer resizetimer;
    ImageBuf dst ("temp", dstspec);
    ImageBuf *toplevel = &dst;    // Ptr to top level of mipmap
    if (! do_resize) {
        // Don't need to resize
        if (dstspec.format == ccSrc->spec().format) {
            // Even more special case, no format change -- just use
            // the original copy.
            toplevel = ccSrc;
        } else {
            ImageBufAlgo::parallel_image (boost::bind(copy_block,&dst,ccSrc,_1),
                                          OIIO::get_roi(dstspec));
        }
    } else {
        // Resize
        if (verbose)
            outstream << "  Resizing image to " << dstspec.width 
                      << " x " << dstspec.height << std::endl;
        if (filtername == "box" && filter->width() == 1.0f)
            ImageBufAlgo::parallel_image (boost::bind(resize_block, &dst, ccSrc, _1, envlatlmode),
                                          OIIO::get_roi(dstspec));
        else
            ImageBufAlgo::parallel_image (boost::bind(resize_block_HQ, &dst, ccSrc, _1, filter),
                                          OIIO::get_roi(dstspec));
    }
    stat_resizetime += resizetimer();

    
    // Update the toplevel ImageDescription with the sha1 pixel hash and constant color
    std::string desc = dstspec.get_string_attribute ("ImageDescription");
    bool updatedDesc = false;
    
    // Eliminate any SHA-1 or ConstantColor hints in the ImageDescription.
    if (desc.size()) {
        desc = boost::regex_replace (desc, boost::regex("SHA-1=[[:xdigit:]]*[ ]*"), "");
        static const char *fp_number_pattern =
            "([+-]?((?:(?:[[:digit:]]*\\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?)))";
        const std::string color_pattern =
            std::string ("ConstantColor=(\\[?") + fp_number_pattern + ",?)+\\]?[ ]*";
        desc = boost::regex_replace (desc, boost::regex(color_pattern), "");
        updatedDesc = true;
    }
    
    // The hash is only computed for the top mipmap level of pixel data.
    // Thus, any additional information that will effect the lower levels
    // (such as filtering information) needs to be manually added into the
    // hash.
    std::ostringstream addlHashData;
    addlHashData << filter->name() << " ";
    addlHashData << filter->width() << " ";
    
    std::string hash_digest = ImageBufAlgo::computePixelHashSHA1 (*toplevel,
        addlHashData.str());
    if (hash_digest.length()) {
        if (desc.length())
            desc += " ";
        desc += "SHA-1=";
        desc += hash_digest;
        if (verbose)
            outstream << "  SHA-1: " << hash_digest << std::endl;
        updatedDesc = true;
        dstspec.attribute ("oiio:SHA-1", hash_digest);
    }
    
    if (isConstantColor) {
        std::ostringstream os; // Emulate a JSON array
        os << "[";
        for (unsigned int i=0; i<constantColor.size(); ++i) {
            if (i!=0) os << ",";
            os << constantColor[i];
        }
        os << "]";
        
        if (desc.length())
            desc += " ";
        desc += "ConstantColor=";
        desc += os.str();
        if (verbose)
            outstream << "  ConstantColor: " << os.str() << std::endl;
        updatedDesc = true;
        dstspec.attribute ("oiio:ConstantColor", os.str());
    }
    
    if (updatedDesc) {
        dstspec.attribute ("ImageDescription", desc);
    }


    if (configspec.get_float_attribute("fovcot") == 0.0f)
        configspec.attribute("fovcot", float(srcspec.full_width) / 
                                       float(srcspec.full_height));


    maketx_merge_spec (dstspec, configspec);

    // Write out, and compute, the mipmap levels for the speicifed image
    bool nomipmap = configspec.get_int_attribute ("maketx:nomipmap");
    bool ok = write_mipmap (mode, *toplevel, dstspec, outputfilename,
                            out, out_dataformat, !shadowmode && !nomipmap,
                            filter, configspec, outstream,
                            stat_writetime, stat_miptime);
    delete out;  // don't need it any more

    // If using update mode, stamp the output file with a modification time
    // matching that of the input file.
    if (ok && updatemode)
        Filesystem::last_write_time (outputfilename, in_time);

    Filter2D::destroy (filter);

    if (verbose || configspec.get_int_attribute("maketx:stats")) {
        double all = alltime();
        outstream << Strutil::format ("maketx run time (seconds): %5.2f\n", all);;

        outstream << Strutil::format ("  file read:       %5.2f\n", stat_readtime);
        outstream << Strutil::format ("  file write:      %5.2f\n", stat_writetime);
        outstream << Strutil::format ("  initial resize:  %5.2f\n", stat_resizetime);
        outstream << Strutil::format ("  mip computation: %5.2f\n", stat_miptime);
        outstream << Strutil::format ("  color convert:   %5.2f\n", stat_colorconverttime);
        outstream << Strutil::format ("  unaccounted:     %5.2f\n",
                                      all-stat_readtime-stat_writetime-stat_resizetime-stat_miptime);
        size_t kb = Sysutil::memory_used(true) / 1024;
        outstream << Strutil::format ("maketx memory used: %5.1f MB\n",
                                      (double)kb/1024.0);
    }

    return ok;
}
Пример #10
0
void
check_opengl_features(void)
{
	GL_vendor = xstrdup((char*)gl(GetString, GL_VENDOR));
	GL_renderer = xstrdup((char*)gl(GetString, GL_RENDERER));
	GL_version = xstrdup((char*)gl(GetString, GL_VERSION));
	/* according to opengl spec, the version string of opengl and
	 * glsl is 
	 *
	 * <version number> <space> <vendor spec information>
	 *
	 * and <version number> is
	 *
	 * major.minor
	 *
	 * or
	 *
	 * major.minor.release
	 *
	 * */

	/* build gl version */
	int err;
	err = sscanf(GL_version, "%d.%d", &GL_major_version, &GL_minor_version);
	assert(err == 2);
	assert((GL_major_version > 0) && (GL_major_version <= 3));
	assert(GL_minor_version > 0);
	GL_full_version = MKVER(GL_major_version, GL_minor_version);

	const char * tmp = (const char *)gl(GetString, GL_SHADING_LANGUAGE_VERSION);
	if (GL_POP_ERROR() != GL_NO_ERROR) {
		WARNING(OPENGL, "Doesn't support glsl\n");
		GL_glsl_version = NULL;
	} else {
		GL_glsl_version = xstrdup(tmp);

		err = sscanf(GL_glsl_version, "%d.%d", &GLSL_major_version, &GLSL_minor_version);
		assert(err == 2);
		assert(GLSL_major_version > 0);
		assert(GLSL_minor_version > 0);
		GLSL_full_version = MKVER(GLSL_major_version, GLSL_minor_version);
	}

	VERBOSE(OPENGL, "OpenGL engine information:\n");
	VERBOSE(OPENGL, "\tvendor: %s\n", GL_vendor);
	VERBOSE(OPENGL, "\trenderer: %s\n", GL_renderer);
	VERBOSE(OPENGL, "\tversion: %s\n", GL_version);
	VERBOSE(OPENGL, "\tglsl version: %s\n", GL_glsl_version);

	int x;
	gl(GetIntegerv, GL_SAMPLES, &x);
	VERBOSE(OPENGL, "\tSamples : %d\n", x);
	gl(GetIntegerv, GL_SAMPLE_BUFFERS, &x);
	VERBOSE(OPENGL, "\tSample buffers : %d\n", x);
	if (x > 0)
		gl(Enable, GL_MULTISAMPLE);
	if (GL_POP_ERROR())
		WARNING(OPENGL, "platform does not support multisample\n");

	gl(GetIntegerv, GL_MAX_TEXTURE_SIZE, &GL_max_texture_size);
	DEBUG(OPENGL, "system max texture size: %d\n", GL_max_texture_size);
	int conf_mts = conf_get_int("video.opengl.texture.maxsize", 0);
	if (conf_mts != 0) {
		conf_mts = pow2roundup(conf_mts);
		if (conf_mts < GL_max_texture_size)
			GL_max_texture_size = conf_mts;
	}
	DEBUG(OPENGL, "max texture size is set to %d\n", GL_max_texture_size);

	gl(GetIntegerv, GL_MAX_VERTEX_ATTRIBS, &GL_max_vertex_attribs);
	DEBUG(OPENGL, "max vertex attributes is set to %d\n", GL_max_vertex_attribs);

	build_extensions();
	assert(GL_extensions_dict != NULL);
	GL_POP_ERROR();

#define verbose_feature(name, exp) do {\
	if (exp)	\
		DEBUG(OPENGL, name " is enabled\n");	\
	else		\
		DEBUG(OPENGL, name " is disabled\n");	\
	} while(0)

	GL_texture_NPOT = check_extension("video.opengl.texture.enableNPOT",
			"GL_ARB_texture_non_power_of_two",
			NULL);
	verbose_feature("NPOT texture", GL_texture_NPOT);

	GL_texture_RECT = check_extension("video.opengl.texture.enableRECT",
			"GL_ARB_texture_rectangle",
			"GL_EXT_texture_rectangle",
			"GL_NV_texture_rectangle",
			NULL);
	verbose_feature("RECT texture", GL_texture_RECT);

	GL_texture_COMPRESSION = check_extension("video.opengl.texture.enableCOMPRESSION",
			"GL_ARB_texture_compression",
			NULL);
	verbose_feature("texture compression", GL_texture_COMPRESSION);

	GL_vertex_buffer_object = check_extension("video.opengl.enableVBO",
			"GL_ARB_vertex_buffer_object",
			NULL);
	GL_pixel_buffer_object = check_extension("video.opengl.enablePBO",
			"GL_ARB_pixel_buffer_object",
			NULL);
	GL_vertex_array_object = check_extension("video.opengl.enableVAO",
			"GL_ARB_vertex_array_object",
			NULL);
#undef verbose_feature
}