Exemplo n.º 1
0
/* Output procedures **********************/
void inc_O_r(void* cdata, _integer _V) {
   _put_int("r", _V);
}/* Main procedure *************************/
int main(){
   /* Context allocation */
   int s = 0;
   struct inc_ctx* ctx = inc_new_ctx(NULL);
   /* Main loop */
   ISATTY = isatty(0);
   while(1){
      if (ISATTY) printf("## STEP %d ##########\n", s+1);
      else if(s) printf("\n");
      fflush(stdout);
      ++s;
      inc_I_n(ctx, _get_int("n"));
      inc_step(ctx);
      
   }
   return 1;
   
}
Exemplo n.º 2
0
int main()
{
    int i;

    float a[SIZE];
    float b[SIZE];
    float c[SIZE];
    float seq[SIZE];
    /*
    float Papi[SIZE][SIZE];
    float *onedim;
    float *twodim;
    float temp[3]={a[0],b[0],c[0]};
    */
    #ifdef __NVCUDA__
    acc_init( acc_device_nvcuda );
    #endif 
    #ifdef __NVOPENCL__
    acc_init( acc_device_nvocl );
    acc_list_devices_spec( acc_device_nvocl );
    #endif 


    // Initialize matrices.
    for (i = 0; i < SIZE; ++i) {
        //B
            a[i] = (float)i ;
            b[i] = (float)2*i;
            c[i] = 0.0f;
    }// B

    unsigned long long int tic, toc;
    // Compute vector Add
    int d[1]={0};
    int k;
    for(k=0; k<3; k++){
        //C
        printf("Calculation on GPU ... ");
        tic = clock();
        #pragma acc data pcopyin(a[0:SIZE],b[0:SIZE]) pcopyout(c[0:SIZE]) pcopy(d[0:1])
        {
            # pragma acc kernels 
            {
                #pragma acc loop independent 
                {
                    for (i = 0; i < SIZE; ++i) {
                        #pragma acc atomic capture
                        {
                            d[0]+=inc_step();
                        } 
                        c[i] = a[i] + b[i] ;
                    }
                }
            }
        }
        toc = clock();
        printf(" %6.4f ms\n",(toc-tic)/(float)1000);
    }

    // ****************
    // double-check the OpenACC result sequentially on the host
    // ****************
    // Perform the add
    printf("Calculation on CPU ... ");
    tic = clock();
    for (i = 0; i < SIZE; ++i) {
            seq[i] = a[i] + b[i] ;
            if(c[i]!= seq[i]) {
                fprintf(stderr,"Error %d %16.10f!=%16.10f \n", i, c[i], seq[i]);
                return -1;
            }
    }
    toc = clock();
    printf(" %6.4f ms\n",(toc-tic)/(float)1000);
    printf("atomic sum> %d (should be %d)\n", d[0], 3*SIZE*inc_step());
    if(d[0]==3*SIZE*inc_step()){
        fprintf(stderr,"OpenACC atomic operation test was successful!\n");
    }else{
        fprintf(stderr,"OpenACC atomic operation test failed!\n");
    }
    return 0;
}
Exemplo n.º 3
0
static void measure(int fd, struct flow *fw)
{
	long delay;

	fw->written_blocks++;
	fw->total_written += fw->block_size;

	if (fw->written_blocks < fw->blocks_per_delay)
		return;

	assert(!fdatasync(fd));
	assert(!gettimeofday(&fw->t2, NULL));
	/* Help the kernel to help us. */
	assert(!posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED));
	delay = delay_ms(&fw->t1, &fw->t2);

	switch (fw->state) {
	case FW_INC:
		if (delay > fw->delay_ms) {
			move_to_search(fw,
				fw->blocks_per_delay - fw->step / 2,
				fw->blocks_per_delay);
		} else if (delay < fw->delay_ms) {
			inc_step(fw);
		} else
			move_to_steady(fw);
		break;

	case FW_DEC:
		if (delay > fw->delay_ms) {
			dec_step(fw);
		} else if (delay < fw->delay_ms) {
			move_to_search(fw, fw->blocks_per_delay,
				fw->blocks_per_delay + fw->step / 2);
		} else
			move_to_steady(fw);
		break;

	case FW_SEARCH:
		if (fw->bpd2 - fw->bpd1 <= 3) {
			move_to_steady(fw);
			break;
		}

		if (delay > fw->delay_ms) {
			fw->bpd2 = fw->blocks_per_delay;
			fw->blocks_per_delay = (fw->bpd1 + fw->bpd2) / 2;
		} else if (delay < fw->delay_ms) {
			fw->bpd1 = fw->blocks_per_delay;
			fw->blocks_per_delay = (fw->bpd1 + fw->bpd2) / 2;
		} else
			move_to_steady(fw);
		break;

	case FW_STEADY:
		update_mean(fw);

		if (delay <= fw->delay_ms) {
			move_to_inc(fw);
		}
		else if (fw->blocks_per_delay > 1) {
			move_to_dec(fw);
		}
		break;

	default:
		assert(0);
	}

	if (fw->progress) {
		/* Instantaneous speed. */
		double inst_speed =
			(double)fw->blocks_per_delay * fw->block_size * 1000 /
			fw->delay_ms;
		const char *unit = adjust_unit(&inst_speed);
		double percent;
		/* The following shouldn't be necessary, but sometimes
		 * the initial free space isn't exactly reported
		 * by the kernel; this issue has been seen on Macs.
		 */
		if (fw->total_size < fw->total_written)
			fw->total_size = fw->total_written;
		percent = (double)fw->total_written * 100 / fw->total_size;
		erase(fw->erase);
		fw->erase = printf("%.2f%% -- %.2f %s/s",
			percent, inst_speed, unit);
		assert(fw->erase > 0);
		if (fw->measurements > 0)
			fw->erase += pr_time(
				(fw->total_size - fw->total_written) /
				get_avg_speed(fw));
		fflush(stdout);
	}

	start_measurement(fw);
}
Exemplo n.º 4
0
static inline void move_to_inc(struct flow *fw)
{
	move_to_inc_at_start(fw);
	inc_step(fw);
}