int main (int argc, char **argv)
{
	int threads, num_barrier;
	if( argc == 3 ){
	  threads = atoi( argv[1] );
	  num_barrier = atoi( argv[2] );
	  dissemination_barrier_init(threads);
	}

	else{ 
	  printf("Syntax:\n./dissemination num_threads num_barriers\n");
	  exit(-1);
	}

	// Serial code
	printf("Serial section\n");
	omp_set_num_threads( threads );
	//int count = threads;

	#pragma omp parallel default(none) shared(num_barrier) 
	{
		
		// Parallel section
		int num_threads = omp_get_num_threads();
		int thread_num = omp_get_thread_num();
		int j;
		double total_time = 0;
		double time1, time2;

		for(j=0; j<num_barrier; j++){
			printf("Hello World from thread %d of %d.\n", thread_num, num_threads);

			time1 = omp_get_wtime();
			
			// Barrier
			dissemination_barrier();
			
			time2 = omp_get_wtime();

			total_time += time2 - time1;
			
			printf("Bye-Bye World from thread %d of %d.\n", thread_num, num_threads);
		}

		printf("Time spent in barrier(s) by thread %d is %f\n", thread_num, total_time);
	}

	printf("Back in the serial section again\n");
	return 0;
}
int main (int argc, char ** argv)
{
  

  MPI_Init(&argc, &argv);
  bool pid_sense = true;
  int parity = 0;
  int myrank;
  struct timeval tv;
  double curr_time_s;
  double curr_time_us;
  dissemination_barrier_init();
  int i,j,k,count=0;
  struct timeval tv1, tv2;
  int N=10000;
  double total_time;
  MPI_Comm_rank(MPI_COMM_WORLD,&myrank);

  if(rank == 0)
  {
    gettimeofday(&tv1, NULL);
  }
  for(i=0;i<N;i++)
  {

//    double delaySecs = drand48()*10;
//    int sleepSecs = (int) delaySecs;
//    sleep(sleepSecs);
/*    fflush(stdout);
    gettimeofday(&tv, NULL);
    curr_time_s=(double) tv.tv_usec + (double) tv.tv_sec*1000000;
    printf("Barrier %d reached by %d at time %f\n", count++, myrank,curr_time_s);
*/  //  MPI_Barrier(MPI_COMM_WORLD);
    dissemination_barrier(&pid_sense);

//    delaySecs = drand48()*10;
//    sleepSecs = (int) delaySecs;
//    sleep(sleepSecs);
/*    fflush(stdout);
    gettimeofday(&tv, NULL);
    curr_time_s=(double) tv.tv_usec + (double) tv.tv_sec*1000000;
    printf("Barrier %d reached by %d at time %f\n", count++, myrank,curr_time_s);
*///    MPI_Barrier(MPI_COMM_WORLD);
    dissemination_barrier(&pid_sense);

//    delaySecs = drand48()*10;
//    sleepSecs = (int) delaySecs;
//    sleep(sleepSecs);
/*    fflush(stdout);
    gettimeofday(&tv, NULL);
    curr_time_s=(double) tv.tv_usec + (double) tv.tv_sec*1000000;
    printf("Barrier %d reached by %d at time %f\n", count++, myrank,curr_time_s);
*///    MPI_Barrier(MPI_COMM_WORLD);
    dissemination_barrier(&pid_sense);

//    delaySecs = drand48()*10;
//    sleepSecs = (int) delaySecs;
//    sleep(sleepSecs);
/*    fflush(stdout);
    gettimeofday(&tv, NULL);
    curr_time_s=(double) tv.tv_usec + (double) tv.tv_sec*1000000;
    printf("Barrier %d reached by %d at time %f\n", count++, myrank,curr_time_s);
*///    MPI_Barrier(MPI_COMM_WORLD);
    dissemination_barrier(&pid_sense);

//    delaySecs = drand48()*10;
//    sleepSecs = (int) delaySecs;
//    sleep(sleepSecs);
/*    fflush(stdout);
    gettimeofday(&tv, NULL);
    curr_time_s=(double) tv.tv_usec + (double) tv.tv_sec*1000000;
    printf("Barrier %d reached by %d at time %f\n", count++, myrank,curr_time_s);
*///    MPI_Barrier(MPI_COMM_WORLD);
    dissemination_barrier(&pid_sense);
   
  }
  if(rank==0)
  {
    gettimeofday(&tv2, NULL);

    total_time = (double) (tv2.tv_usec - tv1.tv_usec) + (double) (tv2.tv_sec - tv1.tv_sec)*1000000;
    printf("\nSUMMARY:\nNumber of processes: %d\n Total run-time for %d "
            "loops with 5 barriers per loop: %fs\n"
            "The average time per barrier: %fus\n",
            P, N, total_time/1000000, (double)(total_time/(N*5)));
  }


  dissemination_barrier_finish();
  MPI_Finalize();
  return 0;
}