void mainmutexmodule()
{
	int mutexchoice;
	char mutexrepeat;
	do
	{
		system("cls");
		printf("\n================================================== WELCOME TO MUTEX E - LEARNING TUTORIAL =================================\n");
	    printf("\n                                                       WISH YOU A HAPPY E - LEARNING \n\n");
		printf("\n1.WHAT IS MUTEX?\n\n2.INSTRUCTIONS AND LOGIC OF MODULE (READ IT RECOMMENDED\n\n3.HOW A MUTEX WORKS?\n\n4.EXIT\n\nENTER YOUR CHOICE\n\n");
		scanf("%d",&mutexchoice);
		switch(mutexchoice)
		{
			case 1:
				system("cls");
				definemutex();
				break;
			case 2:
			    system("cls");
				guidencetorunmutex();
				break;
			case 3:
			    system("cls");
				runmutex();
				break;
			case 4:
				printf("\n\n\t\t\tTHANKYOU FOR USING MUTEX E-LEARNING TUTORIAL\n\n");
			    return;
				break;
			default:
			    printf("\n\nYOU ENTERED WRONG CHOICE\n");
				break;				
		}
		printf("\n\nDO YOU WANNA CONTINUE ABOUT MUTEX (Y/N) :\n\n");
		mutexrepeat=getch();
	}while(mutexrepeat=='y'||mutexrepeat=='Y');
	printf("\n\n\t\t\tTHANKYOU FOR USING MUTEX E-LEARNING TUTORIAL\n\n");
}
int
main(int argc, char** argv)
{
  int i;
  char *name;
  unsigned long long sum, sum2;
  float var, std;
  for (i = 1; i<argc; i++)
  {
    if (strncmp(argv[i], "--threads=", sizeof("--threads")) == 0)
    {
      min_threads = atoi(argv[i] + sizeof("--threads"));
      max_threads = min_threads + 1;
    }
    else if (strncmp(argv[i], "--threads_lo=", sizeof("--threads_lo")) == 0)
    {
      min_threads = atoi(argv[i] + sizeof("--threads_lo"));
      if (max_threads <= min_threads)
        max_threads = min_threads + 1;
    }
    else if (strncmp(argv[i], "--threads_hi=", sizeof("--threads_hi")) == 0)
    {
      max_threads = atoi(argv[i] + sizeof("--threads_hi"));
      if (min_threads >= max_threads)
        min_threads = max_threads - 1;
      if (min_threads <= 0)
        min_threads = 1;
    }
    else if (strncmp(argv[i], "--threads_step=", sizeof("--threads_step")) == 0)
    {
      threads_step = atoi(argv[i] + sizeof("--threads_step"));
    }
    else if (strncmp(argv[i], "--time=", sizeof("--time")) == 0)
    {
      duration = atoi(argv[i] + sizeof("--time"));
    }
    else if (strncmp(argv[i], "--loops=", sizeof("--loops")) == 0)
    {
      loops = atoi(argv[i] + sizeof("--loops"));
    }
    else if (strcmp(argv[i], "--mutex_align") == 0)
    {
      locktype |= 1 << 0;
    }
    else if (strcmp(argv[i], "--mutex_non_align") == 0)
    {
      locktype |= 1 << 1;
    }
    else if (strcmp(argv[i], "--spin_align") == 0)
    {
      locktype |= 1 << 2;
    }
    else if (strcmp(argv[i], "--spin_non_align") == 0)
    {
      locktype |= 1 << 3;
    }
    else if (strcmp(argv[i], "--lock_xadd") == 0)
    {
      locktype |= 1 << 4;
    }
    else if (strcmp(argv[i], "--xadd") == 0)
    {
      locktype |= 1 << 5;
    }
    else if (strcmp(argv[i], "--gcc_sync_fetch_and_add") == 0)
    {
      locktype |= 1 << 6;
    }
    else if (strcmp(argv[i], "--add_mb") == 0)
    {
      locktype |= 1 << 7;
    }
    else if (strcmp(argv[i], "--add") == 0)
    {
      locktype |= 1 << 8;
    }
    else if (strcmp(argv[i], "--nop") == 0)
    {
      locktype |= 1 << 9;
    }
    else
    {
      printf("unknown argument >%s<\n", argv[i]); fflush(stdout);
      exit(1);
    }
  }

  if (locktype == 0)
    locktype = -1;

  printf("sizeof(pthread_mutex_t): %u\n", sizeof(pthread_mutex_t)); fflush(stdout);

  char* ptr0 = (char*)roundup((size_t)malloc(max_threads*roundup(sizeof(my_counter))+2*ALIGN));
  char* ptr1 = (char*)malloc(max_threads * sizeof(my_counter));

  struct my_counter *align[MAX_THREADS];
  struct my_counter *compact[MAX_THREADS];
  
  for (i = 0; i<max_threads; i++)
  {
    align[i] = (struct my_counter*)(ptr0 + i * roundup(sizeof(my_counter)));
    compact[i] = (struct my_counter*)(ptr1 + i * sizeof(my_counter));
  }

  /**
   * calibrate iteration by using mutex_align
   */
  {
    pthread_mutex_init(&align[0]->mutex, 0);
    printf("calibrating..."); fflush(stdout);
    unsigned long long start = now();
    iter = 0;
    thr_iter = 100000;
    do
    {
      runmutex(align[0]);
      iter += thr_iter;
    } while (now() < (start + 1000000*duration));
    pthread_mutex_destroy(&align[0]->mutex);
    printf("done. using %llu lock/unlock pairs\n", iter); fflush(stdout);
  }
  
  for (int lt = 0; lt < OP_CNT; lt++)
  {
    if ((locktype & (1 << lt)) == 0)
      continue;

    pthread_t thr_id[MAX_THREADS];
    struct my_counter ** base = 0;

    name = 0;
    switch(lt){
    case 0:
      name = "mutex_align";
      runfunc = runmutex;
      base = align;
      break;
    case 1:
      name = "mutex_non_align";
      runfunc = runmutex;
      base = compact;
      break;
    case 2:
#ifdef HAVE_SPINLOCK
      name = "spin_align";
      runfunc = runspin;
      base = align;
#endif
      break;
    case 3:
#ifdef HAVE_SPINLOCK
      name = "spin_non_align";
      runfunc = runspin;
      base = compact;
#endif
      break;
    case 4:
#ifdef HAVE_XADD_MP
      name = "lock_xadd";
      runfunc = runatomic_mp;
      base = align;
#endif
      break;
    case 5:
#ifdef HAVE_XADD_UP
      name = "xadd";
      runfunc = runatomic_up;
      base = align;
#endif
      break;
    case 6:
#ifdef HAVE_GCC_INTRINSICS
      name = "gcc_sync_fetch_and_add";
      runfunc = rungcc_sync_fetch_and_add;
      base = align;
#endif
      break;
    case 7:
      name = "add_mb";
      runfunc = runaddmb;
      base = align;
      break;
    case 8:
      name = "add";
      runfunc = runadd;
      base = align;
      break;
    case 9:
      name = "nop";
      runfunc = runnop;
      base = align;
      break;
    }

    if (name == 0)
      continue;

    for (i = 0; i<max_threads; i++)
    {
      base[i]->counter = 0;
      switch(lt){
      case 0:
      case 1:
        pthread_mutex_init(&base[i]->mutex, 0);
        break;
      case 2:
      case 3:
        init(&base[i]->spinlock);
        break;
      case 4:
      case 5:
      case 6:
      case 7:
      case 8:
        break;
      }
    }
    
    for (int t = min_threads; t < max_threads; t += threads_step)
    {
      int threads = t;
      thr_iter = (iter / threads);

      sum = 0;
      sum2 = 0;

      printf("%s threads: %u ", name, threads); fflush(stdout);
      
      for (int nn = 0; nn<loops; nn++)
      {
        unsigned long long start = now();
        
        for (i = 0; i<threads; i++)
        {
          void * arg = base[i];
          pthread_create(thr_id + i, 0, 
                         runfunc, arg); 
        }
        
        for (i = 0; i<threads; i++)
        {
          void * arg;
          pthread_join(thr_id[i], &arg);
        }
        
        unsigned long long stop = now();
        unsigned long long diff = (stop - start);
        sum += diff;
        sum2 += (diff * diff);
      }
      
      unsigned long long div = loops;
      var = sum2/div - (sum/div)*(sum/div);
      var *= 10/9;
      
      unsigned long long mops = (iter * div) / (sum ? sum : 1);
      unsigned long long ns = ((1000 * sum) / iter) / div;
      
      if (threads == 1)
      {
        printf("time: %llu (us) stddev: %u %u%% mops: %llu ns/op: %llu\n", 
               sum / div, 
               (unsigned)sqrt(var), 
               (unsigned)((100*sqrt(var)*div)/sum),
               mops,
               ns);
      }
      else
      {
        printf("time: %llu (us) stddev: %u %u%% mops: %llu\n", 
               sum / div, 
               (unsigned)sqrt(var), 
               (unsigned)((100*sqrt(var)*div)/sum),
               mops);
      }
    }
    
    for (i = 0; i<max_threads; i++)
    {
      switch(lt){
      case 0:
      case 1:
        pthread_mutex_destroy(&base[i]->mutex);
        break;
      }
    }
  }
  return 0;
}