/************************************************************************
  PopWork() : This routine accesses the first task from the task stack and
            : returns DONE when the sorting is complete
 *************************************************************************/
PopWork(TaskElement **task)
{
    int i;
    BLOCKLOCK(&gMem->TaskStackLock);
    while(gMem->TaskStackTop == 0) {
        if(++gMem->NumWaiting == NUM_PROCS) {   /* Check for empty stack */
            gMem->TaskStackTop = -1;
            /* Something to wake up the spinning folks */
            FREELOCK(&gMem->TaskStackLock);
            return(DONE);
        }
        else {
            /* Wait for some work to get pushed on... */
            FREELOCK(&gMem->TaskStackLock);
            START_SPIN;
            while(gMem->TaskStackTop == 0) ;
            END_AGG;
            ACQ_MEMBAR;

            BLOCKLOCK(&gMem->TaskStackLock);
            if(gMem->NumWaiting == NUM_PROCS) {
                FREELOCK(&gMem->TaskStackLock);
                return(DONE);
            }
            (gMem->NumWaiting)--;
        }
    }
    *task = gMem->TaskStack;
    gMem->TaskStack=gMem->TaskStack->next;
    gMem->TaskStackTop--;
    FREELOCK(&gMem->TaskStackLock);
    return(0);
}
void
rw_destroy(krwlock_t *rw)
{

	FREELOCK(rw);
	rumpuser_rw_destroy(RUMPRW(rw));
}
void
mutex_destroy(kmutex_t *mtx)
{

	FREELOCK(mtx);
	rumpuser_mutex_destroy(RUMPMTX(mtx));
}
void PushWork(int i,int j)
{
    register TaskElement *te = gMem->tasks + LocalTaskStack[LocalStackTop--];
    int a;
    if (LocalStackTop < 0)
    {
        printf("!!!!!!!FAILURE IN LOCAL STACK TOP!!!\n\n\n");
        HALT(-1);
    }

    te->left=i;
    te->right=j;
    GETLOCK(&gMem->TaskStackLock);
    ANNOUNCE("\t\t\tI have got the lock -- ", whoami);
    a=gMem->TaskStackTop++;

    te->next=gMem->TaskStack;
    gMem->TaskStack=te;

    FREELOCK(&gMem->TaskStackLock);
    ANNOUNCE("\t\t\tI have released the lock -- ",whoami);
    ANNOUNCE("\t\t\tMy TaskStackTop was -- ",a);

    if(LocalStackTop < 0)
    {
        printf("Local TaskStackTop negative!\n");
    }
}
void root_main()
{
    int i,j;
    char name[32];
    int proc_size, start;

    LocalStackTop = -1;
    /* whoami is always zero here */
    for (i = (whoami+1) * chunk -1; i > whoami * chunk; i--)    {
        LocalTaskStack[++LocalStackTop] = i;
    }

    printf("\nInitializing values\n");
    FREELOCK(&gMem->TaskStackLock);
    gMem->waitqhead = gMem->waitqtail = gMem->waitqcount = 0;
    gMem->TaskStackTop = 0;
    gMem->NumWaiting = 0;

    printf("Initializing data\n");
    /* initialize the data */
    for(i=0; i<size; i++)
        gMem->A[i] = i;

    printf("Shuffling data\n");
    for(i=0; i<size; i++)   {
        j=random() % size;
        SWAP(gMem->A,i,j);
    }
    if(show_array) print_array(gMem->A);

    /* Put the work in */
    PushWork_startup(0, size-1);
}
main(int argc, char **argv)
{
    int i,j;
    int c;
    extern char *optarg;
    int ctrproc;
    int start;
    int proc_size;
    char name[32];

    /*********************************************************************
      Parse the command line
      ********************************************************************/
    collect_info = 1;
    MEMSYS_OFF;                             /* in the initialization phase */
    while ((c = getopt(argc, argv, "p:s:b:BdvH")) != -1) {
        switch(c) {
        case 'p':
            NUM_PROCS = atoi(optarg);
            if (NUM_PROCS < 1) {
                printf("P must be >= 1\n");
                exit(-1);
            }
            break;

        case 's':
            size = atoi(optarg);
            break;

        case 'b':
            BubbleThresh = atoi(optarg);
            break;

        case 'B':
            bubble = 1;
            break;
        case 'd':
            show_array = 1;
            break;
        case 'v':
            verify = 1;
            break;
        default:
            printf("Bad option : %s \n",optarg);
        case 'h':
            printf( "\t\t\tQS - OPTIONS\n");
            printf( "\tp - Number of processors\n");
            printf( "\ts - Size of array\n");
            printf( "\tb - Bubble threshold\n");
            printf( "\tB - Bubble\n");
            printf( "\td - Display output \n");
            printf( "\tv - Verify results \n");
            printf( "\tH - Help\n");
            exit(0);
        }
    }

    StatClearAll();         /* clear the stats */
    /**********************************************************************/
    /* Use shmalloc to allocate memory from the shared address space, also
       use AssociateAddrNode to determine the distribution of the shared
       memory among the various processors */
    /**********************************************************************/
    gMem = (GlobalMemory *) shmalloc(sizeof(GlobalMemory));
    ctrproc = NUM_PROCS/2  + (int)(sqrt((double)NUM_PROCS)/2);
    /* choose a "middle-point" in the mesh network */

    /* associate the task stack variables to a processor easily accessible */
#if !defined(SPATIAL)
    AssociateAddrNode((void *)&(gMem->TaskStackLock),
                      (void *)(&(gMem->NumWaiting)+1),
                      ctrproc>=NUM_PROCS-1? NUM_PROCS-1 :  ctrproc+1,"lock");
    AssociateAddrNode((void *)&(gMem->TaskStackTop),
                      (void *)(&(gMem->TaskStackTop)+1),
                      ctrproc>=NUM_PROCS-1 ? NUM_PROCS-1 : ctrproc,"top");
#endif

    /************** associate the task queue among all processors *********/
    chunk = MAX_TASK_QUEUE / NUM_PROCS;
    for (i=0; i< NUM_PROCS; i++)   {
#if !defined(SPATIAL)
        AssociateAddrNode(&gMem->tasks[i*chunk],
                          &gMem->tasks[(i+1)*chunk],
                          i,"tasks");
#endif
    }
    LocalStackTop = -1;
    FREELOCK(&gMem->TaskStackLock);
    proc_size = (size + NUM_PROCS)/NUM_PROCS;

    /*************** associate the array among all processors **************/
    start = 0;
    strcpy(name,"Array chunks");
    for(i=0; i<NUM_PROCS; i++) {
        printf("going to call Associate address node\n");
#if !defined(SPATIAL)&&!defined(DO_PREF)
        AssociateAddrNode(&gMem->A[start],
                          &gMem->A[start+ proc_size],
                          i, name);
#endif
        start = start+proc_size;
    }

    printf( "QS - OPTIONS\n");
    printf( "\tp - Number of processors \t%d\n", NUM_PROCS);
    printf( "\ts - Size of array\t\t%d\n",size);
    printf( "\tb - Bubble threshold \t\t%d\n",BubbleThresh);
    printf( "\tB - Bubble \t\t\t%d\n",bubble);
    printf( "\td - Display output\t\t%d\n",show_array);
    printf( "\tv - Verify results\t\t%d\n",verify);

    /* The work which the root process has to do */
    whoami=0;
    root_main();                  /* initialization by the root process */
    endphase(phase);              /* end of initialization phase */
    TreeBarInit(&tree,NUM_PROCS); /* initialize tree barrier */

    MEMSYS_ON;

    /********************************************************************/
    /* Forking processes :  Create a process for each of the processors */
    /********************************************************************/

    for(i=0; i<NUM_PROCS-1; i++) {
        if(fork() == 0)     {
            whoami =  getpid();
            LocalStackTop = -1;
            for (i = (whoami+1) * chunk -1; i > whoami * chunk; i--) {
                LocalTaskStack[++LocalStackTop] = i;
            }
            break;
        }
    }

    /******************* Barrier after initialization *******************/
    printf("Before barrier %d\n",whoami);
    TreeBarrier(&tree,whoami);
    printf("Starting Process %d\n",whoami);
    if (whoami == 0)     {
        StatReportAll();
        StatClearAll();
    }

    newphase(++phase);
    Worker();   /**** Call the main procedure which we have to execute ****/
    endphase(phase);

    /**************** Barrier after finishing the sort ******************/
    printf("Coming out of worker %d\n",whoami);
    TreeBarrier(&tree,whoami);
    MEMSYS_OFF;
    if (whoami == 0)     {
        StatReportAll();
        StatClearAll();
    }

    /*************************** Cleanup phase ***************************/
    newphase(++phase);
    if(whoami== 0)     do_cleanup();
    endphase(phase);
    if (whoami == 0)     {
        StatReportAll();
        StatClearAll();
    }
}