void enqueue(ArgVal arg, int pid) {
    ToggleVector diffs, l_toggles;
    pointer_t ldw, mod_dw, tmp_sp;
    int i, j, enq_counter, prefix, mybank;
    EnqState *mod_sp, *lsp_data;
    Node *node, *llist;
	
    announce[pid] = arg; // A Fetch&Add instruction follows soon, thus a barrier is needless
    mybank = TVEC_GET_BANK_OF_BIT(pid);
    TVEC_REVERSE_BIT(&my_enq_bit, pid);
    TVEC_NEGATIVE_BANK(&enq_toggle, &enq_toggle, mybank);
    mod_sp = &enq_pool[pid * LOCAL_POOL_SIZE + enq_local_index];
    TVEC_ATOMIC_ADD_BANK(&enqueuers, &enq_toggle, mybank);            // toggle pid's bit in a_toggles, Fetch&Add acts as a full write-barrier
#if N_THREADS > USE_CPUS
    if (simRandomRange(1, N_THREADS) > 4)
        sched_yield();
#else
    volatile int k;
    int backoff_limit;

    if (simRandomRange(1, N_THREADS) > 1) {
        backoff_limit =  simRandomRange(backoff >> 1, backoff);
        for (k = 0; k < backoff_limit; k++)
            ;
    }
inline static void *Execute(void* Arg) {
    long i, rnum, mybank;
    volatile long j;
    long id = (long) Arg;
    volatile ToggleVector lactive_set;
    ToggleVector mystate;

    fastRandomSetSeed(id + 1);
    BarrierWait(&bar);
    if (id == 0)
        d1 = getTimeMillis();
    TVEC_SET_ZERO(&mystate);
    TVEC_SET_BIT(&mystate, id);
    mybank = TVEC_GET_BANK_OF_BIT(id);
    for (i = 0; i < RUNS; i++) {
        mystate = TVEC_NEGATIVE(mystate);
        TVEC_ATOMIC_ADD_BANK(&active_set, &mystate, mybank);
        rnum = fastRandomRange(1, MAX_WORK);
        for (j = 0; j < rnum; j++)
            ;
        lactive_set = active_set;
        rnum = fastRandomRange(1, MAX_WORK);
        for (j = 0; j < rnum; j++)
            ;
    }
    return NULL;
}
void SimQueueThreadStateInit(SimQueueStruct *queue, SimQueueThreadState *th_state, int pid) {
    TVEC_SET_ZERO(&th_state->mask);
    TVEC_SET_ZERO(&th_state->my_enq_bit);
    TVEC_SET_ZERO(&th_state->enq_toggle);
    TVEC_REVERSE_BIT(&th_state->my_enq_bit, pid);
    TVEC_SET_BIT(&th_state->mask, pid);
    th_state->enq_toggle = TVEC_NEGATIVE(th_state->mask);
    init_pool(&th_state->pool_node, sizeof(Node));

    TVEC_SET_ZERO(&th_state->mask);
    TVEC_SET_ZERO(&th_state->my_deq_bit);
    TVEC_SET_ZERO(&th_state->deq_toggle);
    TVEC_REVERSE_BIT(&th_state->my_deq_bit, pid);
    TVEC_SET_BIT(&th_state->mask, pid);
    th_state->deq_toggle = TVEC_NEGATIVE(th_state->mask);
    th_state->deq_local_index = 0;
    th_state->enq_local_index = 0;
    th_state->backoff = 1;
    th_state->mybank = TVEC_GET_BANK_OF_BIT(pid);
}