void enqueue(ArgVal arg, int pid) {
    ToggleVector diffs, l_toggles;
    pointer_t ldw, mod_dw, tmp_sp;
    int i, j, enq_counter, prefix, mybank;
    EnqState *mod_sp, *lsp_data;
    Node *node, *llist;
	
    announce[pid] = arg; // A Fetch&Add instruction follows soon, thus a barrier is needless
    mybank = TVEC_GET_BANK_OF_BIT(pid);
    TVEC_REVERSE_BIT(&my_enq_bit, pid);
    TVEC_NEGATIVE_BANK(&enq_toggle, &enq_toggle, mybank);
    mod_sp = &enq_pool[pid * LOCAL_POOL_SIZE + enq_local_index];
    TVEC_ATOMIC_ADD_BANK(&enqueuers, &enq_toggle, mybank);            // toggle pid's bit in a_toggles, Fetch&Add acts as a full write-barrier
#if N_THREADS > USE_CPUS
    if (simRandomRange(1, N_THREADS) > 4)
        sched_yield();
#else
    volatile int k;
    int backoff_limit;

    if (simRandomRange(1, N_THREADS) > 1) {
        backoff_limit =  simRandomRange(backoff >> 1, backoff);
        for (k = 0; k < backoff_limit; k++)
            ;
    }
void SHARED_OBJECT_INIT(int pid) {
    if (pid == 0) {
        pointer_t tmp_sp;

        tmp_sp.struct_data.index = LOCAL_POOL_SIZE * N_THREADS;
        tmp_sp.struct_data.seq = 0L;
        enq_sp = tmp_sp;

        tmp_sp.struct_data.index = LOCAL_POOL_SIZE * N_THREADS;
        tmp_sp.struct_data.seq = 0L;
        deq_sp = tmp_sp;

        TVEC_SET_ZERO((ToggleVector *)&enqueuers);
        TVEC_SET_ZERO((ToggleVector *)&dequeuers);

        // Initializing queue's state
        // --------------------------
        TVEC_SET_ZERO((ToggleVector *) &enq_pool[LOCAL_POOL_SIZE * N_THREADS].applied);
        enq_pool[LOCAL_POOL_SIZE * N_THREADS].link_a = &guard;
        enq_pool[LOCAL_POOL_SIZE * N_THREADS].link_b = null;
        enq_pool[LOCAL_POOL_SIZE * N_THREADS].ptr = &guard;

        TVEC_SET_ZERO((ToggleVector *) &deq_pool[LOCAL_POOL_SIZE * N_THREADS].applied);
        deq_pool[LOCAL_POOL_SIZE * N_THREADS].ptr = &guard;
#ifdef DEBUG
        enq_pool[LOCAL_POOL_SIZE * N_THREADS].counter = 0L;
        deq_pool[LOCAL_POOL_SIZE * N_THREADS].counter = 0L;
#endif
        MAX_BACK *= 100;
        FullFence();
    }

    TVEC_SET_ZERO(&mask);
    TVEC_SET_ZERO(&my_enq_bit);
    TVEC_SET_ZERO(&enq_toggle);
    TVEC_REVERSE_BIT(&my_enq_bit, pid);
    TVEC_SET_BIT(&mask, pid);
    enq_toggle = TVEC_NEGATIVE(mask);
    init_pool(&pool_node, sizeof(Node));

    TVEC_SET_ZERO(&mask);
    TVEC_SET_ZERO(&my_deq_bit);
    TVEC_SET_ZERO(&deq_toggle);
    TVEC_REVERSE_BIT(&my_deq_bit, pid);
    TVEC_SET_BIT(&mask, pid);
    deq_toggle = TVEC_NEGATIVE(mask);
}
void SimQueueThreadStateInit(SimQueueStruct *queue, SimQueueThreadState *th_state, int pid) {
    TVEC_SET_ZERO(&th_state->mask);
    TVEC_SET_ZERO(&th_state->my_enq_bit);
    TVEC_SET_ZERO(&th_state->enq_toggle);
    TVEC_REVERSE_BIT(&th_state->my_enq_bit, pid);
    TVEC_SET_BIT(&th_state->mask, pid);
    th_state->enq_toggle = TVEC_NEGATIVE(th_state->mask);
    init_pool(&th_state->pool_node, sizeof(Node));

    TVEC_SET_ZERO(&th_state->mask);
    TVEC_SET_ZERO(&th_state->my_deq_bit);
    TVEC_SET_ZERO(&th_state->deq_toggle);
    TVEC_REVERSE_BIT(&th_state->my_deq_bit, pid);
    TVEC_SET_BIT(&th_state->mask, pid);
    th_state->deq_toggle = TVEC_NEGATIVE(th_state->mask);
    th_state->deq_local_index = 0;
    th_state->enq_local_index = 0;
    th_state->backoff = 1;
    th_state->mybank = TVEC_GET_BANK_OF_BIT(pid);
}
void SimQueueEnqueue(SimQueueStruct *queue, SimQueueThreadState *th_state, ArgVal arg, int pid) {
    ToggleVector diffs, l_toggles;
    pointer_t ldw, mod_dw, tmp_sp;
    int i, j, enq_counter, prefix;
    EnqState *mod_sp, *lsp_data;
    Node *node, *llist;

    queue->announce[pid] = arg; // A Fetch&Add instruction follows soon, thus a barrier is needless
    TVEC_REVERSE_BIT(&th_state->my_enq_bit, pid);
    TVEC_NEGATIVE_BANK(&th_state->enq_toggle, &th_state->enq_toggle, th_state->mybank);
    mod_sp = &queue->enq_pool[pid * LOCAL_POOL_SIZE + th_state->enq_local_index];
    TVEC_ATOMIC_ADD_BANK(&queue->enqueuers, &th_state->enq_toggle, th_state->mybank);            // toggle pid's bit in a_toggles, Fetch&Add acts as a full write-barrier
#if N_THREADS > USE_CPUS
    if (fastRandomRange(1, N_THREADS) > 4)
        resched();
#else
    volatile int k;
    int backoff_limit;

    if (fastRandomRange(1, N_THREADS) > 1) {
        backoff_limit =  fastRandomRange(th_state->backoff >> 1, th_state->backoff);
        for (k = 0; k < backoff_limit; k++)
            ;
    }