Пример #1
0
/**
 * @brief Wait for cross lock (callable from fth thread ONLY).
 *
 * @param cross <IN> cross lock structure pointer
 * @param write <IN> Nonzero for write lock, zero for read lock
 */
void fthXLock(XLock_t *cross, int write) {

    // Wait for the fth lock to be free
    while (__sync_val_compare_and_swap(&cross->fthLock, 0, 1) != 0) {
        fthYield(0);
    }

    // Now aquire the queing lock (should be free except for race)
    while (pthread_rwlock_trywrlock(&cross->qLock) != 0) {
        // Another fthread might be waiting for the lock - avoid race
        fthYield(0);                         // Avoid race between 2 fthreads
    }

     // Release the FTH lock now that we have the Q lock
    (void) __sync_fetch_and_sub(&cross->fthLock, 1);

    // Now we have the pthread queueing lock so everyone will wait behind us
    if (write) {
        while (pthread_rwlock_trywrlock(&cross->lock) != 0) { // Try to get it
            fthYield(0);                     // Let everyone else run
        }
    } else {
        while (pthread_rwlock_tryrdlock(&cross->lock) != 0) { // Try to get it
            fthYield(0);                     // Let everyone else run
        }
   }

    // Release the Q lock now that we have the full lock
    pthread_rwlock_unlock(&cross->qLock);

}
Пример #2
0
void initThread(uint64_t arg) {
    fthWaitEl_t *wait = fthLock(&lock1, 1, NULL);
    slt = FTH_MALLOC(FTH_SPARSE_LOCK_TABLE_SIZE(16));
    fthSparseLockTableInit(slt, 16);
    
    fthResume(fthSpawn(&threadRoutine1, 4096), 0);
    fthYield(100);
    for (int i=0; i<100; i++) {
        fthResume(fthSpawn(&threadRoutine2, 4096), i);
    }
    fthYield(1000);
    fthUnlock(wait);

}
Пример #3
0
void testget()
{
    uint64_t cguid = 1;
    SDF_container_type_t ctype = SDF_BLOCK_CONTAINER;

    for (int blockNum = 0; blockNum < numBlocks*2; blockNum++) {
        local_key_t *lkey = get_local_block_key(blockNum);
        DirEntry *entry = HomeDir_get(homedir, cguid, ctype, lkey);
        // {{
        if (blockNum < numBlocks) {
            plat_assert_always(entry);
            fthWaitEl_t *wait = reqq_lock(entry->q); // LOCK REQQ
            fthThread_t *top = reqq_peek(entry->q);
            plat_assert_always(top == fthSelf());
            fthThread_t *self = reqq_dequeue(entry->q);
            plat_assert_always(self == fthSelf());
            reqq_unlock(entry->q, wait); // UNLOCK REQQ
            if (NULL != (top = reqq_peek(entry->q))) {
                fthResume(top, 0);
                printf("Thread 1: yielding after setting thread 2 to run for block=%u\n", blockNum);
                fthYield(1);
            }
        }
        // }}
        free_local_key(lkey);
    }
    printf("Thread 1: Got %d blocks from the directory\n", numBlocks);
}
Пример #4
0
void * OrderTestFthPthreadRoutine(void *arg) {
    fthThread_t *sched;

    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
            "\nNode %d FTH threads firing up\n", myid);

    sched = fthInit(); // Init a scheduler

    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
            "\nNode %d FTH scheduler has initialized\n", myid);

    // Start a thread
    fthResume(fthSpawn(&fthThreadReceiver1, 16384), 1);
    fthResume(fthSpawn(&fthThreadReceiver2, 16384), 2); // Start a thread
    fthResume(fthSpawn(&sdf_msg_resp_gc, 16384), (uint64_t)myid); // Start response collector
    usleep(500);
    fthResume(fthSpawn(&fthThreadSender, 16384), 0);

    fthStartScheduler(sched);
    fthYield(1); // let them run

    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
            "\nFTH scheduler halted\n");
    return (0);

}
Пример #5
0
void fth_mbox_test_5(uint64_t arg) {
    int index;
    printf("you are in fth %d\n", arg);
#ifdef MULTIQ_SCHED
    ASSERT(get_eligibleQ_size(fthBase()->eligibleQ[0].head) == 2, "the size of eligibleQ. ")
#else
    ASSERT(get_eligibleQ_size(fthBase()->eligibleQ.head) == 2, "the size of eligibleQ. ")
#endif // MULTIQ_SCHED


    ASSERT(get_mailQ_size(mbox.mailQ.head) == 0, "empty mail box. ")
    for(index = 0; index < WRITENUM; index ++) {
       fthMboxPost(&mbox, index);
       printf("!<Note>:fth %d post data %d\n", arg, index);
       fthYield(1);
    }

    printf("@@@fth %d spawn new threads to mornitor multiple write single read:\n", arg);
    post_data = 0;
    for(index = 0; index < FTHWRITER; index ++) 
        fthResume(fthSpawn(fth_mbox_test_7, 4096), index + 8);
    fthResume(fthSpawn(fth_mbox_test_8, 4096), FTHWRITER + 8);
    last_fth_id = FTHWRITER + 8;
    
}
Пример #6
0
void fth_mbox_test_9(uint64_t arg) {
    int index;
    printf("you are in fth %d\n", arg);
    for(index = 0; index < WRITENUM; index ++) {
       fthMboxPost(&mbox, post_data);
       printf("!<Note>:fth %d post data %d\n", arg, post_data ++);
       fthYield(1);
    }
}
Пример #7
0
void fth_mbox_test_4(uint64_t arg) {
    int index;
    printf("fth %d: entry is %s\n", arg, __FUNCTION__);
    ASSERT(get_threadQ_size(mbox.threadQ.head) == 1, "one fth in threadQ. ")
    fthMboxPost(&mbox, 6347);
    fthYield(1);

    for(index = 0; index < 30; index ++) {
      fthMboxPost(&mbox, index);
      printf("!<Note>:fth %d post %d\n", arg, index);
      fthYield(1);
    }
    printf("@@@fth %d spawn new threads to mornitor single write multiple read:\n", arg);
    fthResume(fthSpawn(fth_mbox_test_5, 4096), 5);
    for(index = 0; index <FTHREADER; index ++)
       fthResume(fthSpawn(fth_mbox_test_6, 4096), index + 6);

}
Пример #8
0
void fth_mbox_test_10(uint64_t arg) {
    int index, data;
    printf("you are in fth %d\n", arg);
    for(index = 0; index < READNUM; index ++) {
       printf("fth %d want to get data:\n", arg);
       data = fthMboxWait(&mbox);
       printf("fth %d get data %d\n", arg, data);
       fthYield(1);
    }
    if(index == READNUM)
       fthKill(1);
}
Пример #9
0
void fth_mbox_test_8(uint64_t arg) {
    int index, data;
    printf("you are in fth %d\n", arg);
    for(index = 0; index < READNUM; index ++) {
       printf("fth %d want to get data:\n", arg);
       data = fthMboxWait(&mbox);
       printf("fth %d get data %d\n", arg, data);
    }

#ifdef FTHWRITER
#undef FTHWRITER
#define FTHWRITER 2
#endif

#ifdef FTHREADER
#undef FTHREADER
#define FTHREADER 2
#endif


#ifdef WRITENUM
#undef WRITENUM
#define WRITENUM 4
#endif

#ifdef READNUM
#undef READNUM
#define READNUM  4
#endif
    

    fthYield(1);

    printf("@@@fth %d spawn new threads to mornitor multiple writer multiple reader:\n", arg);
#ifdef MULTIQ_SCHED
    ASSERT(get_eligibleQ_size(fthBase()->eligibleQ[0].head) == 0, "the size of eligibleQ. ")
#else
    ASSERT(get_eligibleQ_size(fthBase()->eligibleQ.head) == 0, "the size of eligibleQ. ")
#endif // MULTIQ_SCHED

    ASSERT(get_mailQ_size(mbox.mailQ.head) == 0, "empty mail box. ")
    post_data = 0;
    for(index = 0; index < FTHWRITER; index ++)
        fthResume(fthSpawn(fth_mbox_test_9, 4096), index + last_fth_id +1);
    last_fth_id += FTHWRITER;
    for(index = 0; index < FTHREADER; index ++)
        fthResume(fthSpawn(fth_mbox_test_10, 4096), index + last_fth_id +1);
    last_fth_id += FTHREADER;
    

}
Пример #10
0
static void sdf_msg_resp_gc(uint64_t arg) {

    struct sdf_msg *garb_msg= NULL;
    int i = 0;

    printf("FTH Thread starting %s\n", __func__);

    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
            "\nNode %d Starting garbage resp collector %d\n", myid, i);
    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
            "\nNode %d: Now yielding waiting for mysync\n", myid);
    while (!mysync)
        fthYield(1);
    /* FIXME this was added to take the messages off of the SDF_RESPONSES queue 
     * but it really doesn't matter since the queue will just wrap around
     * the queue
     */
    return;
    for (;;) {
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                "\nNode %d: Now Sleeping on RESPONSES queue %p\n", myid,
                q_pair_RESPONSES->q_out);
        garb_msg = sdf_msg_receive(q_pair_RESPONSES->q_out, 0, B_TRUE);
        i++;
        fthYield(1);
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                "\nNode %d: got garbage response num %d\n", myid, i);
        if ((SHORTTEST)&&(i == 2))
            break;

        /* FIXME do not release the receive buffer back to the sdf messaging thread  
         * for this loop, it's already being done indirectly by the 
         * int ret = sdf_msg_free_buff(garb_msg);
         */
    }
    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
            "\nNode %d: garbage collector exiting %d\n", myid, i);
}
Пример #11
0
int clipper_aio_write_flash(struct flashDev *pdev, char *pbuf, uint64_t offset, uint64_t size)
{
    struct aiocb           acb;
    int                    errstatus;
    ssize_t                ret;
    clipper_aio_state_t   *pcas;

    pcas = (clipper_aio_state_t *) pdev->paio_state;

    (void) memset((void *) &acb, 0, (size_t) sizeof(acb));
    acb.aio_fildes     = pcas->fildes;
    acb.aio_lio_opcode = LIO_WRITE; // ignored
    acb.aio_reqprio    = 0; // no priority change
    acb.aio_buf        = pbuf;
    acb.aio_nbytes     = size;
    acb.aio_sigevent.sigev_notify = SIGEV_NONE;
    acb.aio_offset     = offset;

    if (aio_write(&acb) != 0) {
	fprintf(stderr, "aio_write failed\n");
	return(FLASH_EAGAIN);
    }

    while (1) {
	/* yield then check if write is complete */

	// fthNanoSleep(10000); /* nanosec */
	fthYield(1);  // xxxzzz try different values here

	errstatus = aio_error(&acb);
	if (errstatus == 0) {
	    break;
	} else if (errstatus == EINPROGRESS) {
	    continue;
	} else {
	    fprintf(stderr, "aio_write failed with errstatus=%d ('%s')\n", errstatus, plat_strerror(errstatus));
	    return(FLASH_EAGAIN);
	}
    }
    if ((ret = aio_return(&acb)) != size) {
	fprintf(stderr, "aio_write failed with return status %"PRIu64", errno: '%s' (%d)\n", ret, plat_strerror(errno), errno);
	return(FLASH_EAGAIN);
    }

    // fprintf(stderr, "flashPut succeeded: fd=%d, offset=%d, size=%d\n", acb.aio_fildes, acb.aio_offset, acb.aio_nbytes);

    return(FLASH_EOK);
}
Пример #12
0
int clipper_aio_read_flash(struct flashDev *pdev, char *pbuf, uint64_t offset, uint64_t size)
{
    struct aiocb           acb;
    int                    errstatus;
    ssize_t                ret;
    clipper_aio_state_t   *pcas;

    pcas = (clipper_aio_state_t *) pdev->paio_state;

    (void) memset((void *) &acb, 0, (size_t) sizeof(acb));
    acb.aio_fildes     = pcas->fildes;
    acb.aio_lio_opcode = LIO_READ; // ignored
    acb.aio_reqprio    = 0; // no priority change
    acb.aio_buf        = pbuf; // xxxzzz what are alignment requirements here?
    acb.aio_nbytes     = size;
    acb.aio_sigevent.sigev_notify = SIGEV_NONE;
    acb.aio_offset     = offset;

    if (aio_read(&acb) != 0) {
	return(1);
    }

    while (1) {
	/* yield then check if read is complete */

	// fthNanoSleep(10000); /* nanosec */
	fthYield(1);  // xxxzzz try different values here

	errstatus = aio_error(&acb);
	if (errstatus == 0) {
	    break;
	} else if (errstatus == EINPROGRESS) {
	    continue;
	} else {
	    // fprintf(stderr, "aio_read failed with errstatus=%d ('%s')\n", errstatus, plat_strerror(errstatus));
	    return(errstatus);
	}
    }
    if ((ret = aio_return(&acb)) != size) {
	// fprintf(stderr, "aio_read failed with return status %d, errno: '%s' (%d)\n", (int)ret, plat_strerror(errno), errno);
	return(ret);
    }

    return(0);
}
void testRoutine1(uint64_t arg) {
	int size = 1024;
	char str[size];

	int seq = __sync_fetch_and_add(&g_seq, 1);
	printf("\n%d fth begins\n", seq);
	HomeDir_printStats(homedir, str, size);
	printf("%s\n", str);
	if (seq % 3 == 0) {
		testcreate(seq);
	} else if (seq % 3 == 1) {
		testget(seq);
	} else if (seq % 3 == 2) {
		testremove(seq);
	}
	if (__sync_add_and_fetch(&threads_done, 1) == nthreads * ncores) {
		printf("\nTotal Iterations Completed: %d\n", nthreads * ncores);
		fthKill(222);
	}
	printf("\n%d ends\n", seq);
	fthYield(1);
}
Пример #14
0
void threadRoutine2(uint64_t arg) {
    
    printf("Thread 2 start - %"PRIu64" number %i\n", arg, __sync_fetch_and_add(&threadCount, 1));

    mail_sp_t mailShmem;
    mailShmem = shmem_cast(mail_sp, ptofMboxWait(xmbox));
    mail_t *mail = mail_sp_rwref(&mail, mailShmem);

    printf("Thread 2 released got MB element - %i/%"PRIu64"\n", mail->mail, arg);
    (void) __sync_fetch_and_add(&checksum, 17 * mail->mail);

    mail_sp_rwrelease(&mail);
    mail_sp_free(mailShmem);

    if (arg == 9) {
        int numTries = 0;
        while (1) {
            mailShmem = shmem_cast(mail_sp, ptofMboxTry(xmbox));
            if (mail_sp_is_null(mailShmem)) {
                numTries++;
                fthYield(0);
            } else {        
                mail_t *mail = mail_sp_rwref(&mail, mailShmem);
                printf("Thread 2 try got MB element after %i tries - %i/%"PRIu64"\n", numTries, mail->mail, arg);
                (void) __sync_fetch_and_add(&checksum, 23 * mail->mail);

                mail_sp_rwrelease(&mail);
                mail_sp_free(mailShmem);
                break;
            }
        }
    }


    return;
}
void fthThreadSinglePtlPressSeqRecver(uint64_t arg) {
    int ret, ct = 0;
    uint64_t aresp = 0, ptl;
    struct sdf_msg *recv_msg = NULL, *send_msg = NULL;
    vnode_t node;
    printf("node %d, fth thread receiver %li starting %s\n", myid, arg, __func__);
    fflush(stdout);

    int localpn, actmask;
    uint32_t numprocs;

#if FLAG
    sdf_fth_mbx_t fthmbx;
    fthMbox_t ackmbox1;
    fthmbx.actlvl = SACK_ONLY_FTH;
    fthmbx.abox = &ackmbox1;
    fthmbx.rbox = NULL;
    fthMboxInit(&ackmbox1);
#endif

    ptl = arg;
    int localrank = sdf_msg_nodestatus(&numprocs, &localpn, cluster_node, &actmask);
    if (localrank == localpn) {
        node = localpn;
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
		     "\nNode %d: FASTPATH_TEST node %d myid %d\n", 
                     myid, node, myid);
    }
    else {
        node = local_get_pnode(localrank, localpn, numprocs);
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
	             "\nNode %d: my pnode is  %d\n", 
                     localrank, node);
	fflush(stdout);
    }
    while (mysync[ptl] != 1) {
         fthYield(1);
    }

    for (;;) {
        
        recv_msg = sdf_msg_receive(q_pair[ptl]->q_out, 0, B_TRUE);

        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                     "\nNode %d: Waiting for messages q_pair %p loop %d\n",
                     myid, q_pair[ptl], ct);

        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                     "\nNode %d: Got One *msg %p sn %d dn %d proto %d type %d"
                     " akrpmbx %p\n", myid, 
                     recv_msg, recv_msg->msg_src_vnode, recv_msg->msg_dest_vnode,
	             recv_msg->msg_dest_service, recv_msg->msg_type,
                     recv_msg->akrpmbx);

#if 0
    if(recv_msg) {
        uint32_t d = recv_msg->msg_dest_service;
        printf("node %d, receiver #%li recvs protocol#%d message from sender\n", myid, ptl, d);
        local_printmsg_payload(recv_msg, TSZE, myid);
    }
    else {
        printf("!!node %d, receiver #%li recvs protocol#%d meessage from sender failed\n", myid, ptl, recv_msg->msg_dest_service);
    }    
#endif
       
#if FLAG
        send_msg = (struct sdf_msg *) sdf_msg_alloc(recv_msg->msg_len);
        memcpy(send_msg->msg_payload, recv_msg->msg_payload, recv_msg->msg_len - sizeof(sdf_msg_t));
        
        struct sdf_resp_mbx rhkey;
        struct sdf_resp_mbx *ptrkey = &rhkey;

        strncpy(rhkey.mkey, MSG_DFLT_KEY, (MSG_KEYSZE - 1));
        rhkey.mkey[MSG_KEYSZE - 1] = '\0';
        rhkey.akrpmbx_from_req = NULL;
        rhkey.rbox = NULL;
        
        ret = sdf_msg_send((struct sdf_msg *)send_msg, TSZE, node, ptl, myid, ptl, RESP_ONE, &fthmbx,
                        sdf_msg_get_response(recv_msg, ptrkey));
        fthMboxWait(&ackmbox1);
#endif

        // release the receive buffer back to the sdf messaging thread
        ret = sdf_msg_free_buff(recv_msg);

        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                     "\nNode %d: Send Buff Freed aresp %ld loop %d\n", 
                     myid, aresp, ct);
        ct++;
        
        if(ct == msgCount) break;

        /* 
         * Simple exit mechanism, worker threads will just quit when predefined msgcnt 
         * has been reached in the sender thread
         */
    }
    printf("@@node %d, receiver #%li, receive message finished, receive %d times\n", myid, ptl, ct);
    
    FTH_SPIN_LOCK(&ssync->spin);
    fthCount ++;
    FTH_SPIN_UNLOCK(&ssync->spin);

    printf("node: %d, protocol: %li, mysync[ptl]: %d, fthCount: %d\n", myid, ptl, mysync[ptl], fthCount);
    fthYield(1);
    
    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                 "\nNode %d: WORKER FTH exiting - loop %d mysync %d\n", myid, ct, mysync[ptl]);
    
}
/**
 * @brief synchronized create_shard/write/read/delete/delete_shard operations
 */
void
user_operations_cursor_test(uint64_t args) {
    struct replication_test_framework *test_framework =
            (struct replication_test_framework *)args;
    SDF_boolean_t op_ret = SDF_FALSE;
    struct SDF_shard_meta *shard_meta = NULL;
    SDF_replication_props_t *replication_props = NULL;
    int failed = 0;
    uint64_t seqno = 0;
    SDF_shardid_t shard_id = 2;
    vnode_t node_id = 1;
    struct timeval now;
    struct timeval when;
    /* timeval incre */
    struct timeval incre;

    void *data_read;
    size_t data_read_len;

    uint64_t          seqno_start, seqno_len, seqno_max;
    int               i;
    int               ncursors;
    it_cursor_t      *pit;
    resume_cursor_t  *prc = NULL;
    char              skey[1024];
    SDF_time_t        exptime;
    SDF_time_t        createtime;
    int               key_len;
    size_t            data_len;
    void             *pdata;
    int               resume_cursor_size = 0;
    char             *pcur;

    shard_id = __sync_add_and_fetch(&test_framework->max_shard_id, 1);
    char *key;
    char *data;

    failed = !plat_calloc_struct(&meta);
    replication_test_meta_init(meta);

    /* Assure test_framework is started?! */
    plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG, "start test_framework");
    rtfw_start(test_framework);
    plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG, "test_framework started\n");

    /* Start all nodes */
    plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG, "start nodes");
    rtfw_start_all_nodes(test_framework);
    plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG, "nodes started");

    plat_assert(!failed);
    failed = !plat_calloc_struct(&replication_props);
    plat_assert(!failed);
    rtfw_set_default_replication_props(&test_framework->config, replication_props);
    shard_meta = rtfw_init_shard_meta(&test_framework->config,
                                      1 /* first_node */,
                                      shard_id
                                      /* shard_id, in real system generated by generate_shard_ids() */,
                                      replication_props);

    plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG,
                 "\n**************************************************\n"
                 "                  create shard sync                 "
                 "\n**************************************************");
    op_ret = rtfw_create_shard_sync(test_framework, 1, shard_meta);
    plat_assert(op_ret == SDF_SUCCESS);

    /* - write on node 1, key:google:1, data:Sebstian:1 */
    plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG,
                 "\n**************************************************\n"
                 "                 write object sync                  "
                 "\n**************************************************");
    plat_asprintf(&key, "google:%d", 1);
    plat_asprintf(&data, "Sebstian:%d", 1);

    plat_log_msg(LOG_ID, LOG_CAT, LOG_TRACE,
                 "write key:%s, key_len:%u, data:%s, data_len:%u",
                 key, (int)(strlen(key)), data, (int)(strlen(data)));
    op_ret = rtfw_write_sync(test_framework,
                             shard_id /* shard */, 1 /* node */,
                             meta /* test_meta */,
                             key, strlen(key)+1, data, strlen(data)+1);
    plat_assert(op_ret == SDF_SUCCESS);
    plat_free(key);
    plat_free(data);

    /* - read on node 1, key:google:1 */
    plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG,
                 "\n**************************************************\n"
                 "                  read object sync                  "
                 "\n**************************************************");
    replication_test_framework_read_data_free_cb_t free_cb =
        replication_test_framework_read_data_free_cb_create(PLAT_CLOSURE_SCHEDULER_ANY_OR_SYNCHRONOUS,
                                                            &rtfw_read_free,
                                                            test_framework);
    plat_asprintf(&key, "google:%d", 1);
    plat_log_msg(LOG_ID, LOG_CAT, LOG_TRACE,
                 "KEY:%s, key_len:%d", key, (int)strlen(key));

    op_ret = rtfw_read_sync(test_framework, shard_id /* shard */, node_id /* node */, key, strlen(key) + 1,
                            &data_read, &data_read_len, &free_cb);
    plat_free(key);
    plat_assert(op_ret == SDF_SUCCESS);
    plat_log_msg(LOG_ID, LOG_CAT, LOG_TRACE,
                 "read data:%s, data_len:%d", (char *)data_read, (int)data_read_len);
    plat_free(data_read);

    /* crash node 2 */
    plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG,
                 "\n**************************************************\n"
                 "                  crash node 2 sync                  "
                 "\n**************************************************");
    rtfw_crash_node_sync(test_framework, 2);
    plat_log_msg(LOG_ID, LOG_CAT, LOG_TRACE,
                 "crash node:%"PRIu32" complete", 2);

    /**
     * write on node 1, key2: google:2, data2: Sebstian:2,
     * key3: google:3, data3: Sebstian:3
     */
    plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG,
                 "\n**************************************************\n"
                 "                 write object sync                  "
                 "\n**************************************************");
    plat_asprintf(&key, "google:%d", 2);
    plat_asprintf(&data, "Sebstian:%d", 2);

    plat_log_msg(LOG_ID, LOG_CAT, LOG_TRACE,
                 "write key:%s, key_len:%u, data:%s, data_len:%u",
                 key, (int)(strlen(key)), data, (int)(strlen(data)));
    op_ret = rtfw_write_sync(test_framework,
                             shard_id /* shard */, 1 /* node */,
                             meta /* test_meta */,
                             key, strlen(key)+1, data, strlen(data)+1);
    plat_assert(op_ret == SDF_SUCCESS);
    plat_free(key);
    plat_free(data);

    plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG,
                 "\n**************************************************\n"
                 "                 write object sync                  "
                 "\n**************************************************");
    plat_asprintf(&key, "google:%d", 3);
    plat_asprintf(&data, "Sebstian:%d", 3);

    plat_log_msg(LOG_ID, LOG_CAT, LOG_TRACE,
                 "write key:%s, key_len:%u, data:%s, data_len:%u",
                 key, (int)(strlen(key)), data, (int)(strlen(data)));
    op_ret = rtfw_write_sync(test_framework,
                             shard_id /* shard */, 1 /* node */,
                             meta /* test_meta */,
                             key, strlen(key)+1, data, strlen(data)+1);
    plat_assert(op_ret == SDF_SUCCESS);
    plat_free(key);
    plat_free(data);


    /* read on node 1, key2: google:2 */
    plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG,
                 "\n**************************************************\n"
                 "                  read object sync                  "
                 "\n**************************************************");
    plat_asprintf(&key, "google:%d", 2);
    plat_log_msg(LOG_ID, LOG_CAT, LOG_TRACE,
                 "KEY:%s, key_len:%d", key, (int)strlen(key));

    op_ret = rtfw_read_sync(test_framework, shard_id /* shard */, 1 /* node */, key, strlen(key) + 1,
                            &data_read, &data_read_len, &free_cb);
    plat_free(key);
    plat_assert(op_ret == SDF_SUCCESS);
    plat_log_msg(LOG_ID, LOG_CAT, LOG_TRACE,
                 "read data:%s, data_len:%d", (char *)data_read, (int)data_read_len);
    plat_free(data_read);


    /* delete from node 1, key3: google3 */
    plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG,
                 "\n***************************************************\n"
                 "                  delete object sync                 "
                 "\n***************************************************");
    plat_asprintf(&key, "google:%d", 3);
    plat_log_msg(LOG_ID, LOG_CAT, LOG_TRACE,
                 "KEY:%s, key_len:%d", key, (int)(strlen(key)));

    op_ret = rtfw_delete_sync(test_framework, shard_id /* shard */, 1 /* node */, key, strlen(key)+1);
    plat_assert(op_ret == SDF_SUCCESS);
    plat_free(key);

    /* restart node 2 */
    op_ret = rtfw_start_node(test_framework, 2);
    plat_assert(op_ret == SDF_SUCCESS);

    /* block a while */
    now = test_framework->now;
    incre.tv_sec = 10;
    incre.tv_usec = 0;
    timeradd(&now, &incre, &when);
    rtfw_block_until(test_framework, (const struct timeval)when);
    rtfw_sleep_usec(test_framework, SLEEP_US);

    /* get last seqno from node 2 */
    plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG,
                 "\n************************************************************\n"
                 "                 get latest seqno from node 2                "
                 "\n************************************************************");

    op_ret = rtfw_get_last_seqno_sync(test_framework, 2, shard_id, &seqno);

    if (op_ret == SDF_SUCCESS) {
        plat_log_msg(LOG_ID, LOG_CAT, LOG_INFO, "get_last_seqno succeeded! (seqno=%"PRIu64")", seqno);
    } else {
        plat_log_msg(LOG_ID, LOG_CAT, LOG_INFO, "get_last_seqno failed!");
    }

    plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG,
                 "\n************************************************************\n"
                 "                        get iteration cursors                  "
                 "\n************************************************************");

    prc = NULL;
    resume_cursor_size = 0;
    while (1) {
        replication_test_framework_read_data_free_cb_t free_cb =
            replication_test_framework_read_data_free_cb_create(PLAT_CLOSURE_SCHEDULER_ANY_OR_SYNCHRONOUS,
                                                                &rtfw_read_free,
                                                                test_framework);

        seqno_start        = 0;
        seqno_len          = 10;
        seqno_max          = UINT64_MAX - 1;
        op_ret = rtfw_get_cursors_sync(test_framework, shard_id, node_id,
                                       seqno_start, seqno_len, seqno_max,
                                       (void *) prc, resume_cursor_size,
                                       (void **) &pit, &data_len, &free_cb);

        if (op_ret != SDF_SUCCESS) {
            plat_log_msg(LOG_ID, LOG_CAT, LOG_INFO, "get_iteration_cursors failed!");
            break;
        } else {
            ncursors = pit->cursor_count;
            if (ncursors == 0) {
                break;
            }
            prc = &(pit->resume_cursor);
            resume_cursor_size = sizeof(resume_cursor_t);
            plat_assert(data_len == (sizeof(it_cursor_t) + seqno_len*pit->cursor_len));
            plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG, "get_iteration_cursors succeeded (%d cursors returned)!", ncursors);

            pcur = pit->cursors;
            for (i = 0; i < ncursors; i++) {
                replication_test_framework_read_data_free_cb_t free_cb =
                replication_test_framework_read_data_free_cb_create(PLAT_CLOSURE_SCHEDULER_ANY_OR_SYNCHRONOUS,
                                                                    &rtfw_read_free,
                                                                    test_framework);
                op_ret = rtfw_get_by_cursor_sync(test_framework, shard_id, node_id,
                                                 (void *) pcur, pit->cursor_len,
                                                 skey, 1024, &key_len,
                                                 &exptime, &createtime, &seqno,
                                                 &pdata, &data_len, &free_cb);
                pcur += pit->cursor_len;

                if (op_ret == SDF_SUCCESS) {
                    plat_log_msg(LOG_ID, LOG_CAT, LOG_TRACE,
                                 "get_by_cursor: %s, key_len:%u, data:%s, data_len:%u,"
                                 "seqno: %"PRIu64", exptime:%"PRIu32", createtime:%"PRIu32"",
                                 skey, key_len, (char *)pdata, (unsigned)data_len,
                                 seqno, exptime, createtime);
                    plat_free(pdata);
                } else {
                    plat_log_msg(LOG_ID, LOG_CAT, LOG_INFO, "get_by_cursor failed!");
                }
            }
        }
    }
    plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG,
                 "\n************************************************************\n"
                 "                  Test framework shutdown                       "
                 "\n************************************************************");
    rtfw_shutdown_sync(test_framework);

    plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG,
                 "\n************************************************************\n"
                 "                  Test framework sync summary                 "
                 "\n************************************************************");
    plat_free(meta);
    plat_free(replication_props);
    plat_free(shard_meta);

    /* Terminate scheduler if idle_thread exit */
    while (test_framework->timer_dispatcher) {
        fthYield(-1);
    }
    plat_free(test_framework);

    fthKill(1);
}
void
fthThreadMultiPtlSglNodeSender(uint64_t arg) {
    int l = 0;
    vnode_t node;
    struct sdf_msg *send_msg = NULL;
    
    msg_type_t type = REQ_FLUSH;

    sdf_fth_mbx_t fthmbx;
    fthMbox_t ackmbox;
    fthmbx.actlvl = SACK_ONLY_FTH;
    fthMboxInit(&ackmbox);
    fthmbx.abox = &ackmbox;
    
    printf("node %d, fth thread sender starting  %s: number of msgs to send = %d\n", myid, __func__, msgCount);
    fflush(stdout);

    int localpn, actmask;
    uint32_t numprocs;
    uint64_t ptl = arg;
    int localrank = sdf_msg_nodestatus(&numprocs, &localpn, cluster_node, &actmask);
    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                 "\nNode %d: numprocs %d active_procs mask 0x%x active_mask 0x%x\n", 
                 localrank, numprocs, localpn, actmask);
    if (numprocs == 1) {
        node = 0;
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
		     "\nNode %d: FASTPATH_TEST node %d myid %d\n", 
                     myid, node, myid);
    }
    else {
        int i;
        node = local_get_pnode(localrank, localpn, numprocs);
	printf("Node %d: %s my pnode is  %d\n", localrank, __func__, node);
	fflush(stdout);
	for (i = 0; i < numprocs; i++) {
            plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                         "\nNode %d: %s cluster_node[%d] = %d\n", 
                         localrank, __func__, i, cluster_node[i]);
            fflush(stdout);
        }
    }
   
    printf("In sender\n");
    if(q_pair[ptl] == NULL) {
        fprintf(stderr, "%s: sdf_create_queue_pair %li failed\n", __func__, ptl);
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                     "\nNode %d: EXITING completed sending %d messages\n", 
                     myid, l);
        return;
    }
    
     // right now we are not using shmem for these buffers

    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                 "\nNode %d: created queue pair %p sn %d dn %d ss %li ds %li maxcnt %d\n",
                 myid, q_pair[ptl], myid, myid == 0 ? 1 : 0,
                 ptl, ptl, msgCount);
    
    FTH_SPIN_LOCK(&ssync->spin);
    mysync[ptl]++;
    FTH_SPIN_UNLOCK(&ssync->spin);
    
    for (l = 0; l < msgCount; ++ l) {
        printf("Send\n");
        int ret;

        send_msg = (struct sdf_msg *) sdf_msg_alloc(TSZE); 
        if (send_msg == NULL) {
             plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_FATAL,
                          "sdf_msg_alloc(TSZE) failed\n");
        }
        
        local_setmsg_payload(send_msg, TSZE, myid, l);
        type = REQ_FLUSH;
        
        ret = sdf_msg_send((struct sdf_msg *)send_msg, TSZE, node, ptl, myid, ptl, type, &fthmbx, NULL);
        fthMboxWait(&ackmbox);   
#if 0 
        if(myid == 0)
            printf("node %d, sender #%d sends %d times, sdf_msg_send return %d, message contents %c-%d\n", myid, ptl, l + 1, ret, l + 65, ptl);
        else
            printf("node %d, sender #%d sends %d times, sdf_msg_send return %d, message contents %c-%d\n", myid, ptl, l + 1, ret, l + 97, ptl);
#endif  
	if (ret != 0 )
            process_ret(ret, ptl, type, myid);

        fthYield(1);
    }

    FTH_SPIN_LOCK(&ssync->spin);
    fthCount ++;
    FTH_SPIN_UNLOCK(&ssync->spin);

    printf("@@node %d, sender #%li sends %li protocol message finished, send %d times\n", myid, ptl, ptl, l);
    while (fthCount != FTHS) {
        fthYield(100);
    }

    printf("node %d, sender %li kill the scheduler.\n", myid, ptl); 

    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                 "\nNode %d: EXITING completed sending %d messages\n", 
                 myid, l);
    fthKill(1);
}
void fthThreadMultiPtlSglNodeRecver(uint64_t arg) {
    int ret, ct = 0;
    uint64_t aresp = 0, ptl;
    struct sdf_msg *recv_msg = NULL;
    vnode_t node;
    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                 "\nnode %d, fth thread receiver %li starting %s\n", 
                 myid, arg, __func__);
    fflush(stdout);

    int localpn, actmask;
    uint32_t numprocs;
    ptl = arg;
    int localrank = sdf_msg_nodestatus(&numprocs, &localpn, cluster_node, &actmask);
    if (localrank == localpn) {
        node = localpn;
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
		     "\nNode %d: FASTPATH_TEST node %d myid %d\n", myid, node, myid);
    }
    else {
        node = local_get_pnode(localrank, localpn, numprocs);
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
	             "\nNode %d: my pnode is  %d\n", 
                     localrank, node);
	fflush(stdout);
    }

 	//q_pair[ptl] = local_create_myqpairs(ptl, myid, node);	
    
    sdf_msg_startmsg(myid, 0, NULL);
    while(!mysync[ptl]) {
        fthYield(1);
        printf("Yield\n");
    }
        

    for (;;) {
        
        printf("Receive\n");
        recv_msg = sdf_msg_receive(q_pair[ptl]->q_out, 0, B_TRUE);

        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                     "\nNode %d: Waiting for messages q_pair %p loop %d\n",
                     myid, q_pair[ptl], ct);

        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                     "\nNode %d: Got One *msg %p sn %d dn %d proto %d type %d"
                     " akrpmbx %p\n", myid, 
                     recv_msg, recv_msg->msg_src_vnode, recv_msg->msg_dest_vnode,
	             recv_msg->msg_dest_service, recv_msg->msg_type,
                     recv_msg->akrpmbx);

#if 0
    if(recv_msg) {
        uint32_t d = recv_msg->msg_dest_service;
        printf("node %d, receiver #%d recvs protocol#%d message from sender\n", myid, ptl, d);
        local_printmsg_payload(recv_msg, TSZE, myid);
    }
    else {
        printf("!!node %d, receiver #%d recvs protocol#%d meessage from sender failed\n", myid, ptl, recv_msg->msg_dest_service);
    }    
#endif

        ret = sdf_msg_free_buff(recv_msg);

        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                     "\nNode %d: Send Buff Freed aresp %ld loop %d\n", 
                     myid, aresp, ct);
        ct++;
        
        if(ct == msgCount) break;

    }
    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                 "\n@@node %d, receiver #%li, receive message finished, receive %d times\n", 
                 myid, ptl, ct);
    
    FTH_SPIN_LOCK(&ssync->spin);
    fthCount ++;
    FTH_SPIN_UNLOCK(&ssync->spin);

    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                 "\nnode: %d, protocol: %li, fthCount:%d\n", 
                 myid, ptl, fthCount);
    
    while (fthCount != FTHS) {
         fthYield(100);
    }

    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                 "\nNode %d: WORKER FTH exiting - loop %d\n", myid, ct);
    fthKill(1);
}
void
fthThreadMultiNodeTntotnSender(uint64_t arg) {
    int l = 0;
    vnode_t node;
    struct sdf_msg *send_msg = NULL;
    
    msg_type_t type = REQ_FLUSH;

    sdf_fth_mbx_t fthmbx;
    fthMbox_t ackmbox, respmbox;
    fthMboxInit(&ackmbox);
    fthmbx.abox = &ackmbox;

#if FLAG
    fthMboxInit(&respmbox);
    fthmbx.rbox = &respmbox;
    fthmbx.actlvl = SACK_BOTH_FTH;
#else
    fthmbx.rbox = NULL;
    fthmbx.actlvl = SACK_ONLY_FTH;
#endif


    printf("node %d, fth thread sender starting  %s: number of msgs to send = %d\n", myid, __func__, msgCount);
    fflush(stdout);

    int localpn, actmask;
    uint32_t numprocs;
    int localrank = sdf_msg_nodestatus(&numprocs, &localpn, cluster_node,  &actmask);
    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                 "\nNode %d: numprocs %d active_procs mask 0x%x active_mask 0x%x\n", localrank, numprocs, localpn, actmask);
    node = arg;//0: 2| 1: 3|2: 0|3: 1

    q_pair[node] = sdf_create_queue_pair(myid, node, ptl, ptl, SDF_WAIT_FTH);
    info->queue_pair_info->queue_add[0] = q_pair[node]->q_in;
    info->queue_pair_info->queue_add[1] = q_pair[node]->q_out;
    info->queue_pair_info->queue_pair_type = ptl;
    if(q_pair[node] == NULL) {
        fprintf(stderr, "%s: sdf_create_queue_pair %li failed\n", __func__, ptl);
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                     "\nNode %d: EXITING completed sending %d messages - mysync %d\n", myid, l, mysync);
        return;
    }

    FTH_SPIN_LOCK(&ssync->spin);
    mysync ++;
    FTH_SPIN_UNLOCK(&ssync->spin); 

    //sdf_msg_startmsg(myid, 0, NULL);
    printf("******************prepare for send*******************\n");
    for (l = 0; l < msgCount; ++l) {
        int ret;

        send_msg = (struct sdf_msg *) sdf_msg_alloc(TSZE); 
        if (send_msg == NULL) {
             plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_FATAL, "sdf_msg_alloc(TSZE) failed\n");
        }
        local_setmsg_mc_payload(send_msg, TSZE, myid, l, msgCount, ptl); 

        type = REQ_FLUSH;

        ret = sdf_msg_send((struct sdf_msg *)send_msg, TSZE, node, ptl, myid, ptl, type, &fthmbx, NULL);
        
        /* get the ack when sending success. */
        fthMboxWait(&ackmbox);   
#if FLAG
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,"node %d, wait and get the response message from pnode.\n", myid);
        /* get the response when receive message success. */
        sdf_msg_t * msg = (sdf_msg_t *)fthMboxWait(&respmbox);
        ret = sdf_msg_free_buff(msg);
#endif
	if (ret != 0 )
            process_ret(ret, ptl, type, myid);

        fthYield(1);
    }

    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,"@@node %d, sender type#%li sends protocol#%li message finished, send %d times\n", myid, ptl, ptl, l);
    FTH_SPIN_LOCK(&ssync->spin);
    fthCount ++;
    FTH_SPIN_UNLOCK(&ssync->spin);

    while (fthCount != 2) fthYield(10);

    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,"node %d, sender type%li kill the scheduler.\n", myid, ptl);  
    fthKill(1);
}
void fthThreadMultiRecver(uint64_t arg) {
    int i = 0, ret, ct = 0;
    uint64_t aresp = 0;
    struct sdf_msg *recv_msg = NULL;
    vnode_t node;

    printf("node %d, fth thread receiver %li starting %s\n", myid, arg, __func__);
    fflush(stdout);

    int localpn, actmask;
    uint32_t numprocs;
    int localrank = sdf_msg_nodestatus(&numprocs, &localpn, cluster_node, &actmask);
    if (localrank == localpn) {
        node = localpn;
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
		     "\nNode %d: FASTPATH_TEST node %d myid %d\n", myid, node, myid);
    }
    else {
        node = local_get_pnode(localrank, localpn, numprocs);
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
	             "\nNode %d: my pnode is  %d\n", 
                     localrank, node);
	fflush(stdout);
    }

    while (!mysync) {
         fthYield(1);
    }

    for (;;) {
        
        if(arg == SDF_SYSTEM || arg == SDF_DEBUG || arg == GOODBYE)
            break;    
        else
            recv_msg = sdf_msg_receive(q_pair[arg]->q_out, 0, B_TRUE);

        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                     "\nNode %d: Waiting for messages q_pair %p loop %d\n",
                     myid, q_pair[arg], ct);

        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                     "\nNode %d: Got One *msg %p sn %d dn %d proto %d type %d"
                     " akrpmbx %p\n", myid, 
                     recv_msg, recv_msg->msg_src_vnode, recv_msg->msg_dest_vnode,
	             recv_msg->msg_dest_service, recv_msg->msg_type,
                     recv_msg->akrpmbx);

#if 0
        printf("node %d, receiver %d recv message from sender\n", myid, arg);
        local_printmsg_payload(recv_msg, TSZE, myid);
#endif

        ret = sdf_msg_free_buff(recv_msg);

        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                     "\nNode %d: Send Buff Freed aresp %ld loop %d\n", 
                     myid, aresp, ct);
        ct++;        
        if(ct == 1) break;
    }
    printf("@@node %d, receiver %li, receive message finished, receive %d times\n", myid, arg, ct); 
    FTH_SPIN_LOCK(&ssync->spin);
    mysync++;
    FTH_SPIN_UNLOCK(&ssync->spin);
    fthYield(1);
    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                 "\nNode %d: WORKER FTH exiting - loop %d mysync %d\n", myid, i, mysync);    
}
Пример #21
0
static void fthThreadSender(uint64_t arg) {
    int i, l;
    uint64_t aresp;
    struct sdf_msg *send_msg = NULL;
    vnode_t node;
    serviceid_t protocol = SDF_CONSISTENCY;
    serviceid_t my_protocol = SDF_CONSISTENCY;
    msg_type_t type = REQ_FLUSH;
    sdf_fth_mbx_t fthmbx;
    fthMbox_t *fthmbxtst;
    int debug = 0;
    int maxcnt;

#if SENDTWO
    maxcnt = 5;
#elif SHORTTEST
    maxcnt = SHORTTEST;
#else
    maxcnt = 990000;
#endif

    fthmbxtst = &respmbox;

    fthMboxInit(&ackmbox);
    fthMboxInit(&respmbox);

    fthmbx.actlvl = SACK_BOTH_FTH;
    fthmbx.abox = &ackmbox;
    fthmbx.rbox = &respmbox;

    printf("FTH Thread starting %s Number of msgs to send = %d arg in %li\n",
            __func__, maxcnt, arg);
    fflush(stdout);

    if (DBGP) {
        plat_log_msg(
                PLAT_LOG_ID_INITIAL,
                LOG_CAT,
                PLAT_LOG_LEVEL_TRACE,
                "\nNode %d: fth mb info fthmbxtst %p rbox %p abox %p lvl %d maxcnt %d\n",
                myid, fthmbxtst, fthmbx.rbox, fthmbx.abox, SACK_BOTH_FTH,
                maxcnt);
    }

    /* node is the destination node */
    int localpn, actmask;
    uint32_t numprocs;
    int localrank = sdf_msg_nodestatus(&numprocs, &localpn, cluster_node, &actmask);
    if (numprocs == 1) {
        node = 0;
    }
    else {
        node = local_get_pnode(localrank, localpn, numprocs);
        printf("Node %d: %s my pnode is  %d\n", localrank, __func__, node);
        fflush(stdout);
        for (i = 0; i < numprocs; i++) {
            printf("Node %d: %s cluster_node[%d] = %d\n", localrank, __func__, i, cluster_node[i]);
            fflush(stdout);
        }
    }
    /* you only init this once but share the q_pairs among the other threads here */

    q_pair_CONSISTENCY = local_create_myqpairs(SDF_CONSISTENCY, myid, node);
    
    //Add Later////////////////////
    info->queue_pair_info->queue_add[0] = q_pair_CONSISTENCY->q_in;
    info->queue_pair_info->queue_add[1] = q_pair_CONSISTENCY->q_out;
    info->queue_pair_info->queue_pair_type = SDF_CONSISTENCY;
    if (q_pair_CONSISTENCY == NULL) {
        fprintf(stderr, "%s: sdf_create_queue_pair failed\n", __func__);
        return;
    }

    q_pair_revert_CONSISTENCY = local_create_myqpairs(SDF_CONSISTENCY, node,
            myid);
    
    if (q_pair_revert_CONSISTENCY == NULL) {
        fprintf(stderr, "%s: sdf_create_queue_pair failed\n", __func__);
        return;
    }
    //self queue locating
    //     q_pair_local_CONSISTENCY = local_create_myqpairs(SDF_CONSISTENCY, myid, myid);
    //     if (q_pair_local_CONSISTENCY == NULL) {
    //         fprintf(stderr, "%s: sdf_create_queue_pair failed\n", __func__);
    //         return;
    //     }
    //
    //     q_pair_local_RESPONSES = local_create_myqpairs(SDF_RESPONSES, myid, myid);
    //     if (q_pair_local_RESPONSES == NULL) {
    //         fprintf(stderr, "%s: sdf_create_queue_pair failed\n", __func__);
    //         return;
    //     }

    plat_log_msg(
            PLAT_LOG_ID_INITIAL,
            LOG_CAT,
            PLAT_LOG_LEVEL_TRACE,
            "\nNode %d: created queue pair %p sn %d dn %d ss %d ds %d maxcnt %d\n",
            myid, q_pair_CONSISTENCY, myid, myid == 0 ? 1 : 0, SDF_CONSISTENCY,
            SDF_CONSISTENCY, maxcnt);

    /* main loop will send SDF_CONSISTENCY protocol messages till maxcnt is reached
     * this sleeps on both mailboxes ack and resp based on the lvl dictated
     */

    FTH_SPIN_LOCK(&ssync->spin);
    mysync = 1;
    FTH_SPIN_UNLOCK(&ssync->spin);
    /* let the msg thread do it's thing */

    sdf_msg_startmsg(myid, 0, NULL);

    if (myid == 1) {
        debug = 0;
        if (debug) {
            plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                    "\nNode %d: DEBUG --- NOT SENDING MESSAGES FROM HERE", myid);
            while (debug)
                fthYield(100); /* Don't send mesages from node one for now */
        }
    }

    for (l = 0; l < maxcnt; ++l) {
        printf("Send:%u\n", l);
        sdf_msg_t *msg;
        unsigned char *m;
        int ret;

        /* create the buffer on every loop to check the buffer release func */
        if (UNEXPT_TEST) {
            send_msg = (struct sdf_msg *) sdf_msg_alloc(8192);
            printf("Node %d: %s BIG Message Alloc %li\n", myid, __func__,
                    sizeof((struct sdf_msg *) send_msg));
        } else {
            send_msg = (struct sdf_msg *) sdf_msg_alloc(TSZE);
        }
        if (send_msg == NULL) {
            /* FIXME should default to an error  */
            plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_FATAL,
                    "sdf_msg_alloc(TSZE) failed\n");
            /* return ((void *)1); */
        }

        for (i = 0; i < TSZE; ++i)
            send_msg->msg_payload[i] = (unsigned char) l;

        type = REQ_FLUSH;

        if (UNEXPT_TEST) {
            ret = sdf_msg_send((struct sdf_msg *) send_msg, 8192, node,
                    protocol, myid, my_protocol, type, &fthmbx, NULL);
        } else {
            ret = sdf_msg_send((struct sdf_msg *) send_msg, TSZE, node,
                    protocol, myid, my_protocol, type, &fthmbx, NULL);
        }

        if (ret != 0)
            process_ret(ret, protocol, type, myid);

        plat_log_msg(
                PLAT_LOG_ID_INITIAL,
                LOG_CAT,
                PLAT_LOG_LEVEL_TRACE,
                "\nNode %d: Msg %d Posted ret %d proto %d type %d Now Sleep on Ack Mbox\n",
                myid, l, ret, protocol, type);

        debug = 0;
        if (debug)
            printf("Node %d: %s STOPPING FOR DEBUG %d\n", myid, __func__, debug);
        while (debug)
            ;

        aresp = fthMboxWait(&ackmbox);
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                "\nNode %d: Send Buff loop num %d Ack Freed aresp %ld\n", myid,
                l, aresp);

        if (!fthmbx.actlvl) {
            plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                    "\nNode %d: actvl %d\n", myid, fthmbx.actlvl);
            plat_assert(fthmbx.actlvl >= 1);
        } else {
            plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                    "\nNode %d: Sleeping on RESP %d  fth mailbox %p loop %d\n",
                    myid, l, &respmbox, l);
            /*
             * Sleep on the mailbox waiting to get a properly directed response message
             */
            msg = (sdf_msg_t *) fthMboxWait(&respmbox);
            m = (unsigned char *) msg;

            plat_log_msg(
                    PLAT_LOG_ID_INITIAL,
                    LOG_CAT,
                    PLAT_LOG_LEVEL_TRACE,
                    "\nNode %d: RESP %d msg %p seq %lu sn %d dn %d proto %d type %d loop %d\n",
                    myid, l, msg, msg->msg_conversation, msg->msg_src_vnode,
                    msg->msg_dest_vnode, msg->msg_dest_service, msg->msg_type,
                    l);
            /*
             * Print out the buffer contents that we just got
             */
#if SHOWBUFF
            for (i = 0; i < 256; i++) {
                printf(" %02x", *m);
                m++;
                if ((i % 16) == 15) {
                    putchar('\n');
                    fflush(stdout);
                }
            }
#endif
            /* release the receive buffer back to the sdf messaging thread */
            ret = sdf_msg_free_buff(msg);

            plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                    "\nNode %d: Returned Buff %d ret %d\n", myid, l, ret);

            fthYield(1); /* we yield to give others a chance to do some work */

        }
    }
    printf("fthYield 100 before\n");

    //send over, it is time to tell receiver you are ready to finalize
    msg_type_t say_goodbye = GOODBYE;
    printf("Before Bye\n");
    int ret = sdf_msg_say_bye(node, protocol, myid, my_protocol, say_goodbye,
            &fthmbx, TSZE);
    printf("Afterbye Bye\n");
    if (ret != 0) {
        process_ret(ret, protocol, type, myid);
    }


    while (mysync < 3)
        fthYield(100);
    printf("fthYield 100 after\n");
    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
            "\nNode %d: EXITING completed sending %d messages - mysync %d\n",
            myid, l, mysync);
    fthKill(5); // Kill off FTH
}
/**
 * @brief synchronized create_shard/write/read/delete/delete_shard operations
 */
void
rt_mix_write_delete_entry(uint64_t args) {
    struct replication_test_framework *test_framework =
            (struct replication_test_framework *)args;

    SDF_boolean_t op_ret;
    struct SDF_shard_meta *shard_meta = NULL;
    SDF_replication_props_t *replication_props = NULL;
    int failed;
    SDF_shardid_t shard_id;
    replication_test_framework_read_data_free_cb_t free_cb;


    shard_id = __sync_add_and_fetch(&test_framework->max_shard_id, 1);
    char key1[] = "key1";
    char key2[] = "key2";
    char *key;
    size_t key_len;
    char *data;
    void *data_out;
    size_t data_len_out;
    int data_generation;

    failed = !plat_calloc_struct(&meta);
    plat_assert(!failed);
    replication_test_meta_init(meta);

    /* Assure test_framework is started?! */
    plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG, "start test_framework");
    rtfw_start(test_framework);
    plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG, "test_framework started\n");

    /* Start all nodes */
    plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG, "start nodes");
    rtfw_start_all_nodes(test_framework);
    plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG, "nodes started");

    failed = !plat_calloc_struct(&replication_props);
    plat_assert(!failed);
    rtfw_set_default_replication_props(&test_framework->config,
                                       replication_props);
    shard_meta = rtfw_init_shard_meta(&test_framework->config, 1 /* first */,
                                      shard_id
                                      /* shard_id, in real system generated by generate_shard_ids() */,
                                      replication_props);

    plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG, "create on node 1");
    op_ret = rtfw_create_shard_sync(test_framework, 1, shard_meta);
    plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG, "create on node 1 complete");
    plat_assert(op_ret == SDF_SUCCESS);


    data_generation = 0;

    plat_asprintf(&data, "data_%s_%d", key1, data_generation);
    key = key1;
    key_len = strlen(key) + 1;
    plat_log_msg(LOG_ID, LOG_CAT, LOG_TRACE,
                 "write on node 1 key:%s, key_len:%u, data:%s, data_len:%u",
                 key, (int)(strlen(key)), data, (int)(strlen(data)));
    op_ret = rtfw_write_sync(test_framework, shard_id /* shard */, 1 /* node */,
                             meta /* test_meta */, key, key_len, data,
                             strlen(data)+1);
    plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG, "write on node 1 complete");
    plat_assert(op_ret == SDF_SUCCESS);

    plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG, "read on node 1");
    op_ret = rtfw_read_sync(test_framework, shard_id, 1 /* node */, key,
                            key_len, &data_out, &data_len_out, &free_cb);
    plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG, "read on node 1 complete");
    plat_assert(op_ret == SDF_SUCCESS);
    plat_assert(strcmp(data_out, data) == 0);
    plat_closure_apply(replication_test_framework_read_data_free_cb, &free_cb,
                       data_out, data_len_out);
    ++ data_generation;
    plat_asprintf(&data, "data_%s_%d", key2, data_generation);
    key = key2;
    key_len = strlen(key) + 1;
    plat_log_msg(LOG_ID, LOG_CAT, LOG_TRACE,
                 "write on node 1 key:%s, key_len:%u, data:%s, data_len:%u",
                 key, (int)(strlen(key)), data, (int)(strlen(data)));
    op_ret = rtfw_write_sync(test_framework, shard_id /* shard */, 1 /* node */,
                             meta /* test_meta */, key, key_len, data,
                             strlen(data)+1);
    plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG, "write on node 1 complete");
    plat_assert(op_ret == SDF_SUCCESS);

    key = key2;
    key_len = strlen(key) + 1;
    op_ret = rtfw_delete_sync(test_framework, shard_id, 1, key, key_len);
    plat_assert(op_ret == SDF_SUCCESS);

    plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG, "crash node 1");
    op_ret = rtfw_crash_node_sync(test_framework, 1);
    plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG, "crash node 1 complete");
    plat_assert(op_ret == SDF_SUCCESS);

    /* Sleep through the lease until switchover happens */
    rtfw_sleep_usec(test_framework,
                    test_framework->config.replicator_config.lease_usecs * 2);

    plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG, "shutdown");
    rtfw_shutdown_sync(test_framework);
    plat_log_msg(LOG_ID, LOG_CAT, LOG_DBG, "shutdown complete");

    plat_free(data);
    plat_free(meta);
    plat_free(replication_props);
    plat_free(shard_meta);

    /* Terminate scheduler if idle_thread exit */
    while (test_framework->timer_dispatcher) {
        fthYield(-1);
    }
    plat_free(test_framework);

    fthKill(1);
}
Пример #23
0
static void fthThreadSender(uint64_t arg) {
    int i, l, thecnt= DIVNUM;
    uint64_t aresp;
    struct sdf_msg *send_msg= NULL;
    vnode_t node;
    serviceid_t protocol = SDF_CONSISTENCY;
    serviceid_t my_protocol = SDF_CONSISTENCY;
    msg_type_t type = REQ_FLUSH;
    sdf_fth_mbx_t fthmbx;
    fthMbox_t *fthmbxtst;
    int debug = 0;
    int maxcnt;
    int seq = 0;
#if SENDTWO
    maxcnt = 5;
#elif SHORTTEST
    maxcnt = SHORTTEST;
#else
    maxcnt = 990000;
#endif

    fthmbxtst = &respmbox;

    fthMboxInit(&ackmbox);
    fthMboxInit(&respmbox);

    fthmbx.actlvl = SACK_BOTH_FTH;
    fthmbx.abox = &ackmbox;
    fthmbx.rbox = &respmbox;

    printf("FTH Thread starting %s Number of msgs to send = %d arg in %d\n",
            __func__, maxcnt, arg);
    fflush(stdout);

    if (DBGP) {
        plat_log_msg(
                PLAT_LOG_ID_INITIAL,
                LOG_CAT,
                PLAT_LOG_LEVEL_TRACE,
                "\nNode %d: fth mb info fthmbxtst %p rbox %p abox %p lvl %d maxcnt %d\n",
                myid, fthmbxtst, fthmbx.rbox, fthmbx.abox, SACK_BOTH_FTH,
                maxcnt);
    }

    /* node is the destination node */

    if (FASTPATH_TEST) {
        node = myid;
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                "\nNode %d: FASTPATH_TEST node %d myid %d\n", myid, node, myid);
    } else {
        node = myid == 0 ? 1 : 0;
    }

    /* you only init this once but share the q_pairs among the other threads here */

    q_pair_CONSISTENCY = local_create_myqpairs(SDF_CONSISTENCY, myid, node);

    if (q_pair_CONSISTENCY == NULL) {
        fprintf(stderr, "%s: sdf_create_queue_pair failed\n", __func__);
        return;
    }
    /* right now we are not using shmem for these buffers */

    plat_log_msg(
            PLAT_LOG_ID_INITIAL,
            LOG_CAT,
            PLAT_LOG_LEVEL_TRACE,
            "\nNode %d: created queue pair %p sn %d dn %d ss %d ds %d maxcnt %d\n",
            myid, q_pair_CONSISTENCY, myid, myid == 0 ? 1 : 0, SDF_CONSISTENCY,
            SDF_CONSISTENCY, maxcnt);

    /* main loop will send SDF_CONSISTENCY protocol messages till maxcnt is reached 
     * this sleeps on both mailboxes ack and resp based on the lvl dictated
     */
    if (myid == 1) {
        debug = 0;
        if (debug) {
            plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                    "\nNode %d: DEBUG --- NOT SENDING MESSAGES FROM HERE", myid);
            while (debug)
                fthYield(100); /* Don't send mesages from node one for now */
        }
    }
    FTH_SPIN_LOCK(&ssync->spin);
    printf("getting spinlock mysync %d\n", mysync);
    fflush(stdout);
    mysync = 1;
    printf("unlock spinlock mysync %d\n", mysync);
    fflush(stdout);
    FTH_SPIN_UNLOCK(&ssync->spin);

    for (l = 0; l < maxcnt; ++l) {
        sdf_msg_t *msg;
        unsigned char *m;
        int ret;

        /* create the buffer on every loop to check the buffer release func */
        if (UNEXPT_TEST) {
            send_msg = (struct sdf_msg *) sdf_msg_alloc(8192);
            printf("Node %d: %s BIG Message Alloc %d\n", myid, __func__,
                    sizeof((struct sdf_msg *)send_msg));
        } else {
            send_msg = (struct sdf_msg *) sdf_msg_alloc(TSZE);
        }
        if (send_msg == NULL) {
            /* FIXME should default to an error  */
            plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_FATAL,
                    "sdf_msg_alloc(TSZE) failed\n");
            /* return ((void *)1); */
        }

        for (i = 0; i < TSZE; ++i)
            send_msg->msg_payload[i] = (unsigned char) l%256;

        /* 
         * Send 2 CONSISTENCY messages with different types to track if we define SENDTWO
         */

        type = REQ_FLUSH;

        printf("norman: before send\n");
        if (UNEXPT_TEST) {
            ret = sdf_msg_send((struct sdf_msg *)send_msg, 8192, node,
                    protocol, myid, my_protocol, type, &fthmbx, NULL);
        } else {
            ret = sdf_msg_send((struct sdf_msg *)send_msg, TSZE, node,
                    protocol, myid, my_protocol, type, &fthmbx, NULL);
        }
        printf("norman: after send\n");

        if (ret != 0)
            process_ret(ret, protocol, type);

        plat_log_msg(
                PLAT_LOG_ID_INITIAL,
                LOG_CAT,
                PLAT_LOG_LEVEL_TRACE,
                "\nNode %d: Msg %d Posted ret %d proto %d type %d Now Sleep on Ack Mbox\n",
                myid, l, ret, protocol, type);

        debug = 0;
        if (debug)
            printf("Node %d: %s STOPPING FOR DEBUG %d\n", myid, __func__, debug);
        while (debug)
            ;

        aresp = fthMboxWait(&ackmbox);
        printf("norman: after wait for ack \n");
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                "\nNode %d: Send Buff loop num %d Ack Freed aresp %ld\n", myid,
                l, aresp);

#if SENDTWO

        /* create the buffer on every loop to check the buffer release func */
        send_msg = (struct sdf_msg *) sdf_msg_alloc(TSZE);
        if (send_msg == NULL) {
            /* FIXME should default to an error  */
            plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_FATAL,
                    "sdf_msg_alloc(TSZE) failed\n");
            /* return ((void *)1); */
        }

        type = LOCK_RESP;

        ret = sdf_msg_send((struct sdf_msg *)send_msg, TSZE, node, protocol,
                myid, my_protocol, type, &fthmbx, NULL);

        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                "\nNode %d: 2nd Msg Posted ret %d protocol %d type %d Sleep on Ack loop %d\n",
                myid, ret, protocol, type, l);

        if (ret != 0)
        process_ret(ret, protocol, type);

        aresp = fthMboxWait(&ackmbox);
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                "\nNode %d: 2nd Send Buff Freed aresp 0x%lx\n", myid, aresp);
#endif

        if (!fthmbx.actlvl) {
            plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                    "\nNode %d: actvl %d\n", myid, fthmbx.actlvl);
            plat_assert(fthmbx.actlvl >= 1);
        } else {
            plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                    "\nNode %d: Sleeping on RESP %d  fth mailbox %p loop %d\n",
                    myid, l, &respmbox, l);
            /* 
             * Sleep on the mailbox waiting to get a properly directed response message
             */
            printf("norman: before wait for resp\n");
            msg = (sdf_msg_t *) fthMboxWait(&respmbox);
            printf("norman: after wait for resp\n");
            m = (unsigned char *)msg->msg_payload;
            
            plat_log_msg(
                    PLAT_LOG_ID_INITIAL,
                    LOG_CAT,
                    PLAT_LOG_LEVEL_TRACE,
                    "\nNode %d: RESP %d msg %p seq %lu sn %d dn %d proto %d type %d loop %d\n",
                    myid, l, msg, msg->msg_conversation, msg->msg_src_vnode,
                    msg->msg_dest_vnode, msg->msg_dest_service, msg->msg_type,
                    l);
            /*
             * Print out the buffer contents that we just got
             */
#if 1
            for (i = 0; i < 256; i++) {
                if((*m)!=l%256)
                {
                    printf("ERROR: !!!!!!!!!!!!!!!!!!!!!! error %dth seq %d, should be %d\n", 
                            i, *m, l%256);
                    sleep(2);
                    break;
                }
                printf(" %02x", *m);
                m++;
                if ((i % 16) == 15) {
                    putchar('\n');
                    fflush(stdout);
                }
            }
#endif
            /* release the receive buffer back to the sdf messaging thread */
            ret = sdf_msg_free_buff(msg);

            plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                    "\nNode %d: Returned Buff %d ret %d\n", myid, l, ret);
#if SENDTWO
            msg = (sdf_msg_t *) fthMboxWait(&respmbox);

            m = (unsigned char *)msg;
            plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                    "\nNode %d: msg %p snode %d dnode %d proto %d type %d loop %d\n",
                    myid, msg, msg->msg_src_vnode, msg->msg_dest_vnode,
                    msg->msg_dest_service, msg->msg_type, l);

            for (i = 0; i < 256; i++) {
                printf(" %02x", *m);
                m++;
                if ((i % 16) == 15) {
                    putchar('\n');
                    fflush(stdout);
                }
            }

            /* release the receive buffer back to the sdf messaging thread */
            ret = sdf_msg_free_buff(msg);

            plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                    "\nNode %d: Returned Buff %d ret %d\n", myid, l, ret);
#endif

            fthYield(1); /* we yield to give others a chance to do some work */

        }
    }
    printf("fthYield 100 before\n");
    while (mysync != 3)
        fthYield(100);
    printf("fthYield 100 after\n");
    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
            "\nNode %d: EXITING completed sending %d messages - mysync %d\n",
            myid, l, mysync);
    fthKill(5); // Kill off FTH
}
Пример #24
0
static void fthThreadReceiver2(uint64_t arg) {
    int i, ret, ct = 0;
    uint64_t aresp;
    struct sdf_msg *send_msg= NULL, *recv_msg= NULL;
    vnode_t node;
    serviceid_t protocol = SDF_RESPONSES;
    serviceid_t my_protocol = SDF_RESPONSES;
    msg_type_t type = RESP_TWO;
    sdf_fth_mbx_t fthmbx;

    fthmbx.actlvl = SACK_ONLY_FTH;
    fthmbx.abox = &ackmbx2;
    fthMboxInit(&ackmbx2);

    unsigned char feedbackseq = 0;
    printf("FTH Thread starting %s\n", __func__);

    if (FASTPATH_TEST) {
        node = myid;
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                "\nNode %d: FASTPATH_TEST node %d myid %d\n", myid, node, myid);
    } else {
        node = myid == 0 ? 1 : 0;
    }

    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
            "\nNode %d: Found queue pair %p sn %d dn %d ss %d ds %d loop %d\n",
            myid, q_pair_RESPONSES, myid, (myid == 0 ? 1 : 0), SDF_RESPONSES,
            SDF_RESPONSES, ct);
    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
            "\nNode %d: Now yielding waiting for mysync\n", myid);

    /* Need to yield till all queues have been created */
    while (!mysync)
        fthYield(1);
    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
            "\nNode %d: Ready To Accept 1st MESSAGE sack lvl %d loop %d\n\n\n",
            myid, fthmbx.actlvl, ct);

    for (;;) {

        plat_log_msg(
                PLAT_LOG_ID_INITIAL,
                LOG_CAT,
                PLAT_LOG_LEVEL_TRACE,
                "\nNode %d: Waiting for messages q_pair_CONSISTENCY %p loop %d\n",
                myid, q_pair_CONSISTENCY, ct);

        printf("norman: before receive data in %s\n", __func__);
        recv_msg = sdf_msg_receive(q_pair_CONSISTENCY->q_out, 0, B_TRUE);
        printf("norman: after receive data in %s\n", __func__);

        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                "\nNode %d: Got One *msg %p sn %d dn %d proto %d type %d"
                    " akrpmbx %p\n", myid, recv_msg, recv_msg->msg_src_vnode,
                recv_msg->msg_dest_vnode, recv_msg->msg_dest_service,
                recv_msg->msg_type, recv_msg->akrpmbx);

        feedbackseq = recv_msg->msg_payload[0];
#if 1
        unsigned char *m = (unsigned char *)recv_msg;
        for (i = 0; i < 256; i++) {
            printf(" %02x", *m);
            m++;
            if ((i % 16) == 15) {
                printf("  myid %d", myid);
                putchar('\n');
                fflush(stdout);
            }
        }
#endif
        fthYield(1); /* let's give it up here */

        send_msg = (struct sdf_msg *) sdf_msg_alloc(recv_msg->msg_len);
        if (send_msg == NULL) {
            plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                    "sdf_msg_alloc(recv_msg->msg_len) failed\n");
            /* return ((void *)1); */
        }

        for (i = 0; i < TSZE; ++i)
            send_msg->msg_payload[i] = (unsigned char) feedbackseq;

        memcpy(send_msg, recv_msg, recv_msg->msg_len);

        plat_log_msg(
                PLAT_LOG_ID_INITIAL,
                LOG_CAT,
                PLAT_LOG_LEVEL_TRACE,
                "\nNode %d: Posting reply MSG dn %d ds %d sn %d ss %d type %d loop %d\n",
                myid, node, protocol, myid, my_protocol, type, ct);

        ret = sdf_msg_send((struct sdf_msg *)send_msg, TSZE, node, protocol,
                myid, my_protocol, type, &fthmbx,
                sdf_msg_get_response_mbx(recv_msg));
        if (ret != 0)
            process_ret(ret, protocol, type);

        /* release the receive buffer back to the sdf messaging thread */
        ret = sdf_msg_free_buff(recv_msg);

        aresp = fthMboxWait(&ackmbx2);
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                "\nNode %d: Send Buff Freed aresp %ld loop %d\n", myid, aresp,
                ct);
        ct++;

/*        if (SHORTTEST)
            break;*/

        /* Simple exit mechanism, worker threads will just quit when predefined msgcnt 
         * has been reached in the sender thread
         */
    }
    mysync++;
    fthYield(1);
    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
            "\nNode %d: WORKER FTH exiting - loop %d\n mysync %d", myid, i,
            mysync);
}
void
fthThreadSinglePtlPressSeqSender(uint64_t arg) {
    int l = 0;
    vnode_t node;
    struct sdf_msg *send_msg = NULL;
    
    msg_type_t type = REQ_FLUSH;

    sdf_fth_mbx_t fthmbx;
    fthMbox_t ackmbox, respmbox;
    fthMboxInit(&ackmbox);
    fthmbx.abox = &ackmbox;

#if FLAG
    fthMboxInit(&respmbox);
    fthmbx.rbox = &respmbox;
    fthmbx.actlvl = SACK_BOTH_FTH;
#else
    fthmbx.rbox = NULL;
    fthmbx.actlvl = SACK_ONLY_FTH;
#endif


    printf("node %d, fth thread sender starting  %s: number of msgs to send = %d\n", myid, __func__, msgCount);
    fflush(stdout);

    int localpn, actmask;
    uint32_t numprocs;
    uint64_t ptl = arg;
    int localrank = sdf_msg_nodestatus(&numprocs, &localpn, cluster_node,  &actmask);
    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                 "\nNode %d: numprocs %d active_procs mask 0x%x active_mask 0x%x\n", 
                  localrank, numprocs, localpn, actmask);

    if (numprocs == 1) {
        node = 0;
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
		     "\nNode %d: FASTPATH_TEST node %d myid %d\n", 
                     myid, node, myid);
    }
    else {
        node = local_get_pnode(localrank, localpn, numprocs);
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
	             "\nNode %d: %s my pnode is  %d\n", 
                     localrank, __func__, node);
	fflush(stdout);
	for (int i = 0; i < numprocs; i++) {
            plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                         "\nNode %d: %s cluster_node[%d] = %d\n", 
                         localrank, __func__, i, cluster_node[i]);
            fflush(stdout);
        }
    }

    // you only init this once but share the q_pairs among the other threads here 
    q_pair[ptl] = local_create_myqpairs(ptl, myid, node);
    info->queue_pair_info->queue_add[0] = q_pair[ptl]->q_in;
    info->queue_pair_info->queue_add[1] = q_pair[ptl]->q_out;
    info->queue_pair_info->queue_pair_type = ptl;
    if(q_pair[ptl] == NULL) {
        fprintf(stderr, "%s: sdf_create_queue_pair %li failed\n", __func__, ptl);
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                     "\nNode %d: EXITING completed sending %d messages\n",
                     myid, l);
        return;
    }
    
     // right now we are not using shmem for these buffers

    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                 "\nNode %d: created queue pair %p sn %d dn %d ss %li ds %li maxcnt %d\n",
                 myid, q_pair[ptl], myid, node, ptl, ptl, msgCount);
    

    FTH_SPIN_LOCK(&ssync->spin);
    mysync[ptl] = 1;
    FTH_SPIN_UNLOCK(&ssync->spin);

    // let the msg thread do it's thing
    sdf_msg_startmsg(myid, 0, NULL);

    /* 
     * main loop will send SDF_CONSISTENCY protocol messages till msgCount is reached 
     * this sleeps on both mailboxes ack and resp based on the lvl dictated
     */

    for (l = 0; l < msgCount; ++l) {
        int ret;

        // create the buffer on every loop to check the buffer release func
	
        send_msg = (struct sdf_msg *) sdf_msg_alloc(TSZE); 
        if (send_msg == NULL) {
             // FIXME should default to an error
             plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_FATAL,
                          "sdf_msg_alloc(TSZE) failed\n");
        }
        local_setmsg_mc_payload(send_msg, TSZE, myid, l, msgCount, ptl); 
	/* 
	 * Send messages with different types
	 */ 

        type = REQ_FLUSH;

        
        ret = sdf_msg_send((struct sdf_msg *)send_msg, TSZE, node, ptl, myid, ptl, type, &fthmbx, NULL);
        /* get the ack when sending success. */
        fthMboxWait(&ackmbox);   
#if FLAG
        printf("node %d, wait and get the response message from pnode.\n", myid);
        /* get the response when receive message success. */
        sdf_msg_t * msg = (sdf_msg_t *)fthMboxWait(&respmbox);
        ret = sdf_msg_free_buff(msg);
#endif
	if (ret != 0 )
            process_ret(ret, ptl, type, myid);

        fthYield(1);
    }

    printf("@@node %d, sender #%li sends %li protocol message finished, send %d times\n", myid, ptl, ptl, l);
	
    FTH_SPIN_LOCK(&ssync->spin);
    fthCount ++;
    FTH_SPIN_UNLOCK(&ssync->spin);

    while (fthCount != 2 * FTHRECVERS) {
        fthYield(100);
    }
    
    printf("node %d, sender %li kill the scheduler.\n", myid, ptl); 
    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                 "\nNode %d: EXITING completed sending %d messages - mysync %d\n", 
                 myid, l, mysync[ptl]);
    
    fthKill(1);
}
/* 
 * This fth thread simulates the action node, and don't need to wait ack or resp.
 * the msg types are arbitrary for now, here it is REQ_FLUSH
 *
 * @brief: n(sender) -> n(recver) "multiprotocol"
 * Create sender and receiver for each protocol. The sender just get the ack when it sends success and the receiver
 * do not get the message from the queue. But if when sender the #msgBp message, the receiver should send a 
 * response to the sender.
 * 
 */
void
fthThreadMultiPtlQuaSender(uint64_t arg) {
    int l = 0;
    vnode_t node;
    struct sdf_msg *send_msg = NULL;
    
    msg_type_t type = REQ_FLUSH;

    sdf_fth_mbx_t fthmbx, fthmbx_bp;
    fthMbox_t ackmbox, ackmbox_bp, respmbox_bp;
 
    uint64_t ptl = arg; 
    //fthmbx: let the sender just receives ack when sending successful
    fthmbx.actlvl = SACK_ONLY_FTH;
    fthMboxInit(&ackmbox);
    fthmbx.abox = &ackmbox;

    //fthmbx_bp: let the sender receives ack when sending successful and receive response from receiver
    fthmbx_bp.actlvl = SACK_BOTH_FTH;
    fthMboxInit(&ackmbox_bp);
    fthMboxInit(&respmbox_bp);
    fthmbx_bp.abox = &ackmbox_bp;
    fthmbx_bp.rbox = &respmbox_bp; 

    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                "\nnode %d, fth thread sender starting  %s: number of msgs to send = %d\n", 
                myid, __func__, msgCount);
    fflush(stdout);

    int localpn, actmask;
    uint32_t numprocs;
    int localrank = sdf_msg_nodestatus(&numprocs, &localpn, cluster_node, &actmask);
    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                 "\nNode %d: numprocs %d active_procs mask 0x%x active_mask 0x%x\n", 
                  localrank, numprocs, localpn, actmask);
    if (numprocs == 1) {
        node = 0;
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
		     "\nNode %d: FASTPATH_TEST node %d myid %d\n", myid, node, myid);
    }
    else {
        int i;
        node = local_get_pnode(localrank, localpn, numprocs);
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
	            "\nNode %d: %s my pnode is  %d\n", 
                    localrank, __func__, node);
	fflush(stdout);
	for (i = 0; i < numprocs; i++) {
            plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                        "\nNode %d: %s cluster_node[%d] = %d\n", 
                        localrank, __func__, i, cluster_node[i]);
            fflush(stdout);
        }
    }

    q_pair[ptl] = local_create_myqpairs(ptl, myid, node);
    
    /*
     * Below code will cause segment error, ignore them
     */

#if 0
    info->queue_pair_info->queue_add[0] = q_pair[node]->q_in;
    info->queue_pair_info->queue_add[1] = q_pair[node]->q_out;
    info->queue_pair_info->queue_pair_type = SDF_CONSISTENCY;
    if(q_pair[ptl] == NULL) {
        fprintf(stderr, "%s: sdf_create_queue_pair %li failed\n", __func__, ptl);
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                     "\nNode %d: EXITING completed sending %d messages\n", 
                     myid, l);
        return;
    }
    
    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                 "\nNode %d: created queue pair %p sn %d dn %d ss %li ds %li maxcnt %d\n",
                 myid, q_pair[ptl], myid, myid == 0 ? 1 : 0,
                 ptl, ptl, msgCount);
#endif

    FTH_SPIN_LOCK(&ssync->spin);
    mysync[ptl] = 1;
    FTH_SPIN_UNLOCK(&ssync->spin);

    //sdf_msg_startmsg(myid, 0, NULL);

    for (l = 0; l < msgCount; ++l) {
        int ret, i;
        sdf_msg_t *msg;

        send_msg = (struct sdf_msg *) sdf_msg_alloc(TSZE); 

        if (send_msg == NULL) {
             plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_FATAL,
                          "sdf_msg_alloc(TSZE) failed\n");
        }
        
		/*
        for (i = 0; i < TSZE; ++i) {//TSZE = 64
            if(myid == 0)
                send_msg->msg_payload[i] = (char)(l + 65);
            else
                send_msg->msg_payload[i] = (char)(l + 97);

         }
		*/

	for (i = 0; i < TSZE; ++i)
	    send_msg->msg_payload[i] = ptl * msgCount + l;

	type = REQ_FLUSH;

        if(l == msgBp-1) {
            
            ret = sdf_msg_send((struct sdf_msg *)send_msg, TSZE, node, ptl, myid, ptl, type, &fthmbx_bp, NULL);
            fthMboxWait(&ackmbox_bp);
            plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
	                "\nnode %d, sender %li waiting for #%d times response.\n", 
                        myid, ptl, msgBp);

            msg = (sdf_msg_t *)fthMboxWait(&respmbox_bp);
#if 0
    if(msg) {
        uint32_t d = msg->msg_src_service;
        printf("node %d, sender #%d recvs response protocol#%d message from recver, at bp %d\n", myid, ptl, d, msgBp);
        char *m = (char *)(msg->msg_payload);
        for (i = 0; i < TSZE; i++) {
            printf("%d-%d-%d  ", *m, d, myid);//the style of data is "number-protocol-node"
            m++;
            if ((i % 16) == 15) {
                printf("  myid %d", myid);
                putchar('\n');
                fflush(stdout);
            }
        }
    }
    else {
        printf("!!node %d, sender #%li recvs response protocol#%d meessage from recver failed\n", myid, ptl, msg->msg_src_service);
    }
#endif
            ret = sdf_msg_free_buff(msg); 
        }
        else {
            ret = sdf_msg_send((struct sdf_msg *)send_msg, TSZE, node, ptl, myid, ptl, type, &fthmbx, NULL);
            fthMboxWait(&ackmbox);
        }

        if(myid == 0) {
            plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                        "\nnode %d ,sender #%li sends %d times, message contents %li-%li-%d\n", 
                        myid, ptl, l + 1, ptl * msgCount + l, ptl, myid);
        }
        else {
            plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                        "\nnode %d, sender #%li sends %d times, message contents %li-%li-%d\n", 
                        myid, ptl, l + 1, ptl *msgCount + l, ptl, myid);
  
        } 
	if (ret != 0 )
            process_ret(ret, ptl, type, myid);

        fthYield(1);
    }
    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                "\n@@node %d, sender #%li sends %li protocol message finished, send %d times\n", 
                myid, ptl, ptl, l + 1);

    FTH_SPIN_LOCK(&ssync->spin);
	fthCount ++;
	FTH_SPIN_UNLOCK(&ssync->spin);
	
	while (fthCount != 2 * FTHRECVERS) {
        fthYield(100);
    }
    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                 "\nnode %d, sender %li kill the scheduler.\n", 
                 myid, ptl); 

    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                 "\nNode %d: EXITING completed sending %d messages - mysync %d\n", 
                 myid, l + 1, mysync[ptl]);
    fthKill(1);
}
Пример #27
0
static void fthThreadReceiver2(uint64_t arg) {
    int i = 0, ret;
    uint64_t aresp;
    struct sdf_msg *send_msg = NULL, *recv_msg = NULL;
    vnode_t node;
    serviceid_t protocol = SDF_RESPONSES;
    serviceid_t my_protocol = SDF_RESPONSES;
    msg_type_t type = RESP_TWO;
    sdf_fth_mbx_t fthmbx;

    fthmbx.actlvl = SACK_ONLY_FTH;
    fthmbx.abox = &ackmbx2;
    fthMboxInit(&ackmbx2);
    
    printf("FTH Thread starting %s\n", __func__);

    if (FASTPATH_TEST) {
        node = myid;
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                "\nNode %d: FASTPATH_TEST node %d myid %d\n", myid, node, myid);
    } else {
        node = myid == 0 ? 1 : 0;
    }

    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
            "\nNode %d: Found queue pair %p sn %d dn %d ss %d ds %d loop %d\n",
            myid, q_pair_RESPONSES, myid, (myid == 0 ? 1 : 0), SDF_RESPONSES,
            SDF_RESPONSES, recv_ct);
    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
            "\nNode %d: Now yielding waiting for mysync\n", myid);

    /* Need to yield till all queues have been created */
    while (!mysync)
        fthYield(1);
    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
            "\nNode %d: Ready To Accept 1st MESSAGE sack lvl %d loop %d\n\n\n",
            myid, fthmbx.actlvl, recv_ct);

    for (;;) {
        printf("Mysync = %d\n", mysync);
        plat_log_msg(
                PLAT_LOG_ID_INITIAL,
                LOG_CAT,
                PLAT_LOG_LEVEL_TRACE,
                "\nNode %d: Waiting for messages q_pair_CONSISTENCY %p loop %d\n",
                myid, q_pair_CONSISTENCY, recv_ct);

        recv_msg = sdf_msg_receive(q_pair_CONSISTENCY->q_out, 0, B_TRUE);
        if (recv_msg->msg_type == GOODBYE && recv_ct >= 10) {
            break;
        }
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                "\nNode %d: Got One *msg %p sn %d dn %d proto %d type %d"
                    " akrpmbx %p\n", myid, recv_msg, recv_msg->msg_src_vnode,
                recv_msg->msg_dest_vnode, recv_msg->msg_dest_service,
                recv_msg->msg_type, recv_msg->akrpmbx);

#if SHOWBUFF
        unsigned char *m = (unsigned char *)recv_msg;
        for (i = 0; i < 256; i++) {
            printf(" %02x", *m);
            m++;
            if ((i % 16) == 15) {
                printf("  myid %d", myid);
                putchar('\n');
                fflush(stdout);
            }
        }
#endif
        fthYield(1); /* let's give it up here */

        send_msg = (struct sdf_msg *) sdf_msg_alloc(recv_msg->msg_len);
        if (send_msg == NULL) {
            fprintf(stderr, "sdf_msg_alloc(recv_msg->msg_len) failed\n");
            /* return ((void *)1); */
			return;
        }

        //         for (i = 0; i < TSZE; ++i)
        //             send_msg->msg_payload[i] = (unsigned char) 0x55;

        memcpy(send_msg->msg_payload, recv_msg->msg_payload, recv_msg->msg_len - sizeof(struct sdf_msg));
        
        struct sdf_resp_mbx rhkey;
        struct sdf_resp_mbx *ptrkey = &rhkey;

        strncpy(rhkey.mkey, MSG_DFLT_KEY, (MSG_KEYSZE - 1));
        rhkey.mkey[MSG_KEYSZE - 1] = '\0';
        rhkey.akrpmbx_from_req = NULL;
        rhkey.rbox = NULL;
        
        plat_log_msg(
                PLAT_LOG_ID_INITIAL,
                LOG_CAT,
                PLAT_LOG_LEVEL_TRACE,
                "\nNode %d: Posting reply MSG dn %d ds %d sn %d ss %d type %d loop %d\n",
                myid, node, protocol, myid, my_protocol, type, recv_ct);

        ret = sdf_msg_send((struct sdf_msg *) send_msg, TSZE, node, protocol,
                myid, my_protocol, type, &fthmbx, sdf_msg_get_response(
                        recv_msg, ptrkey));
        if (ret != 0)
            process_ret(ret, protocol, type, myid);

        aresp = fthMboxWait(&ackmbx2);
        /* release the receive buffer back to the sdf messaging thread */
        ret = sdf_msg_free_buff(recv_msg);

        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                "\nNode %d: Send Buff Freed aresp %ld loop %d\n", myid, aresp,
                recv_ct);
        recv_ct++;
        printf("Thread 2 receive:%d\n", recv_ct);
        /* Simple exit mechanism, worker threads will just quit when predefined msgcnt
         * has been reached in the sender thread
         */
        if (recv_ct >= 10) {
            printf("Thread 2 break\n");
            break;
        }
            
    }

    if ((--endsync) != 0) {
        //there are still some receivers left, we need to notify them
        msg_type_t say_goodbye = GOODBYE;
        ret = sdf_msg_say_bye(myid, SDF_CONSISTENCY, node, SDF_CONSISTENCY,
                say_goodbye, &fthmbx, TSZE);
        if (ret != 0) {
            process_ret(ret, protocol, type, myid);
        }
    }
    mysync++;
    fthYield(1);
    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
            "\nNode %d: WORKER FTH exiting - loop %d\n mysync %d", myid, i,
            mysync);
}
void fthThreadMultiNodeTntotnRecver(uint64_t arg) {
    int ret, ct = 0;
    uint64_t aresp = 0;
    struct sdf_msg *recv_msg = NULL, *send_msg = NULL;
    vnode_t node;
    printf("node %d, fth thread receiver type#%li starting %s\n", myid, ptl, __func__);
    fflush(stdout);

    int localpn, actmask;
    uint32_t numprocs;

#if FLAG
    sdf_fth_mbx_t fthmbx;
    fthMbox_t ackmbox1;
    fthmbx.actlvl = SACK_ONLY_FTH;
    fthmbx.abox = &ackmbox1;
    fthmbx.rbox = NULL;
    fthMboxInit(&ackmbox1);
#endif

    int localrank = sdf_msg_nodestatus(&numprocs, &localpn, cluster_node, &actmask);
    if (localrank); // keep compiler happy
    node = arg;

    while (!mysync) fthYield(1);

    for (;;) {
        recv_msg = sdf_msg_receive(q_pair[node]->q_out, 0, B_TRUE);

        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                     "\nNode %d: Waiting for messages q_pair %p loop %d\n", myid, q_pair[node], ct);

        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                     "\nNode %d: Got One *msg %p sn %d dn %d proto %d type %d"
                     " akrpmbx %p\n", myid, recv_msg, recv_msg->msg_src_vnode, recv_msg->msg_dest_vnode,
                     recv_msg->msg_dest_service, recv_msg->msg_type, recv_msg->akrpmbx);

#if 1
    if(recv_msg) {
        uint32_t d = recv_msg->msg_dest_service;
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,"node %d, receiver type#%li recvs protocol#%d type message from sender %d\n", myid, ptl, d, node);
        local_printmsg_payload(recv_msg, TSZE, myid);
    }   
    else {
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,"!!node %d, receiver type#%li recvs protocol#%d meessage from sender failed\n", myid, ptl, recv_msg->msg_dest_service);
    }    
#endif
    
#if FLAG
        send_msg = (struct sdf_msg *) sdf_msg_alloc(recv_msg->msg_len);
        memcpy(send_msg->msg_payload, recv_msg->msg_payload, recv_msg->msg_len);
        
        struct sdf_resp_mbx rhkey;
        struct sdf_resp_mbx *ptrkey = &rhkey;

        strncpy(rhkey.mkey, MSG_DFLT_KEY, (MSG_KEYSZE - 1));
        rhkey.mkey[MSG_KEYSZE - 1] = '\0';
        rhkey.akrpmbx_from_req = NULL;
        rhkey.rbox = NULL;

        ret = sdf_msg_send((struct sdf_msg *)send_msg, TSZE, node, ptl, myid, ptl, RESP_ONE, &fthmbx,
                        sdf_msg_get_response(recv_msg, ptrkey));
        fthMboxWait(&ackmbox1);
#endif

        // release the receive buffer back to the sdf messaging thread
        ret = sdf_msg_free_buff(recv_msg);

        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                     "\nNode %d: Send Buff Freed aresp %ld loop %d\n", myid, aresp, ct);
        ct++;
        printf("Recv:%d\n", ct);
        
        if(ct == msgCount) break;
        


    }//end of for statement
    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,"@@node %d, receiver type#%li, receive message finished, receive %d times\n", myid, ptl, ct);

    FTH_SPIN_LOCK(&ssync->spin);
    fthCount ++;
    FTH_SPIN_UNLOCK(&ssync->spin);

    while (fthCount != 2) fthYield(10);
   
    fthKill(1);
}
Пример #29
0
static void fthThreadSender(uint64_t arg) {
    int i, l;
    struct sdf_msg *send_msg = NULL;
    vnode_t node;
    serviceid_t protocol = SDF_CONSISTENCY;
    serviceid_t my_protocol = SDF_CONSISTENCY;
    msg_type_t type = REQ_FLUSH;
    sdf_fth_mbx_t fthmbx;
    fthMbox_t *fthmbxtst;
    int debug = 0;
    int maxcnt, ret;

#if SENDTWO
    maxcnt = 5;
#elif SHORTTEST
    maxcnt = SHORTTEST;
#else
    maxcnt = 990000;
#endif

    fthmbxtst = &respmbox;

    fthMboxInit(&ackmbox);
    fthMboxInit(&respmbox);

    fthmbx.actlvl = SACK_BOTH_FTH;
    fthmbx.abox = &ackmbox;
    fthmbx.rbox = &respmbox;

    printf("FTH Thread starting %s Number of msgs to send = %d arg in %li\n",
            __func__, maxcnt, arg);
    fflush(stdout);

    if (DBGP) {
        plat_log_msg(
                PLAT_LOG_ID_INITIAL,
                LOG_CAT,
                PLAT_LOG_LEVEL_TRACE,
                "\nNode %d: fth mb info fthmbxtst %p rbox %p abox %p lvl %d maxcnt %d\n",
                myid, fthmbxtst, fthmbx.rbox, fthmbx.abox, SACK_BOTH_FTH,
                maxcnt);
    }

    /* node is the destination node */
    int localpn, actmask;
    uint32_t numprocs;
    int localrank = sdf_msg_nodestatus(&numprocs, &localpn, cluster_node, &actmask);
    if (numprocs == 1) {
        node = 0;
    }
    else {
        node = local_get_pnode(localrank, localpn, numprocs);
        printf("Node %d: %s my pnode is  %d\n", localrank, __func__, node);
        fflush(stdout);
        for (i = 0; i < numprocs; i++) {
            printf("Node %d: %s cluster_node[%d] = %d\n", localrank, __func__, i, cluster_node[i]);
            fflush(stdout);
        }
    }

    /* you only init this once but share the q_pairs among the other threads here */

    q_pair_CONSISTENCY = local_create_myqpairs(SDF_CONSISTENCY, myid, node);
    info->queue_pair_info->queue_add[0] = q_pair_CONSISTENCY->q_in;
    info->queue_pair_info->queue_add[1] = q_pair_CONSISTENCY->q_out;
    info->queue_pair_info->queue_pair_type = SDF_CONSISTENCY;
    if (q_pair_CONSISTENCY == NULL) {
        fprintf(stderr, "%s: sdf_create_queue_pair failed\n", __func__);
        return;
    }
    /* right now we are not using shmem for these buffers */

    plat_log_msg(
            PLAT_LOG_ID_INITIAL,
            LOG_CAT,
            PLAT_LOG_LEVEL_TRACE,
            "\nNode %d: created queue pair %p sn %d dn %d ss %d ds %d maxcnt %d\n",
            myid, q_pair_CONSISTENCY, myid, node, SDF_CONSISTENCY,
            SDF_CONSISTENCY, maxcnt);

    /* main loop will send SDF_CONSISTENCY protocol messages till maxcnt is reached
     * this sleeps on both mailboxes ack and resp based on the lvl dictated
     */
    if (myid == 1) {
        debug = 0;
        if (debug) {
            plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                    "\nNode %d: DEBUG --- NOT SENDING MESSAGES FROM HERE", myid);
            while (debug)
                fthYield(100); /* Don't send mesages from node one for now */
        }
    }
    FTH_SPIN_LOCK(&ssync->spin);
    mysync = 1;
    FTH_SPIN_UNLOCK(&ssync->spin);

    // start the message engine
    sdf_msg_startmsg(myid, 0, NULL);

    for (l = 0; l < maxcnt; ++l) {
        printf("A new %d round\n", l);
        /* create the buffer on every loop to check the buffer release func */
        if (UNEXPT_TEST) {
            send_msg = (struct sdf_msg *) sdf_msg_alloc(8192);
            printf("Node %d: %s BIG Message Alloc %li\n", myid, __func__,
                    sizeof((struct sdf_msg *) send_msg));
        } else {
            send_msg = (struct sdf_msg *) sdf_msg_alloc(TSZE);
        }
        if (send_msg == NULL) {
            /* FIXME should default to an error  */
            plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_FATAL,
                    "sdf_msg_alloc(TSZE) failed\n");
            /* return ((void *)1); */
        }

        for (i = 0; i < TSZE; ++i)
            send_msg->msg_payload[i] = (unsigned char) i;

        /*
         * Send 2 CONSISTENCY messages with different types to track if we define SENDTWO
         */

        type = REQ_FLUSH;

        if (UNEXPT_TEST) {
            ret = sdf_msg_send((struct sdf_msg *) send_msg, 8192, node,
                    protocol, myid, my_protocol, type, &fthmbx, NULL);
        } else {
            ret = sdf_msg_send((struct sdf_msg *) send_msg, TSZE, node,
                    protocol, myid, my_protocol, type, &fthmbx, NULL);
        }
        while (ret != 0) {
            /* just print the alerts wrt the queues */
            ret = process_ret(ret, protocol, type, myid);
	    if (ret == QUEUE_NOQUEUE) {break;}
            ret = sdf_msg_send((struct sdf_msg *) send_msg, TSZE, node,
                    protocol, myid, my_protocol, type, &fthmbx, NULL);
	}
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                     "\nNode %d: Msg %d Posted ret %d proto %d type %d loop %d\n",
                     myid, l, ret, protocol, type, l);
    }


    /* at the end here we just do a simple test to fail a post, should print the error message
     * and we just exit this test */
    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                 "\nNode %d: SEND QUEUE TEST - Posting to a non existent queue ds %d loop %d\n",
                 myid, SDF_METADATA, l);
    my_protocol = SDF_METADATA; /* set a bogus protocol that has no associated queue */
    ret = sdf_msg_send((struct sdf_msg *) send_msg, TSZE, node,
                    protocol, myid, my_protocol, type, &fthmbx, NULL);
    while (ret != 0) {
        /* just print the alerts wrt the queues */
        ret = process_ret(ret, protocol, type, myid);
        if (ret == 2) {break;}
        ret = sdf_msg_send((struct sdf_msg *) send_msg, TSZE, node,
                            protocol, myid, my_protocol, type, &fthmbx, NULL);
	}

    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                 "\nNode %d: Done with SEND QUEUE TEST complete total msgs posted %d\n",
                     myid, l);
    mysync++;
    sleep(1);
    fthYield(100);
    fthKill(5); // Kill off FTH
}
void
fthThreadMultiSender(uint64_t arg) {
    int i = 0, l = 0;
    vnode_t node;
    struct sdf_msg *send_msg = NULL;
    
    msg_type_t type = REQ_FLUSH;

    printf("node %d, fth thread sender starting  %s: number of msgs to send = %d\n", myid, __func__, msgCount);
    fflush(stdout);

    int localpn, actmask;
    uint32_t numprocs;
    int localrank = sdf_msg_nodestatus(&numprocs, &localpn, cluster_node, &actmask);
    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                 "\nNode %d: numprocs %d active_procs mask 0x%x active_mask 0x%x\n", 
                  localrank, numprocs, localpn, actmask);
    if (numprocs == 1) {
        node = 0;
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
		     "\nNode %d: FASTPATH_TEST node %d myid %d\n", myid, node, myid);
    }
    else {
        node = local_get_pnode(localrank, localpn, numprocs);
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
	             "\nNode %d: %s my pnode is  %d\n", 
                     localrank, __func__, node);
	fflush(stdout);
	for (i = 0; i < numprocs; i++) {
            plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                         "\nNode %d: %s cluster_node[%d] = %d\n", 
                         localrank, __func__, i, cluster_node[i]);
            fflush(stdout);
        }
    }
    for(i = 0; i < SDF_PROTOCOL_COUNT; i ++) {

         if(i == SDF_SYSTEM || i == SDF_DEBUG || i == GOODBYE) {
             q_pair[i] = NULL;
             continue;
         }
         q_pair[i] = local_create_myqpairs(i, myid, node);
         info->queue_pair_info->queue_add[0] = q_pair[i]->q_in;
         info->queue_pair_info->queue_add[1] = q_pair[i]->q_out;
         info->queue_pair_info->queue_pair_type = i;
         if(q_pair[i] == NULL) {
             fprintf(stderr, "%s: sdf_create_queue_pair %d failed\n", __func__, i);
             plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                     "\nNode %d: EXITING completed sending %d messages - mysync %d\n", 
                     myid, l, mysync);
             fthKill(1);     
             return;
         }
    
         plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                 "\nNode %d: created queue pair %p sn %d dn %d ss %d ds %d maxcnt %d\n",
                 myid, q_pair[i], myid, node, i, i, msgCount);
    }

    FTH_SPIN_LOCK(&ssync->spin);
    mysync = 1;
    FTH_SPIN_UNLOCK(&ssync->spin);

    sdf_msg_startmsg(myid, 0, NULL);//you can move this method to the main method.

    for (l = 0; l < msgCount; ++l) {
        int ret;
        if(l == SDF_SYSTEM || l == SDF_DEBUG || l == GOODBYE)//do not send SDF_SYSTEM message and SDF_DEBUG message
            continue;
        send_msg = (struct sdf_msg *) sdf_msg_alloc(TSZE); 
        if (send_msg == NULL) {
             plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_FATAL,
                          "sdf_msg_alloc(TSZE) failed\n");
        }
        
        local_setmsg_payload(send_msg, TSZE, myid, l);
        type = REQ_FLUSH;        
        ret = sdf_msg_send((struct sdf_msg *)send_msg, TSZE, node, l, myid, l, type, NULL, NULL);
        
	if (ret != 0 )
            process_ret(ret, l, type, myid);

        fthYield(1);
    }
    while (mysync != 17) {// senders + receivers = 17   
        fthYield(100);
    }
    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                 "\nNode %d: EXITING completed sending %d messages - mysync %d\n", 
                 myid, l, mysync);
    fthKill(1);
}