/** ******************************************************************************* * \brief ******************************************************************************/ static void kv_async_dispatch(async_CB_t *pCB) { uint32_t new_flags = pCB->flags; new_flags &= ~KV_ASYNC_CB_QUEUED; new_flags |= KV_ASYNC_CB_RUNNING; pCB->flags = new_flags; if (pCB->flags & KV_ASYNC_CB_SET) { KV_TRC_IO(pFT, "DISPATCH: SET: %p", pCB); kv_async_SET_KEY(pCB); } else if (pCB->flags & KV_ASYNC_CB_GET) { KV_TRC_IO(pFT, "DISPATCH: GET: %p", pCB); kv_async_GET_KEY(pCB); } else if (pCB->flags & KV_ASYNC_CB_EXISTS) { KV_TRC_IO(pFT, "DISPATCH: EXI: %p", pCB); kv_async_EXISTS_KEY(pCB); } else if (pCB->flags & KV_ASYNC_CB_DEL) { KV_TRC_IO(pFT, "DISPATCH: DEL: %p", pCB); kv_async_DEL_KEY(pCB); } else { EXPECT_TRUE(0); } }
/** ******************************************************************************* * \brief ******************************************************************************/ static void kv_async_GET_KEY(async_CB_t *pCB) { uint64_t tag = (uint64_t)pCB; int32_t rc = 0; KV_TRC_IO(pFT, "GET_KEY: %p, %" PRIx64 " %d", pCB, tag, pCB->len_i); pCB->tag = tag; rc = ark_get_async_cb(pCB->ark, pCB->db[pCB->len_i].klen, pCB->db[pCB->len_i].key, pCB->db[pCB->len_i].vlen, pCB->gvalue, 0, pCB->cb, tag); if (EAGAIN == rc) { kv_async_q_retry(pCB); } else { EXPECT_EQ(0, rc); } }
/** ******************************************************************************* * \brief * callback function for set/get/exists/del ******************************************************************************/ static void kv_async_cb(int errcode, uint64_t dt, int64_t res) { async_CB_t *pCB = (async_CB_t*)dt; kv_t *p_kv = NULL; uint64_t tag = (uint64_t)pCB; if (pCB == NULL) { KV_TRC_FFDC(pFT, "FFDC: pCB NULL"); return; } if (pCB->b_mark != B_MARK) { KV_TRC_FFDC(pFT, "FFDC: B_MARK FAILURE %p: %"PRIx64"", pCB, pCB->b_mark); return; } if (pCB->e_mark != E_MARK) { KV_TRC_FFDC(pFT, "FFDC: E_MARK FAILURE %p: %"PRIx64"", pCB, pCB->e_mark); return; } if (EBUSY == errcode) {kv_async_q_retry(pCB); goto done;} if (IS_GTEST) { EXPECT_EQ(0, errcode); EXPECT_EQ(tag, pCB->tag); } p_kv = pCB->db + pCB->len_i; ++pCB->len_i; if (pCB->flags & KV_ASYNC_CB_SET) { KV_TRC_IO(pFT, "KV_ASYNC_CB_SET, %p %d %d", pCB, pCB->len_i, pCB->len); if (0 != errcode) printf("ark_set failed, errcode=%d\n", errcode); if (tag != pCB->tag) printf("ark_set bad tag\n"); if (res != p_kv->vlen) printf("ark_set bad vlen\n"); if (IS_GTEST) { EXPECT_EQ(res, p_kv->vlen);} /* end of db len sequence, move to next step */ if (pCB->len_i == pCB->len) { if (pCB->flags & KV_ASYNC_CB_WRITE_PERF) { pCB->len_i = 0; kv_async_perf_done(pCB); goto done; } pCB->len_i = 0; pCB->flags &= ~KV_ASYNC_CB_SET; pCB->flags |= KV_ASYNC_CB_GET; kv_async_GET_KEY(pCB); goto done; } kv_async_SET_KEY(pCB); goto done; } else if (pCB->flags & KV_ASYNC_CB_GET) { uint32_t miscompare = memcmp(p_kv->value, pCB->gvalue, p_kv->vlen); KV_TRC_IO(pFT, "KV_ASYNC_CB_GET, %p %d %d", pCB, pCB->len_i, pCB->len); if (0 != errcode) printf("ark_get failed, errcode=%d\n", errcode); if (tag != pCB->tag) printf("ark_get bad tag\n"); if (res != p_kv->vlen) printf("ark_get bad vlen\n"); if (IS_GTEST) { EXPECT_EQ(0, miscompare);} /* end of db len sequence, move to next step */ if (pCB->len_i == pCB->len) { if (pCB->flags & KV_ASYNC_CB_READ_PERF) { pCB->len_i = 0; kv_async_perf_done(pCB); goto done; } pCB->len_i = 0; pCB->flags &= ~KV_ASYNC_CB_GET; pCB->flags |= KV_ASYNC_CB_EXISTS; kv_async_EXISTS_KEY(pCB); goto done; } kv_async_GET_KEY(pCB); goto done; } else if (pCB->flags & KV_ASYNC_CB_EXISTS) { KV_TRC_IO(pFT, "KV_ASYNC_CB_EXISTS, %p %d %d", pCB, pCB->len_i, pCB->len); if (0 != errcode) printf("ark_exists failed,errcode=%d\n",errcode); if (tag != pCB->tag) printf("ark_exists bad tag\n"); if (res != p_kv->vlen) printf("ark_exists bad vlen\n"); if (IS_GTEST) { EXPECT_EQ(res, p_kv->vlen);} /* if end of db len sequence, move to next step */ if (pCB->len_i == pCB->len) { pCB->len_i = 0; pCB->flags &= ~KV_ASYNC_CB_EXISTS; if (pCB->flags & KV_ASYNC_CB_SGD) { pCB->flags |= KV_ASYNC_CB_DEL; kv_async_DEL_KEY(pCB); goto done; } else if (pCB->flags & KV_ASYNC_CB_REPLACE) { /* make sure we don't shutdown before we have replaced once */ if (pCB->replace && pCB->flags & KV_ASYNC_CB_SHUTDOWN) { pCB->flags |= KV_ASYNC_CB_DEL; kv_async_DEL_KEY(pCB); goto done; } pCB->replace = TRUE; if (0 != pCB->regen(pCB->db, pCB->len, pCB->regen_len)) { printf("regen failure, fatal\n"); KV_TRC_FFDC(pFT, "FFDC: regen failure"); memset(pCB, 0, sizeof(async_CB_t)); goto done; } pCB->flags |= KV_ASYNC_CB_SET; kv_async_SET_KEY(pCB); goto done; } else { /* should not be here */ EXPECT_TRUE(0); } } kv_async_EXISTS_KEY(pCB); goto done; } else if (pCB->flags & KV_ASYNC_CB_DEL) { KV_TRC_IO(pFT, "KV_ASYNC_CB_DEL, %p i:%d len:%d", pCB, pCB->len_i,pCB->len); if (0 != errcode) printf("ark_del failed, errcode=%d\n",errcode); if (tag != pCB->tag) printf("ark_del bad tag\n"); if (res != p_kv->vlen) printf("ark_del bad vlen\n"); if (IS_GTEST) { EXPECT_EQ(res, p_kv->vlen);} /* end of db len sequence, move to next step */ if (pCB->len_i == pCB->len) { if (pCB->flags & KV_ASYNC_CB_SHUTDOWN) { if (!(pCB->flags & KV_ASYNC_CB_MULTI_CTXT_IO)) { kv_db_destroy(pCB->db, pCB->len); } if (pCB->gvalue) free(pCB->gvalue); memset(pCB, 0, sizeof(async_CB_t)); KV_TRC_IO(pFT, "LOOP_DONE: %p", pCB); goto done; } KV_TRC_IO(pFT, "NEXT_LOOP, %p", pCB); pCB->flags &= ~KV_ASYNC_CB_DEL; pCB->flags |= KV_ASYNC_CB_SET; pCB->len_i = 0; kv_async_SET_KEY(pCB); goto done; } kv_async_DEL_KEY(pCB); goto done; } else { /* should not be here */ EXPECT_TRUE(0); } done: return; }
/** ******************************************************************************* * \brief ******************************************************************************/ void kv_async_run_jobs(void) { async_CB_t *pCB = NULL; uint32_t ctxt_running = 0; uint32_t jobs_running = 0; uint32_t i = 0; uint32_t next = 0; uint32_t elapse = 0; uint32_t inject = 0; uint32_t secs = 0; uint32_t log_interval = 600; uint64_t ops = 0; uint64_t ios = 0; uint32_t tops = 0; uint32_t tios = 0; uint32_t perf = 0; KV_TRC(pFT, "ASYNC START: 0 minutes"); if (!(pCTs->pCBs->flags & KV_ASYNC_CB_RUNNING)) start = time(0); next = log_interval; do { ctxt_running = FALSE; if (elapse > next) { KV_TRC(pFT, "ASYNC RUNNING: %d elapsed minutes", elapse/60); next += log_interval; } for (i=0; i<KV_ASYNC_MAX_CONTEXTS; i++) { if (! (pCTs[i].flags & KV_ASYNC_CT_RUNNING)) continue; jobs_running = kv_async_dispatch_jobs(i); if (!jobs_running) { pCTs[i].flags &= ~KV_ASYNC_CT_RUNNING; pCTs[i].flags |= KV_ASYNC_CT_DONE; KV_TRC(pFT, "ASYNC DONE ctxt %d %x", i, pCTs[i].flags); continue; } else { ctxt_running = TRUE; } elapse = time(0) - start; if (elapse >= inject && pCTs[i].flags & KV_ASYNC_CT_ERROR_INJECT) { KV_TRC_FFDC(pFT, "FFDC: INJECT ERRORS"); FVT_KV_INJECT_READ_ERROR; FVT_KV_INJECT_WRITE_ERROR; FVT_KV_INJECT_ALLOC_ERROR; ++inject; } if (elapse >= pCTs[i].secs) { for (pCB=pCTs[i].pCBs;pCB<pCTs[i].pCBs+KV_ASYNC_JOB_Q;pCB++) { if ((pCB->flags & KV_ASYNC_CB_RUNNING || pCB->flags & KV_ASYNC_CB_QUEUED) && (!(pCB->flags & KV_ASYNC_CB_SHUTDOWN)) ) { pCB->flags |= KV_ASYNC_CB_SHUTDOWN; KV_TRC_IO(pFT, "SHUTDOWN pCB %p (%d >= %d)", pCB, elapse, pCTs[i].secs); } } } usleep(100); } } while (ctxt_running); stop = time(0); secs = stop - start; KV_TRC(pFT, "ASYNC RUNNING DONE: %d minutes", elapse/60); /* log cleanup, since the first ark_delete closes the log file */ for (i=0; i<KV_ASYNC_MAX_CONTEXTS; i++) { if (pCTs[i].flags & KV_ASYNC_CT_DONE) KV_TRC(pFT, "ASYNC CLEANUP: ctxt:%d ark:%p", i, pCTs[i].ark); } /* check for MULTI_CTXT_IO, destroy common kv dbs */ for (pCB=pCTs->pCBs;pCB<pCTs->pCBs+KV_ASYNC_JOB_Q;pCB++) { if (pCB->flags & KV_ASYNC_CB_MULTI_CTXT_IO) { kv_db_destroy(pCB->db, pCB->len); } } for (i=0; i<KV_ASYNC_MAX_CONTEXTS; i++) { /* if this context didn't run any I/O */ if (! (pCTs[i].flags & KV_ASYNC_CT_DONE)) continue; pCTs[i].flags &= ~KV_ASYNC_CT_DONE; /* if perf then don't delete the ark here */ if (pCTs[i].flags & KV_ASYNC_CT_PERF) { perf = TRUE; continue; } (void)ark_stats(pCTs[i].ark, &ops, &ios); tops += (uint32_t)ops; tios += (uint32_t)ios; KV_TRC(pFT, "PERF ark%p ops:%"PRIu64" ios:%"PRIu64"", pCTs[i].ark, ops, ios); EXPECT_EQ(0, ark_delete(pCTs[i].ark)); } if (!perf) { tops = tops / secs; tios = tios / secs; printf("op/s:%d io/s:%d secs:%d\n", tops, tios, secs); KV_TRC(pFT, "PERF op/s:%d io/s:%d secs:%d", tops, tios, secs); } }
int ea_async_io(EA *ea, int op, void *addr, ark_io_list_t *blist, int64_t len, int nthrs) { int64_t i = 0; int64_t j = 0; int64_t comps = 0; int num = 0; int max_ops = 0; void *m_rc = NULL; int rc = 0; int a_rc = 0; uint64_t status = 0; uint8_t *p_addr = NULL; uint8_t *m_addr = NULL; char *ot = NULL; ARK_SYNC_EA_READ(ea); if (op == ARK_EA_READ) {ot="IO_RD";} else {ot="IO_WR";} if ( ea->st_type == EA_STORE_TYPE_MEMORY) { // Loop through the block list to issue the IO for(i = 0; i < len; i++) { p_addr = ((uint8_t*)addr) + (i * ea->bsize); // For in-memory Store, we issue the memcpy // and wait for the return, no async here. // Read out the value from the in-memor block m_addr = ea->st_memory + (blist[i].blkno * ea->bsize); if (op == ARK_EA_READ) {m_rc = memcpy(p_addr, m_addr, ea->bsize);} else {m_rc = memcpy(m_addr, p_addr, ea->bsize);} if (check_sched_error_injects(op)) {m_rc=NULL;} if (check_harv_error_injects(op)) {m_rc=NULL;} if (m_rc == NULL) { rc = errno; break; } } } else { // divide up the cmd slots among // the threads and go 3 less max_ops = (ARK_EA_BLK_ASYNC_CMDS / nthrs) - 3; // Loop through the block list to issue the IO while ((comps < len) && (rc == 0)) { for(i = comps, num = 0; (i < len) && (num < max_ops); i++, num++) { p_addr = ((uint8_t*)addr) + (i * ea->bsize); // Call out to the block layer and retrive a block // Do an async op for a single block and tell the block // layer to wait if there are no available command // blocks. Upon return, we can either get an error // (rc == -1), the data will be available (rc == number // of blocks read), or IO has been scheduled (rc == 0). if (op == ARK_EA_READ) { rc = cblk_aread(ea->st_flash, p_addr, blist[i].blkno, 1, &(blist[i].a_tag), NULL,CBLK_ARW_WAIT_CMD_FLAGS); } else { rc = cblk_awrite(ea->st_flash, p_addr, blist[i].blkno, 1, &(blist[i].a_tag), NULL,CBLK_ARW_WAIT_CMD_FLAGS); } if (check_sched_error_injects(op)) {rc=-1;} KV_TRC_IO(pAT, "%s: id:%d blkno:%"PRIi64" rc:%d", ot, ea->st_flash, blist[i].blkno, rc); if ( rc == -1 ) { // Error was encountered. Don't issue any more IO rc = errno; KV_TRC_FFDC(pAT, "IO_ERR: cblk_aread/awrite failed, " "blkno:%"PRIi64" tag:%d, errno = %d", blist[i].blkno, blist[i].a_tag, errno); break; } // Data has already been returned so we don't need to // wait for the response below if ( rc > 0 ) { blist[i].a_tag = -1; rc = 0; } //_arkp->stats.io_cnt++; } // For as many IOs that were performed, we loop t // see if we need to wait for the response or the // data has already been returned. for (j = comps; j < i; j++) { // Data has already been read if (blist[j].a_tag == -1) { continue; } do { a_rc = cblk_aresult(ea->st_flash, &(blist[j].a_tag), &status, CBLK_ARESULT_BLOCKING); if (check_harv_error_injects(op)) {a_rc=-1;} // There was an error, check to see if we haven't // encoutnered an error previously and if not, then // set rc. Continue processing so that we harvest // all outstanding responses if (a_rc == -1) { if (rc == 0) { rc = errno; } KV_TRC_IO(pAT, "IO_ERR: id:%d blkno:%ld status:%ld a_rc:%d", ea->st_flash, blist[j].blkno, status, a_rc); } else { KV_TRC_IO(pAT, "IO_CMP: id:%d blkno:%ld status:%ld a_rc:%d", ea->st_flash, blist[j].blkno, status, a_rc); } // If a_rc is 0, that means we got interrupted somehow // so we need to retry the operation. } while (a_rc == 0); } // If we start another loop, start off where we finished // in this loop. comps = i; } } ARK_SYNC_EA_UNLOCK(ea); return rc; }
// if successful returns length of value void ark_exist_start(_ARK *_arkp, int tid, tcb_t *tcbp) { scb_t *scbp = &(_arkp->poolthreads[tid]); rcb_t *rcbp = &(_arkp->rcbs[tcbp->rtag]); tcb_t *iotcbp = &(_arkp->tcbs[rcbp->ttag]); iocb_t *iocbp = &(_arkp->iocbs[rcbp->ttag]); ark_io_list_t *bl_array = NULL; int32_t rc = 0; // Now that we have the hash entry, get the block // that holds the control information for the entry. tcbp->hblk = HASH_LBA(HASH_GET(_arkp->ht, rcbp->pos)); // If there is no control block for this hash // entry, then the key is not present in the hash. // Set the error if ( tcbp->hblk == 0 ) { KV_TRC_FFDC(pAT, "rc = ENOENT key %p, klen %"PRIu64" ttag:%d", rcbp->key, rcbp->klen, tcbp->ttag); rcbp->res = -1; rcbp->rc = ENOENT; tcbp->state = ARK_CMD_DONE; goto ark_exist_start_err; } // Set up the in-buffer to read in the hash bucket // that contains the key tcbp->blen = bl_len(_arkp->bl, tcbp->hblk); rc = bt_growif(&(tcbp->inb), &(tcbp->inb_orig), &(tcbp->inblen), (tcbp->blen * _arkp->bsize)); if (rc != 0) { KV_TRC_FFDC(pAT, "bt_growif failed tcbp:%p ttag:%d", tcbp, tcbp->ttag); rcbp->res = -1; rcbp->rc = rc; tcbp->state = ARK_CMD_DONE; goto ark_exist_start_err; } // Create a chain of blocks to be passed to be read bl_array = bl_chain(_arkp->bl, tcbp->hblk, tcbp->blen); if (bl_array == NULL) { KV_TRC_FFDC(pAT, "bl_chain failed tcbp:%p ttag:%d", tcbp, tcbp->ttag); rcbp->rc = ENOMEM; rcbp->res = -1; tcbp->state = ARK_CMD_DONE; goto ark_exist_start_err; } scbp->poolstats.io_cnt += tcbp->blen; KV_TRC_IO(pAT, "read hash entry ttag:%d", tcbp->ttag); ea_async_io_init(_arkp, ARK_EA_READ, (void *)tcbp->inb, bl_array, tcbp->blen, 0, tcbp->ttag, ARK_EXIST_FINISH); if (ea_async_io_schedule(_arkp, tid, iotcbp, iocbp) && ea_async_io_harvest (_arkp, tid, iotcbp, iocbp, rcbp)) { ark_exist_finish(_arkp, tid, tcbp); } ark_exist_start_err: return; }
/** ******************************************************************************* * \brief * return TRUE if all IOs for the iocb are successfully completed, else FALSE ******************************************************************************/ int ea_async_io_schedule(_ARK *_arkp, int32_t tid, tcb_t *iotcbp, iocb_t *iocbp) { EA *ea = iocbp->ea; int32_t rc = TRUE; int32_t arc = 0; void *prc = 0; int64_t i = 0; uint8_t *p_addr = NULL; uint8_t *m_addr = NULL; char *ot = NULL; KV_TRC_IO(pAT, "IO_BEG: SCHEDULE_START: tid:%d ttag:%d start:%"PRIu64" " "nblks:%"PRIu64" issT:%d cmpT:%d", tid, iocbp->tag, iocbp->start, iocbp->nblks, iocbp->issT, iocbp->cmpT); ARK_SYNC_EA_READ(iocbp->ea); if (iocbp->op == ARK_EA_READ) {ot="IO_RD";} else {ot="IO_WR";} for (i=iocbp->start; i<iocbp->nblks; i++) { if (ea->st_type == EA_STORE_TYPE_MEMORY) { p_addr = ((uint8_t *)(iocbp->addr)) + (i * ea->bsize); m_addr = ea->st_memory + (iocbp->blist[i].blkno * ea->bsize); if (ARK_EA_READ == iocbp->op) {prc = memcpy(p_addr,m_addr,ea->bsize);} else {prc = memcpy(m_addr,p_addr,ea->bsize);} if (check_sched_error_injects(iocbp->op)) {prc=NULL;} // if memcpy failed, fail the iocb if (prc == NULL) { rc=FALSE; KV_TRC_FFDC(pAT,"IO_ERR: tid:%d ttag:%d blkno:%"PRIi64"" " errno:%d", tid, iocbp->tag, iocbp->blist[i].blkno, errno); if (!errno) {KV_TRC_FFDC(pAT, "IO: UNSET_ERRNO"); errno=EIO;} iocbp->io_error = errno; break; } ++iocbp->issT; iocbp->blist[i].a_tag = i; } else // r/w to hw { p_addr = ((uint8_t *)iocbp->addr) + (i * ea->bsize); if (check_sched_error_injects(iocbp->op)) { arc=-1; } else if ( iocbp->op == ARK_EA_READ ) { arc = cblk_aread(ea->st_flash, p_addr, iocbp->blist[i].blkno, 1, &(iocbp->blist[i].a_tag), NULL, 0); } else { arc = cblk_awrite(ea->st_flash, p_addr, iocbp->blist[i].blkno, 1, &(iocbp->blist[i].a_tag), NULL, 0); } if (arc == 0) // good status { ++iocbp->issT; rc=FALSE; } else if (arc < 0) { rc=FALSE; if (errno == EAGAIN) { // return, and an ark thread will re-schedule this iocb KV_TRC_DBG(pAT,"IO: RW_EAGAIN: tid:%d ttag:%d " "blkno:%"PRIi64"", tid, iocbp->tag, iocbp->blist[i].blkno); break; } // Something bad went wrong, fail the iocb KV_TRC_FFDC(pAT,"IO_ERR: tid:%d ttag:%d blkno:%"PRIi64"" " errno:%d", tid, iocbp->tag, iocbp->blist[i].blkno, errno); if (!errno) {KV_TRC_FFDC(pAT, "IO: UNSET_ERRNO"); errno=EIO;} iocbp->io_error = errno; break; } else if (arc > 0) { KV_TRC_IO(pAT,"IO_CMP: IMMEDIATE: tid:%d ttag:%d a_tag:%d " "blkno:%"PRIi64"", tid, iocbp->tag, iocbp->blist[i].a_tag, iocbp->blist[i].blkno); ++iocbp->issT; ++iocbp->cmpT; iocbp->blist[i].a_tag = -1; // mark as harvested } } KV_TRC_IO(pAT, "%s: tid:%2d ttag:%4d a_tag:%4d blkno:%5"PRIi64"", ot,tid, iocbp->tag, iocbp->blist[i].a_tag, iocbp->blist[i].blkno); } iotcbp->state = ARK_IO_HARVEST; iocbp->start = i; ARK_SYNC_EA_UNLOCK(iocbp->ea); return rc; }
/** ******************************************************************************* * \brief * return TRUE if the IOs for the iocb are successfully completed, else FALSE ******************************************************************************/ int ea_async_io_harvest(_ARK *_arkp, int32_t tid, tcb_t *iotcbp, iocb_t *iocbp, rcb_t *iorcbp) { EA *ea = iocbp->ea; int32_t i = 0; int32_t arc = 0; int32_t rc = FALSE; uint64_t status = 0; scb_t *scbp = &(_arkp->poolthreads[tid]); queue_t *rq = scbp->rqueue; queue_t *tq = scbp->tqueue; queue_t *ioq = scbp->ioqueue; for (i=0; i<iocbp->issT; i++) { if (EA_STORE_TYPE_MEMORY == ea->st_type) { // the IO has already been done in the schedule function, // so mark it completed arc = 1; } else { // skip previously harvested cmd if (iocbp->blist[i].a_tag == -1) {continue;} arc = cblk_aresult(ea->st_flash, &(iocbp->blist[i].a_tag), &status,0); } if (check_harv_error_injects(iocbp->op)) {arc=-1;} if (arc == 0) { KV_TRC_DBG(pAT,"IO: WAIT_NOT_CMP: tid:%d ttag:%d a_tag:%d " "blkno:%"PRIi64"", tid, iocbp->tag, iocbp->blist[i].a_tag, iocbp->blist[i].blkno); ++iocbp->hmissN; // if nothing to do and the first harvest missed, usleep if (queue_empty(rq) && queue_empty(tq) && queue_count(ioq)<=8 && iocbp->hmissN==1 && _arkp->ea->st_type != EA_STORE_TYPE_MEMORY) { usleep(50); KV_TRC_DBG(pAT,"IO: USLEEP"); } break; } if (arc < 0) { KV_TRC_FFDC(pAT, "IO_ERR: tid:%d ttag:%d errno=%d", tid, iocbp->tag, errno); if (!errno) {KV_TRC_FFDC(pAT, "UNSET_ERRNO"); errno=EIO;} iocbp->io_error = errno; } else { KV_TRC_IO(pAT,"IO_CMP: tid:%2d ttag:%4d a_tag:%4d blkno:%5"PRIi64"", tid, iocbp->tag, iocbp->blist[i].a_tag, iocbp->blist[i].blkno); } ++iocbp->cmpT; iocbp->blist[i].a_tag = -1; // mark as harvested } if (iocbp->io_error) { // if all cmds that were issued (success or fail) have been // completed for this iocb, then fail this iocb if (iocbp->issT == iocbp->cmpT) { iorcbp->res = -1; iorcbp->rc = iocbp->io_error; iotcbp->state = ARK_CMD_DONE; am_free(iocbp->blist); KV_TRC_FFDC(pAT, "IO: ERROR_DONE: tid:%d ttag:%d rc:%d", tid, iocbp->tag, iorcbp->rc); } else { // IOs outstanding, harvest the remaining IOs for this iocb KV_TRC_FFDC(pAT,"IO: ERROR_RE_HARVEST: tid:%d ttag:%d " "iocbp->issT:%d iocbp->cmpT:%d", tid, iocbp->tag, iocbp->issT, iocbp->cmpT); } } // if all IO has completed successfully for this iocb, done else if (iocbp->cmpT == iocbp->nblks) { rc=TRUE; am_free(iocbp->blist); iotcbp->state = ARK_IO_DONE; KV_TRC_IO(pAT, "IO_END: SUCCESS tid:%d ttag:%d cmpT:%d", tid, iocbp->tag, iocbp->cmpT); } // if more blks need an IO, schedule else if (iocbp->issT < iocbp->nblks) { iotcbp->state = ARK_IO_SCHEDULE; KV_TRC_IO(pAT,"IO: RE_SCHEDULE: tid:%d ttag:%d " "iocbp->issT:%d iocbp->nblks:%"PRIi64" ", tid, iocbp->tag, iocbp->issT, iocbp->nblks); } else { // all IOs have been issued but not all are completed, do harvest KV_TRC_IO(pAT,"IO: RE_HARVEST: tid:%d ttag:%d " "iocbp->cmpT:%d iocbp->issT:%d", tid, iocbp->tag, iocbp->cmpT, iocbp->issT); } return rc; }