/** ******************************************************************************* * \brief ******************************************************************************/ IV *iv_resize(IV *piv, uint64_t n, uint64_t m) { uint64_t bits = n * m; uint64_t words = divup(bits, 64); uint64_t bytes = sizeof(IV) + words * sizeof(uint64_t); IV *iv = am_realloc(piv,bytes); if (iv == NULL) { errno = ENOMEM; KV_TRC_FFDC(pAT, "FFDC: iv %p n %"PRIu64" m %"PRIu64", errno = %d", piv, n, m, errno); } else { iv->n = n; iv->m = m; iv->bits = bits; iv->words = words; iv->mask = 1; iv->mask <<= m; iv->mask -= 1; iv->bar = 64 - m; } KV_TRC_DBG(pAT, "iv %p n %"PRIu64" m %"PRIu64"", piv, n, m); return iv; }
/** ******************************************************************************* * \brief ******************************************************************************/ static void kv_async_q_retry(async_CB_t *pCB) { uint32_t new_flags = pCB->flags; KV_TRC_DBG(pFT, "Q_RETRY %p", pCB); new_flags &= ~KV_ASYNC_CB_RUNNING; new_flags |= KV_ASYNC_CB_QUEUED; pCB->flags = new_flags; }
/** ******************************************************************************* * \brief ******************************************************************************/ static void kv_async_DEL_KEY(async_CB_t *pCB) { uint64_t tag = (uint64_t)pCB; int32_t rc = 0; KV_TRC_DBG(pFT, "DEL_KEY: %p, %" PRIx64 "", pCB, tag); pCB->tag = tag; rc = ark_del_async_cb(pCB->ark, pCB->db[pCB->len_i].klen, pCB->db[pCB->len_i].key, pCB->cb, tag); if (EAGAIN == rc) { kv_async_q_retry(pCB); } else { EXPECT_EQ(0, rc); } }
/** ******************************************************************************* * \brief * return TRUE if all IOs for the iocb are successfully completed, else FALSE ******************************************************************************/ int ea_async_io_schedule(_ARK *_arkp, int32_t tid, tcb_t *iotcbp, iocb_t *iocbp) { EA *ea = iocbp->ea; int32_t rc = TRUE; int32_t arc = 0; void *prc = 0; int64_t i = 0; uint8_t *p_addr = NULL; uint8_t *m_addr = NULL; char *ot = NULL; KV_TRC_IO(pAT, "IO_BEG: SCHEDULE_START: tid:%d ttag:%d start:%"PRIu64" " "nblks:%"PRIu64" issT:%d cmpT:%d", tid, iocbp->tag, iocbp->start, iocbp->nblks, iocbp->issT, iocbp->cmpT); ARK_SYNC_EA_READ(iocbp->ea); if (iocbp->op == ARK_EA_READ) {ot="IO_RD";} else {ot="IO_WR";} for (i=iocbp->start; i<iocbp->nblks; i++) { if (ea->st_type == EA_STORE_TYPE_MEMORY) { p_addr = ((uint8_t *)(iocbp->addr)) + (i * ea->bsize); m_addr = ea->st_memory + (iocbp->blist[i].blkno * ea->bsize); if (ARK_EA_READ == iocbp->op) {prc = memcpy(p_addr,m_addr,ea->bsize);} else {prc = memcpy(m_addr,p_addr,ea->bsize);} if (check_sched_error_injects(iocbp->op)) {prc=NULL;} // if memcpy failed, fail the iocb if (prc == NULL) { rc=FALSE; KV_TRC_FFDC(pAT,"IO_ERR: tid:%d ttag:%d blkno:%"PRIi64"" " errno:%d", tid, iocbp->tag, iocbp->blist[i].blkno, errno); if (!errno) {KV_TRC_FFDC(pAT, "IO: UNSET_ERRNO"); errno=EIO;} iocbp->io_error = errno; break; } ++iocbp->issT; iocbp->blist[i].a_tag = i; } else // r/w to hw { p_addr = ((uint8_t *)iocbp->addr) + (i * ea->bsize); if (check_sched_error_injects(iocbp->op)) { arc=-1; } else if ( iocbp->op == ARK_EA_READ ) { arc = cblk_aread(ea->st_flash, p_addr, iocbp->blist[i].blkno, 1, &(iocbp->blist[i].a_tag), NULL, 0); } else { arc = cblk_awrite(ea->st_flash, p_addr, iocbp->blist[i].blkno, 1, &(iocbp->blist[i].a_tag), NULL, 0); } if (arc == 0) // good status { ++iocbp->issT; rc=FALSE; } else if (arc < 0) { rc=FALSE; if (errno == EAGAIN) { // return, and an ark thread will re-schedule this iocb KV_TRC_DBG(pAT,"IO: RW_EAGAIN: tid:%d ttag:%d " "blkno:%"PRIi64"", tid, iocbp->tag, iocbp->blist[i].blkno); break; } // Something bad went wrong, fail the iocb KV_TRC_FFDC(pAT,"IO_ERR: tid:%d ttag:%d blkno:%"PRIi64"" " errno:%d", tid, iocbp->tag, iocbp->blist[i].blkno, errno); if (!errno) {KV_TRC_FFDC(pAT, "IO: UNSET_ERRNO"); errno=EIO;} iocbp->io_error = errno; break; } else if (arc > 0) { KV_TRC_IO(pAT,"IO_CMP: IMMEDIATE: tid:%d ttag:%d a_tag:%d " "blkno:%"PRIi64"", tid, iocbp->tag, iocbp->blist[i].a_tag, iocbp->blist[i].blkno); ++iocbp->issT; ++iocbp->cmpT; iocbp->blist[i].a_tag = -1; // mark as harvested } } KV_TRC_IO(pAT, "%s: tid:%2d ttag:%4d a_tag:%4d blkno:%5"PRIi64"", ot,tid, iocbp->tag, iocbp->blist[i].a_tag, iocbp->blist[i].blkno); } iotcbp->state = ARK_IO_HARVEST; iocbp->start = i; ARK_SYNC_EA_UNLOCK(iocbp->ea); return rc; }
/** ******************************************************************************* * \brief * return TRUE if the IOs for the iocb are successfully completed, else FALSE ******************************************************************************/ int ea_async_io_harvest(_ARK *_arkp, int32_t tid, tcb_t *iotcbp, iocb_t *iocbp, rcb_t *iorcbp) { EA *ea = iocbp->ea; int32_t i = 0; int32_t arc = 0; int32_t rc = FALSE; uint64_t status = 0; scb_t *scbp = &(_arkp->poolthreads[tid]); queue_t *rq = scbp->rqueue; queue_t *tq = scbp->tqueue; queue_t *ioq = scbp->ioqueue; for (i=0; i<iocbp->issT; i++) { if (EA_STORE_TYPE_MEMORY == ea->st_type) { // the IO has already been done in the schedule function, // so mark it completed arc = 1; } else { // skip previously harvested cmd if (iocbp->blist[i].a_tag == -1) {continue;} arc = cblk_aresult(ea->st_flash, &(iocbp->blist[i].a_tag), &status,0); } if (check_harv_error_injects(iocbp->op)) {arc=-1;} if (arc == 0) { KV_TRC_DBG(pAT,"IO: WAIT_NOT_CMP: tid:%d ttag:%d a_tag:%d " "blkno:%"PRIi64"", tid, iocbp->tag, iocbp->blist[i].a_tag, iocbp->blist[i].blkno); ++iocbp->hmissN; // if nothing to do and the first harvest missed, usleep if (queue_empty(rq) && queue_empty(tq) && queue_count(ioq)<=8 && iocbp->hmissN==1 && _arkp->ea->st_type != EA_STORE_TYPE_MEMORY) { usleep(50); KV_TRC_DBG(pAT,"IO: USLEEP"); } break; } if (arc < 0) { KV_TRC_FFDC(pAT, "IO_ERR: tid:%d ttag:%d errno=%d", tid, iocbp->tag, errno); if (!errno) {KV_TRC_FFDC(pAT, "UNSET_ERRNO"); errno=EIO;} iocbp->io_error = errno; } else { KV_TRC_IO(pAT,"IO_CMP: tid:%2d ttag:%4d a_tag:%4d blkno:%5"PRIi64"", tid, iocbp->tag, iocbp->blist[i].a_tag, iocbp->blist[i].blkno); } ++iocbp->cmpT; iocbp->blist[i].a_tag = -1; // mark as harvested } if (iocbp->io_error) { // if all cmds that were issued (success or fail) have been // completed for this iocb, then fail this iocb if (iocbp->issT == iocbp->cmpT) { iorcbp->res = -1; iorcbp->rc = iocbp->io_error; iotcbp->state = ARK_CMD_DONE; am_free(iocbp->blist); KV_TRC_FFDC(pAT, "IO: ERROR_DONE: tid:%d ttag:%d rc:%d", tid, iocbp->tag, iorcbp->rc); } else { // IOs outstanding, harvest the remaining IOs for this iocb KV_TRC_FFDC(pAT,"IO: ERROR_RE_HARVEST: tid:%d ttag:%d " "iocbp->issT:%d iocbp->cmpT:%d", tid, iocbp->tag, iocbp->issT, iocbp->cmpT); } } // if all IO has completed successfully for this iocb, done else if (iocbp->cmpT == iocbp->nblks) { rc=TRUE; am_free(iocbp->blist); iotcbp->state = ARK_IO_DONE; KV_TRC_IO(pAT, "IO_END: SUCCESS tid:%d ttag:%d cmpT:%d", tid, iocbp->tag, iocbp->cmpT); } // if more blks need an IO, schedule else if (iocbp->issT < iocbp->nblks) { iotcbp->state = ARK_IO_SCHEDULE; KV_TRC_IO(pAT,"IO: RE_SCHEDULE: tid:%d ttag:%d " "iocbp->issT:%d iocbp->nblks:%"PRIi64" ", tid, iocbp->tag, iocbp->issT, iocbp->nblks); } else { // all IOs have been issued but not all are completed, do harvest KV_TRC_IO(pAT,"IO: RE_HARVEST: tid:%d ttag:%d " "iocbp->cmpT:%d iocbp->issT:%d", tid, iocbp->tag, iocbp->cmpT, iocbp->issT); } return rc; }