/** ******************************************************************************** ** \brief main ** \details ** process input parms \n ** open device \n ** alloc memory \n ** loop running IO until secs expire \n ** print IO stats \n ** cleanup *******************************************************************************/ int main(int argc, char **argv) { struct timeval start, delta; long int mil = 1000000; float esecs = 0; uint8_t *rbuf = NULL; uint8_t *wbuf = NULL; OP_t *op = NULL; char *dev = NULL; char FF = 0xFF; char c = '\0'; chunk_ext_arg_t ext = 0; int flags = 0; int i, rc = 0; int id = 0; char *_secs = NULL; char *_QD = NULL; char *_RD = NULL; char *_nblocks = NULL; uint32_t plun = 0; uint32_t nsecs = 4; uint32_t QD = 256; uint32_t nRD = 100; uint32_t RD = 0; uint32_t WR = 0; uint32_t intrp_thds = 0; int rtag = 0; int htag = 0; uint32_t lba = 0; size_t nblks = 0; uint32_t nblocks = 1; uint32_t cnt = 0; uint32_t tmiss = 0; uint64_t status = 0; uint32_t TI = TIME_INTERVAL; uint32_t N = 0; uint32_t TIME = 1; uint32_t COMP = 0; uint32_t miss = 0; uint64_t tlat = 0; double ns_per_tick = 0; /*-------------------------------------------------------------------------- * process and verify input parms *------------------------------------------------------------------------*/ while (FF != (c=getopt(argc, argv, "d:r:q:n:s:phi"))) { switch (c) { case 'd': dev = optarg; break; case 'r': _RD = optarg; break; case 'q': _QD = optarg; break; case 'n': _nblocks = optarg; break; case 's': _secs = optarg; break; case 'p': plun = 1; break; case 'i': intrp_thds = 1; break; case 'h': case '?': usage(); break; } } if (_secs) nsecs = atoi(_secs); if (_QD) QD = atoi(_QD); if (_nblocks) nblocks = atoi(_nblocks); if (_RD) nRD = atoi(_RD); if (QD > _8K) QD = _8K; if (nRD > 100) nRD = 100; if (!plun && nblocks > 1) { printf("error: <-n %d> can only be used with a physical lun\n",nblocks); usage(); } if (dev == NULL) usage(); srand48(time(0)); ns_per_tick = time_per_tick(1000, 100); N = QD; COMP = QD < 8 ? 1 : QD/8; /*-------------------------------------------------------------------------- * open device and set lun size *------------------------------------------------------------------------*/ rc = cblk_init(NULL,0); if (rc) { fprintf(stderr,"cblk_init failed with rc = %d and errno = %d\n", rc,errno); exit(1); } if (!plun) flags = CBLK_OPN_VIRT_LUN; if (!intrp_thds) flags |= CBLK_OPN_NO_INTRP_THREADS; id = cblk_open(dev, QD, O_RDWR, ext, flags); if (id == NULL_CHUNK_ID) { if (ENOSPC == errno) fprintf(stderr,"cblk_open: ENOSPC\n"); else if (ENODEV == errno) fprintf(stderr,"cblk_open: ENODEV\n"); else fprintf(stderr,"cblk_open: errno:%d\n",errno); cblk_term(NULL,0); exit(errno); } rc = cblk_get_lun_size(id, &nblks, 0); if (rc) { fprintf(stderr, "cblk_get_lun_size failed: errno: %d\n", errno); exit(errno); } if (!plun) { nblks = nblks > SET_NBLKS ? SET_NBLKS : nblks; rc = cblk_set_size(id, nblks, 0); if (rc) { fprintf(stderr, "cblk_set_size failed, errno: %d\n", errno); exit(errno); } } lba = lrand48() % nblks; /*-------------------------------------------------------------------------- * alloc data for IO *------------------------------------------------------------------------*/ op = malloc(QD*sizeof(OP_t)); if ((rc=posix_memalign((void**)&rbuf, _4K, _4K*nblocks))) { fprintf(stderr,"posix_memalign failed, size=%d, rc=%d\n", _4K*nblocks, rc); cblk_close(id,0); cblk_term(NULL,0); exit(0); } if ((rc=posix_memalign((void**)&wbuf, _4K, _4K*nblocks))) { fprintf(stderr,"posix_memalign failed, size=%d, rc=%d\n", _4K*nblocks, rc); cblk_close(id,0); cblk_term(NULL,0); exit(0); } memset(wbuf,0x79,_4K*nblocks); memset(op, 0, QD*sizeof(OP_t)); /*-------------------------------------------------------------------------- * loop running IO until secs expire *------------------------------------------------------------------------*/ gettimeofday(&start, NULL); do { /* setup #read ops and #write ops to send before completing ops */ if (!RD && !WR) {RD=nRD; WR=100-RD;} /*---------------------------------------------------------------------- * send up to RD reads, as long as the queuedepth N is not max *--------------------------------------------------------------------*/ while (TIME && RD && N) { rc = cblk_aread(id, rbuf, lba, nblocks, &rtag, NULL, CBLK_ARW_WAIT_CMD_FLAGS); if (0 == rc) {OP_BEG(rtag); --RD; --N; BMP_LBA();} else if (EBUSY == errno) {break;} else {io_error(id,errno);} } /*---------------------------------------------------------------------- * send up to WR writes, as long as the queuedepth N is not max *--------------------------------------------------------------------*/ while (TIME && WR && N) { rc = cblk_awrite(id, wbuf, lba, nblocks, &rtag, NULL, CBLK_ARW_WAIT_CMD_FLAGS); if (0 == rc) {OP_BEG(rtag); --WR; --N; BMP_LBA();} else if (EBUSY == errno) {break;} else {io_error(id,errno);} } /*---------------------------------------------------------------------- * complete cmds *--------------------------------------------------------------------*/ for (i=0; i<QD && N<COMP; i++, htag++) { if (intrp_thds) { rc = cblk_aresult(id, &htag, &status, CBLK_ARESULT_BLOCKING | CBLK_ARESULT_NEXT_TAG); if (rc != nblocks) {io_error(id,errno);} OP_END(htag); ++cnt; ++N; continue; } if (htag>=QD) htag=0; if (!op[htag].iss) {continue;} rc = cblk_aresult(id, &htag, &status, 0); if (rc == 0) { if (QD==1 && ++miss==1) {usleep(80);} ++tmiss; continue; } else if (rc < 0) {io_error(id,errno);} OP_END(htag); ++cnt; ++N; miss=0; } /*---------------------------------------------------------------------- * at an interval which does not impact performance, check if secs * have expired, and randomize lba *--------------------------------------------------------------------*/ if (cnt > TI) { TI += TIME_INTERVAL; gettimeofday(&delta, NULL); if (delta.tv_sec - start.tv_sec >= nsecs) {TIME=0; COMP=QD;} lba = lrand48() % nblks; } } while (TIME || QD-N); /*-------------------------------------------------------------------------- * print IO stats *------------------------------------------------------------------------*/ gettimeofday(&delta, NULL); esecs = ((float)((delta.tv_sec*mil + delta.tv_usec) - (start.tv_sec*mil + start.tv_usec))) / (float)mil; printf("d:%s r:%d q:%d s:%d p:%d n:%d i:%d miss:%d lat:%d mbps:%d iops:%d", dev, nRD, QD, nsecs, plun, nblocks, intrp_thds, tmiss, (uint32_t)((tlat*ns_per_tick)/cnt/1000), (uint32_t)((float)((cnt*nblocks*4)/1024)/esecs), (uint32_t)((float)(cnt/esecs))); if (plun && nblocks > 1) printf(" 4k-iops:%d", (uint32_t)((float)(cnt*nblocks)/esecs)); printf("\n"); /*-------------------------------------------------------------------------- * cleanup *------------------------------------------------------------------------*/ free(op); free(rbuf); free(wbuf); cblk_close(id,0); cblk_term(NULL,0); return 0; }
/** ******************************************************************************* * \brief * return TRUE if all IOs for the iocb are successfully completed, else FALSE ******************************************************************************/ int ea_async_io_schedule(_ARK *_arkp, int32_t tid, tcb_t *iotcbp, iocb_t *iocbp) { EA *ea = iocbp->ea; int32_t rc = TRUE; int32_t arc = 0; void *prc = 0; int64_t i = 0; uint8_t *p_addr = NULL; uint8_t *m_addr = NULL; char *ot = NULL; KV_TRC_IO(pAT, "IO_BEG: SCHEDULE_START: tid:%d ttag:%d start:%"PRIu64" " "nblks:%"PRIu64" issT:%d cmpT:%d", tid, iocbp->tag, iocbp->start, iocbp->nblks, iocbp->issT, iocbp->cmpT); ARK_SYNC_EA_READ(iocbp->ea); if (iocbp->op == ARK_EA_READ) {ot="IO_RD";} else {ot="IO_WR";} for (i=iocbp->start; i<iocbp->nblks; i++) { if (ea->st_type == EA_STORE_TYPE_MEMORY) { p_addr = ((uint8_t *)(iocbp->addr)) + (i * ea->bsize); m_addr = ea->st_memory + (iocbp->blist[i].blkno * ea->bsize); if (ARK_EA_READ == iocbp->op) {prc = memcpy(p_addr,m_addr,ea->bsize);} else {prc = memcpy(m_addr,p_addr,ea->bsize);} if (check_sched_error_injects(iocbp->op)) {prc=NULL;} // if memcpy failed, fail the iocb if (prc == NULL) { rc=FALSE; KV_TRC_FFDC(pAT,"IO_ERR: tid:%d ttag:%d blkno:%"PRIi64"" " errno:%d", tid, iocbp->tag, iocbp->blist[i].blkno, errno); if (!errno) {KV_TRC_FFDC(pAT, "IO: UNSET_ERRNO"); errno=EIO;} iocbp->io_error = errno; break; } ++iocbp->issT; iocbp->blist[i].a_tag = i; } else // r/w to hw { p_addr = ((uint8_t *)iocbp->addr) + (i * ea->bsize); if (check_sched_error_injects(iocbp->op)) { arc=-1; } else if ( iocbp->op == ARK_EA_READ ) { arc = cblk_aread(ea->st_flash, p_addr, iocbp->blist[i].blkno, 1, &(iocbp->blist[i].a_tag), NULL, 0); } else { arc = cblk_awrite(ea->st_flash, p_addr, iocbp->blist[i].blkno, 1, &(iocbp->blist[i].a_tag), NULL, 0); } if (arc == 0) // good status { ++iocbp->issT; rc=FALSE; } else if (arc < 0) { rc=FALSE; if (errno == EAGAIN) { // return, and an ark thread will re-schedule this iocb KV_TRC_DBG(pAT,"IO: RW_EAGAIN: tid:%d ttag:%d " "blkno:%"PRIi64"", tid, iocbp->tag, iocbp->blist[i].blkno); break; } // Something bad went wrong, fail the iocb KV_TRC_FFDC(pAT,"IO_ERR: tid:%d ttag:%d blkno:%"PRIi64"" " errno:%d", tid, iocbp->tag, iocbp->blist[i].blkno, errno); if (!errno) {KV_TRC_FFDC(pAT, "IO: UNSET_ERRNO"); errno=EIO;} iocbp->io_error = errno; break; } else if (arc > 0) { KV_TRC_IO(pAT,"IO_CMP: IMMEDIATE: tid:%d ttag:%d a_tag:%d " "blkno:%"PRIi64"", tid, iocbp->tag, iocbp->blist[i].a_tag, iocbp->blist[i].blkno); ++iocbp->issT; ++iocbp->cmpT; iocbp->blist[i].a_tag = -1; // mark as harvested } } KV_TRC_IO(pAT, "%s: tid:%2d ttag:%4d a_tag:%4d blkno:%5"PRIi64"", ot,tid, iocbp->tag, iocbp->blist[i].a_tag, iocbp->blist[i].blkno); } iotcbp->state = ARK_IO_HARVEST; iocbp->start = i; ARK_SYNC_EA_UNLOCK(iocbp->ea); return rc; }
int ea_async_io(EA *ea, int op, void *addr, ark_io_list_t *blist, int64_t len, int nthrs) { int64_t i = 0; int64_t j = 0; int64_t comps = 0; int num = 0; int max_ops = 0; void *m_rc = NULL; int rc = 0; int a_rc = 0; uint64_t status = 0; uint8_t *p_addr = NULL; uint8_t *m_addr = NULL; char *ot = NULL; ARK_SYNC_EA_READ(ea); if (op == ARK_EA_READ) {ot="IO_RD";} else {ot="IO_WR";} if ( ea->st_type == EA_STORE_TYPE_MEMORY) { // Loop through the block list to issue the IO for(i = 0; i < len; i++) { p_addr = ((uint8_t*)addr) + (i * ea->bsize); // For in-memory Store, we issue the memcpy // and wait for the return, no async here. // Read out the value from the in-memor block m_addr = ea->st_memory + (blist[i].blkno * ea->bsize); if (op == ARK_EA_READ) {m_rc = memcpy(p_addr, m_addr, ea->bsize);} else {m_rc = memcpy(m_addr, p_addr, ea->bsize);} if (check_sched_error_injects(op)) {m_rc=NULL;} if (check_harv_error_injects(op)) {m_rc=NULL;} if (m_rc == NULL) { rc = errno; break; } } } else { // divide up the cmd slots among // the threads and go 3 less max_ops = (ARK_EA_BLK_ASYNC_CMDS / nthrs) - 3; // Loop through the block list to issue the IO while ((comps < len) && (rc == 0)) { for(i = comps, num = 0; (i < len) && (num < max_ops); i++, num++) { p_addr = ((uint8_t*)addr) + (i * ea->bsize); // Call out to the block layer and retrive a block // Do an async op for a single block and tell the block // layer to wait if there are no available command // blocks. Upon return, we can either get an error // (rc == -1), the data will be available (rc == number // of blocks read), or IO has been scheduled (rc == 0). if (op == ARK_EA_READ) { rc = cblk_aread(ea->st_flash, p_addr, blist[i].blkno, 1, &(blist[i].a_tag), NULL,CBLK_ARW_WAIT_CMD_FLAGS); } else { rc = cblk_awrite(ea->st_flash, p_addr, blist[i].blkno, 1, &(blist[i].a_tag), NULL,CBLK_ARW_WAIT_CMD_FLAGS); } if (check_sched_error_injects(op)) {rc=-1;} KV_TRC_IO(pAT, "%s: id:%d blkno:%"PRIi64" rc:%d", ot, ea->st_flash, blist[i].blkno, rc); if ( rc == -1 ) { // Error was encountered. Don't issue any more IO rc = errno; KV_TRC_FFDC(pAT, "IO_ERR: cblk_aread/awrite failed, " "blkno:%"PRIi64" tag:%d, errno = %d", blist[i].blkno, blist[i].a_tag, errno); break; } // Data has already been returned so we don't need to // wait for the response below if ( rc > 0 ) { blist[i].a_tag = -1; rc = 0; } //_arkp->stats.io_cnt++; } // For as many IOs that were performed, we loop t // see if we need to wait for the response or the // data has already been returned. for (j = comps; j < i; j++) { // Data has already been read if (blist[j].a_tag == -1) { continue; } do { a_rc = cblk_aresult(ea->st_flash, &(blist[j].a_tag), &status, CBLK_ARESULT_BLOCKING); if (check_harv_error_injects(op)) {a_rc=-1;} // There was an error, check to see if we haven't // encoutnered an error previously and if not, then // set rc. Continue processing so that we harvest // all outstanding responses if (a_rc == -1) { if (rc == 0) { rc = errno; } KV_TRC_IO(pAT, "IO_ERR: id:%d blkno:%ld status:%ld a_rc:%d", ea->st_flash, blist[j].blkno, status, a_rc); } else { KV_TRC_IO(pAT, "IO_CMP: id:%d blkno:%ld status:%ld a_rc:%d", ea->st_flash, blist[j].blkno, status, a_rc); } // If a_rc is 0, that means we got interrupted somehow // so we need to retry the operation. } while (a_rc == 0); } // If we start another loop, start off where we finished // in this loop. comps = i; } } ARK_SYNC_EA_UNLOCK(ea); return rc; }
/** ******************************************************************************** ** \brief main ** \details ** process input parms \n ** open device \n ** alloc memory \n ** loop running IO until secs expire \n ** print IO stats \n ** cleanup *******************************************************************************/ int main(int argc, char **argv) { struct timeval start, delta; long int mil = 1000000; float esecs = 0; uint8_t **rbuf = NULL; uint8_t **wbuf = NULL; int *tags = NULL; char *dev = NULL; char FF = 0xFF; char c = '\0'; chunk_ext_arg_t ext = 0; int flags = 0; int rc = 0; int id = 0; char *_secs = NULL; char *_QD = NULL; char *_RD = NULL; char *_nblocks = NULL; uint32_t plun = 0; uint32_t nsecs = 4; uint32_t QD = 500; uint32_t nRD = 100; uint32_t RD = 0; uint32_t WR = 0; uint32_t intrp_thds = 0; int tag = 0; int rtag = 0; uint32_t lba = 0; size_t nblks = 0; uint32_t nblocks = 1; uint32_t cnt = 0; uint32_t pollN = 0; uint64_t status = 0; uint32_t TI = TIME_INTERVAL; uint32_t N = 0; uint32_t TIME = 1; uint32_t COMP = 0; /*-------------------------------------------------------------------------- * process and verify input parms *------------------------------------------------------------------------*/ while (FF != (c=getopt(argc, argv, "d:r:q:n:s:phi"))) { switch (c) { case 'd': dev = optarg; break; case 'r': _RD = optarg; break; case 'q': _QD = optarg; break; case 'n': _nblocks = optarg; break; case 's': _secs = optarg; break; case 'p': plun = 1; break; case 'i': intrp_thds = 1; break; case 'h': case '?': usage(); break; } } if (_secs) nsecs = atoi(_secs); if (_QD) QD = atoi(_QD); if (_nblocks) nblocks = atoi(_nblocks); if (_RD) nRD = atoi(_RD); if (QD > _8K) QD = _8K; if (nRD > 100) nRD = 100; if (!plun) nblocks = 1; if (dev == NULL) usage(); srand48(time(0)); N = QD; COMP = QD < 5 ? 1 : QD/5; /*-------------------------------------------------------------------------- * open device and set lun size *------------------------------------------------------------------------*/ rc = cblk_init(NULL,0); if (rc) { fprintf(stderr,"cblk_init failed with rc = %d and errno = %d\n", rc,errno); exit(1); } if (!plun) flags = CBLK_OPN_VIRT_LUN; if (!intrp_thds) flags |= CBLK_OPN_NO_INTRP_THREADS; id = cblk_open(dev, QD, O_RDWR, ext, flags); if (id == NULL_CHUNK_ID) { if (ENOSPC == errno) fprintf(stderr,"cblk_open: ENOSPC\n"); else if (ENODEV == errno) fprintf(stderr,"cblk_open: ENODEV\n"); else fprintf(stderr,"cblk_open: errno:%d\n",errno); cblk_term(NULL,0); exit(errno); } rc = cblk_get_lun_size(id, &nblks, 0); if (rc) { fprintf(stderr, "cblk_get_lun_size failed: errno: %d\n", errno); exit(errno); } if (!plun) { nblks = nblks > SET_NBLKS ? SET_NBLKS : nblks; rc = cblk_set_size(id, nblks, 0); if (rc) { fprintf(stderr, "cblk_set_size failed, errno: %d\n", errno); exit(errno); } } /*-------------------------------------------------------------------------- * alloc data for IO *------------------------------------------------------------------------*/ tags = malloc(QD*sizeof(int)); rbuf = malloc(QD*sizeof(uint8_t*)); wbuf = malloc(QD*sizeof(uint8_t*)); for (tag=0; tag<QD; tag++) { if ((rc=posix_memalign((void**)&(rbuf[tag]), _4K, _4K*nblocks))) { fprintf(stderr,"posix_memalign failed, size=%d, rc=%d\n", _4K*nblocks, rc); cblk_close(id,0); cblk_term(NULL,0); exit(0); } if ((rc=posix_memalign((void**)&(wbuf[tag]), _4K, _4K*nblocks))) { fprintf(stderr,"posix_memalign failed, size=%d, rc=%d\n", _4K*nblocks, rc); cblk_close(id,0); cblk_term(NULL,0); exit(0); } memset(wbuf[tag],0x79,_4K*nblocks); tags[tag] = tag; } /*-------------------------------------------------------------------------- * loop running IO until secs expire *------------------------------------------------------------------------*/ gettimeofday(&start, NULL); do { /* setup #read ops and #write ops to send before completing ops */ if (!RD && !WR) {RD=nRD; WR=100-RD;} /*---------------------------------------------------------------------- * send up to RD reads, as long as the queuedepth N is not max *--------------------------------------------------------------------*/ while (TIME && RD && N) { tag = tags[--N]; rc = cblk_aread(id, rbuf[tag], lba, nblocks, &tag, NULL, CBLK_ARW_WAIT_CMD_FLAGS | CBLK_ARW_USER_TAG_FLAG); if (0 == rc) {--RD; lba+=2; if (lba >= nblks) lba=cnt%2;} else if (EBUSY == errno) {++N; usleep(USLEEP); continue;} else {io_error(id,errno);} } /*---------------------------------------------------------------------- * send up to WR writes, as long as the queuedepth N is not max *--------------------------------------------------------------------*/ while (TIME && WR && N) { tag = tags[--N]; rc = cblk_awrite(id, wbuf[tag], lba, nblocks, &tag, NULL, CBLK_ARW_WAIT_CMD_FLAGS | CBLK_ARW_USER_TAG_FLAG); if (0 == rc) {--WR; lba+=2; if (lba >= nblks) lba=cnt%2;} else if (EBUSY == errno) {++N; usleep(USLEEP); continue;} else {io_error(id,errno);} } /* if the queuedepth is 1, don't immediately pound aresult */ if (QD==1) usleep(USLEEP); /*---------------------------------------------------------------------- * complete cmds until queue depth is QD-COMP *--------------------------------------------------------------------*/ while (N < COMP) { rtag=0; rc = cblk_aresult(id, &rtag, &status, CBLK_ARESULT_BLOCKING| CBLK_ARESULT_NEXT_TAG); if (rc == 0) {++pollN; usleep(USLEEP); continue;} else if (rc < 0) {io_error(id,errno);} ++cnt; tags[N++] = rtag; } /*---------------------------------------------------------------------- * at an interval which does not impact performance, check if secs * have expired, and randomize lba *--------------------------------------------------------------------*/ if (cnt > TI) { TI += TIME_INTERVAL; gettimeofday(&delta, NULL); if (delta.tv_sec - start.tv_sec >= nsecs) {TIME=0; COMP = QD;} lba = lrand48() % TIME_INTERVAL; } } while (TIME || QD-N); /*-------------------------------------------------------------------------- * print IO stats *------------------------------------------------------------------------*/ gettimeofday(&delta, NULL); esecs = ((float)((delta.tv_sec*mil + delta.tv_usec) - (start.tv_sec*mil + start.tv_usec))) / (float)mil; printf("d:%s r:%d q:%d s:%d p:%d n:%d i:%d pollN:%d mbps:%d iops:%d", dev, nRD, QD, nsecs, plun, nblocks, intrp_thds, pollN, (uint32_t)((float)((cnt*nblocks*4)/1024)/esecs), (uint32_t)((float)(cnt/esecs))); if (plun && nblocks > 1) printf(" 4k-iops:%d", (uint32_t)((float)(cnt*nblocks)/esecs)); printf("\n"); /*-------------------------------------------------------------------------- * cleanup *------------------------------------------------------------------------*/ for (cnt=0; cnt<QD; cnt++) { free(rbuf[cnt]); free(wbuf[cnt]); } free(rbuf); free(wbuf); cblk_close(id,0); cblk_term(NULL,0); return 0; }