void add_trace(struct io *iop) { if (iop->t.action & BLK_TC_ACT(BLK_TC_NOTIFY)) { if (iop->t.action == BLK_TN_PROCESS) { if (iop->t.pid == 0) process_alloc(0, "kernel"); else { char *slash = strchr(iop->pdu, '/'); if (slash) *slash = '\0'; process_alloc(iop->t.pid, iop->pdu); } } else if (iop->t.action == BLK_TN_MESSAGE) trace_message(iop); io_release(iop); } else if (iop->t.action & BLK_TC_ACT(BLK_TC_PC)) { io_release(iop); } else { if (time_bounded) { if (BIT_TIME(iop->t.time) < t_astart) { io_release(iop); return; } else if (BIT_TIME(iop->t.time) > t_aend) { io_release(iop); done = 1; return; } } __add_trace(iop); } }
/** * blk_add_trace_rq - Add a trace for a request oriented action * @q: queue the io is for * @rq: the source request * @what: the action * * Description: * Records an action against a request. Will log the bio offset + size. * **/ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, u32 what) { struct blk_trace *bt = q->blk_trace; int rw = rq->cmd_flags & 0x03; if (likely(!bt)) return; if (rq->cmd_flags & REQ_SYNC) rw |= (1 << BIO_RW_SYNCIO); if (rq->cmd_flags & REQ_META) rw |= (1 << BIO_RW_META); if (rq->cmd_flags & REQ_DISCARD) rw |= BIO_DISCARD; if (rq->cmd_flags & REQ_FLUSH) rw |= BIO_FLUSH; if (rq->cmd_flags & REQ_FUA) rw |= BIO_FUA; if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { what |= BLK_TC_ACT(BLK_TC_PC); __blk_add_trace(bt, 0, blk_rq_bytes(rq), rw, what, rq->errors, rq->cmd_len, rq->cmd); } else { what |= BLK_TC_ACT(BLK_TC_FS); __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rw, what, rq->errors, 0, NULL); } }
/* * We only care for queue traces, most of the others are side effects * due to internal workings of the block layer. */ static void handle_trace(struct thread_data *td, struct blk_io_trace *t, unsigned long *ios, unsigned int *bs) { static unsigned long long last_ttime; unsigned long long delay; if ((t->action & 0xffff) != __BLK_TA_QUEUE) return; if (!(t->action & BLK_TC_ACT(BLK_TC_NOTIFY))) { if (!last_ttime || td->o.no_stall) { last_ttime = t->time; delay = 0; } else { delay = t->time - last_ttime; last_ttime = t->time; } } if (t->action & BLK_TC_ACT(BLK_TC_NOTIFY)) handle_trace_notify(t); else if (t->action & BLK_TC_ACT(BLK_TC_DISCARD)) handle_trace_discard(td, t, delay, ios, bs); else handle_trace_fs(td, t, delay, ios, bs); }
static enum fio_ddir t_get_ddir(struct blk_io_trace *t) { if (t->action & BLK_TC_ACT(BLK_TC_READ)) return DDIR_READ; else if (t->action & BLK_TC_ACT(BLK_TC_WRITE)) return DDIR_WRITE; else if (t->action & BLK_TC_ACT(BLK_TC_DISCARD)) return DDIR_TRIM; return DDIR_INVAL; }
/* * We only care for queue traces, most of the others are side effects * due to internal workings of the block layer. */ static void handle_trace(struct thread_data *td, struct blk_io_trace *t, unsigned long long ttime, unsigned long *ios, unsigned int *bs) { if ((t->action & 0xffff) != __BLK_TA_QUEUE) return; if (t->action & BLK_TC_ACT(BLK_TC_PC)) return; if (t->action & BLK_TC_ACT(BLK_TC_NOTIFY)) handle_trace_notify(t); else if (t->action & BLK_TC_ACT(BLK_TC_DISCARD)) handle_trace_discard(td, t, ttime, ios); else handle_trace_fs(td, t, ttime, ios, bs); }
/** * blk_add_trace_rq - Add a trace for a request oriented action * @q: queue the io is for * @rq: the source request * @what: the action * * Description: * Records an action against a request. Will log the bio offset + size. * **/ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, u32 what) { struct blk_trace *bt = q->blk_trace; int rw = rq->cmd_flags & 0x03; if (likely(!bt)) return; if (blk_discard_rq(rq)) rw |= (1 << BIO_RW_DISCARD); if (blk_pc_request(rq)) { what |= BLK_TC_ACT(BLK_TC_PC); __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, sizeof(rq->cmd), rq->cmd); } else { what |= BLK_TC_ACT(BLK_TC_FS); __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, rw, what, rq->errors, 0, NULL); } }
static struct trace *blkiomon_do_trace(struct trace *t) { struct trace *t_stored, *t_old, *t_young; /* store trace if there is no match yet */ t_stored = blkiomon_fetch_trace(&t->bit); if (!t_stored) { blkiomon_store_trace(t); return blkiomon_alloc_trace(); } /* figure out older trace and younger trace */ if (t_stored->bit.time < t->bit.time) { t_old = t_stored; t_young = t; } else { t_old = t; t_young = t_stored; } /* we need an older D trace and a younger C trace */ if (t_old->bit.action & BLK_TC_ACT(BLK_TC_ISSUE) && t_young->bit.action & BLK_TC_ACT(BLK_TC_COMPLETE)) { /* matching D and C traces - update statistics */ match++; blkiomon_account(&t_old->bit, &t_young->bit); blkiomon_free_trace(t_stored); return t; } /* no matching D and C traces - keep more recent trace */ dump_bits(t_old, t_young, "mismatch"); mismatch++; blkiomon_store_trace(t_young); return t_old; }
/* * Send out a notify for this process, if we haven't done so since a trace * started */ static void trace_note_tsk(struct blk_trace *bt, struct task_struct *tsk) { struct blk_io_trace *t; t = relay_reserve(bt->rchan, sizeof(*t) + sizeof(tsk->comm)); if (t) { t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; t->device = bt->dev; t->action = BLK_TC_ACT(BLK_TC_NOTIFY); t->pid = tsk->pid; t->cpu = smp_processor_id(); t->pdu_len = sizeof(tsk->comm); memcpy((void *) t + sizeof(*t), tsk->comm, t->pdu_len); tsk->btrace_seq = blktrace_seq; } }
static void handle_trace_fs(struct thread_data *td, struct blk_io_trace *t, unsigned long long ttime, unsigned long *ios, unsigned int *bs) { int rw; trace_add_file(td, t->device); rw = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0; if (t->bytes > bs[rw]) bs[rw] = t->bytes; ios[rw]++; td->o.size += t->bytes; store_ipo(td, t->sector, t->bytes, rw, ttime); }
static int blk_log_with_error(struct trace_seq *s, const struct trace_entry *ent) { if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) { int ret; ret = blk_log_dump_pdu(s, ent); if (ret) return trace_seq_printf(s, "[%d]\n", t_error(ent)); return 0; } else { if (t_sec(ent)) return trace_seq_printf(s, "%llu + %u [%d]\n", t_sector(ent), t_sec(ent), t_error(ent)); return trace_seq_printf(s, "%llu [%d]\n", t_sector(ent), t_error(ent)); } }
static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent) { char cmd[TASK_COMM_LEN]; trace_find_cmdline(ent->pid, cmd); if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) { int ret; ret = trace_seq_printf(s, "%u ", t_bytes(ent)); if (!ret) return 0; ret = blk_log_dump_pdu(s, ent); if (!ret) return 0; return trace_seq_printf(s, "[%s]\n", cmd); } else { if (t_sec(ent)) return trace_seq_printf(s, "%llu + %u [%s]\n", t_sector(ent), t_sec(ent), cmd); return trace_seq_printf(s, "[%s]\n", cmd); } }
static int blkiomon_do_fifo(void) { struct trace *t; struct blk_io_trace *bit; void *pdu_buf = NULL; t = blkiomon_alloc_trace(); if (!t) return 1; bit = &t->bit; while (up) { if (fread(bit, sizeof(*bit), 1, ifp) != 1) { if (!feof(ifp)) fprintf(stderr, "blkiomon: could not read trace"); break; } if (ferror(ifp)) { clearerr(ifp); fprintf(stderr, "blkiomon: error while reading trace"); break; } if (data_is_native == -1 && check_data_endianness(bit->magic)) { fprintf(stderr, "blkiomon: endianess problem\n"); break; } /* endianess */ trace_to_cpu(bit); if (verify_trace(bit)) { fprintf(stderr, "blkiomon: bad trace\n"); break; } /* read additional trace payload */ if (bit->pdu_len) { pdu_buf = realloc(pdu_buf, bit->pdu_len); if (fread(pdu_buf, bit->pdu_len, 1, ifp) != 1) { clearerr(ifp); fprintf(stderr, "blkiomon: could not read payload\n"); break; } } t->sequence = sequence++; /* forward low-level device driver trace to other tool */ if (bit->action & BLK_TC_ACT(BLK_TC_DRV_DATA)) { driverdata++; if (blkiomon_dump_drvdata(bit, pdu_buf)) { fprintf(stderr, "blkiomon: could not send trace\n"); break; } continue; } if (!(bit->action & BLK_TC_ACT(BLK_TC_ISSUE | BLK_TC_COMPLETE))) continue; /* try to find matching trace and update statistics */ t = blkiomon_do_trace(t); if (!t) { fprintf(stderr, "blkiomon: could not alloc trace\n"); break; } bit = &t->bit; /* t and bit will be recycled for next incoming trace */ } blkiomon_free_trace(t); free(pdu_buf); return 0; }
/* * Load a blktrace file by reading all the blk_io_trace entries, and storing * them as io_pieces like the fio text version would do. */ int load_blktrace(struct thread_data *td, const char *filename, int need_swap) { unsigned long long ttime, delay; struct blk_io_trace t; unsigned long ios[2], skipped_writes; unsigned int cpu; unsigned int rw_bs[2]; struct fifo *fifo; int fd, i, old_state; struct fio_file *f; fd = open(filename, O_RDONLY); if (fd < 0) { td_verror(td, errno, "open blktrace file"); return 1; } fifo = fifo_alloc(TRACE_FIFO_SIZE); old_state = td_bump_runstate(td, TD_SETTING_UP); td->o.size = 0; cpu = 0; ttime = 0; ios[0] = ios[1] = 0; rw_bs[0] = rw_bs[1] = 0; skipped_writes = 0; do { int ret = trace_fifo_get(td, fifo, fd, &t, sizeof(t)); if (ret < 0) goto err; else if (!ret) break; else if (ret < (int) sizeof(t)) { log_err("fio: short fifo get\n"); break; } if (need_swap) byteswap_trace(&t); if ((t.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) { log_err("fio: bad magic in blktrace data: %x\n", t.magic); goto err; } if ((t.magic & 0xff) != BLK_IO_TRACE_VERSION) { log_err("fio: bad blktrace version %d\n", t.magic & 0xff); goto err; } ret = discard_pdu(td, fifo, fd, &t); if (ret < 0) { td_verror(td, ret, "blktrace lseek"); goto err; } else if (t.pdu_len != ret) { log_err("fio: discarded %d of %d\n", ret, t.pdu_len); goto err; } if ((t.action & BLK_TC_ACT(BLK_TC_NOTIFY)) == 0) { if (!ttime) { ttime = t.time; cpu = t.cpu; } delay = 0; if (cpu == t.cpu) delay = t.time - ttime; if ((t.action & BLK_TC_ACT(BLK_TC_WRITE)) && read_only) skipped_writes++; else { /* * set delay to zero if no_stall enabled for * fast replay */ if (td->o.no_stall) delay = 0; handle_trace(td, &t, delay, ios, rw_bs); } ttime = t.time; cpu = t.cpu; } else { delay = 0; handle_trace(td, &t, delay, ios, rw_bs); } } while (1); for (i = 0; i < td->files_index; i++) { f = td->files[i]; trace_add_open_close_event(td, f->fileno, FIO_LOG_CLOSE_FILE); } fifo_free(fifo); close(fd); td_restore_runstate(td, old_state); if (!td->files_index) { log_err("fio: did not find replay device(s)\n"); return 1; } if (skipped_writes) log_err("fio: %s skips replay of %lu writes due to read-only\n", td->o.name, skipped_writes); if (!ios[DDIR_READ] && !ios[DDIR_WRITE]) { log_err("fio: found no ios in blktrace data\n"); return 1; } else if (ios[DDIR_READ] && !ios[DDIR_READ]) { td->o.td_ddir = TD_DDIR_READ; td->o.max_bs[DDIR_READ] = rw_bs[DDIR_READ]; } else if (!ios[DDIR_READ] && ios[DDIR_WRITE]) { td->o.td_ddir = TD_DDIR_WRITE; td->o.max_bs[DDIR_WRITE] = rw_bs[DDIR_WRITE]; } else { td->o.td_ddir = TD_DDIR_RW; td->o.max_bs[DDIR_READ] = rw_bs[DDIR_READ]; td->o.max_bs[DDIR_WRITE] = rw_bs[DDIR_WRITE]; } /* * We need to do direct/raw ios to the device, to avoid getting * read-ahead in our way. */ td->o.odirect = 1; return 0; err: close(fd); fifo_free(fifo); return 1; }
static void do_thread_job(mb_btreplay_thread_arg_t *arg) { int fd; int open_flags; mb_btreplay_ioreq_t *ioreq; int64_t endpos; int64_t ofst; size_t sz; void *buf; size_t bufsz; open_flags = O_RDWR; if (option.direct){ open_flags |= O_DIRECT; } if (-1 == (fd = open(option.target_path, open_flags))){ perror("Failed open(2)"); exit(EXIT_FAILURE); } endpos = -1; bufsz = 64 * KIBI; buf = memalign(KIBI, bufsz); g_async_queue_ref(arg->ioreq_queue); for(;;) { ioreq = (mb_btreplay_ioreq_t *) g_async_queue_pop(arg->ioreq_queue); if (ioreq->stop == true) { if (option.verbose) fprintf(stderr, "[tid: %d] stopping...\n", arg->tid); free(ioreq); break; } int act = ioreq->trace.action & 0xffff; bool w = (ioreq->trace.action & BLK_TC_ACT(BLK_TC_WRITE)) != 0; if (act == __BLK_TA_ISSUE) { ofst = ioreq->trace.sector * 512; sz = ioreq->trace.bytes; if (sz > bufsz) { free(buf); bufsz = sz; buf = memalign(KIBI, bufsz); } if (ofst != endpos) { if (-1 == lseek64(fd, ofst, SEEK_SET)) { fprintf(stderr, "lseek64 failed: errno=%d\n", errno); } if (option.vverbose) printf("[tid: %d] lseek64 to %ld\n", arg->tid, ofst); } if (option.vverbose) printf("[tid: %d] %s on fd:%d at %ld + %ld\n", arg->tid, (w == true ? "write" : "read"), fd, ofst, sz); if (w == false) { // do read mb_readall(fd, buf, sz, true); } else { // do write mb_writeall(fd, buf, sz, true); } endpos = ofst + sz; } free(ioreq); } close(fd); }
pid_t pid) { if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0) return 1; if (sector && (sector < bt->start_lba || sector > bt->end_lba)) return 1; if (bt->pid && pid != bt->pid) return 1; return 0; } /* * Data direction bit lookup */ static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) }; /* The ilog2() calls fall out because they're constant */ #define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \ (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name)) /* * The worker for the various blk_add_trace*() types. Fills out a * blk_io_trace structure and places it in a per-cpu subbuffer. */ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, int rw, u32 what, int error, int pdu_len, void *pdu_data) { struct task_struct *tsk = current; struct ring_buffer_event *event = NULL;
/* * Load a blktrace file by reading all the blk_io_trace entries, and storing * them as io_pieces like the fio text version would do. */ int load_blktrace(struct thread_data *td, const char *filename, int need_swap) { struct blk_io_trace t; unsigned long ios[DDIR_RWDIR_CNT], skipped_writes; unsigned int rw_bs[DDIR_RWDIR_CNT]; struct fifo *fifo; int fd, i, old_state; struct fio_file *f; int this_depth[DDIR_RWDIR_CNT], depth[DDIR_RWDIR_CNT], max_depth; fd = open(filename, O_RDONLY); if (fd < 0) { td_verror(td, errno, "open blktrace file"); return 1; } fifo = fifo_alloc(TRACE_FIFO_SIZE); old_state = td_bump_runstate(td, TD_SETTING_UP); td->o.size = 0; for (i = 0; i < DDIR_RWDIR_CNT; i++) { ios[i] = 0; rw_bs[i] = 0; this_depth[i] = 0; depth[i] = 0; } skipped_writes = 0; do { int ret = trace_fifo_get(td, fifo, fd, &t, sizeof(t)); if (ret < 0) goto err; else if (!ret) break; else if (ret < (int) sizeof(t)) { log_err("fio: short fifo get\n"); break; } if (need_swap) byteswap_trace(&t); if ((t.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) { log_err("fio: bad magic in blktrace data: %x\n", t.magic); goto err; } if ((t.magic & 0xff) != BLK_IO_TRACE_VERSION) { log_err("fio: bad blktrace version %d\n", t.magic & 0xff); goto err; } ret = discard_pdu(td, fifo, fd, &t); if (ret < 0) { td_verror(td, ret, "blktrace lseek"); goto err; } else if (t.pdu_len != ret) { log_err("fio: discarded %d of %d\n", ret, t.pdu_len); goto err; } if ((t.action & BLK_TC_ACT(BLK_TC_NOTIFY)) == 0) { if ((t.action & 0xffff) == __BLK_TA_QUEUE) depth_inc(&t, this_depth); else if (((t.action & 0xffff) == __BLK_TA_BACKMERGE) || ((t.action & 0xffff) == __BLK_TA_FRONTMERGE)) depth_dec(&t, this_depth); else if ((t.action & 0xffff) == __BLK_TA_COMPLETE) depth_end(&t, this_depth, depth); if (t_is_write(&t) && read_only) { skipped_writes++; continue; } } handle_trace(td, &t, ios, rw_bs); } while (1); for (i = 0; i < td->files_index; i++) { f = td->files[i]; trace_add_open_close_event(td, f->fileno, FIO_LOG_CLOSE_FILE); } fifo_free(fifo); close(fd); td_restore_runstate(td, old_state); if (!td->files_index) { log_err("fio: did not find replay device(s)\n"); return 1; } /* * For stacked devices, we don't always get a COMPLETE event so * the depth grows to insane values. Limit it to something sane(r). */ max_depth = 0; for (i = 0; i < DDIR_RWDIR_CNT; i++) { if (depth[i] > 1024) depth[i] = 1024; else if (!depth[i] && ios[i]) depth[i] = 1; max_depth = max(depth[i], max_depth); } if (skipped_writes) log_err("fio: %s skips replay of %lu writes due to read-only\n", td->o.name, skipped_writes); if (!ios[DDIR_READ] && !ios[DDIR_WRITE]) { log_err("fio: found no ios in blktrace data\n"); return 1; } else if (ios[DDIR_READ] && !ios[DDIR_WRITE]) { td->o.td_ddir = TD_DDIR_READ; td->o.max_bs[DDIR_READ] = rw_bs[DDIR_READ]; } else if (!ios[DDIR_READ] && ios[DDIR_WRITE]) { td->o.td_ddir = TD_DDIR_WRITE; td->o.max_bs[DDIR_WRITE] = rw_bs[DDIR_WRITE]; } else { td->o.td_ddir = TD_DDIR_RW; td->o.max_bs[DDIR_READ] = rw_bs[DDIR_READ]; td->o.max_bs[DDIR_WRITE] = rw_bs[DDIR_WRITE]; td->o.max_bs[DDIR_TRIM] = rw_bs[DDIR_TRIM]; } /* * We need to do direct/raw ios to the device, to avoid getting * read-ahead in our way. But only do so if the minimum block size * is a multiple of 4k, otherwise we don't know if it's safe to do so. */ if (!fio_option_is_set(&td->o, odirect) && !(td_min_bs(td) & 4095)) td->o.odirect = 1; /* * If depth wasn't manually set, use probed depth */ if (!fio_option_is_set(&td->o, iodepth)) td->o.iodepth = td->o.iodepth_low = max_depth; return 0; err: close(fd); fifo_free(fifo); return 1; }
static int t_is_write(struct blk_io_trace *t) { return (t->action & BLK_TC_ACT(BLK_TC_WRITE | BLK_TC_DISCARD)) != 0; }
static int action(int a) { int bits = BLK_TC_WRITE | BLK_TC_READ | BLK_TC_FS | BLK_TC_PC; return a & (BLK_TC_ACT(bits)); }
pid_t pid) { if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0) return 1; if (sector < bt->start_lba || sector > bt->end_lba) return 1; if (bt->pid && pid != bt->pid) return 1; return 0; } /* * Data direction bit lookup */ static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) }; /* * Bio action bits of interest */ static u32 bio_act[5] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD) }; /* * More could be added as needed, taking care to increment the decrementer * to get correct indexing */ #define trace_barrier_bit(rw) \ (((rw) & (1 << BIO_RW_BARRIER)) >> (BIO_RW_BARRIER - 0)) #define trace_sync_bit(rw) \ (((rw) & (1 << BIO_RW_SYNC)) >> (BIO_RW_SYNC - 1)) #define trace_ahead_bit(rw) \
/* * Load a blktrace file by reading all the blk_io_trace entries, and storing * them as io_pieces like the fio text version would do. */ int load_blktrace(struct thread_data *td, const char *filename, int need_swap) { struct blk_io_trace t; unsigned long ios[DDIR_RWDIR_CNT], skipped_writes; unsigned int rw_bs[DDIR_RWDIR_CNT]; struct fifo *fifo; int fd, i, old_state; struct fio_file *f; int this_depth, depth; fd = open(filename, O_RDONLY); if (fd < 0) { td_verror(td, errno, "open blktrace file"); return 1; } fifo = fifo_alloc(TRACE_FIFO_SIZE); old_state = td_bump_runstate(td, TD_SETTING_UP); td->o.size = 0; ios[0] = ios[1] = 0; rw_bs[0] = rw_bs[1] = 0; skipped_writes = 0; this_depth = depth = 0; do { int ret = trace_fifo_get(td, fifo, fd, &t, sizeof(t)); if (ret < 0) goto err; else if (!ret) break; else if (ret < (int) sizeof(t)) { log_err("fio: short fifo get\n"); break; } if (need_swap) byteswap_trace(&t); if ((t.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) { log_err("fio: bad magic in blktrace data: %x\n", t.magic); goto err; } if ((t.magic & 0xff) != BLK_IO_TRACE_VERSION) { log_err("fio: bad blktrace version %d\n", t.magic & 0xff); goto err; } ret = discard_pdu(td, fifo, fd, &t); if (ret < 0) { td_verror(td, ret, "blktrace lseek"); goto err; } else if (t.pdu_len != ret) { log_err("fio: discarded %d of %d\n", ret, t.pdu_len); goto err; } if ((t.action & BLK_TC_ACT(BLK_TC_NOTIFY)) == 0) { if ((t.action & 0xffff) == __BLK_TA_QUEUE) this_depth++; else if ((t.action & 0xffff) == __BLK_TA_COMPLETE) { depth = max(depth, this_depth); this_depth = 0; } if (t_is_write(&t) && read_only) { skipped_writes++; continue; } } handle_trace(td, &t, ios, rw_bs); } while (1); for (i = 0; i < td->files_index; i++) { f = td->files[i]; trace_add_open_close_event(td, f->fileno, FIO_LOG_CLOSE_FILE); } fifo_free(fifo); close(fd); td_restore_runstate(td, old_state); if (!td->files_index) { log_err("fio: did not find replay device(s)\n"); return 1; } /* * For stacked devices, we don't always get a COMPLETE event so * the depth grows to insane values. Limit it to something sane(r). */ if (!depth || depth > 1024) depth = 1024; if (skipped_writes) log_err("fio: %s skips replay of %lu writes due to read-only\n", td->o.name, skipped_writes); if (!ios[DDIR_READ] && !ios[DDIR_WRITE]) { log_err("fio: found no ios in blktrace data\n"); return 1; } else if (ios[DDIR_READ] && !ios[DDIR_WRITE]) { td->o.td_ddir = TD_DDIR_READ; td->o.max_bs[DDIR_READ] = rw_bs[DDIR_READ]; } else if (!ios[DDIR_READ] && ios[DDIR_WRITE]) { td->o.td_ddir = TD_DDIR_WRITE; td->o.max_bs[DDIR_WRITE] = rw_bs[DDIR_WRITE]; } else { td->o.td_ddir = TD_DDIR_RW; td->o.max_bs[DDIR_READ] = rw_bs[DDIR_READ]; td->o.max_bs[DDIR_WRITE] = rw_bs[DDIR_WRITE]; td->o.max_bs[DDIR_TRIM] = rw_bs[DDIR_TRIM]; } /* * We need to do direct/raw ios to the device, to avoid getting * read-ahead in our way. */ td->o.odirect = 1; /* * we don't know if this option was set or not. it defaults to 1, * so we'll just guess that we should override it if it's still 1 */ if (td->o.iodepth != 1) td->o.iodepth = depth; return 0; err: close(fd); fifo_free(fifo); return 1; }
pid_t pid) { if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0) return 1; if (sector < bt->start_lba || sector > bt->end_lba) return 1; if (bt->pid && pid != bt->pid) return 1; return 0; } /* * Data direction bit lookup */ static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) }; /* The ilog2() calls fall out because they're constant */ #define MASK_TC_BIT(rw, __name) ( (rw & (1 << BIO_RW_ ## __name)) << \ (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name) ) /* * The worker for the various blk_add_trace*() types. Fills out a * blk_io_trace structure and places it in a per-cpu subbuffer. */ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, int rw, u32 what, int error, int pdu_len, void *pdu_data) { struct task_struct *tsk = current; struct blk_io_trace *t; unsigned long flags;