Worker(const Option &opt, const char *dataPtr, RangeMutex &mu, std::atomic<uint64_t> &ioIdGen,
        std::atomic<bool> &shouldStop)
     : opt_(opt)
     , file_(opt.targetPath, O_RDWR | O_DIRECT)
     , ioSpecGen_(opt.offsetB(), opt.sizeB(), opt_.minIoB(), opt_.maxIoB())
     , dataPtr_(dataPtr)
     , ioIdV_(opt.sizeB(), 0)
     , mu_(mu)
     , ioIdGen_(ioIdGen)
     , shouldStop_(shouldStop) {
 }
void verify(
    const Option &opt, uint64_t sizeB, const std::vector<size_t> &threadIdV,
    cybozu::util::File &file, const std::vector<AlignedArray> &blksV)
{
    const uint32_t bs = opt.bs;
    AlignedArray blk(bs, false);
    uint64_t nVerified = 0, nWritten = 0;
    if (opt.isVerbose) {
        ::printf("thId written read\n");
    }
    for (uint64_t i = 0; i < sizeB; i++) {
        const size_t thId = threadIdV[i];
        if (thId == size_t(-1)) continue;
        nWritten++;
        file.pread(blk.data(), bs, (opt.offsetB() + i) * bs);
        const char *data = blksV[thId].data() + (i * bs);
        if (::memcmp(blk.data(), data, bs) == 0) {
            nVerified++;
        } else {
            //::printf("block %" PRIu64 " invalid.\n", i);
        }
        if (opt.isVerbose) {
            const uint32_t csum0 = cybozu::util::calcChecksum(data, bs, opt.salt);
            const uint32_t csum1 = cybozu::util::calcChecksum(blk.data(), blk.size(), opt.salt);
            ::printf("%zu %08x %08x\n", thId, csum0, csum1);
        }
    }
    ::printf("total/written/verified %" PRIu64 "/%" PRIu64 "/%" PRIu64 ".\n"
             , sizeB, nWritten, nVerified);

    if (nVerified < nWritten) {
#if 1
        throw cybozu::Exception(__func__)
            << "invalid blocks found" << (nWritten - nVerified);
#else
        ::printf("!!!invalid blocks found %" PRIu64 "!!!\n", nWritten - nVerified);
#endif
    }
}
void writeConcurrentlyAndVerify(const Option &opt)
{
    /* Prepare */
    const uint32_t bs = opt.bs;
    cybozu::util::File file(opt.targetPath, O_RDWR | O_DIRECT);

    /* Decide target area size. */
    const uint64_t sizeB = opt.sizeB();
    const uint64_t devSizeB = cybozu::util::getBlockDeviceSize(file.fd()) / bs;
    if (devSizeB < opt.offsetB() + sizeB) {
        throw cybozu::Exception(__func__)
            << "specified area is out of range" << devSizeB << opt.offsetB() << sizeB;
    }

    /* Write zero to the target range. */
    std::cout << "zero-clear" << std::endl;
    AlignedArray blk(bs, true);
    file.lseek(opt.offsetB() * bs);
    for (uint64_t i = 0; i < sizeB; i++) {
        file.write(blk.data(), blk.size());
        if (i % 16 == 0) {
            ::printf(".");
            ::fflush(::stdout);
            if (i % 1024 == 0) {
                ::printf("%" PRIu64 "\n", i);
            }
        }
    }

    /* Prepare resources shared by all threads. */
    std::cout << "prepare resources" << std::endl;
    std::atomic<uint64_t> ioIdGen(1); // 0 means invalid.
    RangeMutex mu(sizeB);
    std::atomic<bool> shouldStop(false);

    /* Prepare writer threads and run concurrently. */
    /* Prepare blocks and fill randomly. */
    cybozu::util::Random<uint64_t> rand;
    std::vector<AlignedArray> blksV;
    std::vector<std::shared_ptr<Worker> > thV;
    for (size_t i = 0; i < opt.numThreads; i++) {
        blksV.emplace_back(opt.size, false);
#if 0 // fill randomly.
        rand.fill(blksV.back().data(), opt.size);
#else // fill fixed value per thread.
        const uint32_t x = i + 1;
        assert(sizeof(x) <= opt.size);
        const size_t n = opt.size / sizeof(x);
        for (size_t j = 0; j < n; j++) {
            ::memcpy(blksV.back().data() + (sizeof(x) * j), &x, sizeof(x));
        }
#endif
        thV.push_back(std::make_shared<Worker>(
                          opt, blksV.back().data(), mu, ioIdGen, shouldStop));
    }
    if (opt.isVerbose) {
        for (size_t thId = 0; thId < opt.numThreads; thId++) {
            ::printf("%zu", thId);
            for (size_t i = 0; i < sizeB; i++) {
                const char *data = blksV[thId].data() + (i * bs);
                ::printf(" %08x", cybozu::util::calcChecksum(data, bs, opt.salt));
            }
            ::printf("\n");
        }
    }
    cybozu::thread::ThreadRunnerSet thSet;
    for (std::shared_ptr<Worker> &w : thV) {
        thSet.add(w);
    }
    std::cout << "start" << std::endl;
    thSet.start();
    util::sleepMs(opt.periodMs);
    shouldStop = true;
    std::cout << "stop" << std::endl;
    thSet.join();
    std::cout << "done" << std::endl;

    /* Determine who writes each block finally. */
    std::vector<size_t> threadIdV(sizeB, size_t(-1)); // -1 means no one writes.
    for (size_t i = 0; i < sizeB; i++) {
        uint64_t maxIoId = 0;
        size_t thId = 0;
        for (size_t j = 0; j < opt.numThreads; j++) {
            const uint64_t ioId = thV[j]->getIoIdV()[i];
            if (maxIoId < ioId) {
                maxIoId = ioId;
                thId = j;
            }
        }
        if (maxIoId > 0) {
            threadIdV[i] = thId;
        }
    }

    for (size_t i = 0; i < opt.numVerify; i++) {
        std::cout << "verify " << i << std::endl;
        verify(opt, sizeB, threadIdV, file, blksV);
        if (i < opt.numVerify - 1 && opt.verifyIntervalMs > 0) {
            util::sleepMs(opt.verifyIntervalMs);
        }
    }
};