STATIC int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused) { struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w); if (!get_ldev(mdev)) { if (DRBD_ratelimit(5*HZ, 5)) dev_warn(DEV, "Can not update on disk bitmap, local IO disabled.\n"); kfree(udw); return 1; } drbd_bm_write_page(mdev, rs_extent_to_bm_page(udw->enr)); put_ldev(mdev); kfree(udw); if (drbd_bm_total_weight(mdev) <= mdev->rs_failed) { switch (mdev->state.conn) { case C_SYNC_SOURCE: case C_SYNC_TARGET: case C_PAUSED_SYNC_S: case C_PAUSED_SYNC_T: drbd_resync_finished(mdev); default: /* nothing to do */ break; } } drbd_bcast_sync_progress(mdev); return 1; }
static void atodb_endio(struct bio *bio, int error) { struct drbd_atodb_wait *wc = bio->bi_private; struct drbd_conf *mdev = wc->mdev; struct page *page; int uptodate = bio_flagged(bio, BIO_UPTODATE); /* strange behavior of some lower level drivers... * fail the request by clearing the uptodate flag, * but do not return any error?! */ if (!error && !uptodate) error = -EIO; drbd_chk_io_error(mdev, error, TRUE); if (error && wc->error == 0) wc->error = error; if (atomic_dec_and_test(&wc->count)) complete(&wc->io_done); page = bio->bi_io_vec[0].bv_page; put_page(page); bio_put(bio); mdev->bm_writ_cnt++; put_ldev(mdev); }
static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const int rw) { const unsigned long s = req->rq_state; /* remove it from the transfer log. * well, only if it had been there in the first * place... if it had not (local only or conflicting * and never sent), it should still be "empty" as * initialized in drbd_req_new(), so we can list_del() it * here unconditionally */ list_del(&req->tl_requests); /* if it was a write, we may have to set the corresponding * bit(s) out-of-sync first. If it had a local part, we need to * release the reference to the activity log. */ if (rw == WRITE) { /* Set out-of-sync unless both OK flags are set * (local only or remote failed). * Other places where we set out-of-sync: * READ with local io-error */ if (!(s & RQ_NET_OK) || !(s & RQ_LOCAL_OK)) drbd_set_out_of_sync(mdev, req->sector, req->size); if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS)) drbd_set_in_sync(mdev, req->sector, req->size); /* one might be tempted to move the drbd_al_complete_io * to the local io completion callback drbd_endio_pri. * but, if this was a mirror write, we may only * drbd_al_complete_io after this is RQ_NET_DONE, * otherwise the extent could be dropped from the al * before it has actually been written on the peer. * if we crash before our peer knows about the request, * but after the extent has been dropped from the al, * we would forget to resync the corresponding extent. */ if (s & RQ_LOCAL_MASK) { if (get_ldev_if_state(mdev, D_FAILED)) { if (s & RQ_IN_ACT_LOG) drbd_al_complete_io(mdev, req->sector); put_ldev(mdev); } else if (__ratelimit(&drbd_ratelimit_state)) { dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu), " "but my Disk seems to have failed :(\n", (unsigned long long) req->sector); } } } drbd_req_free(req); }
static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const int rw) { const unsigned long s = req->rq_state; list_del(&req->tl_requests); if (rw == WRITE) { if (!(s & RQ_NET_OK) || !(s & RQ_LOCAL_OK)) drbd_set_out_of_sync(mdev, req->sector, req->size); if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS)) drbd_set_in_sync(mdev, req->sector, req->size); /* one might be tempted to move the drbd_al_complete_io * to the local io completion callback drbd_endio_pri. * but, if this was a mirror write, we may only * drbd_al_complete_io after this is RQ_NET_DONE, * otherwise the extent could be dropped from the al * before it has actually been written on the peer. * if we crash before our peer knows about the request, * but after the extent has been dropped from the al, * we would forget to resync the corresponding extent. */ if (s & RQ_LOCAL_MASK) { if (get_ldev_if_state(mdev, D_FAILED)) { if (s & RQ_IN_ACT_LOG) drbd_al_complete_io(mdev, req->sector); put_ldev(mdev); } else if (__ratelimit(&drbd_ratelimit_state)) { dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu), " "but my Disk seems to have failed :(\n", (unsigned long long) req->sector); } } } drbd_req_free(req); }
int w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) { struct update_al_work *aw = container_of(w, struct update_al_work, w); struct lc_element *updated = aw->al_ext; const unsigned int new_enr = aw->enr; const unsigned int evicted = aw->old_enr; struct al_transaction *buffer; sector_t sector; int i, n, mx; unsigned int extent_nr; u32 xor_sum = 0; if (!get_ldev(mdev)) { dev_err(DEV, "disk is %s, cannot start al transaction (-%d +%d)\n", drbd_disk_str(mdev->state.disk), evicted, new_enr); complete(&((struct update_al_work *)w)->event); return 1; } /* do we have to do a bitmap write, first? * TODO reduce maximum latency: * submit both bios, then wait for both, * instead of doing two synchronous sector writes. * For now, we must not write the transaction, * if we cannot write out the bitmap of the evicted extent. */ if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE) drbd_bm_write_page(mdev, al_extent_to_bm_page(evicted)); /* The bitmap write may have failed, causing a state change. */ if (mdev->state.disk < D_INCONSISTENT) { dev_err(DEV, "disk is %s, cannot write al transaction (-%d +%d)\n", drbd_disk_str(mdev->state.disk), evicted, new_enr); complete(&((struct update_al_work *)w)->event); put_ldev(mdev); return 1; } mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */ buffer = (struct al_transaction *)page_address(mdev->md_io_page); buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC); buffer->tr_number = cpu_to_be32(mdev->al_tr_number); n = lc_index_of(mdev->act_log, updated); buffer->updates[0].pos = cpu_to_be32(n); buffer->updates[0].extent = cpu_to_be32(new_enr); xor_sum ^= new_enr; mx = min_t(int, AL_EXTENTS_PT, mdev->act_log->nr_elements - mdev->al_tr_cycle); for (i = 0; i < mx; i++) { unsigned idx = mdev->al_tr_cycle + i; extent_nr = lc_element_by_index(mdev->act_log, idx)->lc_number; buffer->updates[i+1].pos = cpu_to_be32(idx); buffer->updates[i+1].extent = cpu_to_be32(extent_nr); xor_sum ^= extent_nr; } for (; i < AL_EXTENTS_PT; i++) { buffer->updates[i+1].pos = __constant_cpu_to_be32(-1); buffer->updates[i+1].extent = __constant_cpu_to_be32(LC_FREE); xor_sum ^= LC_FREE; } mdev->al_tr_cycle += AL_EXTENTS_PT; if (mdev->al_tr_cycle >= mdev->act_log->nr_elements) mdev->al_tr_cycle = 0; buffer->xor_sum = cpu_to_be32(xor_sum); sector = mdev->ldev->md.md_offset + mdev->ldev->md.al_offset + mdev->al_tr_pos; if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) drbd_chk_io_error(mdev, 1, true); if (++mdev->al_tr_pos > div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT)) mdev->al_tr_pos = 0; D_ASSERT(mdev->al_tr_pos < MD_AL_MAX_SIZE); mdev->al_tr_number++; mutex_unlock(&mdev->md_io_mutex); complete(&((struct update_al_work *)w)->event); put_ldev(mdev); return 1; }
/** * drbd_al_to_on_disk_bm() - * Writes bitmap parts covered by active AL extents * @mdev: DRBD device. * * Called when we detach (unconfigure) local storage, * or when we go from R_PRIMARY to R_SECONDARY role. */ void drbd_al_to_on_disk_bm(struct drbd_conf *mdev) { int i, nr_elements; unsigned int enr; struct bio **bios; struct drbd_atodb_wait wc; ERR_IF (!get_ldev_if_state(mdev, D_ATTACHING)) return; /* sorry, I don't have any act_log etc... */ wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); nr_elements = mdev->act_log->nr_elements; /* GFP_KERNEL, we are not in anyone's write-out path */ bios = kzalloc(sizeof(struct bio *) * nr_elements, GFP_KERNEL); if (!bios) goto submit_one_by_one; atomic_set(&wc.count, 0); init_completion(&wc.io_done); wc.mdev = mdev; wc.error = 0; for (i = 0; i < nr_elements; i++) { enr = lc_element_by_index(mdev->act_log, i)->lc_number; if (enr == LC_FREE) continue; /* next statement also does atomic_inc wc.count and local_cnt */ if (atodb_prepare_unless_covered(mdev, bios, enr/AL_EXT_PER_BM_SECT, &wc)) goto free_bios_submit_one_by_one; } /* unnecessary optimization? */ lc_unlock(mdev->act_log); wake_up(&mdev->al_wait); /* all prepared, submit them */ for (i = 0; i < nr_elements; i++) { if (bios[i] == NULL) break; if (FAULT_ACTIVE(mdev, DRBD_FAULT_MD_WR)) { bios[i]->bi_rw = WRITE; bio_endio(bios[i], -EIO); } else { submit_bio(WRITE, bios[i]); } } drbd_blk_run_queue(bdev_get_queue(mdev->ldev->md_bdev)); /* always (try to) flush bitmap to stable storage */ drbd_md_flush(mdev); /* In case we did not submit a single IO do not wait for * them to complete. ( Because we would wait forever here. ) * * In case we had IOs and they are already complete, there * is not point in waiting anyways. * Therefore this if () ... */ if (atomic_read(&wc.count)) wait_for_completion(&wc.io_done); put_ldev(mdev); kfree(bios); return; free_bios_submit_one_by_one: /* free everything by calling the endio callback directly. */ for (i = 0; i < nr_elements && bios[i]; i++) bio_endio(bios[i], 0); kfree(bios); submit_one_by_one: dev_warn(DEV, "Using the slow drbd_al_to_on_disk_bm()\n"); for (i = 0; i < mdev->act_log->nr_elements; i++) { enr = lc_element_by_index(mdev->act_log, i)->lc_number; if (enr == LC_FREE) continue; /* Really slow: if we have al-extents 16..19 active, * sector 4 will be written four times! Synchronous! */ drbd_bm_write_sect(mdev, enr/AL_EXT_PER_BM_SECT); } lc_unlock(mdev->act_log); wake_up(&mdev->al_wait); put_ldev(mdev); }
void drbd_req_destroy(struct kref *kref) { struct drbd_request *req = container_of(kref, struct drbd_request, kref); struct drbd_conf *mdev = req->w.mdev; const unsigned s = req->rq_state; if ((req->master_bio && !(s & RQ_POSTPONED)) || atomic_read(&req->completion_ref) || (s & RQ_LOCAL_PENDING) || ((s & RQ_NET_MASK) && !(s & RQ_NET_DONE))) { dev_err(DEV, "drbd_req_destroy: Logic BUG rq_state = 0x%x, completion_ref = %d\n", s, atomic_read(&req->completion_ref)); return; } /* remove it from the transfer log. * well, only if it had been there in the first * place... if it had not (local only or conflicting * and never sent), it should still be "empty" as * initialized in drbd_req_new(), so we can list_del() it * here unconditionally */ list_del_init(&req->tl_requests); /* if it was a write, we may have to set the corresponding * bit(s) out-of-sync first. If it had a local part, we need to * release the reference to the activity log. */ if (s & RQ_WRITE) { /* Set out-of-sync unless both OK flags are set * (local only or remote failed). * Other places where we set out-of-sync: * READ with local io-error */ /* There is a special case: * we may notice late that IO was suspended, * and postpone, or schedule for retry, a write, * before it even was submitted or sent. * In that case we do not want to touch the bitmap at all. */ if ((s & (RQ_POSTPONED|RQ_LOCAL_MASK|RQ_NET_MASK)) != RQ_POSTPONED) { if (!(s & RQ_NET_OK) || !(s & RQ_LOCAL_OK)) drbd_set_out_of_sync(mdev, req->i.sector, req->i.size); if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS)) drbd_set_in_sync(mdev, req->i.sector, req->i.size); } /* one might be tempted to move the drbd_al_complete_io * to the local io completion callback drbd_request_endio. * but, if this was a mirror write, we may only * drbd_al_complete_io after this is RQ_NET_DONE, * otherwise the extent could be dropped from the al * before it has actually been written on the peer. * if we crash before our peer knows about the request, * but after the extent has been dropped from the al, * we would forget to resync the corresponding extent. */ if (s & RQ_IN_ACT_LOG) { if (get_ldev_if_state(mdev, D_FAILED)) { drbd_al_complete_io(mdev, &req->i); put_ldev(mdev); } else if (__ratelimit(&drbd_ratelimit_state)) { dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu, %u), " "but my Disk seems to have failed :(\n", (unsigned long long) req->i.sector, req->i.size); } } } mempool_free(req, drbd_request_mempool); }