/** * lc_set - associate index with label * @lc: the lru cache to operate on * @enr: the label to set * @index: the element index to associate label with. * * Used to initialize the active set to some previously recorded state. */ void lc_set(struct lru_cache *lc, unsigned int enr, int index) { struct lc_element *e; if (index < 0 || index >= lc->nr_elements) return; e = lc_element_by_index(lc, index); e->lc_number = enr; hlist_del_init(&e->colision); hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru); }
/** * drbd_al_shrink() - Removes all active extents form the activity log * @mdev: DRBD device. * * Removes all active extents form the activity log, waiting until * the reference count of each entry dropped to 0 first, of course. * * You need to lock mdev->act_log with lc_try_lock() / lc_unlock() */ void drbd_al_shrink(struct drbd_conf *mdev) { struct lc_element *al_ext; int i; D_ASSERT(test_bit(__LC_DIRTY, &mdev->act_log->flags)); for (i = 0; i < mdev->act_log->nr_elements; i++) { al_ext = lc_element_by_index(mdev->act_log, i); if (al_ext->lc_number == LC_FREE) continue; wait_event(mdev->al_wait, _try_lc_del(mdev, al_ext)); } wake_up(&mdev->al_wait); }
/** * lc_dump - Dump a complete LRU cache to seq in textual form. * @lc: the lru cache to operate on * @seq: the &struct seq_file pointer to seq_printf into * @utext: user supplied "heading" or other info * @detail: function pointer the user may provide to dump further details * of the object the lc_element is embedded in. */ void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext, void (*detail) (struct seq_file *, struct lc_element *)) { unsigned int nr_elements = lc->nr_elements; struct lc_element *e; int i; seq_printf(seq, "\tnn: lc_number refcnt %s\n ", utext); for (i = 0; i < nr_elements; i++) { e = lc_element_by_index(lc, i); if (e->lc_number == LC_FREE) { seq_printf(seq, "\t%2d: FREE\n", i); } else { seq_printf(seq, "\t%2d: %4u %4u ", i, e->lc_number, e->refcnt); detail(seq, e); } } }
/** * drbd_al_apply_to_bm() - Sets the bitmap to diry(1) where covered ba active AL extents * @mdev: DRBD device. */ void drbd_al_apply_to_bm(struct drbd_conf *mdev) { unsigned int enr; unsigned long add = 0; char ppb[10]; int i; wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); for (i = 0; i < mdev->act_log->nr_elements; i++) { enr = lc_element_by_index(mdev->act_log, i)->lc_number; if (enr == LC_FREE) continue; add += drbd_bm_ALe_set_all(mdev, enr); } lc_unlock(mdev->act_log); wake_up(&mdev->al_wait); dev_info(DEV, "Marked additional %s as out-of-sync based on AL.\n", ppsize(ppb, Bit2KB(add))); }
int w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) { struct update_al_work *aw = container_of(w, struct update_al_work, w); struct lc_element *updated = aw->al_ext; const unsigned int new_enr = aw->enr; const unsigned int evicted = aw->old_enr; struct al_transaction *buffer; sector_t sector; int i, n, mx; unsigned int extent_nr; u32 xor_sum = 0; if (!get_ldev(mdev)) { dev_err(DEV, "disk is %s, cannot start al transaction (-%d +%d)\n", drbd_disk_str(mdev->state.disk), evicted, new_enr); complete(&((struct update_al_work *)w)->event); return 1; } /* do we have to do a bitmap write, first? * TODO reduce maximum latency: * submit both bios, then wait for both, * instead of doing two synchronous sector writes. * For now, we must not write the transaction, * if we cannot write out the bitmap of the evicted extent. */ if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE) drbd_bm_write_page(mdev, al_extent_to_bm_page(evicted)); /* The bitmap write may have failed, causing a state change. */ if (mdev->state.disk < D_INCONSISTENT) { dev_err(DEV, "disk is %s, cannot write al transaction (-%d +%d)\n", drbd_disk_str(mdev->state.disk), evicted, new_enr); complete(&((struct update_al_work *)w)->event); put_ldev(mdev); return 1; } mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */ buffer = (struct al_transaction *)page_address(mdev->md_io_page); buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC); buffer->tr_number = cpu_to_be32(mdev->al_tr_number); n = lc_index_of(mdev->act_log, updated); buffer->updates[0].pos = cpu_to_be32(n); buffer->updates[0].extent = cpu_to_be32(new_enr); xor_sum ^= new_enr; mx = min_t(int, AL_EXTENTS_PT, mdev->act_log->nr_elements - mdev->al_tr_cycle); for (i = 0; i < mx; i++) { unsigned idx = mdev->al_tr_cycle + i; extent_nr = lc_element_by_index(mdev->act_log, idx)->lc_number; buffer->updates[i+1].pos = cpu_to_be32(idx); buffer->updates[i+1].extent = cpu_to_be32(extent_nr); xor_sum ^= extent_nr; } for (; i < AL_EXTENTS_PT; i++) { buffer->updates[i+1].pos = __constant_cpu_to_be32(-1); buffer->updates[i+1].extent = __constant_cpu_to_be32(LC_FREE); xor_sum ^= LC_FREE; } mdev->al_tr_cycle += AL_EXTENTS_PT; if (mdev->al_tr_cycle >= mdev->act_log->nr_elements) mdev->al_tr_cycle = 0; buffer->xor_sum = cpu_to_be32(xor_sum); sector = mdev->ldev->md.md_offset + mdev->ldev->md.al_offset + mdev->al_tr_pos; if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) drbd_chk_io_error(mdev, 1, true); if (++mdev->al_tr_pos > div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT)) mdev->al_tr_pos = 0; D_ASSERT(mdev->al_tr_pos < MD_AL_MAX_SIZE); mdev->al_tr_number++; mutex_unlock(&mdev->md_io_mutex); complete(&((struct update_al_work *)w)->event); put_ldev(mdev); return 1; }
/** * drbd_al_to_on_disk_bm() - * Writes bitmap parts covered by active AL extents * @mdev: DRBD device. * * Called when we detach (unconfigure) local storage, * or when we go from R_PRIMARY to R_SECONDARY role. */ void drbd_al_to_on_disk_bm(struct drbd_conf *mdev) { int i, nr_elements; unsigned int enr; struct bio **bios; struct drbd_atodb_wait wc; ERR_IF (!get_ldev_if_state(mdev, D_ATTACHING)) return; /* sorry, I don't have any act_log etc... */ wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); nr_elements = mdev->act_log->nr_elements; /* GFP_KERNEL, we are not in anyone's write-out path */ bios = kzalloc(sizeof(struct bio *) * nr_elements, GFP_KERNEL); if (!bios) goto submit_one_by_one; atomic_set(&wc.count, 0); init_completion(&wc.io_done); wc.mdev = mdev; wc.error = 0; for (i = 0; i < nr_elements; i++) { enr = lc_element_by_index(mdev->act_log, i)->lc_number; if (enr == LC_FREE) continue; /* next statement also does atomic_inc wc.count and local_cnt */ if (atodb_prepare_unless_covered(mdev, bios, enr/AL_EXT_PER_BM_SECT, &wc)) goto free_bios_submit_one_by_one; } /* unnecessary optimization? */ lc_unlock(mdev->act_log); wake_up(&mdev->al_wait); /* all prepared, submit them */ for (i = 0; i < nr_elements; i++) { if (bios[i] == NULL) break; if (FAULT_ACTIVE(mdev, DRBD_FAULT_MD_WR)) { bios[i]->bi_rw = WRITE; bio_endio(bios[i], -EIO); } else { submit_bio(WRITE, bios[i]); } } drbd_blk_run_queue(bdev_get_queue(mdev->ldev->md_bdev)); /* always (try to) flush bitmap to stable storage */ drbd_md_flush(mdev); /* In case we did not submit a single IO do not wait for * them to complete. ( Because we would wait forever here. ) * * In case we had IOs and they are already complete, there * is not point in waiting anyways. * Therefore this if () ... */ if (atomic_read(&wc.count)) wait_for_completion(&wc.io_done); put_ldev(mdev); kfree(bios); return; free_bios_submit_one_by_one: /* free everything by calling the endio callback directly. */ for (i = 0; i < nr_elements && bios[i]; i++) bio_endio(bios[i], 0); kfree(bios); submit_one_by_one: dev_warn(DEV, "Using the slow drbd_al_to_on_disk_bm()\n"); for (i = 0; i < mdev->act_log->nr_elements; i++) { enr = lc_element_by_index(mdev->act_log, i)->lc_number; if (enr == LC_FREE) continue; /* Really slow: if we have al-extents 16..19 active, * sector 4 will be written four times! Synchronous! */ drbd_bm_write_sect(mdev, enr/AL_EXT_PER_BM_SECT); } lc_unlock(mdev->act_log); wake_up(&mdev->al_wait); put_ldev(mdev); }