static int osd_object_destroy(const struct lu_env *env, struct dt_object *dt, struct thandle *th) { char *buf = osd_oti_get(env)->oti_str; struct osd_object *obj = osd_dt_obj(dt); struct osd_device *osd = osd_obj2dev(obj); const struct lu_fid *fid = lu_object_fid(&dt->do_lu); struct osd_thandle *oh; int rc; uint64_t oid, zapid; ENTRY; LASSERT(obj->oo_db != NULL); LASSERT(dt_object_exists(dt)); LASSERT(!lu_object_is_dying(dt->do_lu.lo_header)); oh = container_of0(th, struct osd_thandle, ot_super); LASSERT(oh != NULL); LASSERT(oh->ot_tx != NULL); /* remove obj ref from index dir (it depends) */ zapid = osd_get_name_n_idx(env, osd, fid, buf); rc = -zap_remove(osd->od_os, zapid, buf, oh->ot_tx); if (rc) { CERROR("%s: zap_remove(%s) failed: rc = %d\n", osd->od_svname, buf, rc); GOTO(out, rc); } rc = osd_xattrs_destroy(env, obj, oh); if (rc) { CERROR("%s: cannot destroy xattrs for %s: rc = %d\n", osd->od_svname, buf, rc); GOTO(out, rc); } /* Remove object from inode accounting. It is not fatal for the destroy * operation if something goes wrong while updating accounting, but we * still log an error message to notify the administrator */ rc = -zap_increment_int(osd->od_os, osd->od_iusr_oid, obj->oo_attr.la_uid, -1, oh->ot_tx); if (rc) CERROR("%s: failed to remove "DFID" from accounting ZAP for usr" " %d: rc = %d\n", osd->od_svname, PFID(fid), obj->oo_attr.la_uid, rc); rc = -zap_increment_int(osd->od_os, osd->od_igrp_oid, obj->oo_attr.la_gid, -1, oh->ot_tx); if (rc) CERROR("%s: failed to remove "DFID" from accounting ZAP for grp" " %d: rc = %d\n", osd->od_svname, PFID(fid), obj->oo_attr.la_gid, rc); oid = obj->oo_db->db_object; if (obj->oo_destroy == OSD_DESTROY_SYNC) { rc = -dmu_object_free(osd->od_os, oid, oh->ot_tx); if (rc) CERROR("%s: failed to free %s "LPU64": rc = %d\n", osd->od_svname, buf, oid, rc); } else { /* asynchronous destroy */ rc = osd_object_unlinked_add(obj, oh); if (rc) GOTO(out, rc); rc = -zap_add_int(osd->od_os, osd->od_unlinkedid, oid, oh->ot_tx); if (rc) CERROR("%s: zap_add_int() failed %s "LPU64": rc = %d\n", osd->od_svname, buf, oid, rc); } out: /* not needed in the cache anymore */ set_bit(LU_OBJECT_HEARD_BANSHEE, &dt->do_lu.lo_header->loh_flags); if (rc == 0) obj->oo_destroyed = 1; RETURN (0); }
static int osd_declare_object_create(const struct lu_env *env, struct dt_object *dt, struct lu_attr *attr, struct dt_allocation_hint *hint, struct dt_object_format *dof, struct thandle *handle) { char *buf = osd_oti_get(env)->oti_str; const struct lu_fid *fid = lu_object_fid(&dt->do_lu); struct osd_object *obj = osd_dt_obj(dt); struct osd_device *osd = osd_obj2dev(obj); struct osd_thandle *oh; uint64_t zapid; int rc; ENTRY; LASSERT(dof); switch (dof->dof_type) { case DFT_REGULAR: case DFT_SYM: case DFT_NODE: if (obj->oo_dt.do_body_ops == NULL) obj->oo_dt.do_body_ops = &osd_body_ops; break; default: break; } LASSERT(handle != NULL); oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_tx != NULL); switch (dof->dof_type) { case DFT_DIR: dt->do_index_ops = &osd_dir_ops; case DFT_INDEX: /* for zap create */ dmu_tx_hold_zap(oh->ot_tx, DMU_NEW_OBJECT, 1, NULL); break; case DFT_REGULAR: case DFT_SYM: case DFT_NODE: /* first, we'll create new object */ dmu_tx_hold_bonus(oh->ot_tx, DMU_NEW_OBJECT); break; default: LBUG(); break; } /* and we'll add it to some mapping */ zapid = osd_get_name_n_idx(env, osd, fid, buf); dmu_tx_hold_bonus(oh->ot_tx, zapid); dmu_tx_hold_zap(oh->ot_tx, zapid, TRUE, buf); /* we will also update inode accounting ZAPs */ dmu_tx_hold_bonus(oh->ot_tx, osd->od_iusr_oid); dmu_tx_hold_zap(oh->ot_tx, osd->od_iusr_oid, TRUE, buf); dmu_tx_hold_bonus(oh->ot_tx, osd->od_igrp_oid); dmu_tx_hold_zap(oh->ot_tx, osd->od_igrp_oid, TRUE, buf); dmu_tx_hold_sa_create(oh->ot_tx, ZFS_SA_BASE_ATTR_SIZE); __osd_xattr_declare_set(env, obj, sizeof(struct lustre_mdt_attrs), XATTR_NAME_LMA, oh); rc = osd_declare_quota(env, osd, attr->la_uid, attr->la_gid, 1, oh, false, NULL, false); RETURN(rc); }
/* * Concurrency: @dt is write locked. */ static int osd_object_create(const struct lu_env *env, struct dt_object *dt, struct lu_attr *attr, struct dt_allocation_hint *hint, struct dt_object_format *dof, struct thandle *th) { struct zpl_direntry *zde = &osd_oti_get(env)->oti_zde.lzd_reg; const struct lu_fid *fid = lu_object_fid(&dt->do_lu); struct osd_object *obj = osd_dt_obj(dt); struct osd_device *osd = osd_obj2dev(obj); char *buf = osd_oti_get(env)->oti_str; struct osd_thandle *oh; dmu_buf_t *db; uint64_t zapid; int rc; ENTRY; /* concurrent create declarations should not see * the object inconsistent (db, attr, etc). * in regular cases acquisition should be cheap */ down(&obj->oo_guard); LASSERT(osd_invariant(obj)); LASSERT(!dt_object_exists(dt)); LASSERT(dof != NULL); LASSERT(th != NULL); oh = container_of0(th, struct osd_thandle, ot_super); /* * XXX missing: Quote handling. */ LASSERT(obj->oo_db == NULL); /* to follow ZFS on-disk format we need * to initialize parent dnode properly */ zapid = 0; if (hint != NULL && hint->dah_parent != NULL && !dt_object_remote(hint->dah_parent)) zapid = osd_dt_obj(hint->dah_parent)->oo_db->db_object; db = osd_create_type_f(dof->dof_type)(env, obj, attr, zapid, oh); if (IS_ERR(db)) GOTO(out, rc = PTR_ERR(db)); zde->zde_pad = 0; zde->zde_dnode = db->db_object; zde->zde_type = IFTODT(attr->la_mode & S_IFMT); zapid = osd_get_name_n_idx(env, osd, fid, buf); rc = -zap_add(osd->od_os, zapid, buf, 8, 1, zde, oh->ot_tx); if (rc) GOTO(out, rc); /* Add new object to inode accounting. * Errors are not considered as fatal */ rc = -zap_increment_int(osd->od_os, osd->od_iusr_oid, (attr->la_valid & LA_UID) ? attr->la_uid : 0, 1, oh->ot_tx); if (rc) CERROR("%s: failed to add "DFID" to accounting ZAP for usr %d " "(%d)\n", osd->od_svname, PFID(fid), attr->la_uid, rc); rc = -zap_increment_int(osd->od_os, osd->od_igrp_oid, (attr->la_valid & LA_GID) ? attr->la_gid : 0, 1, oh->ot_tx); if (rc) CERROR("%s: failed to add "DFID" to accounting ZAP for grp %d " "(%d)\n", osd->od_svname, PFID(fid), attr->la_gid, rc); /* configure new osd object */ obj->oo_db = db; rc = osd_object_init0(env, obj); LASSERT(ergo(rc == 0, dt_object_exists(dt))); LASSERT(osd_invariant(obj)); rc = osd_init_lma(env, obj, fid, oh); if (rc) { CERROR("%s: can not set LMA on "DFID": rc = %d\n", osd->od_svname, PFID(fid), rc); /* ignore errors during LMA initialization */ rc = 0; } out: up(&obj->oo_guard); RETURN(rc); }
static int osp_object_create(const struct lu_env *env, struct dt_object *dt, struct lu_attr *attr, struct dt_allocation_hint *hint, struct dt_object_format *dof, struct thandle *th) { struct osp_thread_info *osi = osp_env_info(env); struct osp_device *d = lu2osp_dev(dt->do_lu.lo_dev); struct osp_object *o = dt2osp_obj(dt); int rc = 0; struct lu_fid *fid = &osi->osi_fid; ENTRY; if (o->opo_reserved) { /* regular case, fid is assigned holding trunsaction open */ osp_object_assign_fid(env, d, o); } memcpy(fid, lu_object_fid(&dt->do_lu), sizeof(*fid)); LASSERTF(fid_is_sane(fid), "fid for osp_obj %p is insane"DFID"!\n", osp_obj, PFID(fid)); if (!o->opo_reserved) { /* special case, id was assigned outside of transaction * see comments in osp_declare_attr_set */ spin_lock(&d->opd_pre_lock); osp_update_last_fid(d, fid); spin_unlock(&d->opd_pre_lock); } CDEBUG(D_INODE, "fid for osp_obj %p is "DFID"!\n", osp_obj, PFID(fid)); /* If the precreate ends, it means it will be ready to rollover to * the new sequence soon, all the creation should be synchronized, * otherwise during replay, the replay fid will be inconsistent with * last_used/create fid */ if (osp_precreate_end_seq(env, d) && osp_is_fid_client(d)) th->th_sync = 1; /* * it's OK if the import is inactive by this moment - id was created * by OST earlier, we just need to maintain it consistently on the disk * once import is reconnected, OSP will claim this and other objects * used and OST either keep them, if they exist or recreate */ /* we might have lost precreated objects */ if (unlikely(d->opd_gap_count) > 0) { spin_lock(&d->opd_pre_lock); if (d->opd_gap_count > 0) { int count = d->opd_gap_count; ostid_set_id(&osi->osi_oi, fid_oid(&d->opd_gap_start_fid)); d->opd_gap_count = 0; spin_unlock(&d->opd_pre_lock); CDEBUG(D_HA, "Writting gap "DFID"+%d in llog\n", PFID(&d->opd_gap_start_fid), count); /* real gap handling is disabled intil ORI-692 will be * fixed, now we only report gaps */ } else { spin_unlock(&d->opd_pre_lock); } } /* new object, the very first ->attr_set() * initializing attributes needs no logging */ o->opo_new = 1; /* Only need update last_used oid file, seq file will only be update * during seq rollover */ osp_objid_buf_prep(&osi->osi_lb, &osi->osi_off, &d->opd_last_used_fid.f_oid, d->opd_index); rc = dt_record_write(env, d->opd_last_used_oid_file, &osi->osi_lb, &osi->osi_off, th); CDEBUG(D_HA, "%s: Wrote last used FID: "DFID", index %d: %d\n", d->opd_obd->obd_name, PFID(fid), d->opd_index, rc); RETURN(rc); }
int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io, struct inode *inode, struct cl_object *clob, int agl) { struct cl_lock_descr *descr = &ccc_env_info(env)->cti_descr; struct cl_inode_info *lli = cl_i2info(inode); const struct lu_fid *fid = lu_object_fid(&clob->co_lu); struct ccc_io *cio = ccc_env_io(env); struct cl_lock *lock; int result; result = 0; if (!(lli->lli_flags & LLIF_MDS_SIZE_LOCK)) { CDEBUG(D_DLMTRACE, "Glimpsing inode "DFID"\n", PFID(fid)); if (lli->lli_has_smd) { /* NOTE: this looks like DLM lock request, but it may * not be one. Due to CEF_ASYNC flag (translated * to LDLM_FL_HAS_INTENT by osc), this is * glimpse request, that won't revoke any * conflicting DLM locks held. Instead, * ll_glimpse_callback() will be called on each * client holding a DLM lock against this file, * and resulting size will be returned for each * stripe. DLM lock on [0, EOF] is acquired only * if there were no conflicting locks. If there * were conflicting locks, enqueuing or waiting * fails with -ENAVAIL, but valid inode * attributes are returned anyway. */ *descr = whole_file; descr->cld_obj = clob; descr->cld_mode = CLM_PHANTOM; descr->cld_enq_flags = CEF_ASYNC | CEF_MUST; if (agl) descr->cld_enq_flags |= CEF_AGL; cio->cui_glimpse = 1; /* * CEF_ASYNC is used because glimpse sub-locks cannot * deadlock (because they never conflict with other * locks) and, hence, can be enqueued out-of-order. * * CEF_MUST protects glimpse lock from conversion into * a lockless mode. */ lock = cl_lock_request(env, io, descr, "glimpse", current); cio->cui_glimpse = 0; if (lock == NULL) return 0; if (IS_ERR(lock)) return PTR_ERR(lock); LASSERT(agl == 0); result = cl_wait(env, lock); if (result == 0) { cl_merge_lvb(env, inode); if (cl_isize_read(inode) > 0 && inode->i_blocks == 0) { /* * LU-417: Add dirty pages block count * lest i_blocks reports 0, some "cp" or * "tar" may think it's a completely * sparse file and skip it. */ inode->i_blocks = dirty_cnt(inode); } cl_unuse(env, lock); } cl_lock_release(env, lock, "glimpse", current); } else { CDEBUG(D_DLMTRACE, "No objects for inode\n"); cl_merge_lvb(env, inode); } } return result; }
static int llog_osd_prev_block(const struct lu_env *env, struct llog_handle *loghandle, int prev_idx, void *buf, int len) { struct llog_thread_info *lgi = llog_info(env); struct dt_object *o; struct dt_device *dt; loff_t cur_offset; int rc; if (len == 0 || len & (LLOG_CHUNK_SIZE - 1)) return -EINVAL; CDEBUG(D_OTHER, "looking for log index %u\n", prev_idx); LASSERT(loghandle); LASSERT(loghandle->lgh_ctxt); o = loghandle->lgh_obj; LASSERT(o); LASSERT(dt_object_exists(o)); dt = lu2dt_dev(o->do_lu.lo_dev); LASSERT(dt); cur_offset = LLOG_CHUNK_SIZE; llog_skip_over(&cur_offset, 0, prev_idx); rc = dt_attr_get(env, o, &lgi->lgi_attr, BYPASS_CAPA); if (rc) GOTO(out, rc); while (cur_offset < lgi->lgi_attr.la_size) { struct llog_rec_hdr *rec, *last_rec; struct llog_rec_tail *tail; lgi->lgi_buf.lb_len = len; lgi->lgi_buf.lb_buf = buf; /* It is OK to have locking around dt_read() only, see * comment in llog_osd_next_block for details */ dt_read_lock(env, o, 0); rc = dt_read(env, o, &lgi->lgi_buf, &cur_offset); dt_read_unlock(env, o); if (rc < 0) { CERROR("%s: can't read llog block from log "DFID " offset "LPU64": rc = %d\n", o->do_lu.lo_dev->ld_obd->obd_name, PFID(lu_object_fid(&o->do_lu)), cur_offset, rc); GOTO(out, rc); } if (rc == 0) /* end of file, nothing to do */ GOTO(out, rc); if (rc < sizeof(*tail)) { CERROR("%s: invalid llog block at log id "DOSTID"/%u " "offset "LPU64"\n", o->do_lu.lo_dev->ld_obd->obd_name, POSTID(&loghandle->lgh_id.lgl_oi), loghandle->lgh_id.lgl_ogen, cur_offset); GOTO(out, rc = -EINVAL); } rec = buf; if (LLOG_REC_HDR_NEEDS_SWABBING(rec)) lustre_swab_llog_rec(rec); tail = (struct llog_rec_tail *)((char *)buf + rc - sizeof(struct llog_rec_tail)); /* get the last record in block */ last_rec = (struct llog_rec_hdr *)((char *)buf + rc - le32_to_cpu(tail->lrt_len)); if (LLOG_REC_HDR_NEEDS_SWABBING(last_rec)) lustre_swab_llog_rec(last_rec); LASSERT(last_rec->lrh_index == tail->lrt_index); /* this shouldn't happen */ if (tail->lrt_index == 0) { CERROR("%s: invalid llog tail at log id "DOSTID"/%u " "offset "LPU64"\n", o->do_lu.lo_dev->ld_obd->obd_name, POSTID(&loghandle->lgh_id.lgl_oi), loghandle->lgh_id.lgl_ogen, cur_offset); GOTO(out, rc = -EINVAL); } if (tail->lrt_index < prev_idx) continue; /* sanity check that the start of the new buffer is no farther * than the record that we wanted. This shouldn't happen. */ if (rec->lrh_index > prev_idx) { CERROR("%s: missed desired record? %u > %u\n", o->do_lu.lo_dev->ld_obd->obd_name, rec->lrh_index, prev_idx); GOTO(out, rc = -ENOENT); } GOTO(out, rc = 0); } GOTO(out, rc = -EIO); out: return rc; }
static int osp_declare_object_create(const struct lu_env *env, struct dt_object *dt, struct lu_attr *attr, struct dt_allocation_hint *hint, struct dt_object_format *dof, struct thandle *th) { struct osp_thread_info *osi = osp_env_info(env); struct osp_device *d = lu2osp_dev(dt->do_lu.lo_dev); struct osp_object *o = dt2osp_obj(dt); const struct lu_fid *fid; int rc = 0; ENTRY; /* should happen to non-0 OSP only so that at least one object * has been already declared in the scenario and LOD should * cleanup that */ if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OSC_CREATE_FAIL) && d->opd_index == 1) RETURN(-ENOSPC); LASSERT(d->opd_last_used_oid_file); fid = lu_object_fid(&dt->do_lu); /* * There can be gaps in precreated ids and record to unlink llog * XXX: we do not handle gaps yet, implemented before solution * was found to be racy, so we disabled that. there is no * point in making useless but expensive llog declaration. */ /* rc = osp_sync_declare_add(env, o, MDS_UNLINK64_REC, th); */ if (unlikely(!fid_is_zero(fid))) { /* replay case: caller knows fid */ osi->osi_off = sizeof(osi->osi_id) * d->opd_index; rc = dt_declare_record_write(env, d->opd_last_used_oid_file, sizeof(osi->osi_id), osi->osi_off, th); RETURN(rc); } /* * in declaration we need to reserve object so that we don't block * awaiting precreation RPC to complete */ rc = osp_precreate_reserve(env, d); /* * we also need to declare update to local "last used id" file for * recovery if object isn't used for a reason, we need to release * reservation, this can be made in osd_object_release() */ if (rc == 0) { /* mark id is reserved: in create we don't want to talk * to OST */ LASSERT(o->opo_reserved == 0); o->opo_reserved = 1; /* common for all OSPs file hystorically */ osi->osi_off = sizeof(osi->osi_id) * d->opd_index; rc = dt_declare_record_write(env, d->opd_last_used_oid_file, sizeof(osi->osi_id), osi->osi_off, th); } else { /* not needed in the cache anymore */ set_bit(LU_OBJECT_HEARD_BANSHEE, &dt->do_lu.lo_header->loh_flags); } RETURN(rc); }
/* appends if idx == -1, otherwise overwrites record idx. */ static int llog_osd_write_rec(const struct lu_env *env, struct llog_handle *loghandle, struct llog_rec_hdr *rec, struct llog_cookie *reccookie, int cookiecount, void *buf, int idx, struct thandle *th) { struct llog_thread_info *lgi = llog_info(env); struct llog_log_hdr *llh; int reclen = rec->lrh_len; int index, rc, old_tail_idx; struct llog_rec_tail *lrt; struct dt_object *o; size_t left; LASSERT(env); llh = loghandle->lgh_hdr; LASSERT(llh); o = loghandle->lgh_obj; LASSERT(o); LASSERT(th); CDEBUG(D_OTHER, "new record %x to "DFID"\n", rec->lrh_type, PFID(lu_object_fid(&o->do_lu))); /* record length should not bigger than LLOG_CHUNK_SIZE */ if (buf) rc = (reclen > LLOG_CHUNK_SIZE - sizeof(struct llog_rec_hdr) - sizeof(struct llog_rec_tail)) ? -E2BIG : 0; else rc = (reclen > LLOG_CHUNK_SIZE) ? -E2BIG : 0; if (rc) return rc; rc = dt_attr_get(env, o, &lgi->lgi_attr, NULL); if (rc) return rc; if (buf) /* write_blob adds header and tail to lrh_len. */ reclen = sizeof(*rec) + rec->lrh_len + sizeof(struct llog_rec_tail); if (idx != -1) { /* no header: only allowed to insert record 1 */ if (idx != 1 && lgi->lgi_attr.la_size == 0) LBUG(); if (idx && llh->llh_size && llh->llh_size != rec->lrh_len) return -EINVAL; if (!ext2_test_bit(idx, llh->llh_bitmap)) CERROR("%s: modify unset record %u\n", o->do_lu.lo_dev->ld_obd->obd_name, idx); if (idx != rec->lrh_index) CERROR("%s: index mismatch %d %u\n", o->do_lu.lo_dev->ld_obd->obd_name, idx, rec->lrh_index); lgi->lgi_off = 0; rc = llog_osd_write_blob(env, o, &llh->llh_hdr, NULL, &lgi->lgi_off, th); /* we are done if we only write the header or on error */ if (rc || idx == 0) return rc; if (buf) { /* We assume that caller has set lgh_cur_* */ lgi->lgi_off = loghandle->lgh_cur_offset; CDEBUG(D_OTHER, "modify record "DOSTID": idx:%d/%u/%d, len:%u " "offset %llu\n", POSTID(&loghandle->lgh_id.lgl_oi), idx, rec->lrh_index, loghandle->lgh_cur_idx, rec->lrh_len, (long long)(lgi->lgi_off - sizeof(*llh))); if (rec->lrh_index != loghandle->lgh_cur_idx) { CERROR("%s: modify idx mismatch %u/%d\n", o->do_lu.lo_dev->ld_obd->obd_name, idx, loghandle->lgh_cur_idx); return -EFAULT; } } else { /* Assumes constant lrh_len */ lgi->lgi_off = sizeof(*llh) + (idx - 1) * reclen; } rc = llog_osd_write_blob(env, o, rec, buf, &lgi->lgi_off, th); if (rc == 0 && reccookie) { reccookie->lgc_lgl = loghandle->lgh_id; reccookie->lgc_index = idx; rc = 1; } return rc; } /* Make sure that records don't cross a chunk boundary, so we can * process them page-at-a-time if needed. If it will cross a chunk * boundary, write in a fake (but referenced) entry to pad the chunk. * * We know that llog_current_log() will return a loghandle that is * big enough to hold reclen, so all we care about is padding here. */ LASSERT(lgi->lgi_attr.la_valid & LA_SIZE); lgi->lgi_off = lgi->lgi_attr.la_size; left = LLOG_CHUNK_SIZE - (lgi->lgi_off & (LLOG_CHUNK_SIZE - 1)); /* NOTE: padding is a record, but no bit is set */ if (left != 0 && left != reclen && left < (reclen + LLOG_MIN_REC_SIZE)) { index = loghandle->lgh_last_idx + 1; rc = llog_osd_pad(env, o, &lgi->lgi_off, left, index, th); if (rc) return rc; loghandle->lgh_last_idx++; /*for pad rec*/ } /* if it's the last idx in log file, then return -ENOSPC */ if (loghandle->lgh_last_idx >= LLOG_BITMAP_SIZE(llh) - 1) return -ENOSPC; loghandle->lgh_last_idx++; index = loghandle->lgh_last_idx; LASSERT(index < LLOG_BITMAP_SIZE(llh)); rec->lrh_index = index; if (buf == NULL) { lrt = (struct llog_rec_tail *)((char *)rec + rec->lrh_len - sizeof(*lrt)); lrt->lrt_len = rec->lrh_len; lrt->lrt_index = rec->lrh_index; } /* The caller should make sure only 1 process access the lgh_last_idx, * Otherwise it might hit the assert.*/ LASSERT(index < LLOG_BITMAP_SIZE(llh)); spin_lock(&loghandle->lgh_hdr_lock); if (ext2_set_bit(index, llh->llh_bitmap)) { CERROR("%s: index %u already set in log bitmap\n", o->do_lu.lo_dev->ld_obd->obd_name, index); spin_unlock(&loghandle->lgh_hdr_lock); LBUG(); /* should never happen */ } llh->llh_count++; spin_unlock(&loghandle->lgh_hdr_lock); old_tail_idx = llh->llh_tail.lrt_index; llh->llh_tail.lrt_index = index; lgi->lgi_off = 0; rc = llog_osd_write_blob(env, o, &llh->llh_hdr, NULL, &lgi->lgi_off, th); if (rc) GOTO(out, rc); rc = dt_attr_get(env, o, &lgi->lgi_attr, NULL); if (rc) GOTO(out, rc); LASSERT(lgi->lgi_attr.la_valid & LA_SIZE); lgi->lgi_off = lgi->lgi_attr.la_size; rc = llog_osd_write_blob(env, o, rec, buf, &lgi->lgi_off, th); out: /* cleanup llog for error case */ if (rc) { spin_lock(&loghandle->lgh_hdr_lock); ext2_clear_bit(index, llh->llh_bitmap); llh->llh_count--; spin_unlock(&loghandle->lgh_hdr_lock); /* restore the header */ loghandle->lgh_last_idx--; llh->llh_tail.lrt_index = old_tail_idx; lgi->lgi_off = 0; llog_osd_write_blob(env, o, &llh->llh_hdr, NULL, &lgi->lgi_off, th); } CDEBUG(D_RPCTRACE, "added record "DOSTID": idx: %u, %u\n", POSTID(&loghandle->lgh_id.lgl_oi), index, rec->lrh_len); if (rc == 0 && reccookie) { reccookie->lgc_lgl = loghandle->lgh_id; reccookie->lgc_index = index; if ((rec->lrh_type == MDS_UNLINK_REC) || (rec->lrh_type == MDS_SETATTR64_REC)) reccookie->lgc_subsys = LLOG_MDS_OST_ORIG_CTXT; else if (rec->lrh_type == OST_SZ_REC) reccookie->lgc_subsys = LLOG_SIZE_ORIG_CTXT; else reccookie->lgc_subsys = -1; rc = 1; } return rc; }
/* sets: * - cur_offset to the furthest point read in the log file * - cur_idx to the log index preceding cur_offset * returns -EIO/-EINVAL on error */ static int llog_osd_next_block(const struct lu_env *env, struct llog_handle *loghandle, int *cur_idx, int next_idx, __u64 *cur_offset, void *buf, int len) { struct llog_thread_info *lgi = llog_info(env); struct dt_object *o; struct dt_device *dt; int rc; LASSERT(env); LASSERT(lgi); if (len == 0 || len & (LLOG_CHUNK_SIZE - 1)) return -EINVAL; CDEBUG(D_OTHER, "looking for log index %u (cur idx %u off "LPU64")\n", next_idx, *cur_idx, *cur_offset); LASSERT(loghandle); LASSERT(loghandle->lgh_ctxt); o = loghandle->lgh_obj; LASSERT(o); LASSERT(dt_object_exists(o)); dt = lu2dt_dev(o->do_lu.lo_dev); LASSERT(dt); rc = dt_attr_get(env, o, &lgi->lgi_attr, BYPASS_CAPA); if (rc) GOTO(out, rc); while (*cur_offset < lgi->lgi_attr.la_size) { struct llog_rec_hdr *rec, *last_rec; struct llog_rec_tail *tail; llog_skip_over(cur_offset, *cur_idx, next_idx); /* read up to next LLOG_CHUNK_SIZE block */ lgi->lgi_buf.lb_len = LLOG_CHUNK_SIZE - (*cur_offset & (LLOG_CHUNK_SIZE - 1)); lgi->lgi_buf.lb_buf = buf; /* Note: read lock is not needed around la_size get above at * the time of dt_attr_get(). There are only two cases that * matter. Either la_size == cur_offset, in which case the * entire read is skipped, or la_size > cur_offset and the loop * is entered and this thread is blocked at dt_read_lock() * until the write is completed. When the write completes, then * the dt_read() will be done with the full length, and will * get the full data. */ dt_read_lock(env, o, 0); rc = dt_read(env, o, &lgi->lgi_buf, cur_offset); dt_read_unlock(env, o); if (rc < 0) { CERROR("%s: can't read llog block from log "DFID " offset "LPU64": rc = %d\n", o->do_lu.lo_dev->ld_obd->obd_name, PFID(lu_object_fid(&o->do_lu)), *cur_offset, rc); GOTO(out, rc); } if (rc < len) { /* signal the end of the valid buffer to * llog_process */ memset(buf + rc, 0, len - rc); } if (rc == 0) /* end of file, nothing to do */ GOTO(out, rc); if (rc < sizeof(*tail)) { CERROR("%s: invalid llog block at log id "DOSTID"/%u " "offset "LPU64"\n", o->do_lu.lo_dev->ld_obd->obd_name, POSTID(&loghandle->lgh_id.lgl_oi), loghandle->lgh_id.lgl_ogen, *cur_offset); GOTO(out, rc = -EINVAL); } rec = buf; if (LLOG_REC_HDR_NEEDS_SWABBING(rec)) lustre_swab_llog_rec(rec); tail = (struct llog_rec_tail *)((char *)buf + rc - sizeof(struct llog_rec_tail)); /* get the last record in block */ last_rec = (struct llog_rec_hdr *)((char *)buf + rc - le32_to_cpu(tail->lrt_len)); if (LLOG_REC_HDR_NEEDS_SWABBING(last_rec)) lustre_swab_llog_rec(last_rec); LASSERT(last_rec->lrh_index == tail->lrt_index); *cur_idx = tail->lrt_index; /* this shouldn't happen */ if (tail->lrt_index == 0) { CERROR("%s: invalid llog tail at log id "DOSTID"/%u " "offset "LPU64"\n", o->do_lu.lo_dev->ld_obd->obd_name, POSTID(&loghandle->lgh_id.lgl_oi), loghandle->lgh_id.lgl_ogen, *cur_offset); GOTO(out, rc = -EINVAL); } if (tail->lrt_index < next_idx) continue; /* sanity check that the start of the new buffer is no farther * than the record that we wanted. This shouldn't happen. */ if (rec->lrh_index > next_idx) { CERROR("%s: missed desired record? %u > %u\n", o->do_lu.lo_dev->ld_obd->obd_name, rec->lrh_index, next_idx); GOTO(out, rc = -ENOENT); } GOTO(out, rc = 0); } GOTO(out, rc = -EIO); out: return rc; }
static int llog_osd_read_header(const struct lu_env *env, struct llog_handle *handle) { struct llog_rec_hdr *llh_hdr; struct dt_object *o; struct llog_thread_info *lgi; int rc; LASSERT(sizeof(*handle->lgh_hdr) == LLOG_CHUNK_SIZE); o = handle->lgh_obj; LASSERT(o); lgi = llog_info(env); rc = dt_attr_get(env, o, &lgi->lgi_attr, NULL); if (rc) return rc; LASSERT(lgi->lgi_attr.la_valid & LA_SIZE); if (lgi->lgi_attr.la_size == 0) { CDEBUG(D_HA, "not reading header from 0-byte log\n"); return LLOG_EEMPTY; } lgi->lgi_off = 0; lgi->lgi_buf.lb_buf = handle->lgh_hdr; lgi->lgi_buf.lb_len = LLOG_CHUNK_SIZE; rc = dt_record_read(env, o, &lgi->lgi_buf, &lgi->lgi_off); if (rc) { CERROR("%s: error reading log header from "DFID": rc = %d\n", o->do_lu.lo_dev->ld_obd->obd_name, PFID(lu_object_fid(&o->do_lu)), rc); return rc; } llh_hdr = &handle->lgh_hdr->llh_hdr; if (LLOG_REC_HDR_NEEDS_SWABBING(llh_hdr)) lustre_swab_llog_hdr(handle->lgh_hdr); if (llh_hdr->lrh_type != LLOG_HDR_MAGIC) { CERROR("%s: bad log %s "DFID" header magic: %#x " "(expected %#x)\n", o->do_lu.lo_dev->ld_obd->obd_name, handle->lgh_name ? handle->lgh_name : "", PFID(lu_object_fid(&o->do_lu)), llh_hdr->lrh_type, LLOG_HDR_MAGIC); return -EIO; } else if (llh_hdr->lrh_len != LLOG_CHUNK_SIZE) { CERROR("%s: incorrectly sized log %s "DFID" header: " "%#x (expected %#x)\n" "you may need to re-run lconf --write_conf.\n", o->do_lu.lo_dev->ld_obd->obd_name, handle->lgh_name ? handle->lgh_name : "", PFID(lu_object_fid(&o->do_lu)), llh_hdr->lrh_len, LLOG_CHUNK_SIZE); return -EIO; } handle->lgh_last_idx = handle->lgh_hdr->llh_tail.lrt_index; return 0; }
/** * Implementation of dt_index_operations::dio_lookup * * Look up record by key under a remote index object. It packs lookup update * into RPC, sends to the remote OUT and waits for the lookup result. * * \param[in] env execution environment * \param[in] dt index object to lookup * \param[out] rec record in which to return lookup result * \param[in] key key of index which will be looked up * * \retval 1 if the lookup succeeds. * \retval negative errno if the lookup fails. */ static int osp_md_index_lookup(const struct lu_env *env, struct dt_object *dt, struct dt_rec *rec, const struct dt_key *key) { struct lu_buf *lbuf = &osp_env_info(env)->osi_lb2; struct osp_device *osp = lu2osp_dev(dt->do_lu.lo_dev); struct dt_device *dt_dev = &osp->opd_dt_dev; struct osp_update_request *update; struct object_update_reply *reply; struct ptlrpc_request *req = NULL; struct lu_fid *fid; int rc; ENTRY; /* Because it needs send the update buffer right away, * just create an update buffer, instead of attaching the * update_remote list of the thandle. */ update = osp_update_request_create(dt_dev); if (IS_ERR(update)) RETURN(PTR_ERR(update)); rc = osp_update_rpc_pack(env, index_lookup, update, OUT_INDEX_LOOKUP, lu_object_fid(&dt->do_lu), rec, key); if (rc != 0) { CERROR("%s: Insert update error: rc = %d\n", dt_dev->dd_lu_dev.ld_obd->obd_name, rc); GOTO(out, rc); } rc = osp_remote_sync(env, osp, update, &req); if (rc < 0) GOTO(out, rc); reply = req_capsule_server_sized_get(&req->rq_pill, &RMF_OUT_UPDATE_REPLY, OUT_UPDATE_REPLY_SIZE); if (reply->ourp_magic != UPDATE_REPLY_MAGIC) { CERROR("%s: Wrong version %x expected %x: rc = %d\n", dt_dev->dd_lu_dev.ld_obd->obd_name, reply->ourp_magic, UPDATE_REPLY_MAGIC, -EPROTO); GOTO(out, rc = -EPROTO); } rc = object_update_result_data_get(reply, lbuf, 0); if (rc < 0) GOTO(out, rc); if (lbuf->lb_len != sizeof(*fid)) { CERROR("%s: lookup "DFID" %s wrong size %d\n", dt_dev->dd_lu_dev.ld_obd->obd_name, PFID(lu_object_fid(&dt->do_lu)), (char *)key, (int)lbuf->lb_len); GOTO(out, rc = -EINVAL); } fid = lbuf->lb_buf; if (ptlrpc_rep_need_swab(req)) lustre_swab_lu_fid(fid); if (!fid_is_sane(fid)) { CERROR("%s: lookup "DFID" %s invalid fid "DFID"\n", dt_dev->dd_lu_dev.ld_obd->obd_name, PFID(lu_object_fid(&dt->do_lu)), (char *)key, PFID(fid)); GOTO(out, rc = -EINVAL); } memcpy(rec, fid, sizeof(*fid)); GOTO(out, rc = 1); out: if (req != NULL) ptlrpc_req_finished(req); osp_update_request_destroy(update); return rc; }
static ssize_t osp_md_read(const struct lu_env *env, struct dt_object *dt, struct lu_buf *rbuf, loff_t *pos) { struct osp_device *osp = lu2osp_dev(dt->do_lu.lo_dev); struct dt_device *dt_dev = &osp->opd_dt_dev; struct lu_buf *lbuf = &osp_env_info(env)->osi_lb2; char *ptr = rbuf->lb_buf; struct osp_update_request *update = NULL; struct ptlrpc_request *req = NULL; struct out_read_reply *orr; struct ptlrpc_bulk_desc *desc; struct object_update_reply *reply; __u32 left_size; int nbufs; int i; int rc; ENTRY; /* Because it needs send the update buffer right away, * just create an update buffer, instead of attaching the * update_remote list of the thandle. */ update = osp_update_request_create(dt_dev); if (IS_ERR(update)) GOTO(out, rc = PTR_ERR(update)); rc = osp_update_rpc_pack(env, read, update, OUT_READ, lu_object_fid(&dt->do_lu), rbuf->lb_len, *pos); if (rc != 0) { CERROR("%s: cannot insert update: rc = %d\n", dt_dev->dd_lu_dev.ld_obd->obd_name, rc); GOTO(out, rc); } rc = osp_prep_update_req(env, osp->opd_obd->u.cli.cl_import, update, &req); if (rc != 0) GOTO(out, rc); nbufs = (rbuf->lb_len + OUT_BULK_BUFFER_SIZE - 1) / OUT_BULK_BUFFER_SIZE; /* allocate bulk descriptor */ desc = ptlrpc_prep_bulk_imp(req, nbufs, 1, PTLRPC_BULK_PUT_SINK | PTLRPC_BULK_BUF_KVEC, MDS_BULK_PORTAL, &ptlrpc_bulk_kvec_ops); if (desc == NULL) GOTO(out, rc = -ENOMEM); /* split the buffer into small chunk size */ left_size = rbuf->lb_len; for (i = 0; i < nbufs; i++) { int read_size; read_size = left_size > OUT_BULK_BUFFER_SIZE ? OUT_BULK_BUFFER_SIZE : left_size; desc->bd_frag_ops->add_iov_frag(desc, ptr, read_size); ptr += read_size; } /* This will only be called with read-only update, and these updates * might be used to retrieve update log during recovery process, so * it will be allowed to send during recovery process */ req->rq_allow_replay = 1; req->rq_bulk_read = 1; /* send request to master and wait for RPC to complete */ rc = ptlrpc_queue_wait(req); if (rc != 0) GOTO(out, rc); rc = sptlrpc_cli_unwrap_bulk_read(req, req->rq_bulk, req->rq_bulk->bd_nob_transferred); if (rc < 0) GOTO(out, rc); reply = req_capsule_server_sized_get(&req->rq_pill, &RMF_OUT_UPDATE_REPLY, OUT_UPDATE_REPLY_SIZE); if (reply->ourp_magic != UPDATE_REPLY_MAGIC) { CERROR("%s: invalid update reply magic %x expected %x:" " rc = %d\n", dt_dev->dd_lu_dev.ld_obd->obd_name, reply->ourp_magic, UPDATE_REPLY_MAGIC, -EPROTO); GOTO(out, rc = -EPROTO); } rc = object_update_result_data_get(reply, lbuf, 0); if (rc < 0) GOTO(out, rc); if (lbuf->lb_len < sizeof(*orr)) GOTO(out, rc = -EPROTO); orr = lbuf->lb_buf; orr_le_to_cpu(orr, orr); rc = orr->orr_size; *pos = orr->orr_offset; out: if (req != NULL) ptlrpc_req_finished(req); if (update != NULL) osp_update_request_destroy(update); RETURN(rc); }
/* * Update a record in a quota index file. * * \param env - is the environment passed by the caller * \param th - is the transaction to use for disk writes * \param obj - is the on-disk index to be updated. * \param id - is the key to be updated * \param rec - is the input record containing the new quota settings. * \param flags - can be LQUOTA_BUMP_VER or LQUOTA_SET_VER. * \param ver - is the new version of the index if LQUOTA_SET_VER is set or is * used to return the new version of the index when * LQUOTA_BUMP_VER is set. * * \retval - 0 on success, appropriate error on failure */ int lquota_disk_write(const struct lu_env *env, struct thandle *th, struct dt_object *obj, union lquota_id *id, struct dt_rec *rec, __u32 flags, __u64 *ver) { struct lquota_thread_info *qti = lquota_info(env); struct dt_key *key = (struct dt_key *)&id->qid_uid; int rc; ENTRY; LASSERT(dt_object_exists(obj)); LASSERT(obj->do_index_ops != NULL); /* lock index */ dt_write_lock(env, obj, 0); /* check whether there is already an existing record for this ID */ rc = dt_lookup(env, obj, (struct dt_rec *)&qti->qti_rec, key); if (rc == 0) { /* delete existing record in order to replace it */ rc = dt_delete(env, obj, key, th); if (rc) GOTO(out, rc); } else if (rc == -ENOENT) { /* probably first insert */ rc = 0; } else { GOTO(out, rc); } if (rec != NULL) { /* insert record with updated quota settings */ rc = dt_insert(env, obj, rec, key, th, 1); if (rc) { /* try to insert the old one */ rc = dt_insert(env, obj, (struct dt_rec *)&qti->qti_rec, key, th, 1); LASSERTF(rc == 0, "failed to insert record in quota " "index "DFID"\n", PFID(lu_object_fid(&obj->do_lu))); GOTO(out, rc); } } if (flags != 0) { LASSERT(ver); if (flags & LQUOTA_BUMP_VER) { /* caller wants to bump the version, let's first read * it */ *ver = dt_version_get(env, obj); (*ver)++; } else { LASSERT(flags & LQUOTA_SET_VER); } dt_version_set(env, obj, *ver, th); } EXIT; out: dt_write_unlock(env, obj); return rc; }
/* * Look-up a slave index file. If the slave index isn't found: * - if local is set to false, we allocate a FID from FID_SEQ_QUOTA sequence and * create the index. * - otherwise, we create the index file with a local reserved FID (see * lquota_local_oid) * * \param env - is the environment passed by the caller * \param dev - is the backend dt_device where to look-up/create the slave index * \param parent - is the parent directory where to create the slave index if * it does not exist already * \param glb_fid - is the fid of the global index file associated with this * slave index. * \param uuid - is the uuid of slave which is (re)connecting to the master * target * \param local - indicate whether to use local reserved FID (LQUOTA_USR_OID * & LQUOTA_GRP_OID) for the slave index creation or to * allocate a new fid from sequence FID_SEQ_QUOTA * * \retval - pointer to the dt_object of the slave index on success, * appropriate error on failure */ struct dt_object *lquota_disk_slv_find_create(const struct lu_env *env, struct dt_device *dev, struct dt_object *parent, struct lu_fid *glb_fid, struct obd_uuid *uuid, bool local) { struct lquota_thread_info *qti = lquota_info(env); struct dt_object *slv_idx; int rc; ENTRY; LASSERT(uuid != NULL); CDEBUG(D_QUOTA, "lookup/create slave index file for %s\n", obd_uuid2str(uuid)); /* generate filename associated with the slave */ rc = lquota_disk_slv_filename(glb_fid, uuid, qti->qti_buf); if (rc) RETURN(ERR_PTR(rc)); /* Slave indexes uses the FID_SEQ_QUOTA sequence since they can be read * through the network */ qti->qti_fid.f_seq = FID_SEQ_QUOTA; qti->qti_fid.f_ver = 0; if (local) { int type; rc = lquota_extract_fid(glb_fid, NULL, NULL, &type); if (rc) RETURN(ERR_PTR(rc)); /* use predefined fid in the reserved oid list */ qti->qti_fid.f_oid = qtype2slv_oid(type); slv_idx = local_index_find_or_create_with_fid(env, dev, &qti->qti_fid, parent, qti->qti_buf, LQUOTA_MODE, &dt_quota_slv_features); } else { /* allocate fid dynamically if index does not exist already */ qti->qti_fid.f_oid = LQUOTA_GENERATED_OID; /* lookup/create slave index file */ slv_idx = lquota_disk_find_create(env, dev, parent, &qti->qti_fid, &dt_quota_slv_features, qti->qti_buf); } if (IS_ERR(slv_idx)) RETURN(slv_idx); /* install index operation vector */ if (slv_idx->do_index_ops == NULL) { rc = slv_idx->do_ops->do_index_try(env, slv_idx, &dt_quota_slv_features); if (rc) { CERROR("%s: failed to setup index operations for "DFID " rc:%d\n", dev->dd_lu_dev.ld_obd->obd_name, PFID(lu_object_fid(&slv_idx->do_lu)), rc); dt_object_put(env, slv_idx); slv_idx = ERR_PTR(rc); } } RETURN(slv_idx); }