/* Add log records for each OSC that this object is striped over, and return * cookies for each one. We _would_ have nice abstraction here, except that * we need to keep cookies in stripe order, even if some are NULL, so that * the right cookies are passed back to the right OSTs at the client side. * Unset cookies should be all-zero (which will never occur naturally). */ static int lov_llog_origin_add(const struct lu_env *env, struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, struct lov_stripe_md *lsm, struct llog_cookie *logcookies, int numcookies) { struct obd_device *obd = ctxt->loc_obd; struct lov_obd *lov = &obd->u.lov; int i, rc = 0, cookies = 0; ENTRY; LASSERTF(logcookies && numcookies >= lsm->lsm_stripe_count, "logcookies %p, numcookies %d lsm->lsm_stripe_count %d \n", logcookies, numcookies, lsm->lsm_stripe_count); for (i = 0; i < lsm->lsm_stripe_count; i++) { struct lov_oinfo *loi = lsm->lsm_oinfo[i]; struct obd_device *child = lov->lov_tgts[loi->loi_ost_idx]->ltd_exp->exp_obd; struct llog_ctxt *cctxt = llog_get_context(child, ctxt->loc_idx); /* fill mds unlink/setattr log record */ switch (rec->lrh_type) { case MDS_UNLINK_REC: { struct llog_unlink_rec *lur = (struct llog_unlink_rec *)rec; lur->lur_oid = ostid_id(&loi->loi_oi); lur->lur_oseq = (__u32)ostid_seq(&loi->loi_oi); break; } case MDS_SETATTR64_REC: { struct llog_setattr64_rec *lsr = (struct llog_setattr64_rec *)rec; lsr->lsr_oi = loi->loi_oi; break; } default: break; } /* inject error in llog_obd_add() below */ if (OBD_FAIL_CHECK(OBD_FAIL_MDS_FAIL_LOV_LOG_ADD)) { llog_ctxt_put(cctxt); cctxt = NULL; } rc = llog_obd_add(env, cctxt, rec, NULL, logcookies + cookies, numcookies - cookies); llog_ctxt_put(cctxt); if (rc < 0) { CERROR("Can't add llog (rc = %d) for stripe %d\n", rc, cookies); memset(logcookies + cookies, 0, sizeof(struct llog_cookie)); rc = 1; /* skip this cookie */ } /* Note that rc is always 1 if llog_obd_add was successful */ cookies += rc; } RETURN(cookies); }
/** * Prepare bulk IO requests for processing. * * This function does initial checks of IO and calls corresponding * functions for read/write processing. * * \param[in] env execution environment * \param[in] cmd IO type (read/write) * \param[in] exp OBD export of client * \param[in] oa OBDO structure from request * \param[in] objcount always 1 * \param[in] obj object data * \param[in] rnb remote buffers * \param[in] nr_local number of local buffers * \param[in] lnb local buffers * * \retval 0 on successful prepare * \retval negative value on error */ int ofd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, struct niobuf_remote *rnb, int *nr_local, struct niobuf_local *lnb) { struct tgt_session_info *tsi = tgt_ses_info(env); struct ofd_device *ofd = ofd_exp(exp); struct ofd_thread_info *info; char *jobid; const struct lu_fid *fid = &oa->o_oi.oi_fid; int rc = 0; if (*nr_local > PTLRPC_MAX_BRW_PAGES) { CERROR("%s: bulk has too many pages %d, which exceeds the" "maximum pages per RPC of %d\n", exp->exp_obd->obd_name, *nr_local, PTLRPC_MAX_BRW_PAGES); RETURN(-EPROTO); } if (tgt_ses_req(tsi) == NULL) { /* echo client case */ info = ofd_info_init(env, exp); jobid = NULL; } else { info = tsi2ofd_info(tsi); jobid = tsi->tsi_jobid; } LASSERT(oa != NULL); if (OBD_FAIL_CHECK(OBD_FAIL_SRV_ENOENT)) { struct ofd_seq *oseq; oseq = ofd_seq_load(env, ofd, ostid_seq(&oa->o_oi)); if (IS_ERR(oseq)) { CERROR("%s: Can not find seq for "DOSTID ": rc = %ld\n", ofd_name(ofd), POSTID(&oa->o_oi), PTR_ERR(oseq)); RETURN(-EINVAL); } if (oseq->os_destroys_in_progress == 0) { /* don't fail lookups for orphan recovery, it causes * later LBUGs when objects still exist during * precreate */ ofd_seq_put(env, oseq); RETURN(-ENOENT); } ofd_seq_put(env, oseq); } LASSERT(objcount == 1); LASSERT(obj->ioo_bufcnt > 0); if (cmd == OBD_BRW_WRITE) { la_from_obdo(&info->fti_attr, oa, OBD_MD_FLGETATTR); rc = ofd_preprw_write(env, exp, ofd, fid, &info->fti_attr, oa, objcount, obj, rnb, nr_local, lnb, jobid); } else if (cmd == OBD_BRW_READ) { ofd_grant_prepare_read(env, exp, oa); rc = ofd_preprw_read(env, exp, ofd, fid, &info->fti_attr, oa, obj->ioo_bufcnt, rnb, nr_local, lnb, jobid); obdo_from_la(oa, &info->fti_attr, LA_ATIME); } else { CERROR("%s: wrong cmd %d received!\n", exp->exp_obd->obd_name, cmd); rc = -EPROTO; } RETURN(rc); }
/** * Implementation of struct cl_object_operations::coo_req_attr_set() for osc * layer. osc is responsible for struct obdo::o_id and struct obdo::o_seq * fields. */ static void osc_req_attr_set(const struct lu_env *env, struct cl_object *obj, struct cl_req_attr *attr) { struct lov_oinfo *oinfo; struct obdo *oa; struct ost_lvb *lvb; u64 flags = attr->cra_flags; oinfo = cl2osc(obj)->oo_oinfo; lvb = &oinfo->loi_lvb; oa = attr->cra_oa; if ((flags & OBD_MD_FLMTIME) != 0) { oa->o_mtime = lvb->lvb_mtime; oa->o_valid |= OBD_MD_FLMTIME; } if ((flags & OBD_MD_FLATIME) != 0) { oa->o_atime = lvb->lvb_atime; oa->o_valid |= OBD_MD_FLATIME; } if ((flags & OBD_MD_FLCTIME) != 0) { oa->o_ctime = lvb->lvb_ctime; oa->o_valid |= OBD_MD_FLCTIME; } if (flags & OBD_MD_FLGROUP) { ostid_set_seq(&oa->o_oi, ostid_seq(&oinfo->loi_oi)); oa->o_valid |= OBD_MD_FLGROUP; } if (flags & OBD_MD_FLID) { ostid_set_id(&oa->o_oi, ostid_id(&oinfo->loi_oi)); oa->o_valid |= OBD_MD_FLID; } if (flags & OBD_MD_FLHANDLE) { struct ldlm_lock *lock; struct osc_page *opg; opg = osc_cl_page_osc(attr->cra_page, cl2osc(obj)); lock = osc_dlmlock_at_pgoff(env, cl2osc(obj), osc_index(opg), OSC_DAP_FL_TEST_LOCK | OSC_DAP_FL_CANCELING); if (lock == NULL && !opg->ops_srvlock) { struct ldlm_resource *res; struct ldlm_res_id *resname; CL_PAGE_DEBUG(D_ERROR, env, attr->cra_page, "uncovered page!\n"); resname = &osc_env_info(env)->oti_resname; ostid_build_res_name(&oinfo->loi_oi, resname); res = ldlm_resource_get( osc_export(cl2osc(obj))->exp_obd->obd_namespace, NULL, resname, LDLM_EXTENT, 0); ldlm_resource_dump(D_ERROR, res); libcfs_debug_dumpstack(NULL); LBUG(); } /* check for lockless io. */ if (lock != NULL) { oa->o_handle = lock->l_remote_handle; oa->o_valid |= OBD_MD_FLHANDLE; LDLM_LOCK_PUT(lock); } } }
/** * Implementation of struct cl_req_operations::cro_attr_set() for osc * layer. osc is responsible for struct obdo::o_id and struct obdo::o_seq * fields. */ static void osc_req_attr_set(const struct lu_env *env, const struct cl_req_slice *slice, const struct cl_object *obj, struct cl_req_attr *attr, u64 flags) { struct lov_oinfo *oinfo; struct cl_req *clerq; struct cl_page *apage; /* _some_ page in @clerq */ struct cl_lock *lock; /* _some_ lock protecting @apage */ struct osc_lock *olck; struct osc_page *opg; struct obdo *oa; struct ost_lvb *lvb; oinfo = cl2osc(obj)->oo_oinfo; lvb = &oinfo->loi_lvb; oa = attr->cra_oa; if ((flags & OBD_MD_FLMTIME) != 0) { oa->o_mtime = lvb->lvb_mtime; oa->o_valid |= OBD_MD_FLMTIME; } if ((flags & OBD_MD_FLATIME) != 0) { oa->o_atime = lvb->lvb_atime; oa->o_valid |= OBD_MD_FLATIME; } if ((flags & OBD_MD_FLCTIME) != 0) { oa->o_ctime = lvb->lvb_ctime; oa->o_valid |= OBD_MD_FLCTIME; } if (flags & OBD_MD_FLGROUP) { ostid_set_seq(&oa->o_oi, ostid_seq(&oinfo->loi_oi)); oa->o_valid |= OBD_MD_FLGROUP; } if (flags & OBD_MD_FLID) { ostid_set_id(&oa->o_oi, ostid_id(&oinfo->loi_oi)); oa->o_valid |= OBD_MD_FLID; } if (flags & OBD_MD_FLHANDLE) { clerq = slice->crs_req; LASSERT(!list_empty(&clerq->crq_pages)); apage = container_of(clerq->crq_pages.next, struct cl_page, cp_flight); opg = osc_cl_page_osc(apage); apage = opg->ops_cl.cpl_page; /* now apage is a sub-page */ lock = cl_lock_at_page(env, apage->cp_obj, apage, NULL, 1, 1); if (lock == NULL) { struct cl_object_header *head; struct cl_lock *scan; head = cl_object_header(apage->cp_obj); list_for_each_entry(scan, &head->coh_locks, cll_linkage) CL_LOCK_DEBUG(D_ERROR, env, scan, "no cover page!\n"); CL_PAGE_DEBUG(D_ERROR, env, apage, "dump uncover page!\n"); dump_stack(); LBUG(); } olck = osc_lock_at(lock); LASSERT(olck != NULL); LASSERT(ergo(opg->ops_srvlock, olck->ols_lock == NULL)); /* check for lockless io. */ if (olck->ols_lock != NULL) { oa->o_handle = olck->ols_lock->l_remote_handle; oa->o_valid |= OBD_MD_FLHANDLE; } cl_lock_put(env, lock); }
int ofd_precreate_objects(const struct lu_env *env, struct ofd_device *ofd, obd_id id, struct ofd_seq *oseq, int nr, int sync) { struct ofd_thread_info *info = ofd_info(env); struct ofd_object *fo = NULL; struct dt_object *next; struct thandle *th; struct ofd_object **batch; struct lu_fid *fid = &info->fti_fid; obd_id tmp; int rc; int i; int objects = 0; int nr_saved = nr; ENTRY; /* Don't create objects beyond the valid range for this SEQ */ if (unlikely(fid_seq_is_mdt0(ostid_seq(&oseq->os_oi)) && (id + nr) >= IDIF_MAX_OID)) { CERROR("%s:"DOSTID" hit the IDIF_MAX_OID (1<<48)!\n", ofd_name(ofd), id, ostid_seq(&oseq->os_oi)); RETURN(rc = -ENOSPC); } else if (unlikely(!fid_seq_is_mdt0(ostid_seq(&oseq->os_oi)) && (id + nr) >= OBIF_MAX_OID)) { CERROR("%s:"DOSTID" hit the OBIF_MAX_OID (1<<32)!\n", ofd_name(ofd), id, ostid_seq(&oseq->os_oi)); RETURN(rc = -ENOSPC); } OBD_ALLOC(batch, nr_saved * sizeof(struct ofd_object *)); if (batch == NULL) RETURN(-ENOMEM); info->fti_attr.la_valid = LA_TYPE | LA_MODE; /* * We mark object SUID+SGID to flag it for accepting UID+GID from * client on first write. Currently the permission bits on the OST are * never used, so this is OK. */ info->fti_attr.la_mode = S_IFREG | S_ISUID | S_ISGID | 0666; info->fti_dof.dof_type = dt_mode_to_dft(S_IFREG); /* Initialize a/c/m time so any client timestamp will always * be newer and update the inode. ctime = 0 is also handled * specially in osd_inode_setattr(). See LU-221, LU-1042 */ info->fti_attr.la_valid |= LA_ATIME | LA_MTIME | LA_CTIME; info->fti_attr.la_atime = 0; info->fti_attr.la_mtime = 0; info->fti_attr.la_ctime = 0; LASSERT(id != 0); /* prepare objects */ *fid = *lu_object_fid(&oseq->os_lastid_obj->do_lu); for (i = 0; i < nr; i++) { rc = fid_set_id(fid, id + i); if (rc != 0) { if (i == 0) GOTO(out, rc); nr = i; break; } fo = ofd_object_find(env, ofd, fid); if (IS_ERR(fo)) { if (i == 0) GOTO(out, rc = PTR_ERR(fo)); nr = i; break; } ofd_write_lock(env, fo); batch[i] = fo; } info->fti_buf.lb_buf = &tmp; info->fti_buf.lb_len = sizeof(tmp); info->fti_off = 0; th = ofd_trans_create(env, ofd); if (IS_ERR(th)) GOTO(out, rc = PTR_ERR(th)); th->th_sync |= sync; rc = dt_declare_record_write(env, oseq->os_lastid_obj, &info->fti_buf, info->fti_off, th); if (rc) GOTO(trans_stop, rc); for (i = 0; i < nr; i++) { fo = batch[i]; LASSERT(fo); if (unlikely(ofd_object_exists(fo))) { /* object may exist being re-created by write replay */ CDEBUG(D_INODE, "object "LPX64"/"LPX64" exists: " DFID"\n", ostid_seq(&oseq->os_oi), id, PFID(lu_object_fid(&fo->ofo_obj.do_lu))); continue; } next = ofd_object_child(fo); LASSERT(next != NULL); rc = dt_declare_create(env, next, &info->fti_attr, NULL, &info->fti_dof, th); if (rc) { nr = i; break; } } rc = dt_trans_start_local(env, ofd->ofd_osd, th); if (rc) GOTO(trans_stop, rc); CDEBUG(D_OTHER, "%s: create new object "DFID" nr %d\n", ofd_name(ofd), PFID(fid), nr); LASSERT(nr > 0); /* When the LFSCK scanning the whole device to verify the LAST_ID file * consistency, it will load the last_id into RAM firstly, and compare * the last_id with each OST-object's ID. If the later one is larger, * then it will regard the LAST_ID file crashed. But during the LFSCK * scanning, the OFD may continue to create new OST-objects. Those new * created OST-objects will have larger IDs than the LFSCK known ones. * So from the LFSCK view, it needs to re-load the last_id from disk * file, and if the latest last_id is still smaller than the object's * ID, then the LAST_ID file is real crashed. * * To make above mechanism to work, before OFD pre-create OST-objects, * it needs to update the LAST_ID file firstly, otherwise, the LFSCK * may cannot get latest last_id although new OST-object created. */ if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_SKIP_LASTID)) { tmp = cpu_to_le64(id + nr - 1); dt_write_lock(env, oseq->os_lastid_obj, 0); rc = dt_record_write(env, oseq->os_lastid_obj, &info->fti_buf, &info->fti_off, th); dt_write_unlock(env, oseq->os_lastid_obj); if (rc != 0) GOTO(trans_stop, rc); } for (i = 0; i < nr; i++) { fo = batch[i]; LASSERT(fo); /* Only the new created objects need to be recorded. */ if (ofd->ofd_osd->dd_record_fid_accessed) { lfsck_pack_rfa(&ofd_info(env)->fti_lr, lu_object_fid(&fo->ofo_obj.do_lu)); lfsck_in_notify(env, ofd->ofd_osd, &ofd_info(env)->fti_lr); } if (likely(!ofd_object_exists(fo) && !OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DANGLING))) { next = ofd_object_child(fo); LASSERT(next != NULL); rc = dt_create(env, next, &info->fti_attr, NULL, &info->fti_dof, th); if (rc) break; LASSERT(ofd_object_exists(fo)); } ofd_seq_last_oid_set(oseq, id + i); } objects = i; /* NOT all the wanted objects have been created, * set the LAST_ID as the real created. */ if (unlikely(objects < nr)) { int rc1; info->fti_off = 0; tmp = cpu_to_le64(ofd_seq_last_oid(oseq)); dt_write_lock(env, oseq->os_lastid_obj, 0); rc1 = dt_record_write(env, oseq->os_lastid_obj, &info->fti_buf, &info->fti_off, th); dt_write_unlock(env, oseq->os_lastid_obj); if (rc1 != 0) CERROR("%s: fail to reset the LAST_ID for seq ("LPX64 ") from "LPU64" to "LPU64"\n", ofd_name(ofd), ostid_seq(&oseq->os_oi), id + nr - 1, ofd_seq_last_oid(oseq)); } trans_stop: ofd_trans_stop(env, ofd, th, rc); out: for (i = 0; i < nr_saved; i++) { fo = batch[i]; if (fo) { ofd_write_unlock(env, fo); ofd_object_put(env, fo); } } OBD_FREE(batch, nr_saved * sizeof(struct ofd_object *)); CDEBUG((objects == 0 && rc == 0) ? D_ERROR : D_OTHER, "created %d/%d objects: %d\n", objects, nr_saved, rc); LASSERT(ergo(objects == 0, rc < 0)); RETURN(objects > 0 ? objects : rc); }
int ofd_precreate_objects(const struct lu_env *env, struct ofd_device *ofd, obd_id id, struct ofd_seq *oseq, int nr, int sync) { struct ofd_thread_info *info = ofd_info(env); struct ofd_object *fo = NULL; struct dt_object *next; struct thandle *th; struct ofd_object **batch; obd_id tmp; int rc; int i; int objects = 0; int nr_saved = nr; ENTRY; /* Don't create objects beyond the valid range for this SEQ */ if (unlikely(fid_seq_is_mdt0(ostid_seq(&oseq->os_oi)) && (id + nr) >= IDIF_MAX_OID)) { CERROR("%s:"DOSTID" hit the IDIF_MAX_OID (1<<48)!\n", ofd_name(ofd), id, ostid_seq(&oseq->os_oi)); RETURN(rc = -ENOSPC); } else if (unlikely(!fid_seq_is_mdt0(ostid_seq(&oseq->os_oi)) && (id + nr) >= OBIF_MAX_OID)) { CERROR("%s:"DOSTID" hit the OBIF_MAX_OID (1<<32)!\n", ofd_name(ofd), id, ostid_seq(&oseq->os_oi)); RETURN(rc = -ENOSPC); } OBD_ALLOC(batch, nr_saved * sizeof(struct ofd_object *)); if (batch == NULL) RETURN(-ENOMEM); info->fti_attr.la_valid = LA_TYPE | LA_MODE; /* * We mark object SUID+SGID to flag it for accepting UID+GID from * client on first write. Currently the permission bits on the OST are * never used, so this is OK. */ info->fti_attr.la_mode = S_IFREG | S_ISUID | S_ISGID | 0666; info->fti_dof.dof_type = dt_mode_to_dft(S_IFREG); /* Initialize a/c/m time so any client timestamp will always * be newer and update the inode. ctime = 0 is also handled * specially in osd_inode_setattr(). See LU-221, LU-1042 */ info->fti_attr.la_valid |= LA_ATIME | LA_MTIME | LA_CTIME; info->fti_attr.la_atime = 0; info->fti_attr.la_mtime = 0; info->fti_attr.la_ctime = 0; /* prepare objects */ ostid_set_seq(&info->fti_ostid, ostid_seq(&oseq->os_oi)); for (i = 0; i < nr; i++) { ostid_set_id(&info->fti_ostid, id + i); rc = ostid_to_fid(&info->fti_fid, &info->fti_ostid, 0); if (rc) { if (i == 0) GOTO(out, rc); nr = i; break; } fo = ofd_object_find(env, ofd, &info->fti_fid); if (IS_ERR(fo)) { if (i == 0) GOTO(out, rc = PTR_ERR(fo)); nr = i; break; } ofd_write_lock(env, fo); batch[i] = fo; } info->fti_buf.lb_buf = &tmp; info->fti_buf.lb_len = sizeof(tmp); info->fti_off = 0; th = ofd_trans_create(env, ofd); if (IS_ERR(th)) GOTO(out, rc = PTR_ERR(th)); th->th_sync |= sync; rc = dt_declare_record_write(env, oseq->os_lastid_obj, sizeof(tmp), info->fti_off, th); if (rc) GOTO(trans_stop, rc); for (i = 0; i < nr; i++) { fo = batch[i]; LASSERT(fo); if (unlikely(ofd_object_exists(fo))) { /* object may exist being re-created by write replay */ CDEBUG(D_INODE, "object "LPX64"/"LPX64" exists: " DFID"\n", ostid_seq(&oseq->os_oi), id, PFID(&info->fti_fid)); continue; } next = ofd_object_child(fo); LASSERT(next != NULL); rc = dt_declare_create(env, next, &info->fti_attr, NULL, &info->fti_dof, th); if (rc) { nr = i; break; } } rc = dt_trans_start_local(env, ofd->ofd_osd, th); if (rc) GOTO(trans_stop, rc); CDEBUG(D_OTHER, "%s: create new object "DFID" nr %d\n", ofd_name(ofd), PFID(&info->fti_fid), nr); for (i = 0; i < nr; i++) { fo = batch[i]; LASSERT(fo); if (likely(!ofd_object_exists(fo))) { next = ofd_object_child(fo); LASSERT(next != NULL); rc = dt_create(env, next, &info->fti_attr, NULL, &info->fti_dof, th); if (rc) break; LASSERT(ofd_object_exists(fo)); } ofd_seq_last_oid_set(oseq, id + i); } objects = i; if (objects > 0) { tmp = cpu_to_le64(ofd_seq_last_oid(oseq)); rc = dt_record_write(env, oseq->os_lastid_obj, &info->fti_buf, &info->fti_off, th); } trans_stop: ofd_trans_stop(env, ofd, th, rc); out: for (i = 0; i < nr_saved; i++) { fo = batch[i]; if (fo) { ofd_write_unlock(env, fo); ofd_object_put(env, fo); } } OBD_FREE(batch, nr_saved * sizeof(struct ofd_object *)); CDEBUG((objects == 0 && rc == 0) ? D_ERROR : D_OTHER, "created %d/%d objects: %d\n", objects, nr_saved, rc); LASSERT(ergo(objects == 0, rc < 0)); RETURN(objects > 0 ? objects : rc); }