static inline int osp_objs_precreated(const struct lu_env *env, struct osp_device *osp) { struct lu_fid *fid1 = &osp->opd_pre_last_created_fid; struct lu_fid *fid2 = &osp->opd_pre_used_fid; LASSERTF(fid_seq(fid1) == fid_seq(fid2), "Created fid"DFID" Next fid "DFID"\n", PFID(fid1), PFID(fid2)); if (fid_is_idif(fid1)) { struct ost_id *oi1 = &osp_env_info(env)->osi_oi; struct ost_id *oi2 = &osp_env_info(env)->osi_oi2; LASSERT(fid_is_idif(fid1) && fid_is_idif(fid2)); fid_to_ostid(fid1, oi1); fid_to_ostid(fid2, oi2); LASSERT(ostid_id(oi1) >= ostid_id(oi2)); return ostid_id(oi1) - ostid_id(oi2); } return fid_oid(fid1) - fid_oid(fid2); }
/** * alloc fids for precreation. * rc = 0 Success, @grow is the count of real allocation. * rc = 1 Current seq is used up. * rc < 0 Other error. **/ static int osp_precreate_fids(const struct lu_env *env, struct osp_device *osp, struct lu_fid *fid, int *grow) { struct osp_thread_info *osi = osp_env_info(env); __u64 end; int i = 0; if (fid_is_idif(fid)) { struct lu_fid *last_fid; struct ost_id *oi = &osi->osi_oi; spin_lock(&osp->opd_pre_lock); last_fid = &osp->opd_pre_last_created_fid; fid_to_ostid(last_fid, oi); end = min(ostid_id(oi) + *grow, IDIF_MAX_OID); *grow = end - ostid_id(oi); ostid_set_id(oi, ostid_id(oi) + *grow); spin_unlock(&osp->opd_pre_lock); if (*grow == 0) return 1; ostid_to_fid(fid, oi, osp->opd_index); return 0; } spin_lock(&osp->opd_pre_lock); *fid = osp->opd_pre_last_created_fid; end = fid->f_oid; end = min((end + *grow), (__u64)LUSTRE_DATA_SEQ_MAX_WIDTH); *grow = end - fid->f_oid; fid->f_oid += end - fid->f_oid; spin_unlock(&osp->opd_pre_lock); CDEBUG(D_INFO, "Expect %d, actual %d ["DFID" -- "DFID"]\n", *grow, i, PFID(fid), PFID(&osp->opd_pre_last_created_fid)); return *grow > 0 ? 0 : 1; }
/** * asks OST to clean precreate orphans * and gets next id for new objects */ static int osp_precreate_cleanup_orphans(struct lu_env *env, struct osp_device *d) { struct osp_thread_info *osi = osp_env_info(env); struct lu_fid *last_fid = &osi->osi_fid; struct ptlrpc_request *req = NULL; struct obd_import *imp; struct ost_body *body; struct l_wait_info lwi = { 0 }; int update_status = 0; int rc; int diff; ENTRY; /* * wait for local recovery to finish, so we can cleanup orphans * orphans are all objects since "last used" (assigned), but * there might be objects reserved and in some cases they won't * be used. we can't cleanup them till we're sure they won't be * used. also can't we allow new reservations because they may * end up getting orphans being cleaned up below. so we block * new reservations and wait till all reserved objects either * user or released. */ spin_lock(&d->opd_pre_lock); d->opd_pre_recovering = 1; spin_unlock(&d->opd_pre_lock); /* * The locking above makes sure the opd_pre_reserved check below will * catch all osp_precreate_reserve() calls who find * "!opd_pre_recovering". */ l_wait_event(d->opd_pre_waitq, (!d->opd_pre_reserved && d->opd_recovery_completed) || !osp_precreate_running(d) || d->opd_got_disconnected, &lwi); if (!osp_precreate_running(d) || d->opd_got_disconnected) GOTO(out, rc = -EAGAIN); CDEBUG(D_HA, "%s: going to cleanup orphans since "DFID"\n", d->opd_obd->obd_name, PFID(&d->opd_last_used_fid)); *last_fid = d->opd_last_used_fid; /* The OSP should already get the valid seq now */ LASSERT(!fid_is_zero(last_fid)); if (fid_oid(&d->opd_last_used_fid) < 2) { /* lastfid looks strange... ask OST */ rc = osp_get_lastfid_from_ost(env, d); if (rc) GOTO(out, rc); } imp = d->opd_obd->u.cli.cl_import; LASSERT(imp); req = ptlrpc_request_alloc(imp, &RQF_OST_CREATE); if (req == NULL) GOTO(out, rc = -ENOMEM); rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_CREATE); if (rc) { ptlrpc_request_free(req); req = NULL; GOTO(out, rc); } body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY); if (body == NULL) GOTO(out, rc = -EPROTO); body->oa.o_flags = OBD_FL_DELORPHAN; body->oa.o_valid = OBD_MD_FLFLAGS | OBD_MD_FLGROUP; fid_to_ostid(&d->opd_last_used_fid, &body->oa.o_oi); ptlrpc_request_set_replen(req); /* Don't resend the delorphan req */ req->rq_no_resend = req->rq_no_delay = 1; rc = ptlrpc_queue_wait(req); if (rc) { update_status = 1; GOTO(out, rc); } body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY); if (body == NULL) GOTO(out, rc = -EPROTO); /* * OST provides us with id new pool starts from in body->oa.o_id */ ostid_to_fid(last_fid, &body->oa.o_oi, d->opd_index); spin_lock(&d->opd_pre_lock); diff = lu_fid_diff(&d->opd_last_used_fid, last_fid); if (diff > 0) { d->opd_pre_grow_count = OST_MIN_PRECREATE + diff; d->opd_pre_last_created_fid = d->opd_last_used_fid; } else { d->opd_pre_grow_count = OST_MIN_PRECREATE; d->opd_pre_last_created_fid = *last_fid; } /* * This empties the pre-creation pool and effectively blocks any new * reservations. */ LASSERT(fid_oid(&d->opd_pre_last_created_fid) <= LUSTRE_DATA_SEQ_MAX_WIDTH); d->opd_pre_used_fid = d->opd_pre_last_created_fid; d->opd_pre_grow_slow = 0; spin_unlock(&d->opd_pre_lock); CDEBUG(D_HA, "%s: Got last_id "DFID" from OST, last_created "DFID "last_used is "DFID"\n", d->opd_obd->obd_name, PFID(last_fid), PFID(&d->opd_pre_last_created_fid), PFID(&d->opd_last_used_fid)); out: if (req) ptlrpc_req_finished(req); spin_lock(&d->opd_pre_lock); d->opd_pre_recovering = 0; spin_unlock(&d->opd_pre_lock); /* * If rc is zero, the pre-creation window should have been emptied. * Since waking up the herd would be useless without pre-created * objects, we defer the signal to osp_precreate_send() in that case. */ if (rc != 0) { if (update_status) { CERROR("%s: cannot cleanup orphans: rc = %d\n", d->opd_obd->obd_name, rc); /* we can't proceed from here, OST seem to * be in a bad shape, better to wait for * a new instance of the server and repeat * from the beginning. notify possible waiters * this OSP isn't quite functional yet */ osp_pre_update_status(d, rc); } else { wake_up(&d->opd_pre_user_waitq); } } RETURN(rc); }
static int osp_precreate_send(const struct lu_env *env, struct osp_device *d) { struct osp_thread_info *oti = osp_env_info(env); struct ptlrpc_request *req; struct obd_import *imp; struct ost_body *body; int rc, grow, diff; struct lu_fid *fid = &oti->osi_fid; ENTRY; /* don't precreate new objects till OST healthy and has free space */ if (unlikely(d->opd_pre_status)) { CDEBUG(D_INFO, "%s: don't send new precreate: rc = %d\n", d->opd_obd->obd_name, d->opd_pre_status); RETURN(0); } /* * if not connection/initialization is compeleted, ignore */ imp = d->opd_obd->u.cli.cl_import; LASSERT(imp); req = ptlrpc_request_alloc(imp, &RQF_OST_CREATE); if (req == NULL) RETURN(-ENOMEM); req->rq_request_portal = OST_CREATE_PORTAL; /* we should not resend create request - anyway we will have delorphan * and kill these objects */ req->rq_no_delay = req->rq_no_resend = 1; rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_CREATE); if (rc) { ptlrpc_request_free(req); RETURN(rc); } spin_lock(&d->opd_pre_lock); if (d->opd_pre_grow_count > d->opd_pre_max_grow_count / 2) d->opd_pre_grow_count = d->opd_pre_max_grow_count / 2; grow = d->opd_pre_grow_count; spin_unlock(&d->opd_pre_lock); body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY); LASSERT(body); *fid = d->opd_pre_last_created_fid; rc = osp_precreate_fids(env, d, fid, &grow); if (rc == 1) { /* Current seq has been used up*/ if (!osp_is_fid_client(d)) { osp_pre_update_status(d, -ENOSPC); rc = -ENOSPC; } wake_up(&d->opd_pre_waitq); GOTO(out_req, rc); } if (!osp_is_fid_client(d)) { /* Non-FID client will always send seq 0 because of * compatiblity */ LASSERTF(fid_is_idif(fid), "Invalid fid "DFID"\n", PFID(fid)); fid->f_seq = 0; } fid_to_ostid(fid, &body->oa.o_oi); body->oa.o_valid = OBD_MD_FLGROUP; ptlrpc_request_set_replen(req); rc = ptlrpc_queue_wait(req); if (rc) { CERROR("%s: can't precreate: rc = %d\n", d->opd_obd->obd_name, rc); GOTO(out_req, rc); } LASSERT(req->rq_transno == 0); body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY); if (body == NULL) GOTO(out_req, rc = -EPROTO); ostid_to_fid(fid, &body->oa.o_oi, d->opd_index); LASSERTF(lu_fid_diff(fid, &d->opd_pre_used_fid) > 0, "reply fid "DFID" pre used fid "DFID"\n", PFID(fid), PFID(&d->opd_pre_used_fid)); diff = lu_fid_diff(fid, &d->opd_pre_last_created_fid); spin_lock(&d->opd_pre_lock); if (diff < grow) { /* the OST has not managed to create all the * objects we asked for */ d->opd_pre_grow_count = max(diff, OST_MIN_PRECREATE); d->opd_pre_grow_slow = 1; } else { /* the OST is able to keep up with the work, * we could consider increasing grow_count * next time if needed */ d->opd_pre_grow_slow = 0; } d->opd_pre_last_created_fid = *fid; spin_unlock(&d->opd_pre_lock); CDEBUG(D_HA, "%s: current precreated pool: "DFID"-"DFID"\n", d->opd_obd->obd_name, PFID(&d->opd_pre_used_fid), PFID(&d->opd_pre_last_created_fid)); out_req: /* now we can wakeup all users awaiting for objects */ osp_pre_update_status(d, rc); wake_up(&d->opd_pre_user_waitq); ptlrpc_req_finished(req); RETURN(rc); }
int osp_object_truncate(const struct lu_env *env, struct dt_object *dt, __u64 size) { struct osp_device *d = lu2osp_dev(dt->do_lu.lo_dev); struct ptlrpc_request *req = NULL; struct obd_import *imp; struct ost_body *body; struct obdo *oa = NULL; int rc; ENTRY; imp = d->opd_obd->u.cli.cl_import; LASSERT(imp); req = ptlrpc_request_alloc(imp, &RQF_OST_PUNCH); if (req == NULL) RETURN(-ENOMEM); /* XXX: capa support? */ /* osc_set_capa_size(req, &RMF_CAPA1, capa); */ rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_PUNCH); if (rc) { ptlrpc_request_free(req); RETURN(rc); } /* * XXX: decide how do we do here with resend * if we don't resend, then client may see wrong file size * if we do resend, then MDS thread can get stuck for quite long */ req->rq_no_resend = req->rq_no_delay = 1; req->rq_request_portal = OST_IO_PORTAL; /* bug 7198 */ ptlrpc_at_set_req_timeout(req); OBD_ALLOC_PTR(oa); if (oa == NULL) GOTO(out, rc = -ENOMEM); rc = fid_to_ostid(lu_object_fid(&dt->do_lu), &oa->o_oi); LASSERT(rc == 0); oa->o_size = size; oa->o_blocks = OBD_OBJECT_EOF; oa->o_valid = OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | OBD_MD_FLID | OBD_MD_FLGROUP; body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY); LASSERT(body); lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa); /* XXX: capa support? */ /* osc_pack_capa(req, body, capa); */ ptlrpc_request_set_replen(req); rc = ptlrpc_queue_wait(req); if (rc) CERROR("can't punch object: %d\n", rc); out: ptlrpc_req_finished(req); if (oa) OBD_FREE_PTR(oa); RETURN(rc); }
static int lfsck_master_oit_engine(const struct lu_env *env, struct lfsck_instance *lfsck) { struct lfsck_thread_info *info = lfsck_env_info(env); const struct dt_it_ops *iops = &lfsck->li_obj_oit->do_index_ops->dio_it; struct dt_it *di = lfsck->li_di_oit; struct lu_fid *fid = &info->lti_fid; struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; struct ptlrpc_thread *thread = &lfsck->li_thread; __u32 idx = lfsck_dev_idx(lfsck->li_bottom); int rc; ENTRY; do { struct dt_object *target; bool update_lma = false; if (lfsck->li_di_dir != NULL) { rc = lfsck_master_dir_engine(env, lfsck); if (rc <= 0) RETURN(rc); } if (unlikely(lfsck->li_oit_over)) RETURN(1); if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY1) && cfs_fail_val > 0) { struct l_wait_info lwi; lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val), NULL, NULL); l_wait_event(thread->t_ctl_waitq, !thread_is_running(thread), &lwi); } if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_CRASH)) RETURN(0); lfsck->li_current_oit_processed = 1; lfsck->li_new_scanned++; rc = iops->rec(env, di, (struct dt_rec *)fid, 0); if (rc != 0) { lfsck_fail(env, lfsck, true); if (rc < 0 && bk->lb_param & LPF_FAILOUT) RETURN(rc); else goto checkpoint; } if (fid_is_idif(fid)) { __u32 idx1 = fid_idif_ost_idx(fid); LASSERT(!lfsck->li_master); /* It is an old format device, update the LMA. */ if (idx != idx1) { struct ost_id *oi = &info->lti_oi; fid_to_ostid(fid, oi); ostid_to_fid(fid, oi, idx); update_lma = true; } } else if (!fid_is_norm(fid) && !fid_is_igif(fid) && !fid_is_last_id(fid) && !fid_is_root(fid) && !fid_seq_is_dot(fid_seq(fid))) { /* If the FID/object is only used locally and invisible * to external nodes, then LFSCK will not handle it. */ goto checkpoint; } target = lfsck_object_find(env, lfsck, fid); if (target == NULL) { goto checkpoint; } else if (IS_ERR(target)) { lfsck_fail(env, lfsck, true); if (bk->lb_param & LPF_FAILOUT) RETURN(PTR_ERR(target)); else goto checkpoint; } /* XXX: Currently, skip remote object, the consistency for * remote object will be processed in LFSCK phase III. */ if (dt_object_exists(target) && !dt_object_remote(target)) { if (update_lma) rc = lfsck_update_lma(env, lfsck, target); if (rc == 0) rc = lfsck_exec_oit(env, lfsck, target); } lfsck_object_put(env, target); if (rc != 0 && bk->lb_param & LPF_FAILOUT) RETURN(rc); checkpoint: rc = lfsck_checkpoint(env, lfsck); if (rc != 0 && bk->lb_param & LPF_FAILOUT) RETURN(rc); /* Rate control. */ lfsck_control_speed(lfsck); if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_FATAL1)) { spin_lock(&lfsck->li_lock); thread_set_flags(thread, SVC_STOPPING); spin_unlock(&lfsck->li_lock); RETURN(-EINVAL); } rc = iops->next(env, di); if (unlikely(rc > 0)) lfsck->li_oit_over = 1; else if (likely(rc == 0)) lfsck->li_current_oit_processed = 0; if (unlikely(!thread_is_running(thread))) RETURN(0); } while (rc == 0 || lfsck->li_di_dir != NULL); RETURN(rc); }