static int osc_lock_upcall_agl(void *cookie, struct lustre_handle *lockh, int errcode) { struct osc_object *osc = cookie; struct ldlm_lock *dlmlock; struct lu_env *env; struct cl_env_nest nest; ENTRY; env = cl_env_nested_get(&nest); LASSERT(!IS_ERR(env)); if (errcode == ELDLM_LOCK_MATCHED) GOTO(out, errcode = ELDLM_OK); if (errcode != ELDLM_OK) GOTO(out, errcode); dlmlock = ldlm_handle2lock(lockh); LASSERT(dlmlock != NULL); lock_res_and_lock(dlmlock); LASSERT(dlmlock->l_granted_mode == dlmlock->l_req_mode); /* there is no osc_lock associated with AGL lock */ osc_lock_lvb_update(env, osc, dlmlock, NULL); unlock_res_and_lock(dlmlock); LDLM_LOCK_PUT(dlmlock); out: cl_object_put(env, osc2cl(osc)); cl_env_nested_put(&nest, env); RETURN(ldlm_error2errno(errcode)); }
/** * Helper for osc_dlm_blocking_ast() handling discrepancies between cl_lock * and ldlm_lock caches. */ static int mdc_dlm_blocking_ast0(const struct lu_env *env, struct ldlm_lock *dlmlock, void *data, int flag) { struct cl_object *obj = NULL; int result = 0; bool discard; enum cl_lock_mode mode = CLM_READ; ENTRY; LASSERT(flag == LDLM_CB_CANCELING); LASSERT(dlmlock != NULL); lock_res_and_lock(dlmlock); if (dlmlock->l_granted_mode != dlmlock->l_req_mode) { dlmlock->l_ast_data = NULL; unlock_res_and_lock(dlmlock); RETURN(0); } discard = ldlm_is_discard_data(dlmlock); if (dlmlock->l_granted_mode & (LCK_PW | LCK_GROUP)) mode = CLM_WRITE; if (dlmlock->l_ast_data != NULL) { obj = osc2cl(dlmlock->l_ast_data); dlmlock->l_ast_data = NULL; cl_object_get(obj); } ldlm_set_kms_ignore(dlmlock); unlock_res_and_lock(dlmlock); /* if l_ast_data is NULL, the dlmlock was enqueued by AGL or * the object has been destroyed. */ if (obj != NULL) { struct cl_attr *attr = &osc_env_info(env)->oti_attr; /* Destroy pages covered by the extent of the DLM lock */ result = mdc_lock_flush(env, cl2osc(obj), cl_index(obj, 0), CL_PAGE_EOF, mode, discard); /* Losing a lock, set KMS to 0. * NB: assumed that DOM lock covers whole data on MDT. */ /* losing a lock, update kms */ lock_res_and_lock(dlmlock); cl_object_attr_lock(obj); attr->cat_kms = 0; cl_object_attr_update(env, obj, attr, CAT_KMS); cl_object_attr_unlock(obj); unlock_res_and_lock(dlmlock); cl_object_put(env, obj); } RETURN(result); }
static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov, struct lovsub_object *los, int idx) { struct cl_object *sub; struct lov_layout_raid0 *r0; struct lu_site *site; struct lu_site_bkt_data *bkt; wait_queue_t *waiter; r0 = &lov->u.raid0; LASSERT(r0->lo_sub[idx] == los); sub = lovsub2cl(los); site = sub->co_lu.lo_dev->ld_site; bkt = lu_site_bkt_from_fid(site, &sub->co_lu.lo_header->loh_fid); cl_object_kill(env, sub); /* release a reference to the sub-object and ... */ lu_object_ref_del(&sub->co_lu, "lov-parent", lov); cl_object_put(env, sub); /* ... wait until it is actually destroyed---sub-object clears its * ->lo_sub[] slot in lovsub_object_fini() */ if (r0->lo_sub[idx] == los) { waiter = &lov_env_info(env)->lti_waiter; init_waitqueue_entry(waiter, current); add_wait_queue(&bkt->lsb_marche_funebre, waiter); set_current_state(TASK_UNINTERRUPTIBLE); while (1) { /* this wait-queue is signaled at the end of * lu_object_free(). */ set_current_state(TASK_UNINTERRUPTIBLE); spin_lock(&r0->lo_sub_lock); if (r0->lo_sub[idx] == los) { spin_unlock(&r0->lo_sub_lock); schedule(); } else { spin_unlock(&r0->lo_sub_lock); set_current_state(TASK_RUNNING); break; } } remove_wait_queue(&bkt->lsb_marche_funebre, waiter); } LASSERT(!r0->lo_sub[idx]); }
static int osc_lock_upcall_agl(void *cookie, struct lustre_handle *lockh, int errcode) { struct osc_object *osc = cookie; struct ldlm_lock *dlmlock; struct lu_env *env; u16 refcheck; env = cl_env_get(&refcheck); LASSERT(!IS_ERR(env)); if (errcode == ELDLM_LOCK_MATCHED) { errcode = ELDLM_OK; goto out; } if (errcode != ELDLM_OK) goto out; dlmlock = ldlm_handle2lock(lockh); LASSERT(dlmlock); lock_res_and_lock(dlmlock); LASSERT(dlmlock->l_granted_mode == dlmlock->l_req_mode); /* there is no osc_lock associated with AGL lock */ osc_lock_lvb_update(env, osc, dlmlock, NULL); unlock_res_and_lock(dlmlock); LDLM_LOCK_PUT(dlmlock); out: cl_object_put(env, osc2cl(osc)); cl_env_put(env, &refcheck); return ldlm_error2errno(errcode); }
static int lov_init_sub(const struct lu_env *env, struct lov_object *lov, struct cl_object *stripe, struct lov_layout_raid0 *r0, int idx) { struct cl_object_header *hdr; struct cl_object_header *subhdr; struct cl_object_header *parent; struct lov_oinfo *oinfo; int result; if (OBD_FAIL_CHECK(OBD_FAIL_LOV_INIT)) { /* For sanity:test_206. * Do not leave the object in cache to avoid accessing * freed memory. This is because osc_object is referring to * lov_oinfo of lsm_stripe_data which will be freed due to * this failure. */ cl_object_kill(env, stripe); cl_object_put(env, stripe); return -EIO; } hdr = cl_object_header(lov2cl(lov)); subhdr = cl_object_header(stripe); oinfo = lov->lo_lsm->lsm_oinfo[idx]; CDEBUG(D_INODE, DFID"@%p[%d] -> "DFID"@%p: ostid: "DOSTID " idx: %d gen: %d\n", PFID(&subhdr->coh_lu.loh_fid), subhdr, idx, PFID(&hdr->coh_lu.loh_fid), hdr, POSTID(&oinfo->loi_oi), oinfo->loi_ost_idx, oinfo->loi_ost_gen); /* reuse ->coh_attr_guard to protect coh_parent change */ spin_lock(&subhdr->coh_attr_guard); parent = subhdr->coh_parent; if (parent == NULL) { subhdr->coh_parent = hdr; spin_unlock(&subhdr->coh_attr_guard); subhdr->coh_nesting = hdr->coh_nesting + 1; lu_object_ref_add(&stripe->co_lu, "lov-parent", lov); r0->lo_sub[idx] = cl2lovsub(stripe); r0->lo_sub[idx]->lso_super = lov; r0->lo_sub[idx]->lso_index = idx; result = 0; } else { struct lu_object *old_obj; struct lov_object *old_lov; unsigned int mask = D_INODE; spin_unlock(&subhdr->coh_attr_guard); old_obj = lu_object_locate(&parent->coh_lu, &lov_device_type); LASSERT(old_obj != NULL); old_lov = cl2lov(lu2cl(old_obj)); if (old_lov->lo_layout_invalid) { /* the object's layout has already changed but isn't * refreshed */ lu_object_unhash(env, &stripe->co_lu); result = -EAGAIN; } else { mask = D_ERROR; result = -EIO; } LU_OBJECT_DEBUG(mask, env, &stripe->co_lu, "stripe %d is already owned.\n", idx); LU_OBJECT_DEBUG(mask, env, old_obj, "owned.\n"); LU_OBJECT_HEADER(mask, env, lov2lu(lov), "try to own.\n"); cl_object_put(env, stripe); } return result; }
/** * Break down the FIEMAP request and send appropriate calls to individual OSTs. * This also handles the restarting of FIEMAP calls in case mapping overflows * the available number of extents in single call. * * \param env [in] lustre environment * \param obj [in] file object * \param fmkey [in] fiemap request header and other info * \param fiemap [out] fiemap buffer holding retrived map extents * \param buflen [in/out] max buffer length of @fiemap, when iterate * each OST, it is used to limit max map needed * \retval 0 success * \retval < 0 error */ static int lov_object_fiemap(const struct lu_env *env, struct cl_object *obj, struct ll_fiemap_info_key *fmkey, struct fiemap *fiemap, size_t *buflen) { struct lov_stripe_md *lsm; struct cl_object *subobj = NULL; struct lov_obd *lov = lu2lov_dev(obj->co_lu.lo_dev)->ld_lov; struct fiemap *fm_local = NULL; struct fiemap_extent *lcl_fm_ext; loff_t fm_start; loff_t fm_end; loff_t fm_length; loff_t fm_end_offset; int count_local; int ost_index = 0; int start_stripe; int current_extent = 0; int rc = 0; int last_stripe; int cur_stripe = 0; int cur_stripe_wrap = 0; int stripe_count; unsigned int buffer_size = FIEMAP_BUFFER_SIZE; /* Whether have we collected enough extents */ bool enough = false; /* EOF for object */ bool ost_eof = false; /* done with required mapping for this OST? */ bool ost_done = false; ENTRY; lsm = lov_lsm_addref(cl2lov(obj)); if (lsm == NULL) RETURN(-ENODATA); /** * If the stripe_count > 1 and the application does not understand * DEVICE_ORDER flag, it cannot interpret the extents correctly. */ if (lsm->lsm_stripe_count > 1 && !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER)) GOTO(out_lsm, rc = -ENOTSUPP); if (lsm_is_released(lsm)) { if (fiemap->fm_start < fmkey->lfik_oa.o_size) { /** * released file, return a minimal FIEMAP if * request fits in file-size. */ fiemap->fm_mapped_extents = 1; fiemap->fm_extents[0].fe_logical = fiemap->fm_start; if (fiemap->fm_start + fiemap->fm_length < fmkey->lfik_oa.o_size) fiemap->fm_extents[0].fe_length = fiemap->fm_length; else fiemap->fm_extents[0].fe_length = fmkey->lfik_oa.o_size - fiemap->fm_start; fiemap->fm_extents[0].fe_flags |= FIEMAP_EXTENT_UNKNOWN | FIEMAP_EXTENT_LAST; } GOTO(out_lsm, rc = 0); } if (fiemap_count_to_size(fiemap->fm_extent_count) < buffer_size) buffer_size = fiemap_count_to_size(fiemap->fm_extent_count); OBD_ALLOC_LARGE(fm_local, buffer_size); if (fm_local == NULL) GOTO(out_lsm, rc = -ENOMEM); lcl_fm_ext = &fm_local->fm_extents[0]; count_local = fiemap_size_to_count(buffer_size); fm_start = fiemap->fm_start; fm_length = fiemap->fm_length; /* Calculate start stripe, last stripe and length of mapping */ start_stripe = lov_stripe_number(lsm, fm_start); fm_end = (fm_length == ~0ULL) ? fmkey->lfik_oa.o_size : fm_start + fm_length - 1; /* If fm_length != ~0ULL but fm_start_fm_length-1 exceeds file size */ if (fm_end > fmkey->lfik_oa.o_size) fm_end = fmkey->lfik_oa.o_size; last_stripe = fiemap_calc_last_stripe(lsm, fm_start, fm_end, start_stripe, &stripe_count); fm_end_offset = fiemap_calc_fm_end_offset(fiemap, lsm, fm_start, fm_end, &start_stripe); if (fm_end_offset == -EINVAL) GOTO(out_fm_local, rc = -EINVAL); /** * Requested extent count exceeds the fiemap buffer size, shrink our * ambition. */ if (fiemap_count_to_size(fiemap->fm_extent_count) > *buflen) fiemap->fm_extent_count = fiemap_size_to_count(*buflen); if (fiemap->fm_extent_count == 0) count_local = 0; /* Check each stripe */ for (cur_stripe = start_stripe; stripe_count > 0; --stripe_count, cur_stripe = (cur_stripe + 1) % lsm->lsm_stripe_count) { loff_t req_fm_len; /* Stores length of required mapping */ loff_t len_mapped_single_call; loff_t lun_start; loff_t lun_end; loff_t obd_object_end; unsigned int ext_count; cur_stripe_wrap = cur_stripe; /* Find out range of mapping on this stripe */ if ((lov_stripe_intersects(lsm, cur_stripe, fm_start, fm_end, &lun_start, &obd_object_end)) == 0) continue; if (lov_oinfo_is_dummy(lsm->lsm_oinfo[cur_stripe])) GOTO(out_fm_local, rc = -EIO); /* If this is a continuation FIEMAP call and we are on * starting stripe then lun_start needs to be set to * fm_end_offset */ if (fm_end_offset != 0 && cur_stripe == start_stripe) lun_start = fm_end_offset; if (fm_length != ~0ULL) { /* Handle fm_start + fm_length overflow */ if (fm_start + fm_length < fm_start) fm_length = ~0ULL - fm_start; lun_end = lov_size_to_stripe(lsm, fm_start + fm_length, cur_stripe); } else { lun_end = ~0ULL; } if (lun_start == lun_end) continue; req_fm_len = obd_object_end - lun_start; fm_local->fm_length = 0; len_mapped_single_call = 0; /* find lobsub object */ subobj = lov_find_subobj(env, cl2lov(obj), lsm, cur_stripe); if (IS_ERR(subobj)) GOTO(out_fm_local, rc = PTR_ERR(subobj)); /* If the output buffer is very large and the objects have many * extents we may need to loop on a single OST repeatedly */ ost_eof = false; ost_done = false; do { if (fiemap->fm_extent_count > 0) { /* Don't get too many extents. */ if (current_extent + count_local > fiemap->fm_extent_count) count_local = fiemap->fm_extent_count - current_extent; } lun_start += len_mapped_single_call; fm_local->fm_length = req_fm_len - len_mapped_single_call; req_fm_len = fm_local->fm_length; fm_local->fm_extent_count = enough ? 1 : count_local; fm_local->fm_mapped_extents = 0; fm_local->fm_flags = fiemap->fm_flags; ost_index = lsm->lsm_oinfo[cur_stripe]->loi_ost_idx; if (ost_index < 0 || ost_index >= lov->desc.ld_tgt_count) GOTO(obj_put, rc = -EINVAL); /* If OST is inactive, return extent with UNKNOWN * flag. */ if (!lov->lov_tgts[ost_index]->ltd_active) { fm_local->fm_flags |= FIEMAP_EXTENT_LAST; fm_local->fm_mapped_extents = 1; lcl_fm_ext[0].fe_logical = lun_start; lcl_fm_ext[0].fe_length = obd_object_end - lun_start; lcl_fm_ext[0].fe_flags |= FIEMAP_EXTENT_UNKNOWN; goto inactive_tgt; } fm_local->fm_start = lun_start; fm_local->fm_flags &= ~FIEMAP_FLAG_DEVICE_ORDER; memcpy(&fmkey->lfik_fiemap, fm_local, sizeof(*fm_local)); *buflen = fiemap_count_to_size( fm_local->fm_extent_count); rc = cl_object_fiemap(env, subobj, fmkey, fm_local, buflen); if (rc != 0) GOTO(obj_put, rc); inactive_tgt: ext_count = fm_local->fm_mapped_extents; if (ext_count == 0) { ost_done = true; /* If last stripe has hold at the end, * we need to return */ if (cur_stripe_wrap == last_stripe) { fiemap->fm_mapped_extents = 0; goto finish; } break; } else if (enough) { /* * We've collected enough extents and there are * more extents after it. */ goto finish; } /* If we just need num of extents, got to next device */ if (fiemap->fm_extent_count == 0) { current_extent += ext_count; break; } /* prepare to copy retrived map extents */ len_mapped_single_call = lcl_fm_ext[ext_count - 1].fe_logical - lun_start + lcl_fm_ext[ext_count - 1].fe_length; /* Have we finished mapping on this device? */ if (req_fm_len <= len_mapped_single_call) ost_done = true; /* Clear the EXTENT_LAST flag which can be present on * the last extent */ if (lcl_fm_ext[ext_count - 1].fe_flags & FIEMAP_EXTENT_LAST) lcl_fm_ext[ext_count - 1].fe_flags &= ~FIEMAP_EXTENT_LAST; if (lov_stripe_size(lsm, lcl_fm_ext[ext_count - 1].fe_logical + lcl_fm_ext[ext_count - 1].fe_length, cur_stripe) >= fmkey->lfik_oa.o_size) ost_eof = true; fiemap_prepare_and_copy_exts(fiemap, lcl_fm_ext, ost_index, ext_count, current_extent); current_extent += ext_count; /* Ran out of available extents? */ if (current_extent >= fiemap->fm_extent_count) enough = true; } while (!ost_done && !ost_eof); cl_object_put(env, subobj); subobj = NULL; if (cur_stripe_wrap == last_stripe) goto finish; } /* for each stripe */ finish: /* Indicate that we are returning device offsets unless file just has * single stripe */ if (lsm->lsm_stripe_count > 1) fiemap->fm_flags |= FIEMAP_FLAG_DEVICE_ORDER; if (fiemap->fm_extent_count == 0) goto skip_last_device_calc; /* Check if we have reached the last stripe and whether mapping for that * stripe is done. */ if ((cur_stripe_wrap == last_stripe) && (ost_done || ost_eof)) fiemap->fm_extents[current_extent - 1].fe_flags |= FIEMAP_EXTENT_LAST; skip_last_device_calc: fiemap->fm_mapped_extents = current_extent; obj_put: if (subobj != NULL) cl_object_put(env, subobj); out_fm_local: OBD_FREE_LARGE(fm_local, buffer_size); out_lsm: lov_lsm_put(lsm); return rc; }
/** * Helper for osc_dlm_blocking_ast() handling discrepancies between cl_lock * and ldlm_lock caches. */ static int osc_dlm_blocking_ast0(const struct lu_env *env, struct ldlm_lock *dlmlock, void *data, int flag) { struct cl_object *obj = NULL; int result = 0; int discard; enum cl_lock_mode mode = CLM_READ; LASSERT(flag == LDLM_CB_CANCELING); lock_res_and_lock(dlmlock); if (dlmlock->l_granted_mode != dlmlock->l_req_mode) { dlmlock->l_ast_data = NULL; unlock_res_and_lock(dlmlock); return 0; } discard = ldlm_is_discard_data(dlmlock); if (dlmlock->l_granted_mode & (LCK_PW | LCK_GROUP)) mode = CLM_WRITE; if (dlmlock->l_ast_data) { obj = osc2cl(dlmlock->l_ast_data); dlmlock->l_ast_data = NULL; cl_object_get(obj); } unlock_res_and_lock(dlmlock); /* if l_ast_data is NULL, the dlmlock was enqueued by AGL or * the object has been destroyed. */ if (obj) { struct ldlm_extent *extent = &dlmlock->l_policy_data.l_extent; struct cl_attr *attr = &osc_env_info(env)->oti_attr; __u64 old_kms; /* Destroy pages covered by the extent of the DLM lock */ result = osc_lock_flush(cl2osc(obj), cl_index(obj, extent->start), cl_index(obj, extent->end), mode, discard); /* losing a lock, update kms */ lock_res_and_lock(dlmlock); cl_object_attr_lock(obj); /* Must get the value under the lock to avoid race. */ old_kms = cl2osc(obj)->oo_oinfo->loi_kms; /* Update the kms. Need to loop all granted locks. * Not a problem for the client */ attr->cat_kms = ldlm_extent_shift_kms(dlmlock, old_kms); cl_object_attr_set(env, obj, attr, CAT_KMS); cl_object_attr_unlock(obj); unlock_res_and_lock(dlmlock); cl_object_put(env, obj); } return result; }