static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt, const struct lu_buf *buf, loff_t *pos, struct thandle *th, struct lustre_capa *capa, int ignore_quota) { struct osd_object *obj = osd_dt_obj(dt); struct osd_device *osd = osd_obj2dev(obj); struct osd_thandle *oh; uint64_t offset = *pos; int rc; ENTRY; LASSERT(dt_object_exists(dt)); LASSERT(obj->oo_db); LASSERT(th != NULL); oh = container_of0(th, struct osd_thandle, ot_super); record_start_io(osd, WRITE, (buf->lb_len >> PAGE_CACHE_SHIFT), 0); dmu_write(osd->od_os, obj->oo_db->db_object, offset, (uint64_t)buf->lb_len, buf->lb_buf, oh->ot_tx); write_lock(&obj->oo_attr_lock); if (obj->oo_attr.la_size < offset + buf->lb_len) { obj->oo_attr.la_size = offset + buf->lb_len; write_unlock(&obj->oo_attr_lock); /* osd_object_sa_update() will be copying directly from oo_attr * into dbuf. any update within a single txg will copy the * most actual */ rc = osd_object_sa_update(obj, SA_ZPL_SIZE(osd), &obj->oo_attr.la_size, 8, oh); if (unlikely(rc)) GOTO(out, rc); } else { write_unlock(&obj->oo_attr_lock); } *pos += buf->lb_len; rc = buf->lb_len; out: record_end_io(osd, WRITE, 0, buf->lb_len); RETURN(rc); }
static ssize_t osd_read(const struct lu_env *env, struct dt_object *dt, struct lu_buf *buf, loff_t *pos, struct lustre_capa *capa) { struct osd_object *obj = osd_dt_obj(dt); struct osd_device *osd = osd_obj2dev(obj); uint64_t old_size; int size = buf->lb_len; int rc; unsigned long start; LASSERT(dt_object_exists(dt)); LASSERT(obj->oo_db); start = cfs_time_current(); read_lock(&obj->oo_attr_lock); old_size = obj->oo_attr.la_size; read_unlock(&obj->oo_attr_lock); if (*pos + size > old_size) { if (old_size < *pos) return 0; else size = old_size - *pos; } record_start_io(osd, READ, (size >> PAGE_CACHE_SHIFT), 0); rc = -dmu_read(osd->od_os, obj->oo_db->db_object, *pos, size, buf->lb_buf, DMU_READ_PREFETCH); record_end_io(osd, READ, cfs_time_current() - start, size); if (rc == 0) { rc = size; *pos += size; } return rc; }
/** * Prepare buffers for read. * * The function maps the range described by \a off and \a len to \a lnb array. * dmu_buf_hold_array_by_bonus() finds/creates appropriate ARC buffers, then * we fill \a lnb array with the pages storing ARC buffers. Notice the current * implementationt passes TRUE to dmu_buf_hold_array_by_bonus() to fill ARC * buffers with actual data, I/O is done in the conext of osd_bufs_get_read(). * A better implementation would just return the buffers (potentially unfilled) * and subsequent osd_read_prep() would do I/O for many ranges concurrently. * * \param[in] env environment * \param[in] obj object * \param[in] off offset in bytes * \param[in] len the number of bytes to access * \param[out] lnb array of local niobufs pointing to the buffers with data * * \retval 0 for success * \retval negative error number of failure */ static int osd_bufs_get_read(const struct lu_env *env, struct osd_object *obj, loff_t off, ssize_t len, struct niobuf_local *lnb) { struct osd_device *osd = osd_obj2dev(obj); unsigned long start = cfs_time_current(); int rc, i, numbufs, npages = 0; dmu_buf_t **dbp; ENTRY; record_start_io(osd, READ, 0); /* grab buffers for read: * OSD API let us to grab buffers first, then initiate IO(s) * so that all required IOs will be done in parallel, but at the * moment DMU doesn't provide us with a method to grab buffers. * If we discover this is a vital for good performance we * can get own replacement for dmu_buf_hold_array_by_bonus(). */ while (len > 0) { rc = -dmu_buf_hold_array_by_bonus(obj->oo_db, off, len, TRUE, osd_zerocopy_tag, &numbufs, &dbp); if (unlikely(rc)) GOTO(err, rc); for (i = 0; i < numbufs; i++) { int bufoff, tocpy, thispage; void *dbf = dbp[i]; LASSERT(len > 0); atomic_inc(&osd->od_zerocopy_pin); bufoff = off - dbp[i]->db_offset; tocpy = min_t(int, dbp[i]->db_size - bufoff, len); /* kind of trick to differentiate dbuf vs. arcbuf */ LASSERT(((unsigned long)dbp[i] & 1) == 0); dbf = (void *) ((unsigned long)dbp[i] | 1); while (tocpy > 0) { thispage = PAGE_CACHE_SIZE; thispage -= bufoff & (PAGE_CACHE_SIZE - 1); thispage = min(tocpy, thispage); lnb->lnb_rc = 0; lnb->lnb_file_offset = off; lnb->lnb_page_offset = bufoff & ~PAGE_MASK; lnb->lnb_len = thispage; lnb->lnb_page = kmem_to_page(dbp[i]->db_data + bufoff); /* mark just a single slot: we need this * reference to dbuf to be released once */ lnb->lnb_data = dbf; dbf = NULL; tocpy -= thispage; len -= thispage; bufoff += thispage; off += thispage; npages++; lnb++; } /* steal dbuf so dmu_buf_rele_array() can't release * it */ dbp[i] = NULL; } dmu_buf_rele_array(dbp, numbufs, osd_zerocopy_tag); } record_end_io(osd, READ, cfs_time_current() - start, npages * PAGE_SIZE, npages); RETURN(npages); err: LASSERT(rc < 0); osd_bufs_put(env, &obj->oo_dt, lnb - npages, npages); RETURN(rc); }