/*
 * This reserves disk blocks and inodes against a dquot.
 * Flags indicate if the dquot is to be locked here and also
 * if the blk reservation is for RT or regular blocks.
 * Sending in XFS_QMOPT_FORCE_RES flag skips the quota check.
 */
STATIC int
xfs_trans_dqresv(
	xfs_trans_t	*tp,
	xfs_mount_t	*mp,
	xfs_dquot_t	*dqp,
	long		nblks,
	long		ninos,
	uint		flags)
{
	xfs_qcnt_t	hardlimit;
	xfs_qcnt_t	softlimit;
	time_t		timer;
	xfs_qwarncnt_t	warns;
	xfs_qwarncnt_t	warnlimit;
	xfs_qcnt_t	total_count;
	xfs_qcnt_t	*resbcountp;
	xfs_quotainfo_t	*q = mp->m_quotainfo;


	xfs_dqlock(dqp);

	if (flags & XFS_TRANS_DQ_RES_BLKS) {
		hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
		if (!hardlimit)
			hardlimit = q->qi_bhardlimit;
		softlimit = be64_to_cpu(dqp->q_core.d_blk_softlimit);
		if (!softlimit)
			softlimit = q->qi_bsoftlimit;
		timer = be32_to_cpu(dqp->q_core.d_btimer);
		warns = be16_to_cpu(dqp->q_core.d_bwarns);
		warnlimit = dqp->q_mount->m_quotainfo->qi_bwarnlimit;
		resbcountp = &dqp->q_res_bcount;
	} else {
		ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS);
		hardlimit = be64_to_cpu(dqp->q_core.d_rtb_hardlimit);
		if (!hardlimit)
			hardlimit = q->qi_rtbhardlimit;
		softlimit = be64_to_cpu(dqp->q_core.d_rtb_softlimit);
		if (!softlimit)
			softlimit = q->qi_rtbsoftlimit;
		timer = be32_to_cpu(dqp->q_core.d_rtbtimer);
		warns = be16_to_cpu(dqp->q_core.d_rtbwarns);
		warnlimit = dqp->q_mount->m_quotainfo->qi_rtbwarnlimit;
		resbcountp = &dqp->q_res_rtbcount;
	}

	if ((flags & XFS_QMOPT_FORCE_RES) == 0 &&
	    dqp->q_core.d_id &&
	    ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) ||
	     (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) &&
	      (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) {
		if (nblks > 0) {
			/*
			 * dquot is locked already. See if we'd go over the
			 * hardlimit or exceed the timelimit if we allocate
			 * nblks.
			 */
			total_count = *resbcountp + nblks;
			if (hardlimit && total_count > hardlimit) {
				xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN);
				goto error_return;
			}
			if (softlimit && total_count > softlimit) {
				if ((timer != 0 && get_seconds() > timer) ||
				    (warns != 0 && warns >= warnlimit)) {
					xfs_quota_warn(mp, dqp,
						       QUOTA_NL_BSOFTLONGWARN);
					goto error_return;
				}

				xfs_quota_warn(mp, dqp, QUOTA_NL_BSOFTWARN);
			}
		}
		if (ninos > 0) {
			total_count = be64_to_cpu(dqp->q_core.d_icount) + ninos;
			timer = be32_to_cpu(dqp->q_core.d_itimer);
			warns = be16_to_cpu(dqp->q_core.d_iwarns);
			warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;
			hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit);
			if (!hardlimit)
				hardlimit = q->qi_ihardlimit;
			softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit);
			if (!softlimit)
				softlimit = q->qi_isoftlimit;

			if (hardlimit && total_count > hardlimit) {
				xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN);
				goto error_return;
			}
			if (softlimit && total_count > softlimit) {
				if  ((timer != 0 && get_seconds() > timer) ||
				     (warns != 0 && warns >= warnlimit)) {
					xfs_quota_warn(mp, dqp,
						       QUOTA_NL_ISOFTLONGWARN);
					goto error_return;
				}
				xfs_quota_warn(mp, dqp, QUOTA_NL_ISOFTWARN);
			}
		}
	}

	/*
	 * Change the reservation, but not the actual usage.
	 * Note that q_res_bcount = q_core.d_bcount + resv
	 */
	(*resbcountp) += (xfs_qcnt_t)nblks;
	if (ninos != 0)
		dqp->q_res_icount += (xfs_qcnt_t)ninos;

	/*
	 * note the reservation amt in the trans struct too,
	 * so that the transaction knows how much was reserved by
	 * it against this particular dquot.
	 * We don't do this when we are reserving for a delayed allocation,
	 * because we don't have the luxury of a transaction envelope then.
	 */
	if (tp) {
		ASSERT(tp->t_dqinfo);
		ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
		if (nblks != 0)
			xfs_trans_mod_dquot(tp, dqp,
					    flags & XFS_QMOPT_RESBLK_MASK,
					    nblks);
		if (ninos != 0)
			xfs_trans_mod_dquot(tp, dqp,
					    XFS_TRANS_DQ_RES_INOS,
					    ninos);
	}
	ASSERT(dqp->q_res_bcount >= be64_to_cpu(dqp->q_core.d_bcount));
	ASSERT(dqp->q_res_rtbcount >= be64_to_cpu(dqp->q_core.d_rtbcount));
	ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount));

	xfs_dqunlock(dqp);
	return 0;

error_return:
	xfs_dqunlock(dqp);
	if (flags & XFS_QMOPT_ENOSPC)
		return ENOSPC;
	return EDQUOT;
}
Exemple #2
0
/*
 * Readdir for block directories.
 */
STATIC int
xfs_dir2_block_getdents(
	struct xfs_da_args	*args,
	struct dir_context	*ctx)
{
	struct xfs_inode	*dp = args->dp;	/* incore directory inode */
	xfs_dir2_data_hdr_t	*hdr;		/* block header */
	struct xfs_buf		*bp;		/* buffer for block */
	xfs_dir2_block_tail_t	*btp;		/* block tail */
	xfs_dir2_data_entry_t	*dep;		/* block data entry */
	xfs_dir2_data_unused_t	*dup;		/* block unused entry */
	char			*endptr;	/* end of the data entries */
	int			error;		/* error return value */
	char			*ptr;		/* current data entry */
	int			wantoff;	/* starting block offset */
	xfs_off_t		cook;
	struct xfs_da_geometry	*geo = args->geo;
	int			lock_mode;

	/*
	 * If the block number in the offset is out of range, we're done.
	 */
	if (xfs_dir2_dataptr_to_db(geo, ctx->pos) > geo->datablk)
		return 0;

	lock_mode = xfs_ilock_data_map_shared(dp);
	error = xfs_dir3_block_read(NULL, dp, &bp);
	xfs_iunlock(dp, lock_mode);
	if (error)
		return error;

	/*
	 * Extract the byte offset we start at from the seek pointer.
	 * We'll skip entries before this.
	 */
	wantoff = xfs_dir2_dataptr_to_off(geo, ctx->pos);
	hdr = bp->b_addr;
	xfs_dir3_data_check(dp, bp);
	/*
	 * Set up values for the loop.
	 */
	btp = xfs_dir2_block_tail_p(geo, hdr);
	ptr = (char *)dp->d_ops->data_entry_p(hdr);
	endptr = (char *)xfs_dir2_block_leaf_p(btp);

	/*
	 * Loop over the data portion of the block.
	 * Each object is a real entry (dep) or an unused one (dup).
	 */
	while (ptr < endptr) {
		__uint8_t filetype;

		dup = (xfs_dir2_data_unused_t *)ptr;
		/*
		 * Unused, skip it.
		 */
		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
			ptr += be16_to_cpu(dup->length);
			continue;
		}

		dep = (xfs_dir2_data_entry_t *)ptr;

		/*
		 * Bump pointer for the next iteration.
		 */
		ptr += dp->d_ops->data_entsize(dep->namelen);
		/*
		 * The entry is before the desired starting point, skip it.
		 */
		if ((char *)dep - (char *)hdr < wantoff)
			continue;

		cook = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
					    (char *)dep - (char *)hdr);

		ctx->pos = cook & 0x7fffffff;
		filetype = dp->d_ops->data_get_ftype(dep);
		/*
		 * If it didn't fit, set the final offset to here & return.
		 */
		if (!dir_emit(ctx, (char *)dep->name, dep->namelen,
			    be64_to_cpu(dep->inumber),
			    xfs_dir3_get_dtype(dp->i_mount, filetype))) {
			xfs_trans_brelse(NULL, bp);
			return 0;
		}
	}

	/*
	 * Reached the end of the block.
	 * Set the offset to a non-existent block 1 and return.
	 */
	ctx->pos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk + 1, 0) &
								0x7fffffff;
	xfs_trans_brelse(NULL, bp);
	return 0;
}
Exemple #3
0
/**
 * ubi_scan_add_used - add information about a physical eraseblock to the
 * scanning information.
 * @ubi: UBI device description object
 * @si: scanning information
 * @pnum: the physical eraseblock number
 * @ec: erase counter
 * @vid_hdr: the volume identifier header
 * @bitflips: if bit-flips were detected when this physical eraseblock was read
 *
 * This function adds information about a used physical eraseblock to the
 * 'used' tree of the corresponding volume. The function is rather complex
 * because it has to handle cases when this is not the first physical
 * eraseblock belonging to the same logical eraseblock, and the newer one has
 * to be picked, while the older one has to be dropped. This function returns
 * zero in case of success and a negative error code in case of failure.
 */
int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si,
		      int pnum, int ec, const struct ubi_vid_hdr *vid_hdr,
		      int bitflips)
{
	int err, vol_id, lnum;
	uint32_t leb_ver;
	unsigned long long sqnum;
	struct ubi_scan_volume *sv;
	struct ubi_scan_leb *seb;
	struct rb_node **p, *parent = NULL;

	vol_id = be32_to_cpu(vid_hdr->vol_id);
	lnum = be32_to_cpu(vid_hdr->lnum);
	sqnum = be64_to_cpu(vid_hdr->sqnum);
	leb_ver = be32_to_cpu(vid_hdr->leb_ver);

	dbg_bld("PEB %d, LEB %d:%d, EC %d, sqnum %llu, ver %u, bitflips %d",
		pnum, vol_id, lnum, ec, sqnum, leb_ver, bitflips);

	sv = add_volume(si, vol_id, pnum, vid_hdr);
	if (IS_ERR(sv) < 0)
		return PTR_ERR(sv);

	if (si->max_sqnum < sqnum)
		si->max_sqnum = sqnum;

	/*
	 * Walk the RB-tree of logical eraseblocks of volume @vol_id to look
	 * if this is the first instance of this logical eraseblock or not.
	 */
	p = &sv->root.rb_node;
	while (*p) {
		int cmp_res;

		parent = *p;
		seb = rb_entry(parent, struct ubi_scan_leb, u.rb);
		if (lnum != seb->lnum) {
			if (lnum < seb->lnum)
				p = &(*p)->rb_left;
			else
				p = &(*p)->rb_right;
			continue;
		}

		/*
		 * There is already a physical eraseblock describing the same
		 * logical eraseblock present.
		 */

		dbg_bld("this LEB already exists: PEB %d, sqnum %llu, "
			"LEB ver %u, EC %d", seb->pnum, seb->sqnum,
			seb->leb_ver, seb->ec);

		/*
		 * Make sure that the logical eraseblocks have different
		 * versions. Otherwise the image is bad.
		 */
		if (seb->leb_ver == leb_ver && leb_ver != 0) {
			ubi_err("two LEBs with same version %u", leb_ver);
			ubi_dbg_dump_seb(seb, 0);
			ubi_dbg_dump_vid_hdr(vid_hdr);
			return -EINVAL;
		}

		/*
		 * Make sure that the logical eraseblocks have different
		 * sequence numbers. Otherwise the image is bad.
		 *
		 * FIXME: remove 'sqnum != 0' check when leb_ver is removed.
		 */
		if (seb->sqnum == sqnum && sqnum != 0) {
			ubi_err("two LEBs with same sequence number %llu",
				sqnum);
			ubi_dbg_dump_seb(seb, 0);
			ubi_dbg_dump_vid_hdr(vid_hdr);
			return -EINVAL;
		}

		/*
		 * Now we have to drop the older one and preserve the newer
		 * one.
		 */
		cmp_res = compare_lebs(ubi, seb, pnum, vid_hdr);
		if (cmp_res < 0)
			return cmp_res;

		if (cmp_res & 1) {
			/*
			 * This logical eraseblock is newer then the one
			 * found earlier.
			 */
			err = validate_vid_hdr(vid_hdr, sv, pnum);
			if (err)
				return err;

			if (cmp_res & 4)
				err = add_to_list(si, seb->pnum, seb->ec,
						  &si->corr);
			else
				err = add_to_list(si, seb->pnum, seb->ec,
						  &si->erase);
			if (err)
				return err;

			seb->ec = ec;
			seb->pnum = pnum;
			seb->scrub = ((cmp_res & 2) || bitflips);
			seb->sqnum = sqnum;
			seb->leb_ver = leb_ver;

			if (sv->highest_lnum == lnum)
				sv->last_data_size =
					be32_to_cpu(vid_hdr->data_size);

			return 0;
		} else {
			/*
			 * This logical eraseblock is older then the one found
			 * previously.
			 */
			if (cmp_res & 4)
				return add_to_list(si, pnum, ec, &si->corr);
			else
				return add_to_list(si, pnum, ec, &si->erase);
		}
	}

	/*
	 * We've met this logical eraseblock for the first time, add it to the
	 * scanning information.
	 */

	err = validate_vid_hdr(vid_hdr, sv, pnum);
	if (err)
		return err;

	seb = kmalloc(sizeof(struct ubi_scan_leb), GFP_KERNEL);
	if (!seb)
		return -ENOMEM;

	seb->ec = ec;
	seb->pnum = pnum;
	seb->lnum = lnum;
	seb->sqnum = sqnum;
	seb->scrub = bitflips;
	seb->leb_ver = leb_ver;

	if (sv->highest_lnum <= lnum) {
		sv->highest_lnum = lnum;
		sv->last_data_size = be32_to_cpu(vid_hdr->data_size);
	}

	sv->leb_count += 1;
	rb_link_node(&seb->u.rb, parent, p);
	rb_insert_color(&seb->u.rb, &sv->root);
	return 0;
}
Exemple #4
0
/* copy the snapshot 'snapshot_name' into the current disk image */
int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
{
    BDRVQcowState *s = bs->opaque;
    QCowSnapshot *sn;
    int i, snapshot_index;
    int cur_l1_bytes, sn_l1_bytes;
    int ret;
    uint64_t *sn_l1_table = NULL;

    /* Search the snapshot */
    snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
    if (snapshot_index < 0) {
        return -ENOENT;
    }
    sn = &s->snapshots[snapshot_index];

    if (sn->disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) {
        error_report("qcow2: Loading snapshots with different disk "
            "size is not implemented");
        ret = -ENOTSUP;
        goto fail;
    }

    /*
     * Make sure that the current L1 table is big enough to contain the whole
     * L1 table of the snapshot. If the snapshot L1 table is smaller, the
     * current one must be padded with zeros.
     */
    ret = qcow2_grow_l1_table(bs, sn->l1_size, true);
    if (ret < 0) {
        goto fail;
    }

    cur_l1_bytes = s->l1_size * sizeof(uint64_t);
    sn_l1_bytes = sn->l1_size * sizeof(uint64_t);

    /*
     * Copy the snapshot L1 table to the current L1 table.
     *
     * Before overwriting the old current L1 table on disk, make sure to
     * increase all refcounts for the clusters referenced by the new one.
     * Decrease the refcount referenced by the old one only when the L1
     * table is overwritten.
     */
    sn_l1_table = g_try_malloc0(cur_l1_bytes);
    if (cur_l1_bytes && sn_l1_table == NULL) {
        ret = -ENOMEM;
        goto fail;
    }

    ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes);
    if (ret < 0) {
        goto fail;
    }

    ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset,
                                         sn->l1_size, 1);
    if (ret < 0) {
        goto fail;
    }

    ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1,
                                        s->l1_table_offset, cur_l1_bytes);
    if (ret < 0) {
        goto fail;
    }

    ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
                           cur_l1_bytes);
    if (ret < 0) {
        goto fail;
    }

    /*
     * Decrease refcount of clusters of current L1 table.
     *
     * At this point, the in-memory s->l1_table points to the old L1 table,
     * whereas on disk we already have the new one.
     *
     * qcow2_update_snapshot_refcount special cases the current L1 table to use
     * the in-memory data instead of really using the offset to load a new one,
     * which is why this works.
     */
    ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset,
                                         s->l1_size, -1);

    /*
     * Now update the in-memory L1 table to be in sync with the on-disk one. We
     * need to do this even if updating refcounts failed.
     */
    for(i = 0;i < s->l1_size; i++) {
        s->l1_table[i] = be64_to_cpu(sn_l1_table[i]);
    }

    if (ret < 0) {
        goto fail;
    }

    g_free(sn_l1_table);
    sn_l1_table = NULL;

    /*
     * Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed
     * when we decreased the refcount of the old snapshot.
     */
    ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
    if (ret < 0) {
        goto fail;
    }

#ifdef DEBUG_ALLOC
    {
        BdrvCheckResult result = {0};
        qcow2_check_refcounts(bs, &result, 0);
    }
#endif
    return 0;

fail:
    g_free(sn_l1_table);
    return ret;
}
Exemple #5
0
static inline atom_t entry_atom(tux_dirent *entry)
{
	return be64_to_cpu(entry->inum);
}
Exemple #6
0
/**
 * ubi_add_to_av - add used physical eraseblock to the attaching information.
 * @ubi: UBI device description object
 * @ai: attaching information
 * @pnum: the physical eraseblock number
 * @ec: erase counter
 * @vid_hdr: the volume identifier header
 * @bitflips: if bit-flips were detected when this physical eraseblock was read
 *
 * This function adds information about a used physical eraseblock to the
 * 'used' tree of the corresponding volume. The function is rather complex
 * because it has to handle cases when this is not the first physical
 * eraseblock belonging to the same logical eraseblock, and the newer one has
 * to be picked, while the older one has to be dropped. This function returns
 * zero in case of success and a negative error code in case of failure.
 */
int ubi_add_to_av(struct ubi_device *ubi, struct ubi_attach_info *ai, int pnum,
		  int ec, const struct ubi_vid_hdr *vid_hdr, int bitflips)
{
	int err, vol_id, lnum;
	unsigned long long sqnum;
	struct ubi_ainf_volume *av;
	struct ubi_ainf_peb *aeb;
	struct rb_node **p, *parent = NULL;

	vol_id = be32_to_cpu(vid_hdr->vol_id);
	lnum = be32_to_cpu(vid_hdr->lnum);
	sqnum = be64_to_cpu(vid_hdr->sqnum);

	dbg_bld("PEB %d, LEB %d:%d, EC %d, sqnum %llu, bitflips %d",
		pnum, vol_id, lnum, ec, sqnum, bitflips);

	av = add_volume(ai, vol_id, pnum, vid_hdr);
	if (IS_ERR(av))
		return PTR_ERR(av);

	if (ai->max_sqnum < sqnum)
		ai->max_sqnum = sqnum;

	/*
	 * Walk the RB-tree of logical eraseblocks of volume @vol_id to look
	 * if this is the first instance of this logical eraseblock or not.
	 */
	p = &av->root.rb_node;
	while (*p) {
		int cmp_res;

		parent = *p;
		aeb = rb_entry(parent, struct ubi_ainf_peb, u.rb);
		if (lnum != aeb->lnum) {
			if (lnum < aeb->lnum)
				p = &(*p)->rb_left;
			else
				p = &(*p)->rb_right;
			continue;
		}

		/*
		 * There is already a physical eraseblock describing the same
		 * logical eraseblock present.
		 */

		dbg_bld("this LEB already exists: PEB %d, sqnum %llu, EC %d",
			aeb->pnum, aeb->sqnum, aeb->ec);

		/*
		 * Make sure that the logical eraseblocks have different
		 * sequence numbers. Otherwise the image is bad.
		 *
		 * However, if the sequence number is zero, we assume it must
		 * be an ancient UBI image from the era when UBI did not have
		 * sequence numbers. We still can attach these images, unless
		 * there is a need to distinguish between old and new
		 * eraseblocks, in which case we'll refuse the image in
		 * 'ubi_compare_lebs()'. In other words, we attach old clean
		 * images, but refuse attaching old images with duplicated
		 * logical eraseblocks because there was an unclean reboot.
		 */
		if (aeb->sqnum == sqnum && sqnum != 0) {
			ubi_err("two LEBs with same sequence number %llu",
				sqnum);
			ubi_dump_aeb(aeb, 0);
			ubi_dump_vid_hdr(vid_hdr);
			return -EINVAL;
		}

		/*
		 * Now we have to drop the older one and preserve the newer
		 * one.
		 */
		cmp_res = ubi_compare_lebs(ubi, aeb, pnum, vid_hdr);
		if (cmp_res < 0)
			return cmp_res;

		if (cmp_res & 1) {
			/*
			 * This logical eraseblock is newer than the one
			 * found earlier.
			 */
			err = validate_vid_hdr(vid_hdr, av, pnum);
			if (err)
				return err;

			err = add_to_list(ai, aeb->pnum, aeb->vol_id,
					  aeb->lnum, aeb->ec, cmp_res & 4,
					  &ai->erase);
			if (err)
				return err;

			aeb->ec = ec;
			aeb->pnum = pnum;
			aeb->vol_id = vol_id;
			aeb->lnum = lnum;
			aeb->scrub = ((cmp_res & 2) || bitflips);
			aeb->copy_flag = vid_hdr->copy_flag;
			aeb->sqnum = sqnum;

			if (av->highest_lnum == lnum)
				av->last_data_size =
					be32_to_cpu(vid_hdr->data_size);

			return 0;
		} else {
			/*
			 * This logical eraseblock is older than the one found
			 * previously.
			 */
			return add_to_list(ai, pnum, vol_id, lnum, ec,
					   cmp_res & 4, &ai->erase);
		}
	}

	/*
	 * We've met this logical eraseblock for the first time, add it to the
	 * attaching information.
	 */

	err = validate_vid_hdr(vid_hdr, av, pnum);
	if (err)
		return err;

	aeb = kmem_cache_alloc(ai->aeb_slab_cache, GFP_KERNEL);
	if (!aeb)
		return -ENOMEM;

	aeb->ec = ec;
	aeb->pnum = pnum;
	aeb->vol_id = vol_id;
	aeb->lnum = lnum;
	aeb->scrub = bitflips;
	aeb->copy_flag = vid_hdr->copy_flag;
	aeb->sqnum = sqnum;

	if (av->highest_lnum <= lnum) {
		av->highest_lnum = lnum;
		av->last_data_size = be32_to_cpu(vid_hdr->data_size);
	}

	av->leb_count += 1;
	rb_link_node(&aeb->u.rb, parent, p);
	rb_insert_color(&aeb->u.rb, &av->root);
	return 0;
}
Exemple #7
0
static inline void inc(be128 *iv)
{
	if (!(iv->b = cpu_to_be64(be64_to_cpu(iv->b) + 1)))
		iv->a = cpu_to_be64(be64_to_cpu(iv->a) + 1);
}
int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
{
    int written = 0;
    __be64 olen;
    s64 len, rc;
    unsigned long flags;
    __be64 evt;

    if (!opal.entry)
        return -ENODEV;

    /* We want put_chars to be atomic to avoid mangling of hvsi
     * packets. To do that, we first test for room and return
     * -EAGAIN if there isn't enough.
     *
     * Unfortunately, opal_console_write_buffer_space() doesn't
     * appear to work on opal v1, so we just assume there is
     * enough room and be done with it
     */
    spin_lock_irqsave(&opal_write_lock, flags);
    if (firmware_has_feature(FW_FEATURE_OPALv2)) {
        rc = opal_console_write_buffer_space(vtermno, &olen);
        len = be64_to_cpu(olen);
        if (rc || len < total_len) {
            spin_unlock_irqrestore(&opal_write_lock, flags);
            /* Closed -> drop characters */
            if (rc)
                return total_len;
            opal_poll_events(NULL);
            return -EAGAIN;
        }
    }

    /* We still try to handle partial completions, though they
     * should no longer happen.
     */
    rc = OPAL_BUSY;
    while(total_len > 0 && (rc == OPAL_BUSY ||
                            rc == OPAL_BUSY_EVENT || rc == OPAL_SUCCESS)) {
        olen = cpu_to_be64(total_len);
        rc = opal_console_write(vtermno, &olen, data);
        len = be64_to_cpu(olen);

        /* Closed or other error drop */
        if (rc != OPAL_SUCCESS && rc != OPAL_BUSY &&
                rc != OPAL_BUSY_EVENT) {
            written = total_len;
            break;
        }
        if (rc == OPAL_SUCCESS) {
            total_len -= len;
            data += len;
            written += len;
        }
        /* This is a bit nasty but we need that for the console to
         * flush when there aren't any interrupts. We will clean
         * things a bit later to limit that to synchronous path
         * such as the kernel console and xmon/udbg
         */
        do
            opal_poll_events(&evt);
        while(rc == OPAL_SUCCESS &&
                (be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT));
    }
    spin_unlock_irqrestore(&opal_write_lock, flags);
    return written;
}
Exemple #9
0
static int get_filter_count(struct adapter *adapter, unsigned int fidx,
			    u64 *pkts, u64 *bytes, bool hash)
{
	unsigned int tcb_base, tcbaddr;
	unsigned int word_offset;
	struct filter_entry *f;
	__be64 be64_byte_count;
	int ret;

	tcb_base = t4_read_reg(adapter, TP_CMM_TCB_BASE_A);
	if (is_hashfilter(adapter) && hash) {
		if (fidx < adapter->tids.ntids) {
			f = adapter->tids.tid_tab[fidx];
			if (!f)
				return -EINVAL;
		} else {
			return -E2BIG;
		}
	} else {
		if ((fidx != (adapter->tids.nftids +
			      adapter->tids.nsftids - 1)) &&
		    fidx >= adapter->tids.nftids)
			return -E2BIG;

		f = &adapter->tids.ftid_tab[fidx];
		if (!f->valid)
			return -EINVAL;
	}
	tcbaddr = tcb_base + f->tid * TCB_SIZE;

	spin_lock(&adapter->win0_lock);
	if (is_t4(adapter->params.chip)) {
		__be64 be64_count;

		/* T4 doesn't maintain byte counts in hw */
		*bytes = 0;

		/* Get pkts */
		word_offset = 4;
		ret = t4_memory_rw(adapter, MEMWIN_NIC, MEM_EDC0,
				   tcbaddr + (word_offset * sizeof(__be32)),
				   sizeof(be64_count),
				   (__be32 *)&be64_count,
				   T4_MEMORY_READ);
		if (ret < 0)
			goto out;
		*pkts = be64_to_cpu(be64_count);
	} else {
		__be32 be32_count;

		/* Get bytes */
		word_offset = 4;
		ret = t4_memory_rw(adapter, MEMWIN_NIC, MEM_EDC0,
				   tcbaddr + (word_offset * sizeof(__be32)),
				   sizeof(be64_byte_count),
				   &be64_byte_count,
				   T4_MEMORY_READ);
		if (ret < 0)
			goto out;
		*bytes = be64_to_cpu(be64_byte_count);

		/* Get pkts */
		word_offset = 6;
		ret = t4_memory_rw(adapter, MEMWIN_NIC, MEM_EDC0,
				   tcbaddr + (word_offset * sizeof(__be32)),
				   sizeof(be32_count),
				   &be32_count,
				   T4_MEMORY_READ);
		if (ret < 0)
			goto out;
		*pkts = (u64)be32_to_cpu(be32_count);
	}

out:
	spin_unlock(&adapter->win0_lock);
	return ret;
}
Exemple #10
0
int gfs2_block_map(struct inode *inode, sector_t lblock,
		   struct buffer_head *bh_map, int create)
{
	struct gfs2_inode *ip = GFS2_I(inode);
	struct gfs2_sbd *sdp = GFS2_SB(inode);
	unsigned int bsize = sdp->sd_sb.sb_bsize;
	const unsigned int maxlen = bh_map->b_size >> inode->i_blkbits;
	const u64 *arr = sdp->sd_heightsize;
	__be64 *ptr;
	u64 size;
	struct metapath mp;
	int ret;
	int eob;
	unsigned int len;
	struct buffer_head *bh;
	u8 height;

	BUG_ON(maxlen == 0);

	memset(mp.mp_bh, 0, sizeof(mp.mp_bh));
	bmap_lock(ip, create);
	clear_buffer_mapped(bh_map);
	clear_buffer_new(bh_map);
	clear_buffer_boundary(bh_map);
	trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
	if (gfs2_is_dir(ip)) {
		bsize = sdp->sd_jbsize;
		arr = sdp->sd_jheightsize;
	}

	ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]);
	if (ret)
		goto out;

	height = ip->i_height;
	size = (lblock + 1) * bsize;
	while (size > arr[height])
		height++;
	find_metapath(sdp, lblock, &mp, height);
	ret = 1;
	if (height > ip->i_height || gfs2_is_stuffed(ip))
		goto do_alloc;
	ret = lookup_metapath(ip, &mp);
	if (ret < 0)
		goto out;
	if (ret != ip->i_height)
		goto do_alloc;
	ptr = metapointer(ip->i_height - 1, &mp);
	if (*ptr == 0)
		goto do_alloc;
	map_bh(bh_map, inode->i_sb, be64_to_cpu(*ptr));
	bh = mp.mp_bh[ip->i_height - 1];
	len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, maxlen, &eob);
	bh_map->b_size = (len << inode->i_blkbits);
	if (eob)
		set_buffer_boundary(bh_map);
	ret = 0;
out:
	release_metapath(&mp);
	trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
	bmap_unlock(ip, create);
	return ret;

do_alloc:
	/* All allocations are done here, firstly check create flag */
	if (!create) {
		BUG_ON(gfs2_is_stuffed(ip));
		ret = 0;
		goto out;
	}

	/* At this point ret is the tree depth of already allocated blocks */
	ret = gfs2_bmap_alloc(inode, lblock, bh_map, &mp, ret, height, maxlen);
	goto out;
}
int elf_ppc64_load(int argc, char **argv, const char *buf, off_t len,
			struct kexec_info *info)
{
	struct mem_ehdr ehdr;
	char *cmdline, *modified_cmdline = NULL;
	const char *devicetreeblob;
	uint64_t max_addr, hole_addr;
	char *seg_buf = NULL;
	off_t seg_size = 0;
	struct mem_phdr *phdr;
	size_t size;
#ifdef NEED_RESERVE_DTB
	uint64_t *rsvmap_ptr;
	struct bootblock *bb_ptr;
#endif
	int result, opt;
	uint64_t my_kernel, my_dt_offset;
	uint64_t my_opal_base = 0, my_opal_entry = 0;
	unsigned int my_panic_kernel;
	uint64_t my_stack, my_backup_start;
	uint64_t toc_addr;
	uint32_t my_run_at_load;
	unsigned int slave_code[256/sizeof (unsigned int)], master_entry;

	/* See options.h -- add any more there, too. */
	static const struct option options[] = {
		KEXEC_ARCH_OPTIONS
		{ "command-line",       1, NULL, OPT_APPEND },
		{ "append",             1, NULL, OPT_APPEND },
		{ "ramdisk",            1, NULL, OPT_RAMDISK },
		{ "initrd",             1, NULL, OPT_RAMDISK },
		{ "devicetreeblob",     1, NULL, OPT_DEVICETREEBLOB },
		{ "dtb",                1, NULL, OPT_DEVICETREEBLOB },
		{ "args-linux",		0, NULL, OPT_ARGS_IGNORE },
		{ 0,                    0, NULL, 0 },
	};

	static const char short_options[] = KEXEC_OPT_STR "";

	/* Parse command line arguments */
	initrd_base = 0;
	initrd_size = 0;
	cmdline = 0;
	ramdisk = 0;
	devicetreeblob = 0;
	max_addr = 0xFFFFFFFFFFFFFFFFULL;
	hole_addr = 0;

	while ((opt = getopt_long(argc, argv, short_options,
					options, 0)) != -1) {
		switch (opt) {
		default:
			/* Ignore core options */
			if (opt < OPT_ARCH_MAX)
				break;
		case OPT_APPEND:
			cmdline = optarg;
			break;
		case OPT_RAMDISK:
			ramdisk = optarg;
			break;
		case OPT_DEVICETREEBLOB:
			devicetreeblob = optarg;
			break;
		case OPT_ARGS_IGNORE:
			break;
		}
	}

	if (!cmdline)
		fprintf(stdout, "Warning: append= option is not passed. Using the first kernel root partition\n");

	if (ramdisk && reuse_initrd)
		die("Can't specify --ramdisk or --initrd with --reuseinitrd\n");

	/* Need to append some command line parameters internally in case of
	 * taking crash dumps.
	 */
	if (info->kexec_flags & KEXEC_ON_CRASH) {
		modified_cmdline = xmalloc(COMMAND_LINE_SIZE);
		memset((void *)modified_cmdline, 0, COMMAND_LINE_SIZE);
		if (cmdline) {
			strncpy(modified_cmdline, cmdline, COMMAND_LINE_SIZE);
			modified_cmdline[COMMAND_LINE_SIZE - 1] = '\0';
		}
	}

	/* Parse the Elf file */
	result = build_elf_exec_info(buf, len, &ehdr, 0);
	if (result < 0) {
		free_elf_info(&ehdr);
		return result;
	}

	/* Load the Elf data. Physical load addresses in elf64 header do not
	 * show up correctly. Use user supplied address for now to patch the
	 * elf header
	 */

	phdr = &ehdr.e_phdr[0];
	size = phdr->p_filesz;
	if (size > phdr->p_memsz)
		size = phdr->p_memsz;

	my_kernel = hole_addr = locate_hole(info, size, 0, 0, max_addr, 1);
	ehdr.e_phdr[0].p_paddr = hole_addr;
	result = elf_exec_load(&ehdr, info);
	if (result < 0) {
		free_elf_info(&ehdr);
		return result;
	}

	/* If panic kernel is being loaded, additional segments need
	 * to be created.
	 */
	if (info->kexec_flags & KEXEC_ON_CRASH) {
		result = load_crashdump_segments(info, modified_cmdline,
						max_addr, 0);
		if (result < 0)
			return -1;
		/* Use new command line. */
		cmdline = modified_cmdline;
	}

	/* Add v2wrap to the current image */
	elf_rel_build_load(info, &info->rhdr, purgatory,
				purgatory_size, 0, max_addr, 1, 0);

	/* Add a ram-disk to the current image
	 * Note: Add the ramdisk after elf_rel_build_load
	 */
	if (ramdisk) {
		if (devicetreeblob) {
			fprintf(stderr,
			"Can't use ramdisk with device tree blob input\n");
			return -1;
		}
		seg_buf = slurp_file(ramdisk, &seg_size);
		hole_addr = add_buffer(info, seg_buf, seg_size, seg_size,
			0, 0, max_addr, 1);
		initrd_base = hole_addr;
		initrd_size = seg_size;
	} /* ramdisk */

	if (devicetreeblob) {
		/* Grab device tree from buffer */
		seg_buf = slurp_file(devicetreeblob, &seg_size);
	} else {
		/* create from fs2dt */
		create_flatten_tree(&seg_buf, &seg_size, cmdline);
	}

	result = fixup_dt(&seg_buf, &seg_size);
	if (result < 0)
		return result;

	my_dt_offset = add_buffer(info, seg_buf, seg_size, seg_size,
				0, 0, max_addr, -1);

#ifdef NEED_RESERVE_DTB
	/* patch reserve map address for flattened device-tree
	 * find last entry (both 0) in the reserve mem list.  Assume DT
	 * entry is before this one
	 */
	bb_ptr = (struct bootblock *)(seg_buf);
	rsvmap_ptr = (uint64_t *)(seg_buf + be32_to_cpu(bb_ptr->off_mem_rsvmap));
	while (*rsvmap_ptr || *(rsvmap_ptr+1))
		rsvmap_ptr += 2;
	rsvmap_ptr -= 2;
	*rsvmap_ptr = cpu_to_be64(my_dt_offset);
	rsvmap_ptr++;
	*rsvmap_ptr = cpu_to_be64((uint64_t)be32_to_cpu(bb_ptr->totalsize));
#endif

	if (read_prop("/proc/device-tree/ibm,opal/opal-base-address",
		      &my_opal_base, sizeof(my_opal_base)) == 0) {
		my_opal_base = be64_to_cpu(my_opal_base);
		elf_rel_set_symbol(&info->rhdr, "opal_base",
				   &my_opal_base, sizeof(my_opal_base));
	}

	if (read_prop("/proc/device-tree/ibm,opal/opal-entry-address",
		      &my_opal_entry, sizeof(my_opal_entry)) == 0) {
		my_opal_entry = be64_to_cpu(my_opal_entry);
		elf_rel_set_symbol(&info->rhdr, "opal_entry",
				   &my_opal_entry, sizeof(my_opal_entry));
	}

	/* Set kernel */
	elf_rel_set_symbol(&info->rhdr, "kernel", &my_kernel, sizeof(my_kernel));

	/* Set dt_offset */
	elf_rel_set_symbol(&info->rhdr, "dt_offset", &my_dt_offset,
				sizeof(my_dt_offset));

	/* get slave code from new kernel, put in purgatory */
	elf_rel_get_symbol(&info->rhdr, "purgatory_start", slave_code,
			sizeof(slave_code));
	master_entry = slave_code[0];
	memcpy(slave_code, phdr->p_data, sizeof(slave_code));
	slave_code[0] = master_entry;
	elf_rel_set_symbol(&info->rhdr, "purgatory_start", slave_code,
				sizeof(slave_code));

	if (info->kexec_flags & KEXEC_ON_CRASH) {
		my_panic_kernel = 1;
		/* Set panic flag */
		elf_rel_set_symbol(&info->rhdr, "panic_kernel",
				&my_panic_kernel, sizeof(my_panic_kernel));

		/* Set backup address */
		my_backup_start = info->backup_start;
		elf_rel_set_symbol(&info->rhdr, "backup_start",
				&my_backup_start, sizeof(my_backup_start));

		/* Tell relocatable kernel to run at load address
		 * via word before slave code in purgatory
		 */

		elf_rel_get_symbol(&info->rhdr, "run_at_load", &my_run_at_load,
				sizeof(my_run_at_load));
		if (my_run_at_load == KERNEL_RUN_AT_ZERO_MAGIC)
			my_run_at_load = 1;
			/* else it should be a fixed offset image */
		elf_rel_set_symbol(&info->rhdr, "run_at_load", &my_run_at_load,
				sizeof(my_run_at_load));
	}

	/* Set stack address */
	my_stack = locate_hole(info, 16*1024, 0, 0, max_addr, 1);
	my_stack += 16*1024;
	elf_rel_set_symbol(&info->rhdr, "stack", &my_stack, sizeof(my_stack));

	/* Set toc */
	toc_addr = my_r2(&info->rhdr);
	elf_rel_set_symbol(&info->rhdr, "my_toc", &toc_addr, sizeof(toc_addr));

	/* Set debug */
	elf_rel_set_symbol(&info->rhdr, "debug", &my_debug, sizeof(my_debug));

	my_kernel = 0;
	my_dt_offset = 0;
	my_panic_kernel = 0;
	my_backup_start = 0;
	my_stack = 0;
	toc_addr = 0;
	my_run_at_load = 0;
	my_debug = 0;
	my_opal_base = 0;
	my_opal_entry = 0;

	elf_rel_get_symbol(&info->rhdr, "opal_base", &my_opal_base,
			   sizeof(my_opal_base));
	elf_rel_get_symbol(&info->rhdr, "opal_entry", &my_opal_entry,
			   sizeof(my_opal_entry));
	elf_rel_get_symbol(&info->rhdr, "kernel", &my_kernel, sizeof(my_kernel));
	elf_rel_get_symbol(&info->rhdr, "dt_offset", &my_dt_offset,
				sizeof(my_dt_offset));
	elf_rel_get_symbol(&info->rhdr, "run_at_load", &my_run_at_load,
				sizeof(my_run_at_load));
	elf_rel_get_symbol(&info->rhdr, "panic_kernel", &my_panic_kernel,
				sizeof(my_panic_kernel));
	elf_rel_get_symbol(&info->rhdr, "backup_start", &my_backup_start,
				sizeof(my_backup_start));
	elf_rel_get_symbol(&info->rhdr, "stack", &my_stack, sizeof(my_stack));
	elf_rel_get_symbol(&info->rhdr, "my_toc", &toc_addr,
				sizeof(toc_addr));
	elf_rel_get_symbol(&info->rhdr, "debug", &my_debug, sizeof(my_debug));

	dbgprintf("info->entry is %p\n", info->entry);
	dbgprintf("kernel is %llx\n", (unsigned long long)my_kernel);
	dbgprintf("dt_offset is %llx\n",
		(unsigned long long)my_dt_offset);
	dbgprintf("run_at_load flag is %x\n", my_run_at_load);
	dbgprintf("panic_kernel is %x\n", my_panic_kernel);
	dbgprintf("backup_start is %llx\n",
		(unsigned long long)my_backup_start);
	dbgprintf("stack is %llx\n", (unsigned long long)my_stack);
	dbgprintf("toc_addr is %llx\n", (unsigned long long)toc_addr);
	dbgprintf("purgatory size is %zu\n", purgatory_size);
	dbgprintf("debug is %d\n", my_debug);
	dbgprintf("opal_base is %llx\n", (unsigned long long) my_opal_base);
	dbgprintf("opal_entry is %llx\n", (unsigned long long) my_opal_entry);

	return 0;
}
Exemple #12
0
static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
                      Error **errp)
{
    BDRVCloopState *s = bs->opaque;
    uint32_t offsets_size, max_compressed_block_size = 1, i;
    int ret;

    bs->read_only = 1;

    /* read header */
    ret = bdrv_pread(bs->file, 128, &s->block_size, 4);
    if (ret < 0) {
        return ret;
    }
    s->block_size = be32_to_cpu(s->block_size);
    if (s->block_size % 512) {
        error_setg(errp, "block_size %" PRIu32 " must be a multiple of 512",
                   s->block_size);
        return -EINVAL;
    }
    if (s->block_size == 0) {
        error_setg(errp, "block_size cannot be zero");
        return -EINVAL;
    }

    /* cloop's create_compressed_fs.c warns about block sizes beyond 256 KB but
     * we can accept more.  Prevent ridiculous values like 4 GB - 1 since we
     * need a buffer this big.
     */
    if (s->block_size > MAX_BLOCK_SIZE) {
        error_setg(errp, "block_size %" PRIu32 " must be %u MB or less",
                   s->block_size,
                   MAX_BLOCK_SIZE / (1024 * 1024));
        return -EINVAL;
    }

    ret = bdrv_pread(bs->file, 128 + 4, &s->n_blocks, 4);
    if (ret < 0) {
        return ret;
    }
    s->n_blocks = be32_to_cpu(s->n_blocks);

    /* read offsets */
    if (s->n_blocks > (UINT32_MAX - 1) / sizeof(uint64_t)) {
        /* Prevent integer overflow */
        error_setg(errp, "n_blocks %" PRIu32 " must be %zu or less",
                   s->n_blocks,
                   (UINT32_MAX - 1) / sizeof(uint64_t));
        return -EINVAL;
    }
    offsets_size = (s->n_blocks + 1) * sizeof(uint64_t);
    if (offsets_size > 512 * 1024 * 1024) {
        /* Prevent ridiculous offsets_size which causes memory allocation to
         * fail or overflows bdrv_pread() size.  In practice the 512 MB
         * offsets[] limit supports 16 TB images at 256 KB block size.
         */
        error_setg(errp, "image requires too many offsets, "
                   "try increasing block size");
        return -EINVAL;
    }

    s->offsets = g_try_malloc(offsets_size);
    if (s->offsets == NULL) {
        error_setg(errp, "Could not allocate offsets table");
        return -ENOMEM;
    }

    ret = bdrv_pread(bs->file, 128 + 4 + 4, s->offsets, offsets_size);
    if (ret < 0) {
        goto fail;
    }

    for (i = 0; i < s->n_blocks + 1; i++) {
        uint64_t size;

        s->offsets[i] = be64_to_cpu(s->offsets[i]);
        if (i == 0) {
            continue;
        }

        if (s->offsets[i] < s->offsets[i - 1]) {
            error_setg(errp, "offsets not monotonically increasing at "
                       "index %" PRIu32 ", image file is corrupt", i);
            ret = -EINVAL;
            goto fail;
        }

        size = s->offsets[i] - s->offsets[i - 1];

        /* Compressed blocks should be smaller than the uncompressed block size
         * but maybe compression performed poorly so the compressed block is
         * actually bigger.  Clamp down on unrealistic values to prevent
         * ridiculous s->compressed_block allocation.
         */
        if (size > 2 * MAX_BLOCK_SIZE) {
            error_setg(errp, "invalid compressed block size at index %" PRIu32
                       ", image file is corrupt", i);
            ret = -EINVAL;
            goto fail;
        }

        if (size > max_compressed_block_size) {
            max_compressed_block_size = size;
        }
    }

    /* initialize zlib engine */
    s->compressed_block = g_try_malloc(max_compressed_block_size + 1);
    if (s->compressed_block == NULL) {
        error_setg(errp, "Could not allocate compressed_block");
        ret = -ENOMEM;
        goto fail;
    }

    s->uncompressed_block = g_try_malloc(s->block_size);
    if (s->uncompressed_block == NULL) {
        error_setg(errp, "Could not allocate uncompressed_block");
        ret = -ENOMEM;
        goto fail;
    }

    if (inflateInit(&s->zstream) != Z_OK) {
        ret = -EINVAL;
        goto fail;
    }
    s->current_block = s->n_blocks;

    s->sectors_per_block = s->block_size/512;
    bs->total_sectors = s->n_blocks * s->sectors_per_block;
    qemu_co_mutex_init(&s->lock);
    return 0;

fail:
    g_free(s->offsets);
    g_free(s->compressed_block);
    g_free(s->uncompressed_block);
    return ret;
}
void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
{
	struct mlx5_eqe_page_fault *pf_eqe = &eqe->data.page_fault;
	int qpn = be32_to_cpu(pf_eqe->flags_qpn) & MLX5_QPN_MASK;
	struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, qpn);
	struct mlx5_core_qp *qp =
		container_of(common, struct mlx5_core_qp, common);
	struct mlx5_pagefault pfault;

	if (!qp) {
		mlx5_core_warn(dev, "ODP event for non-existent QP %06x\n",
			       qpn);
		return;
	}

	pfault.event_subtype = eqe->sub_type;
	pfault.flags = (be32_to_cpu(pf_eqe->flags_qpn) >> MLX5_QPN_BITS) &
		(MLX5_PFAULT_REQUESTOR | MLX5_PFAULT_WRITE | MLX5_PFAULT_RDMA);
	pfault.bytes_committed = be32_to_cpu(
		pf_eqe->bytes_committed);

	mlx5_core_dbg(dev,
		      "PAGE_FAULT: subtype: 0x%02x, flags: 0x%02x,\n",
		      eqe->sub_type, pfault.flags);

	switch (eqe->sub_type) {
	case MLX5_PFAULT_SUBTYPE_RDMA:
		/* RDMA based event */
		pfault.rdma.r_key =
			be32_to_cpu(pf_eqe->rdma.r_key);
		pfault.rdma.packet_size =
			be16_to_cpu(pf_eqe->rdma.packet_length);
		pfault.rdma.rdma_op_len =
			be32_to_cpu(pf_eqe->rdma.rdma_op_len);
		pfault.rdma.rdma_va =
			be64_to_cpu(pf_eqe->rdma.rdma_va);
		mlx5_core_dbg(dev,
			      "PAGE_FAULT: qpn: 0x%06x, r_key: 0x%08x,\n",
			      qpn, pfault.rdma.r_key);
		mlx5_core_dbg(dev,
			      "PAGE_FAULT: rdma_op_len: 0x%08x,\n",
			      pfault.rdma.rdma_op_len);
		mlx5_core_dbg(dev,
			      "PAGE_FAULT: rdma_va: 0x%016llx,\n",
			      pfault.rdma.rdma_va);
		mlx5_core_dbg(dev,
			      "PAGE_FAULT: bytes_committed: 0x%06x\n",
			      pfault.bytes_committed);
		break;

	case MLX5_PFAULT_SUBTYPE_WQE:
		/* WQE based event */
		pfault.wqe.wqe_index =
			be16_to_cpu(pf_eqe->wqe.wqe_index);
		pfault.wqe.packet_size =
			be16_to_cpu(pf_eqe->wqe.packet_length);
		mlx5_core_dbg(dev,
			      "PAGE_FAULT: qpn: 0x%06x, wqe_index: 0x%04x,\n",
			      qpn, pfault.wqe.wqe_index);
		mlx5_core_dbg(dev,
			      "PAGE_FAULT: bytes_committed: 0x%06x\n",
			      pfault.bytes_committed);
		break;

	default:
		mlx5_core_warn(dev,
			       "Unsupported page fault event sub-type: 0x%02hhx, QP %06x\n",
			       eqe->sub_type, qpn);
		/* Unsupported page faults should still be resolved by the
		 * page fault handler
		 */
	}

	if (qp->pfault_handler) {
		qp->pfault_handler(qp, &pfault);
	} else {
		mlx5_core_err(dev,
			      "ODP event for QP %08x, without a fault handler in QP\n",
			      qpn);
		/* Page fault will remain unresolved. QP will hang until it is
		 * destroyed
		 */
	}

	mlx5_core_put_rsc(common);
}
Exemple #14
0
int nbd_receive_negotiate(QIOChannel *ioc, const char *name,
                          QCryptoTLSCreds *tlscreds, const char *hostname,
                          QIOChannel **outioc, NBDExportInfo *info,
                          Error **errp)
{
    char buf[256];
    uint64_t magic;
    int rc;
    bool zeroes = true;
    bool structured_reply = info->structured_reply;

    trace_nbd_receive_negotiate(tlscreds, hostname ? hostname : "<null>");

    info->structured_reply = false;
    rc = -EINVAL;

    if (outioc) {
        *outioc = NULL;
    }
    if (tlscreds && !outioc) {
        error_setg(errp, "Output I/O channel required for TLS");
        goto fail;
    }

    if (nbd_read(ioc, buf, 8, errp) < 0) {
        error_prepend(errp, "Failed to read data: ");
        goto fail;
    }

    buf[8] = '\0';
    if (strlen(buf) == 0) {
        error_setg(errp, "Server connection closed unexpectedly");
        goto fail;
    }

    magic = ldq_be_p(buf);
    trace_nbd_receive_negotiate_magic(magic);

    if (memcmp(buf, "NBDMAGIC", 8) != 0) {
        error_setg(errp, "Invalid magic received");
        goto fail;
    }

    if (nbd_read(ioc, &magic, sizeof(magic), errp) < 0) {
        error_prepend(errp, "Failed to read magic: ");
        goto fail;
    }
    magic = be64_to_cpu(magic);
    trace_nbd_receive_negotiate_magic(magic);

    if (magic == NBD_OPTS_MAGIC) {
        uint32_t clientflags = 0;
        uint16_t globalflags;
        bool fixedNewStyle = false;

        if (nbd_read(ioc, &globalflags, sizeof(globalflags), errp) < 0) {
            error_prepend(errp, "Failed to read server flags: ");
            goto fail;
        }
        globalflags = be16_to_cpu(globalflags);
        trace_nbd_receive_negotiate_server_flags(globalflags);
        if (globalflags & NBD_FLAG_FIXED_NEWSTYLE) {
            fixedNewStyle = true;
            clientflags |= NBD_FLAG_C_FIXED_NEWSTYLE;
        }
        if (globalflags & NBD_FLAG_NO_ZEROES) {
            zeroes = false;
            clientflags |= NBD_FLAG_C_NO_ZEROES;
        }
        /* client requested flags */
        clientflags = cpu_to_be32(clientflags);
        if (nbd_write(ioc, &clientflags, sizeof(clientflags), errp) < 0) {
            error_prepend(errp, "Failed to send clientflags field: ");
            goto fail;
        }
        if (tlscreds) {
            if (fixedNewStyle) {
                *outioc = nbd_receive_starttls(ioc, tlscreds, hostname, errp);
                if (!*outioc) {
                    goto fail;
                }
                ioc = *outioc;
            } else {
                error_setg(errp, "Server does not support STARTTLS");
                goto fail;
            }
        }
        if (!name) {
            trace_nbd_receive_negotiate_default_name();
            name = "";
        }
        if (fixedNewStyle) {
            int result;

            if (structured_reply) {
                result = nbd_request_simple_option(ioc,
                                                   NBD_OPT_STRUCTURED_REPLY,
                                                   errp);
                if (result < 0) {
                    goto fail;
                }
                info->structured_reply = result == 1;
            }

            /* Try NBD_OPT_GO first - if it works, we are done (it
             * also gives us a good message if the server requires
             * TLS).  If it is not available, fall back to
             * NBD_OPT_LIST for nicer error messages about a missing
             * export, then use NBD_OPT_EXPORT_NAME.  */
            result = nbd_opt_go(ioc, name, info, errp);
            if (result < 0) {
                goto fail;
            }
            if (result > 0) {
                return 0;
            }
            /* Check our desired export is present in the
             * server export list. Since NBD_OPT_EXPORT_NAME
             * cannot return an error message, running this
             * query gives us better error reporting if the
             * export name is not available.
             */
            if (nbd_receive_query_exports(ioc, name, errp) < 0) {
                goto fail;
            }
        }
        /* write the export name request */
        if (nbd_send_option_request(ioc, NBD_OPT_EXPORT_NAME, -1, name,
                                    errp) < 0) {
            goto fail;
        }

        /* Read the response */
        if (nbd_read(ioc, &info->size, sizeof(info->size), errp) < 0) {
            error_prepend(errp, "Failed to read export length: ");
            goto fail;
        }
        be64_to_cpus(&info->size);

        if (nbd_read(ioc, &info->flags, sizeof(info->flags), errp) < 0) {
            error_prepend(errp, "Failed to read export flags: ");
            goto fail;
        }
        be16_to_cpus(&info->flags);
    } else if (magic == NBD_CLIENT_MAGIC) {
        uint32_t oldflags;

        if (name) {
            error_setg(errp, "Server does not support export names");
            goto fail;
        }
        if (tlscreds) {
            error_setg(errp, "Server does not support STARTTLS");
            goto fail;
        }

        if (nbd_read(ioc, &info->size, sizeof(info->size), errp) < 0) {
            error_prepend(errp, "Failed to read export length: ");
            goto fail;
        }
        be64_to_cpus(&info->size);

        if (nbd_read(ioc, &oldflags, sizeof(oldflags), errp) < 0) {
            error_prepend(errp, "Failed to read export flags: ");
            goto fail;
        }
        be32_to_cpus(&oldflags);
        if (oldflags & ~0xffff) {
            error_setg(errp, "Unexpected export flags %0x" PRIx32, oldflags);
            goto fail;
        }
        info->flags = oldflags;
    } else {
        error_setg(errp, "Bad magic received");
        goto fail;
    }

    trace_nbd_receive_negotiate_size_flags(info->size, info->flags);
    if (zeroes && nbd_drop(ioc, 124, errp) < 0) {
        error_prepend(errp, "Failed to read reserved block: ");
        goto fail;
    }
    rc = 0;

fail:
    return rc;
}
Exemple #15
0
static void aliasguid_query_handler(int status,
				    struct ib_sa_guidinfo_rec *guid_rec,
				    void *context)
{
	struct mlx4_ib_dev *dev;
	struct mlx4_alias_guid_work_context *cb_ctx = context;
	u8 port_index ;
	int i;
	struct mlx4_sriov_alias_guid_info_rec_det *rec;
	unsigned long flags, flags1;

	if (!context)
		return;

	dev = cb_ctx->dev;
	port_index = cb_ctx->port - 1;
	rec = &dev->sriov.alias_guid.ports_guid[port_index].
		all_rec_per_port[cb_ctx->block_num];

	if (status) {
		rec->status = MLX4_GUID_INFO_STATUS_IDLE;
		pr_debug("(port: %d) failed: status = %d\n",
			 cb_ctx->port, status);
		goto out;
	}

	if (guid_rec->block_num != cb_ctx->block_num) {
		pr_err("block num mismatch: %d != %d\n",
		       cb_ctx->block_num, guid_rec->block_num);
		goto out;
	}

	pr_debug("lid/port: %d/%d, block_num: %d\n",
		 be16_to_cpu(guid_rec->lid), cb_ctx->port,
		 guid_rec->block_num);

	rec = &dev->sriov.alias_guid.ports_guid[port_index].
		all_rec_per_port[guid_rec->block_num];

	rec->status = MLX4_GUID_INFO_STATUS_SET;
	rec->method = MLX4_GUID_INFO_RECORD_SET;

	for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) {
		__be64 tmp_cur_ag;
		tmp_cur_ag = *(__be64 *)&guid_rec->guid_info_list[i * GUID_REC_SIZE];
		/* check if the SM didn't assign one of the records.
		 * if it didn't, if it was not sysadmin request:
		 * ask the SM to give a new GUID, (instead of the driver request).
		 */
		if (tmp_cur_ag == MLX4_NOT_SET_GUID) {
			mlx4_ib_warn(&dev->ib_dev, "%s:Record num %d in "
				     "block_num: %d was declined by SM, "
				     "ownership by %d (0 = driver, 1=sysAdmin,"
				     " 2=None)\n", __func__, i,
				     guid_rec->block_num, rec->ownership);
			if (rec->ownership == MLX4_GUID_DRIVER_ASSIGN) {
				/* if it is driver assign, asks for new GUID from SM*/
				*(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] =
					MLX4_NOT_SET_GUID;

				/* Mark the record as not assigned, and let it
				 * be sent again in the next work sched.*/
				rec->status = MLX4_GUID_INFO_STATUS_IDLE;
				rec->guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
			}
		} else {
		       /* properly assigned record. */
		       /* We save the GUID we just got from the SM in the
			* admin_guid in order to be persistent, and in the
			* request from the sm the process will ask for the same GUID */
			if (rec->ownership == MLX4_GUID_SYSADMIN_ASSIGN &&
			    tmp_cur_ag != *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE]) {
				/* the sysadmin assignment failed.*/
				mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set"
					     " admin guid after SysAdmin "
					     "configuration. "
					     "Record num %d in block_num:%d "
					     "was declined by SM, "
					     "new val(0x%llx) was kept\n",
					      __func__, i,
					     guid_rec->block_num,
					     (long long)be64_to_cpu(*(__be64 *) &
							 rec->all_recs[i * GUID_REC_SIZE]));
			} else {
				memcpy(&rec->all_recs[i * GUID_REC_SIZE],
				       &guid_rec->guid_info_list[i * GUID_REC_SIZE],
				       GUID_REC_SIZE);
			}
		}
	}
	/*
	The func is call here to close the cases when the
	sm doesn't send smp, so in the sa response the driver
	notifies the slave.
	*/
	mlx4_ib_notify_slaves_on_guid_change(dev, guid_rec->block_num,
					     cb_ctx->port,
					     guid_rec->guid_info_list);
out:
	spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
	spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
	if (!dev->sriov.is_going_down)
		queue_delayed_work(dev->sriov.alias_guid.ports_guid[port_index].wq,
				   &dev->sriov.alias_guid.ports_guid[port_index].
				   alias_guid_work, 0);
	if (cb_ctx->sa_query) {
		list_del(&cb_ctx->list);
		kfree(cb_ctx);
	} else
		complete(&cb_ctx->done);
	spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
	spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
}
/* Reads the appropriate file and retrieves the SYSTEM RAM regions for whom to
 * create Elf headers. Keeping it separate from get_memory_ranges() as
 * requirements are different in the case of normal kexec and crashdumps.
 *
 * Normal kexec needs to look at all of available physical memory irrespective
 * of the fact how much of it is being used by currently running kernel.
 * Crashdumps need to have access to memory regions actually being used by
 * running  kernel. Expecting a different file/data structure than /proc/iomem
 * to look into down the line. May be something like /proc/kernelmem or may
 * be zone data structures exported from kernel.
 */
static int get_crash_memory_ranges(struct memory_range **range, int *ranges)
{

	char device_tree[256] = "/proc/device-tree/";
	char fname[256];
	char buf[MAXBYTES];
	DIR *dir, *dmem;
	FILE *file;
	struct dirent *dentry, *mentry;
	int i, n, crash_rng_len = 0;
	unsigned long long start, end;
	int page_size;

	crash_max_memory_ranges = max_memory_ranges + 6;
	crash_rng_len = sizeof(struct memory_range) * crash_max_memory_ranges;

	crash_memory_range = (struct memory_range *) malloc(crash_rng_len);
	if (!crash_memory_range) {
		fprintf(stderr, "Allocation for crash memory range failed\n");
		return -1;
	}
	memset(crash_memory_range, 0, crash_rng_len);

	/* create a separate program header for the backup region */
	crash_memory_range[0].start = BACKUP_SRC_START;
	crash_memory_range[0].end = BACKUP_SRC_END + 1;
	crash_memory_range[0].type = RANGE_RAM;
	memory_ranges++;

	if ((dir = opendir(device_tree)) == NULL) {
		perror(device_tree);
		goto err;
	}

	cstart = crash_base;
	cend = crash_base + crash_size;

	while ((dentry = readdir(dir)) != NULL) {
		if (!strncmp(dentry->d_name,
				"ibm,dynamic-reconfiguration-memory", 35)){
			get_dyn_reconf_crash_memory_ranges();
			continue;
		}
		if (strncmp(dentry->d_name, "memory@", 7) &&
			strcmp(dentry->d_name, "memory"))
			continue;
		strcpy(fname, device_tree);
		strcat(fname, dentry->d_name);
		if ((dmem = opendir(fname)) == NULL) {
			perror(fname);
			closedir(dir);
			goto err;
		}
		while ((mentry = readdir(dmem)) != NULL) {
			if (strcmp(mentry->d_name, "reg"))
				continue;
			strcat(fname, "/reg");
			if ((file = fopen(fname, "r")) == NULL) {
				perror(fname);
				closedir(dmem);
				closedir(dir);
				goto err;
			}
			if ((n = fread(buf, 1, MAXBYTES, file)) < 0) {
				perror(fname);
				fclose(file);
				closedir(dmem);
				closedir(dir);
				goto err;
			}
			if (memory_ranges >= (max_memory_ranges + 1)) {
				/* No space to insert another element. */
				fprintf(stderr,
					"Error: Number of crash memory ranges"
					" excedeed the max limit\n");
				goto err;
			}

			start = be64_to_cpu(((unsigned long long *)buf)[0]);
			end = start +
				be64_to_cpu(((unsigned long long *)buf)[1]);
			if (start == 0 && end >= (BACKUP_SRC_END + 1))
				start = BACKUP_SRC_END + 1;

			exclude_crash_region(start, end);
			fclose(file);
		}
		closedir(dmem);
	}
	closedir(dir);

	/*
	 * If RTAS region is overlapped with crashkernel, need to create ELF
	 * Program header for the overlapped memory.
	 */
	if (crash_base < rtas_base + rtas_size &&
		rtas_base < crash_base + crash_size) {
		page_size = getpagesize();
		cstart = rtas_base;
		cend = rtas_base + rtas_size;
		if (cstart < crash_base)
			cstart = crash_base;
		if (cend > crash_base + crash_size)
			cend = crash_base + crash_size;
		/*
		 * The rtas section created here is formed by reading rtas-base
		 * and rtas-size from /proc/device-tree/rtas.  Unfortunately
		 * rtas-size is not required to be a multiple of PAGE_SIZE
		 * The remainder of the page it ends on is just garbage, and is
		 * safe to read, its just not accounted in rtas-size.  Since
		 * we're creating an elf section here though, lets round it up
		 * to the next page size boundary though, so makedumpfile can
		 * read it safely without going south on us.
		 */
		cend = _ALIGN(cend, page_size);

		crash_memory_range[memory_ranges].start = cstart;
		crash_memory_range[memory_ranges++].end = cend;
	}

	/*
	 * If OPAL region is overlapped with crashkernel, need to create ELF
	 * Program header for the overlapped memory.
	 */
	if (crash_base < opal_base + opal_size &&
		opal_base < crash_base + crash_size) {
		page_size = getpagesize();
		cstart = opal_base;
		cend = opal_base + opal_size;
		if (cstart < crash_base)
			cstart = crash_base;
		if (cend > crash_base + crash_size)
			cend = crash_base + crash_size;
		/*
		 * The opal section created here is formed by reading opal-base
		 * and opal-size from /proc/device-tree/ibm,opal.  Unfortunately
		 * opal-size is not required to be a multiple of PAGE_SIZE
		 * The remainder of the page it ends on is just garbage, and is
		 * safe to read, its just not accounted in opal-size.  Since
		 * we're creating an elf section here though, lets round it up
		 * to the next page size boundary though, so makedumpfile can
		 * read it safely without going south on us.
		 */
		cend = _ALIGN(cend, page_size);

		crash_memory_range[memory_ranges].start = cstart;
		crash_memory_range[memory_ranges++].end = cend;
	}
	*range = crash_memory_range;
	*ranges = memory_ranges;

	int j;
	dbgprintf("CRASH MEMORY RANGES\n");
	for(j = 0; j < *ranges; j++) {
		start = crash_memory_range[j].start;
		end = crash_memory_range[j].end;
		dbgprintf("%016Lx-%016Lx\n", start, end);
	}

	return 0;

err:
	if (crash_memory_range)
		free(crash_memory_range);
	return -1;
}
Exemple #17
0
/**
 * ubi_compare_lebs - find out which logical eraseblock is newer.
 * @ubi: UBI device description object
 * @aeb: first logical eraseblock to compare
 * @pnum: physical eraseblock number of the second logical eraseblock to
 * compare
 * @vid_hdr: volume identifier header of the second logical eraseblock
 *
 * This function compares 2 copies of a LEB and informs which one is newer. In
 * case of success this function returns a positive value, in case of failure, a
 * negative error code is returned. The success return codes use the following
 * bits:
 *     o bit 0 is cleared: the first PEB (described by @aeb) is newer than the
 *       second PEB (described by @pnum and @vid_hdr);
 *     o bit 0 is set: the second PEB is newer;
 *     o bit 1 is cleared: no bit-flips were detected in the newer LEB;
 *     o bit 1 is set: bit-flips were detected in the newer LEB;
 *     o bit 2 is cleared: the older LEB is not corrupted;
 *     o bit 2 is set: the older LEB is corrupted.
 */
int ubi_compare_lebs(struct ubi_device *ubi, const struct ubi_ainf_peb *aeb,
			int pnum, const struct ubi_vid_hdr *vid_hdr)
{
	int len, err, second_is_newer, bitflips = 0, corrupted = 0;
	uint32_t data_crc, crc;
	struct ubi_vid_hdr *vh = NULL;
	unsigned long long sqnum2 = be64_to_cpu(vid_hdr->sqnum);

	if (sqnum2 == aeb->sqnum) {
		/*
		 * This must be a really ancient UBI image which has been
		 * created before sequence numbers support has been added. At
		 * that times we used 32-bit LEB versions stored in logical
		 * eraseblocks. That was before UBI got into mainline. We do not
		 * support these images anymore. Well, those images still work,
		 * but only if no unclean reboots happened.
		 */
		ubi_err("unsupported on-flash UBI format");
		return -EINVAL;
	}

	/* Obviously the LEB with lower sequence counter is older */
	second_is_newer = (sqnum2 > aeb->sqnum);

	/*
	 * Now we know which copy is newer. If the copy flag of the PEB with
	 * newer version is not set, then we just return, otherwise we have to
	 * check data CRC. For the second PEB we already have the VID header,
	 * for the first one - we'll need to re-read it from flash.
	 *
	 * Note: this may be optimized so that we wouldn't read twice.
	 */

	if (second_is_newer) {
		if (!vid_hdr->copy_flag) {
			/* It is not a copy, so it is newer */
			dbg_bld("second PEB %d is newer, copy_flag is unset",
				pnum);
			return 1;
		}
	} else {
		if (!aeb->copy_flag) {
			/* It is not a copy, so it is newer */
			dbg_bld("first PEB %d is newer, copy_flag is unset",
				pnum);
			return bitflips << 1;
		}

		vh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL);
		if (!vh)
			return -ENOMEM;

		pnum = aeb->pnum;
		err = ubi_io_read_vid_hdr(ubi, pnum, vh, 0);
		if (err) {
			if (err == UBI_IO_BITFLIPS)
				bitflips = 1;
			else {
				ubi_err("VID of PEB %d header is bad, but it was OK earlier, err %d",
					pnum, err);
				if (err > 0)
					err = -EIO;

				goto out_free_vidh;
			}
		}

		vid_hdr = vh;
	}

	/* Read the data of the copy and check the CRC */

	len = be32_to_cpu(vid_hdr->data_size);

	mutex_lock(&ubi->buf_mutex);
	err = ubi_io_read_data(ubi, ubi->peb_buf, pnum, 0, len);
	if (err && err != UBI_IO_BITFLIPS && !mtd_is_eccerr(err))
		goto out_unlock;

	data_crc = be32_to_cpu(vid_hdr->data_crc);
	crc = crc32(UBI_CRC32_INIT, ubi->peb_buf, len);
	if (crc != data_crc) {
		dbg_bld("PEB %d CRC error: calculated %#08x, must be %#08x",
			pnum, crc, data_crc);
		corrupted = 1;
		bitflips = 0;
		second_is_newer = !second_is_newer;
	} else {
		dbg_bld("PEB %d CRC is OK", pnum);
		bitflips = !!err;
	}
	mutex_unlock(&ubi->buf_mutex);

	ubi_free_vid_hdr(ubi, vh);

	if (second_is_newer)
		dbg_bld("second PEB %d is newer, copy_flag is set", pnum);
	else
		dbg_bld("first PEB %d is newer, copy_flag is set", pnum);

	return second_is_newer | (bitflips << 1) | (corrupted << 2);

out_unlock:
	mutex_unlock(&ubi->buf_mutex);
out_free_vidh:
	ubi_free_vid_hdr(ubi, vh);
	return err;
}
Exemple #18
0
/*
 * Adjust quota limits, and start/stop timers accordingly.
 */
int
xfs_qm_scall_setqlim(
	xfs_mount_t		*mp,
	xfs_dqid_t		id,
	uint			type,
	fs_disk_quota_t		*newlim)
{
	struct xfs_quotainfo	*q = mp->m_quotainfo;
	xfs_disk_dquot_t	*ddq;
	xfs_dquot_t		*dqp;
	xfs_trans_t		*tp;
	int			error;
	xfs_qcnt_t		hard, soft;

	if (newlim->d_fieldmask & ~XFS_DQ_MASK)
		return EINVAL;
	if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0)
		return 0;

	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
	if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128,
				      0, 0, XFS_DEFAULT_LOG_COUNT))) {
		xfs_trans_cancel(tp, 0);
		return (error);
	}

	/*
	 * We don't want to race with a quotaoff so take the quotaoff lock.
	 * (We don't hold an inode lock, so there's nothing else to stop
	 * a quotaoff from happening). (XXXThis doesn't currently happen
	 * because we take the vfslock before calling xfs_qm_sysent).
	 */
	mutex_lock(&q->qi_quotaofflock);

	/*
	 * Get the dquot (locked), and join it to the transaction.
	 * Allocate the dquot if this doesn't exist.
	 */
	if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) {
		xfs_trans_cancel(tp, XFS_TRANS_ABORT);
		ASSERT(error != ENOENT);
		goto out_unlock;
	}
	xfs_trans_dqjoin(tp, dqp);
	ddq = &dqp->q_core;

	/*
	 * Make sure that hardlimits are >= soft limits before changing.
	 */
	hard = (newlim->d_fieldmask & FS_DQ_BHARD) ?
		(xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_hardlimit) :
			be64_to_cpu(ddq->d_blk_hardlimit);
	soft = (newlim->d_fieldmask & FS_DQ_BSOFT) ?
		(xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_softlimit) :
			be64_to_cpu(ddq->d_blk_softlimit);
	if (hard == 0 || hard >= soft) {
		ddq->d_blk_hardlimit = cpu_to_be64(hard);
		ddq->d_blk_softlimit = cpu_to_be64(soft);
		if (id == 0) {
			q->qi_bhardlimit = hard;
			q->qi_bsoftlimit = soft;
		}
	} else {
		xfs_debug(mp, "blkhard %Ld < blksoft %Ld\n", hard, soft);
	}
	hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ?
		(xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) :
			be64_to_cpu(ddq->d_rtb_hardlimit);
	soft = (newlim->d_fieldmask & FS_DQ_RTBSOFT) ?
		(xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_softlimit) :
			be64_to_cpu(ddq->d_rtb_softlimit);
	if (hard == 0 || hard >= soft) {
		ddq->d_rtb_hardlimit = cpu_to_be64(hard);
		ddq->d_rtb_softlimit = cpu_to_be64(soft);
		if (id == 0) {
			q->qi_rtbhardlimit = hard;
			q->qi_rtbsoftlimit = soft;
		}
	} else {
		xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld\n", hard, soft);
	}

	hard = (newlim->d_fieldmask & FS_DQ_IHARD) ?
		(xfs_qcnt_t) newlim->d_ino_hardlimit :
			be64_to_cpu(ddq->d_ino_hardlimit);
	soft = (newlim->d_fieldmask & FS_DQ_ISOFT) ?
		(xfs_qcnt_t) newlim->d_ino_softlimit :
			be64_to_cpu(ddq->d_ino_softlimit);
	if (hard == 0 || hard >= soft) {
		ddq->d_ino_hardlimit = cpu_to_be64(hard);
		ddq->d_ino_softlimit = cpu_to_be64(soft);
		if (id == 0) {
			q->qi_ihardlimit = hard;
			q->qi_isoftlimit = soft;
		}
	} else {
		xfs_debug(mp, "ihard %Ld < isoft %Ld\n", hard, soft);
	}

	/*
	 * Update warnings counter(s) if requested
	 */
	if (newlim->d_fieldmask & FS_DQ_BWARNS)
		ddq->d_bwarns = cpu_to_be16(newlim->d_bwarns);
	if (newlim->d_fieldmask & FS_DQ_IWARNS)
		ddq->d_iwarns = cpu_to_be16(newlim->d_iwarns);
	if (newlim->d_fieldmask & FS_DQ_RTBWARNS)
		ddq->d_rtbwarns = cpu_to_be16(newlim->d_rtbwarns);

	if (id == 0) {
		/*
		 * Timelimits for the super user set the relative time
		 * the other users can be over quota for this file system.
		 * If it is zero a default is used.  Ditto for the default
		 * soft and hard limit values (already done, above), and
		 * for warnings.
		 */
		if (newlim->d_fieldmask & FS_DQ_BTIMER) {
			q->qi_btimelimit = newlim->d_btimer;
			ddq->d_btimer = cpu_to_be32(newlim->d_btimer);
		}
		if (newlim->d_fieldmask & FS_DQ_ITIMER) {
			q->qi_itimelimit = newlim->d_itimer;
			ddq->d_itimer = cpu_to_be32(newlim->d_itimer);
		}
		if (newlim->d_fieldmask & FS_DQ_RTBTIMER) {
			q->qi_rtbtimelimit = newlim->d_rtbtimer;
			ddq->d_rtbtimer = cpu_to_be32(newlim->d_rtbtimer);
		}
		if (newlim->d_fieldmask & FS_DQ_BWARNS)
			q->qi_bwarnlimit = newlim->d_bwarns;
		if (newlim->d_fieldmask & FS_DQ_IWARNS)
			q->qi_iwarnlimit = newlim->d_iwarns;
		if (newlim->d_fieldmask & FS_DQ_RTBWARNS)
			q->qi_rtbwarnlimit = newlim->d_rtbwarns;
	} else {
		/*
		 * If the user is now over quota, start the timelimit.
		 * The user will not be 'warned'.
		 * Note that we keep the timers ticking, whether enforcement
		 * is on or off. We don't really want to bother with iterating
		 * over all ondisk dquots and turning the timers on/off.
		 */
		xfs_qm_adjust_dqtimers(mp, ddq);
	}
	dqp->dq_flags |= XFS_DQ_DIRTY;
	xfs_trans_log_dquot(tp, dqp);

	error = xfs_trans_commit(tp, 0);
	xfs_qm_dqrele(dqp);

 out_unlock:
	mutex_unlock(&q->qi_quotaofflock);
	return error;
}
Exemple #19
0
static int tux3_symlink(struct inode *dir, struct dentry *dentry,
			const char *symname)
{
	if(DEBUG_MODE_K==1)
	{
		printk(KERN_INFO"%25s  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	struct tux_iattr iattr = {
		.uid	= current_fsuid(),
		.gid	= current_fsgid(),
		.mode	= S_IFLNK | S_IRWXUGO,
	};

	return __tux3_symlink(dir, dentry, &iattr, symname);
}
#endif /* !__KERNEL__ */

static int tux_del_dirent(struct inode *dir, struct dentry *dentry)
{
	if(DEBUG_MODE_K==1)
	{
		printk(KERN_INFO"%25s  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	struct buffer_head *buffer;
	tux_dirent *entry;

	entry = tux_find_dirent(dir, &dentry->d_name, &buffer);
	if (IS_ERR(entry))
		return PTR_ERR(entry);

	return tux_delete_dirent(dir, buffer, entry);
}

static int tux3_unlink(struct inode *dir, struct dentry *dentry)
{
	if(DEBUG_MODE_K==1)
	{
		printk(KERN_INFO"%25s  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	struct inode *inode = dentry->d_inode;
	struct sb *sb = tux_sb(inode->i_sb);

	change_begin(sb);
	int err = tux_del_dirent(dir, dentry);
	if (!err) {
		tux3_iattrdirty(inode);
		inode->i_ctime = dir->i_ctime;
		/* FIXME: we shouldn't write inode for i_nlink = 0? */
		inode_dec_link_count(inode);
	}
	change_end(sb);

	return err;
}

static int tux3_rmdir(struct inode *dir, struct dentry *dentry)
{
	if(DEBUG_MODE_K==1)
	{
		printk(KERN_INFO"%25s  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	struct sb *sb = tux_sb(dir->i_sb);
	struct inode *inode = dentry->d_inode;
	int err = tux_dir_is_empty(inode);

	if (!err) {
		change_begin(sb);
		err = tux_del_dirent(dir, dentry);
		if (!err) {
			tux3_iattrdirty(inode);
			inode->i_ctime = dir->i_ctime;
			/* FIXME: we need to do this for POSIX? */
			/* inode->i_size = 0; */
			clear_nlink(inode);
			tux3_mark_inode_dirty_sync(inode);

			inode_dec_link_count(dir);
		}
		change_end(sb);
	}
	return err;
}

static int tux3_rename(struct inode *old_dir, struct dentry *old_dentry,
		       struct inode *new_dir, struct dentry *new_dentry)
{
	if(DEBUG_MODE_K==1)
	{
		printk(KERN_INFO"%25s  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	struct inode *old_inode = old_dentry->d_inode;
	struct inode *new_inode = new_dentry->d_inode;
	struct sb *sb = tux_sb(old_inode->i_sb);
	struct buffer_head *old_buffer, *new_buffer, *clone;
	tux_dirent *old_entry, *new_entry;
	void *olddata;
	int err, new_subdir = 0;
	unsigned delta;

	old_entry = tux_find_dirent(old_dir, &old_dentry->d_name, &old_buffer);
	if (IS_ERR(old_entry))
		return PTR_ERR(old_entry);

	/* FIXME: is this needed? */
	assert(be64_to_cpu(old_entry->inum) == tux_inode(old_inode)->inum);

	change_begin(sb);
	delta = tux3_get_current_delta();

	if (new_inode) {
		int old_is_dir = S_ISDIR(old_inode->i_mode);
		if (old_is_dir) {
			err = tux_dir_is_empty(new_inode);
			if (err)
				goto error;
		}

		new_entry = tux_find_dirent(new_dir, &new_dentry->d_name,
					    &new_buffer);
		if (IS_ERR(new_entry)) {
			assert(PTR_ERR(new_entry) != -ENOENT);
			err = PTR_ERR(new_entry);
			goto error;
		}

		/*
		 * The directory is protected by i_mutex.
		 * blockdirty() should never return -EAGAIN.
		 */
		olddata = bufdata(new_buffer);
		clone = blockdirty(new_buffer, delta);
		if (IS_ERR(clone)) {
			assert(PTR_ERR(clone) != -EAGAIN);
			blockput(new_buffer);
			err = PTR_ERR(clone);
			goto error;
		}
		new_entry = ptr_redirect(new_entry, olddata, bufdata(clone));

		/* this releases new_buffer */
		tux_update_dirent(new_dir, clone, new_entry, old_inode);

		tux3_iattrdirty(new_inode);
		new_inode->i_ctime = new_dir->i_ctime;
		if (old_is_dir)
			drop_nlink(new_inode);
		inode_dec_link_count(new_inode);
	} else {
		new_subdir = S_ISDIR(old_inode->i_mode) && new_dir != old_dir;
		if (new_subdir) {
			if (new_dir->i_nlink >= TUX_LINK_MAX) {
				err = -EMLINK;
				goto error;
			}
		}
		err = __tux_add_dirent(new_dir, new_dentry, old_inode);
		if (err)
			goto error;
		if (new_subdir)
			inode_inc_link_count(new_dir);
	}
	tux3_iattrdirty(old_inode);
	old_inode->i_ctime = new_dir->i_ctime;
	tux3_mark_inode_dirty(old_inode);

	/*
	 * The new entry can be on same buffer with old_buffer, and
	 * may did buffer fork in the above path. So if old_buffer is
	 * forked buffer, we update the old_buffer in here.
	 */
	if (buffer_forked(old_buffer)) {
		clone = blockget(mapping(old_dir), bufindex(old_buffer));
		assert(clone);
		old_entry = ptr_redirect(old_entry, bufdata(old_buffer),
					 bufdata(clone));
		blockput(old_buffer);
		old_buffer = clone;
	}
	err = tux_delete_dirent(old_dir, old_buffer, old_entry);
	if (err) {
		tux3_fs_error(sb, "couldn't delete old entry (%Lu)",
			      tux_inode(old_inode)->inum);
		/* FIXME: now, we have hardlink even if it's dir. */
		inode_inc_link_count(old_inode);
	}
	if (!err && new_subdir)
		inode_dec_link_count(old_dir);

	change_end(sb);
	return err;

error:
	change_end(sb);
	blockput(old_buffer);
	return err;
}

#ifdef __KERNEL__
const struct file_operations tux_dir_fops = {
	.llseek		= generic_file_llseek,
	.read		= generic_read_dir,
	.readdir	= tux_readdir,
	.fsync		= tux3_sync_file,
};

const struct inode_operations tux_dir_iops = {
	.create		= tux3_create,
	.lookup		= tux3_lookup,
	.link		= tux3_link,
	.unlink		= tux3_unlink,
	.symlink	= tux3_symlink,
	.mkdir		= tux3_mkdir,
	.rmdir		= tux3_rmdir,
	.mknod		= tux3_mknod,
	.rename		= tux3_rename,
	.setattr	= tux3_setattr,
	.getattr	= tux3_getattr
//	.setxattr	= generic_setxattr,
//	.getxattr	= generic_getxattr,
//	.listxattr	= ext3_listxattr,
//	.removexattr	= generic_removexattr,
//	.permission	= ext3_permission,
	/* FIXME: why doesn't ext4 support this for directory? */
//	.fallocate	= ext4_fallocate,
//	.fiemap		= ext4_fiemap,
};
/*
 * Read and convert on-disk LUKS2 header to in-memory representation..
 * Try to do recovery if on-disk state is not consistent.
 */
int LUKS2_disk_hdr_read(struct crypt_device *cd, struct luks2_hdr *hdr,
			struct device *device, int do_recovery)
{
	enum { HDR_OK, HDR_OBSOLETE, HDR_FAIL, HDR_FAIL_IO } state_hdr1, state_hdr2;
	struct luks2_hdr_disk hdr_disk1, hdr_disk2;
	char *json_area1 = NULL, *json_area2 = NULL;
	json_object *jobj_hdr1 = NULL, *jobj_hdr2 = NULL;
	int i, r;
	uint64_t hdr_size;

	if (do_recovery && !crypt_metadata_locking_enabled()) {
		do_recovery = 0;
		log_dbg("Disabling header auto-recovery due to locking being disabled.");
	}

	/*
	 * Read primary LUKS2 header (offset 0).
	 */
	state_hdr1 = HDR_FAIL;
	r = hdr_read_disk(device, &hdr_disk1, &json_area1, 0, 0);
	if (r == 0) {
		jobj_hdr1 = parse_and_validate_json(json_area1, be64_to_cpu(hdr_disk1.hdr_size) - LUKS2_HDR_BIN_LEN);
		state_hdr1 = jobj_hdr1 ? HDR_OK : HDR_OBSOLETE;
	} else if (r == -EIO)
		state_hdr1 = HDR_FAIL_IO;

	/*
	 * Read secondary LUKS2 header (follows primary).
	 */
	state_hdr2 = HDR_FAIL;
	if (state_hdr1 != HDR_FAIL && state_hdr1 != HDR_FAIL_IO) {
		r = hdr_read_disk(device, &hdr_disk2, &json_area2, be64_to_cpu(hdr_disk1.hdr_size), 1);
		if (r == 0) {
			jobj_hdr2 = parse_and_validate_json(json_area2, be64_to_cpu(hdr_disk2.hdr_size) - LUKS2_HDR_BIN_LEN);
			state_hdr2 = jobj_hdr2 ? HDR_OK : HDR_OBSOLETE;
		} else if (r == -EIO)
			state_hdr2 = HDR_FAIL_IO;
	} else {
		/*
		 * No header size, check all known offsets.
		 */
		for (r = -EINVAL,i = 2; r < 0 && i <= 1024; i <<= 1)
			r = hdr_read_disk(device, &hdr_disk2, &json_area2, i * 4096, 1);

		if (r == 0) {
			jobj_hdr2 = parse_and_validate_json(json_area2, be64_to_cpu(hdr_disk2.hdr_size) - LUKS2_HDR_BIN_LEN);
			state_hdr2 = jobj_hdr2 ? HDR_OK : HDR_OBSOLETE;
		} else if (r == -EIO)
			state_hdr2 = HDR_FAIL_IO;
	}

	/*
	 * Check sequence id if both headers are read correctly.
	 */
	if (state_hdr1 == HDR_OK && state_hdr2 == HDR_OK) {
		if (be64_to_cpu(hdr_disk1.seqid) > be64_to_cpu(hdr_disk2.seqid))
			state_hdr2 = HDR_OBSOLETE;
		else if (be64_to_cpu(hdr_disk1.seqid) < be64_to_cpu(hdr_disk2.seqid))
			state_hdr1 = HDR_OBSOLETE;
	}

	/* check header with keyslots to fit the device */
	if (state_hdr1 == HDR_OK)
		hdr_size = LUKS2_hdr_and_areas_size(jobj_hdr1);
	else if (state_hdr2 == HDR_OK)
		hdr_size = LUKS2_hdr_and_areas_size(jobj_hdr2);
	else {
		r = (state_hdr1 == HDR_FAIL_IO && state_hdr2 == HDR_FAIL_IO) ? -EIO : -EINVAL;
		goto err;
	}

	r = LUKS2_check_device_size(cd, device, hdr_size, 0);
	if (r)
		goto err;

	/*
	 * Try to rewrite (recover) bad header. Always regenerate salt for bad header.
	 */
	if (state_hdr1 == HDR_OK && state_hdr2 != HDR_OK) {
		log_dbg("Secondary LUKS2 header requires recovery.");

		if (do_recovery) {
			memcpy(&hdr_disk2, &hdr_disk1, LUKS2_HDR_BIN_LEN);
			r = crypt_random_get(NULL, (char*)hdr_disk2.salt, sizeof(hdr_disk2.salt), CRYPT_RND_SALT);
			if (r)
				log_dbg("Cannot generate master salt.");
			else {
				hdr_from_disk(&hdr_disk1, &hdr_disk2, hdr, 0);
				r = hdr_write_disk(device, hdr, json_area1, 1);
			}
			if (r)
				log_dbg("Secondary LUKS2 header recovery failed.");
		}
	} else if (state_hdr1 != HDR_OK && state_hdr2 == HDR_OK) {
		log_dbg("Primary LUKS2 header requires recovery.");

		if (do_recovery) {
			memcpy(&hdr_disk1, &hdr_disk2, LUKS2_HDR_BIN_LEN);
			r = crypt_random_get(NULL, (char*)hdr_disk1.salt, sizeof(hdr_disk1.salt), CRYPT_RND_SALT);
			if (r)
				log_dbg("Cannot generate master salt.");
			else {
				hdr_from_disk(&hdr_disk2, &hdr_disk1, hdr, 1);
				r = hdr_write_disk(device, hdr, json_area2, 0);
			}
			if (r)
				log_dbg("Primary LUKS2 header recovery failed.");
		}
	}

	free(json_area1);
	json_area1 = NULL;
	free(json_area2);
	json_area2 = NULL;

	/* wrong lock for write mode during recovery attempt */
	if (r == -EAGAIN)
		goto err;

	/*
	 * Even if status is failed, the second header includes salt.
	 */
	if (state_hdr1 == HDR_OK) {
		hdr_from_disk(&hdr_disk1, &hdr_disk2, hdr, 0);
		hdr->jobj = jobj_hdr1;
		json_object_put(jobj_hdr2);
	} else if (state_hdr2 == HDR_OK) {
		hdr_from_disk(&hdr_disk2, &hdr_disk1, hdr, 1);
		hdr->jobj = jobj_hdr2;
		json_object_put(jobj_hdr1);
	}

	/*
	 * FIXME: should this fail? At least one header was read correctly.
	 * r = (state_hdr1 == HDR_FAIL_IO || state_hdr2 == HDR_FAIL_IO) ? -EIO : -EINVAL;
	 */
	return 0;
err:
	log_dbg("LUKS2 header read failed (%d).", r);

	free(json_area1);
	free(json_area2);
	json_object_put(jobj_hdr1);
	json_object_put(jobj_hdr2);
	hdr->jobj = NULL;
	return r;
}
Exemple #21
0
int qcow2_read_snapshots(BlockDriverState *bs)
{
    BDRVQcowState *s = bs->opaque;
    QCowSnapshotHeader h;
    QCowSnapshotExtraData extra;
    QCowSnapshot *sn;
    int i, id_str_size, name_size;
    int64_t offset;
    uint32_t extra_data_size;
    int ret;

    if (!s->nb_snapshots) {
        s->snapshots = NULL;
        s->snapshots_size = 0;
        return 0;
    }

    offset = s->snapshots_offset;
    s->snapshots = g_new0(QCowSnapshot, s->nb_snapshots);

    for(i = 0; i < s->nb_snapshots; i++) {
        /* Read statically sized part of the snapshot header */
        offset = align_offset(offset, 8);
        ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
        if (ret < 0) {
            goto fail;
        }

        offset += sizeof(h);
        sn = s->snapshots + i;
        sn->l1_table_offset = be64_to_cpu(h.l1_table_offset);
        sn->l1_size = be32_to_cpu(h.l1_size);
        sn->vm_state_size = be32_to_cpu(h.vm_state_size);
        sn->date_sec = be32_to_cpu(h.date_sec);
        sn->date_nsec = be32_to_cpu(h.date_nsec);
        sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec);
        extra_data_size = be32_to_cpu(h.extra_data_size);

        id_str_size = be16_to_cpu(h.id_str_size);
        name_size = be16_to_cpu(h.name_size);

        /* Read extra data */
        ret = bdrv_pread(bs->file, offset, &extra,
                         MIN(sizeof(extra), extra_data_size));
        if (ret < 0) {
            goto fail;
        }
        offset += extra_data_size;

        if (extra_data_size >= 8) {
            sn->vm_state_size = be64_to_cpu(extra.vm_state_size_large);
        }

        if (extra_data_size >= 16) {
            sn->disk_size = be64_to_cpu(extra.disk_size);
        } else {
            sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
        }

        /* Read snapshot ID */
        sn->id_str = g_malloc(id_str_size + 1);
        ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
        if (ret < 0) {
            goto fail;
        }
        offset += id_str_size;
        sn->id_str[id_str_size] = '\0';

        /* Read snapshot name */
        sn->name = g_malloc(name_size + 1);
        ret = bdrv_pread(bs->file, offset, sn->name, name_size);
        if (ret < 0) {
            goto fail;
        }
        offset += name_size;
        sn->name[name_size] = '\0';

        if (offset - s->snapshots_offset > QCOW_MAX_SNAPSHOTS_SIZE) {
            ret = -EFBIG;
            goto fail;
        }
    }

    assert(offset - s->snapshots_offset <= INT_MAX);
    s->snapshots_size = offset - s->snapshots_offset;
    return 0;

fail:
    qcow2_free_snapshots(bs);
    return ret;
}
Exemple #22
0
static void do_io_interrupt(CPUS390XState *env)
{
    S390CPU *cpu = s390_env_get_cpu(env);
    LowCore *lowcore;
    IOIntQueue *q;
    uint8_t isc;
    int disable = 1;
    int found = 0;

    if (!(env->psw.mask & PSW_MASK_IO)) {
        cpu_abort(CPU(cpu), "I/O int w/o I/O mask\n");
    }

    for (isc = 0; isc < ARRAY_SIZE(env->io_index); isc++) {
        uint64_t isc_bits;

        if (env->io_index[isc] < 0) {
            continue;
        }
        if (env->io_index[isc] >= MAX_IO_QUEUE) {
            cpu_abort(CPU(cpu), "I/O queue overrun for isc %d: %d\n",
                      isc, env->io_index[isc]);
        }

        q = &env->io_queue[env->io_index[isc]][isc];
        isc_bits = ISC_TO_ISC_BITS(IO_INT_WORD_ISC(q->word));
        if (!(env->cregs[6] & isc_bits)) {
            disable = 0;
            continue;
        }
        if (!found) {
            uint64_t mask, addr;

            found = 1;
            lowcore = cpu_map_lowcore(env);

            lowcore->subchannel_id = cpu_to_be16(q->id);
            lowcore->subchannel_nr = cpu_to_be16(q->nr);
            lowcore->io_int_parm = cpu_to_be32(q->parm);
            lowcore->io_int_word = cpu_to_be32(q->word);
            lowcore->io_old_psw.mask = cpu_to_be64(get_psw_mask(env));
            lowcore->io_old_psw.addr = cpu_to_be64(env->psw.addr);
            mask = be64_to_cpu(lowcore->io_new_psw.mask);
            addr = be64_to_cpu(lowcore->io_new_psw.addr);

            cpu_unmap_lowcore(lowcore);

            env->io_index[isc]--;

            DPRINTF("%s: %" PRIx64 " %" PRIx64 "\n", __func__,
                    env->psw.mask, env->psw.addr);
            load_psw(env, mask, addr);
        }
        if (env->io_index[isc] >= 0) {
            disable = 0;
        }
        continue;
    }

    if (disable) {
        env->pending_int &= ~INTERRUPT_IO;
    }

}
Exemple #23
0
/*
 * Connection established.
 * We get here for both outgoing and incoming connection.
 */
void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event)
{
	const struct rds_ib_connect_private *dp = NULL;
	struct rds_ib_connection *ic = conn->c_transport_data;
	struct ib_qp_attr qp_attr;
	int err;

	if (event->param.conn.private_data_len >= sizeof(*dp)) {
		dp = event->param.conn.private_data;

		/* make sure it isn't empty data */
		if (dp->dp_protocol_major) {
			rds_ib_set_protocol(conn,
				RDS_PROTOCOL(dp->dp_protocol_major,
				dp->dp_protocol_minor));
			rds_ib_set_flow_control(conn, be32_to_cpu(dp->dp_credit));
		}
	}

	if (conn->c_version < RDS_PROTOCOL(3,1)) {
		printk(KERN_NOTICE "RDS/IB: Connection to %pI4 version %u.%u failed,"
		       " no longer supported\n",
		       &conn->c_faddr,
		       RDS_PROTOCOL_MAJOR(conn->c_version),
		       RDS_PROTOCOL_MINOR(conn->c_version));
		rds_conn_destroy(conn);
		return;
	} else {
		printk(KERN_NOTICE "RDS/IB: connected to %pI4 version %u.%u%s\n",
		       &conn->c_faddr,
		       RDS_PROTOCOL_MAJOR(conn->c_version),
		       RDS_PROTOCOL_MINOR(conn->c_version),
		       ic->i_flowctl ? ", flow control" : "");
	}

	/*
	 * Init rings and fill recv. this needs to wait until protocol negotiation
	 * is complete, since ring layout is different from 3.0 to 3.1.
	 */
	rds_ib_send_init_ring(ic);
	rds_ib_recv_init_ring(ic);
	/* Post receive buffers - as a side effect, this will update
	 * the posted credit count. */
	rds_ib_recv_refill(conn, 1);

	/* Tune RNR behavior */
	rds_ib_tune_rnr(ic, &qp_attr);

	qp_attr.qp_state = IB_QPS_RTS;
	err = ib_modify_qp(ic->i_cm_id->qp, &qp_attr, IB_QP_STATE);
	if (err)
		printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err);

	/* update ib_device with this local ipaddr */
	err = rds_ib_update_ipaddr(ic->rds_ibdev, conn->c_laddr);
	if (err)
		printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n",
			err);

	/* If the peer gave us the last packet it saw, process this as if
	 * we had received a regular ACK. */
	if (dp && dp->dp_ack_seq)
		rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL);

	rds_connect_complete(conn);
}
Exemple #24
0
static void do_mchk_interrupt(CPUS390XState *env)
{
    S390CPU *cpu = s390_env_get_cpu(env);
    uint64_t mask, addr;
    LowCore *lowcore;
    MchkQueue *q;
    int i;

    if (!(env->psw.mask & PSW_MASK_MCHECK)) {
        cpu_abort(CPU(cpu), "Machine check w/o mchk mask\n");
    }

    if (env->mchk_index < 0 || env->mchk_index >= MAX_MCHK_QUEUE) {
        cpu_abort(CPU(cpu), "Mchk queue overrun: %d\n", env->mchk_index);
    }

    q = &env->mchk_queue[env->mchk_index];

    if (q->type != 1) {
        /* Don't know how to handle this... */
        cpu_abort(CPU(cpu), "Unknown machine check type %d\n", q->type);
    }
    if (!(env->cregs[14] & (1 << 28))) {
        /* CRW machine checks disabled */
        return;
    }

    lowcore = cpu_map_lowcore(env);

    for (i = 0; i < 16; i++) {
        lowcore->floating_pt_save_area[i] = cpu_to_be64(get_freg(env, i)->ll);
        lowcore->gpregs_save_area[i] = cpu_to_be64(env->regs[i]);
        lowcore->access_regs_save_area[i] = cpu_to_be32(env->aregs[i]);
        lowcore->cregs_save_area[i] = cpu_to_be64(env->cregs[i]);
    }
    lowcore->prefixreg_save_area = cpu_to_be32(env->psa);
    lowcore->fpt_creg_save_area = cpu_to_be32(env->fpc);
    lowcore->tod_progreg_save_area = cpu_to_be32(env->todpr);
    lowcore->cpu_timer_save_area[0] = cpu_to_be32(env->cputm >> 32);
    lowcore->cpu_timer_save_area[1] = cpu_to_be32((uint32_t)env->cputm);
    lowcore->clock_comp_save_area[0] = cpu_to_be32(env->ckc >> 32);
    lowcore->clock_comp_save_area[1] = cpu_to_be32((uint32_t)env->ckc);

    lowcore->mcck_interruption_code[0] = cpu_to_be32(0x00400f1d);
    lowcore->mcck_interruption_code[1] = cpu_to_be32(0x40330000);
    lowcore->mcck_old_psw.mask = cpu_to_be64(get_psw_mask(env));
    lowcore->mcck_old_psw.addr = cpu_to_be64(env->psw.addr);
    mask = be64_to_cpu(lowcore->mcck_new_psw.mask);
    addr = be64_to_cpu(lowcore->mcck_new_psw.addr);

    cpu_unmap_lowcore(lowcore);

    env->mchk_index--;
    if (env->mchk_index == -1) {
        env->pending_int &= ~INTERRUPT_MCHK;
    }

    DPRINTF("%s: %" PRIx64 " %" PRIx64 "\n", __func__,
            env->psw.mask, env->psw.addr);

    load_psw(env, mask, addr);
}
Exemple #25
0
static uint32_t mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
				struct mlx4_en_tx_ring *ring,
				int index, uint8_t owner, uint64_t timestamp)
{
	struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
	struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE;
	struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset;
	void *end = ring->buf + ring->buf_size;
	struct block *block = tx_info->block;
	int nr_maps = tx_info->nr_maps;
	int i;

#if 0 // AKAROS_PORT
	/* We do not touch skb here, so prefetch skb->users location
	 * to speedup consume_skb()
	 */
	prefetchw(&skb->users);

	if (unlikely(timestamp)) {
		struct skb_shared_hwtstamps hwts;

		mlx4_en_fill_hwtstamps(priv->mdev, &hwts, timestamp);
		skb_tstamp_tx(skb, &hwts);
	}
#endif

	/* Optimize the common case when there are no wraparounds */
	if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) {
		if (!tx_info->inl) {
			if (tx_info->linear)
				dma_unmap_single(priv->ddev,
						tx_info->map0_dma,
						tx_info->map0_byte_count,
						PCI_DMA_TODEVICE);
			else
				dma_unmap_page(priv->ddev,
					       tx_info->map0_dma,
					       tx_info->map0_byte_count,
					       PCI_DMA_TODEVICE);
			for (i = 1; i < nr_maps; i++) {
				data++;
				dma_unmap_page(priv->ddev,
					(dma_addr_t)be64_to_cpu(data->addr),
					be32_to_cpu(data->byte_count),
					PCI_DMA_TODEVICE);
			}
		}
	} else {
		if (!tx_info->inl) {
			if ((void *) data >= end) {
				data = ring->buf + ((void *)data - end);
			}

			if (tx_info->linear)
				dma_unmap_single(priv->ddev,
						tx_info->map0_dma,
						tx_info->map0_byte_count,
						PCI_DMA_TODEVICE);
			else
				dma_unmap_page(priv->ddev,
					       tx_info->map0_dma,
					       tx_info->map0_byte_count,
					       PCI_DMA_TODEVICE);
			for (i = 1; i < nr_maps; i++) {
				data++;
				/* Check for wraparound before unmapping */
				if ((void *) data >= end)
					data = ring->buf;
				dma_unmap_page(priv->ddev,
					(dma_addr_t)be64_to_cpu(data->addr),
					be32_to_cpu(data->byte_count),
					PCI_DMA_TODEVICE);
			}
		}
	}
	freeb(block);
	return tx_info->nr_txbb;
}
Exemple #26
0
HPT_U64 BE64_TO_CPU(HPT_U64 x) { return be64_to_cpu(x); }
Exemple #27
0
/**
 * compare_lebs - find out which logical eraseblock is newer.
 * @ubi: UBI device description object
 * @seb: first logical eraseblock to compare
 * @pnum: physical eraseblock number of the second logical eraseblock to
 * compare
 * @vid_hdr: volume identifier header of the second logical eraseblock
 *
 * This function compares 2 copies of a LEB and informs which one is newer. In
 * case of success this function returns a positive value, in case of failure, a
 * negative error code is returned. The success return codes use the following
 * bits:
 *     o bit 0 is cleared: the first PEB (described by @seb) is newer then the
 *       second PEB (described by @pnum and @vid_hdr);
 *     o bit 0 is set: the second PEB is newer;
 *     o bit 1 is cleared: no bit-flips were detected in the newer LEB;
 *     o bit 1 is set: bit-flips were detected in the newer LEB;
 *     o bit 2 is cleared: the older LEB is not corrupted;
 *     o bit 2 is set: the older LEB is corrupted.
 */
static int compare_lebs(struct ubi_device *ubi, const struct ubi_scan_leb *seb,
			int pnum, const struct ubi_vid_hdr *vid_hdr)
{
	void *buf;
	int len, err, second_is_newer, bitflips = 0, corrupted = 0;
	uint32_t data_crc, crc;
	struct ubi_vid_hdr *vh = NULL;
	unsigned long long sqnum2 = be64_to_cpu(vid_hdr->sqnum);

	if (seb->sqnum == 0 && sqnum2 == 0) {
		long long abs, v1 = seb->leb_ver, v2 = be32_to_cpu(vid_hdr->leb_ver);

		/*
		 * UBI constantly increases the logical eraseblock version
		 * number and it can overflow. Thus, we have to bear in mind
		 * that versions that are close to %0xFFFFFFFF are less then
		 * versions that are close to %0.
		 *
		 * The UBI WL unit guarantees that the number of pending tasks
		 * is not greater then %0x7FFFFFFF. So, if the difference
		 * between any two versions is greater or equivalent to
		 * %0x7FFFFFFF, there was an overflow and the logical
		 * eraseblock with lower version is actually newer then the one
		 * with higher version.
		 *
		 * FIXME: but this is anyway obsolete and will be removed at
		 * some point.
		 */
		dbg_bld("using old crappy leb_ver stuff");

		if (v1 == v2) {
			ubi_err("PEB %d and PEB %d have the same version %lld",
				seb->pnum, pnum, v1);
			return -EINVAL;
		}

		abs = v1 - v2;
		if (abs < 0)
			abs = -abs;

		if (abs < 0x7FFFFFFF)
			/* Non-overflow situation */
			second_is_newer = (v2 > v1);
		else
			second_is_newer = (v2 < v1);
	} else
		/* Obviously the LEB with lower sequence counter is older */
		second_is_newer = sqnum2 > seb->sqnum;

	/*
	 * Now we know which copy is newer. If the copy flag of the PEB with
	 * newer version is not set, then we just return, otherwise we have to
	 * check data CRC. For the second PEB we already have the VID header,
	 * for the first one - we'll need to re-read it from flash.
	 *
	 * FIXME: this may be optimized so that we wouldn't read twice.
	 */

	if (second_is_newer) {
		if (!vid_hdr->copy_flag) {
			/* It is not a copy, so it is newer */
			dbg_bld("second PEB %d is newer, copy_flag is unset",
				pnum);
			return 1;
		}
	} else {
		pnum = seb->pnum;

		vh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL);
		if (!vh)
			return -ENOMEM;

		err = ubi_io_read_vid_hdr(ubi, pnum, vh, 0);
		if (err) {
			if (err == UBI_IO_BITFLIPS)
				bitflips = 1;
			else {
				dbg_err("VID of PEB %d header is bad, but it "
					"was OK earlier", pnum);
				if (err > 0)
					err = -EIO;

				goto out_free_vidh;
			}
		}

		if (!vh->copy_flag) {
			/* It is not a copy, so it is newer */
			dbg_bld("first PEB %d is newer, copy_flag is unset",
				pnum);
			err = bitflips << 1;
			goto out_free_vidh;
		}

		vid_hdr = vh;
	}

	/* Read the data of the copy and check the CRC */

	len = be32_to_cpu(vid_hdr->data_size);
	buf = vmalloc(len);
	if (!buf) {
		err = -ENOMEM;
		goto out_free_vidh;
	}

	err = ubi_io_read_data(ubi, buf, pnum, 0, len);
	if (err && err != UBI_IO_BITFLIPS)
		goto out_free_buf;

	data_crc = be32_to_cpu(vid_hdr->data_crc);
	crc = crc32(UBI_CRC32_INIT, buf, len);
	if (crc != data_crc) {
		dbg_bld("PEB %d CRC error: calculated %#08x, must be %#08x",
			pnum, crc, data_crc);
		corrupted = 1;
		bitflips = 0;
		second_is_newer = !second_is_newer;
	} else {
		dbg_bld("PEB %d CRC is OK", pnum);
		bitflips = !!err;
	}

	vfree(buf);
	ubi_free_vid_hdr(ubi, vh);

	if (second_is_newer)
		dbg_bld("second PEB %d is newer, copy_flag is set", pnum);
	else
		dbg_bld("first PEB %d is newer, copy_flag is set", pnum);

	return second_is_newer | (bitflips << 1) | (corrupted << 2);

out_free_buf:
	vfree(buf);
out_free_vidh:
	ubi_free_vid_hdr(ubi, vh);
	return err;
}
Exemple #28
0
/*
 * Whenever new GUID is set/unset (guid table change) create event and
 * notify the relevant slave (master also should be notified).
 * If the GUID value is not as we have in the cache the slave will not be
 * updated; in this case it waits for the smp_snoop or the port management
 * event to call the function and to update the slave.
 * block_number - the index of the block (16 blocks available)
 * port_number - 1 or 2
 */
void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
					  int block_num, u8 port_num,
					  u8 *p_data)
{
	int i;
	u64 guid_indexes;
	int slave_id;
	enum slave_port_state new_state;
	enum slave_port_state prev_state;
	__be64 tmp_cur_ag, form_cache_ag;
	enum slave_port_gen_event gen_event;

	if (!mlx4_is_master(dev->dev))
		return;

	guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
				   ports_guid[port_num - 1].
				   all_rec_per_port[block_num].guid_indexes);
	pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes);

	/*calculate the slaves and notify them*/
	for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
		/* the location of the specific index runs from bits 4..11 */
		if (!(test_bit(i + 4, (unsigned long *)&guid_indexes)))
			continue;

		slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
		if (slave_id >= dev->dev->num_slaves)
			return;
		tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE];
		form_cache_ag = get_cached_alias_guid(dev, port_num,
					(NUM_ALIAS_GUID_IN_REC * block_num) + i);
		/*
		 * Check if guid is not the same as in the cache,
		 * If it is different, wait for the snoop_smp or the port mgmt
		 * change event to update the slave on its port state change
		 */
		if (tmp_cur_ag != form_cache_ag)
			continue;
		mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num);

		/*2 cases: Valid GUID, and Invalid Guid*/

		if (tmp_cur_ag != MLX4_NOT_SET_GUID) { /*valid GUID*/
			prev_state = mlx4_get_slave_port_state(dev->dev, slave_id, port_num);
			new_state = set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
								  MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID,
								  &gen_event);
			pr_debug("slave: %d, port: %d prev_port_state: %d,"
				 " new_port_state: %d, gen_event: %d\n",
				 slave_id, port_num, prev_state, new_state, gen_event);
			if (gen_event == SLAVE_PORT_GEN_EVENT_UP) {
				pr_debug("sending PORT_UP event to slave: %d, port: %d\n",
					 slave_id, port_num);
				mlx4_gen_port_state_change_eqe(dev->dev, slave_id,
							       port_num, MLX4_PORT_CHANGE_SUBTYPE_ACTIVE);
			}
		} else { /* request to invalidate GUID */
			set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
						      MLX4_PORT_STATE_IB_EVENT_GID_INVALID,
						      &gen_event);
			pr_debug("sending PORT DOWN event to slave: %d, port: %d\n",
				 slave_id, port_num);
			mlx4_gen_port_state_change_eqe(dev->dev, slave_id, port_num,
						       MLX4_PORT_CHANGE_SUBTYPE_DOWN);
		}
	}
}
Exemple #29
0
/**
 *	t4vf_wr_mbox_core - send a command to FW through the mailbox
 *	@adapter: the adapter
 *	@cmd: the command to write
 *	@size: command length in bytes
 *	@rpl: where to optionally store the reply
 *	@sleep_ok: if true we may sleep while awaiting command completion
 *
 *	Sends the given command to FW through the mailbox and waits for the
 *	FW to execute the command.  If @rpl is not %NULL it is used to store
 *	the FW's reply to the command.  The command and its optional reply
 *	are of the same length.  FW can take up to 500 ms to respond.
 *	@sleep_ok determines whether we may sleep while awaiting the response.
 *	If sleeping is allowed we use progressive backoff otherwise we spin.
 *
 *	The return value is 0 on success or a negative errno on failure.  A
 *	failure can happen either because we are not able to execute the
 *	command or FW executes it but signals an error.  In the latter case
 *	the return value is the error code indicated by FW (negated).
 */
int t4vf_wr_mbox_core(struct adapter *adapter, const void *cmd, int size,
		      void *rpl, bool sleep_ok)
{
	static const int delay[] = {
		1, 1, 3, 5, 10, 10, 20, 50, 100
	};

	u32 v;
	int i, ms, delay_idx;
	const __be64 *p;
	u32 mbox_data = T4VF_MBDATA_BASE_ADDR;
	u32 mbox_ctl = T4VF_CIM_BASE_ADDR + CIM_VF_EXT_MAILBOX_CTRL;

	/*
	 * Commands must be multiples of 16 bytes in length and may not be
	 * larger than the size of the Mailbox Data register array.
	 */
	if ((size % 16) != 0 ||
	    size > NUM_CIM_VF_MAILBOX_DATA_INSTANCES * 4)
		return -EINVAL;

	/*
	 * Loop trying to get ownership of the mailbox.  Return an error
	 * if we can't gain ownership.
	 */
	v = MBOWNER_GET(t4_read_reg(adapter, mbox_ctl));
	for (i = 0; v == MBOX_OWNER_NONE && i < 3; i++)
		v = MBOWNER_GET(t4_read_reg(adapter, mbox_ctl));
	if (v != MBOX_OWNER_DRV)
		return v == MBOX_OWNER_FW ? -EBUSY : -ETIMEDOUT;

	/*
	 * Write the command array into the Mailbox Data register array and
	 * transfer ownership of the mailbox to the firmware.
	 *
	 * For the VFs, the Mailbox Data "registers" are actually backed by
	 * T4's "MA" interface rather than PL Registers (as is the case for
	 * the PFs).  Because these are in different coherency domains, the
	 * write to the VF's PL-register-backed Mailbox Control can race in
	 * front of the writes to the MA-backed VF Mailbox Data "registers".
	 * So we need to do a read-back on at least one byte of the VF Mailbox
	 * Data registers before doing the write to the VF Mailbox Control
	 * register.
	 */
	for (i = 0, p = cmd; i < size; i += 8)
		t4_write_reg64(adapter, mbox_data + i, be64_to_cpu(*p++));
	t4_read_reg(adapter, mbox_data);         /* flush write */

	t4_write_reg(adapter, mbox_ctl,
		     MBMSGVALID | MBOWNER(MBOX_OWNER_FW));
	t4_read_reg(adapter, mbox_ctl);          /* flush write */

	/*
	 * Spin waiting for firmware to acknowledge processing our command.
	 */
	delay_idx = 0;
	ms = delay[0];

	for (i = 0; i < FW_CMD_MAX_TIMEOUT; i += ms) {
		if (sleep_ok) {
			ms = delay[delay_idx];
			if (delay_idx < ARRAY_SIZE(delay) - 1)
				delay_idx++;
			msleep(ms);
		} else
			mdelay(ms);

		/*
		 * If we're the owner, see if this is the reply we wanted.
		 */
		v = t4_read_reg(adapter, mbox_ctl);
		if (MBOWNER_GET(v) == MBOX_OWNER_DRV) {
			/*
			 * If the Message Valid bit isn't on, revoke ownership
			 * of the mailbox and continue waiting for our reply.
			 */
			if ((v & MBMSGVALID) == 0) {
				t4_write_reg(adapter, mbox_ctl,
					     MBOWNER(MBOX_OWNER_NONE));
				continue;
			}

			/*
			 * We now have our reply.  Extract the command return
			 * value, copy the reply back to our caller's buffer
			 * (if specified) and revoke ownership of the mailbox.
			 * We return the (negated) firmware command return
			 * code (this depends on FW_SUCCESS == 0).
			 */

			/* return value in low-order little-endian word */
			v = t4_read_reg(adapter, mbox_data);
			if (FW_CMD_RETVAL_GET(v))
				dump_mbox(adapter, "FW Error", mbox_data);

			if (rpl) {
				/* request bit in high-order BE word */
				WARN_ON((be32_to_cpu(*(const u32 *)cmd)
					 & FW_CMD_REQUEST) == 0);
				get_mbox_rpl(adapter, rpl, size, mbox_data);
				WARN_ON((be32_to_cpu(*(u32 *)rpl)
					 & FW_CMD_REQUEST) != 0);
			}
			t4_write_reg(adapter, mbox_ctl,
				     MBOWNER(MBOX_OWNER_NONE));
			return -FW_CMD_RETVAL_GET(v);
		}
	}

	/*
	 * We timed out.  Return the error ...
	 */
	dump_mbox(adapter, "FW Timeout", mbox_data);
	return -ETIMEDOUT;
}
/*
 * Called by xfs_trans_commit() and similar in spirit to
 * xfs_trans_apply_sb_deltas().
 * Go thru all the dquots belonging to this transaction and modify the
 * INCORE dquot to reflect the actual usages.
 * Unreserve just the reservations done by this transaction.
 * dquot is still left locked at exit.
 */
void
xfs_trans_apply_dquot_deltas(
	xfs_trans_t		*tp)
{
	int			i, j;
	xfs_dquot_t		*dqp;
	xfs_dqtrx_t		*qtrx, *qa;
	xfs_disk_dquot_t	*d;
	long			totalbdelta;
	long			totalrtbdelta;

	if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY))
		return;

	ASSERT(tp->t_dqinfo);
	qa = tp->t_dqinfo->dqa_usrdquots;
	for (j = 0; j < 2; j++) {
		if (qa[0].qt_dquot == NULL) {
			qa = tp->t_dqinfo->dqa_grpdquots;
			continue;
		}

		/*
		 * Lock all of the dquots and join them to the transaction.
		 */
		xfs_trans_dqlockedjoin(tp, qa);

		for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
			qtrx = &qa[i];
			/*
			 * The array of dquots is filled
			 * sequentially, not sparsely.
			 */
			if ((dqp = qtrx->qt_dquot) == NULL)
				break;

			ASSERT(XFS_DQ_IS_LOCKED(dqp));
			ASSERT(dqp->q_transp == tp);

			/*
			 * adjust the actual number of blocks used
			 */
			d = &dqp->q_core;

			/*
			 * The issue here is - sometimes we don't make a blkquota
			 * reservation intentionally to be fair to users
			 * (when the amount is small). On the other hand,
			 * delayed allocs do make reservations, but that's
			 * outside of a transaction, so we have no
			 * idea how much was really reserved.
			 * So, here we've accumulated delayed allocation blks and
			 * non-delay blks. The assumption is that the
			 * delayed ones are always reserved (outside of a
			 * transaction), and the others may or may not have
			 * quota reservations.
			 */
			totalbdelta = qtrx->qt_bcount_delta +
				qtrx->qt_delbcnt_delta;
			totalrtbdelta = qtrx->qt_rtbcount_delta +
				qtrx->qt_delrtb_delta;
#ifdef DEBUG
			if (totalbdelta < 0)
				ASSERT(be64_to_cpu(d->d_bcount) >=
				       -totalbdelta);

			if (totalrtbdelta < 0)
				ASSERT(be64_to_cpu(d->d_rtbcount) >=
				       -totalrtbdelta);

			if (qtrx->qt_icount_delta < 0)
				ASSERT(be64_to_cpu(d->d_icount) >=
				       -qtrx->qt_icount_delta);
#endif
			if (totalbdelta)
				be64_add_cpu(&d->d_bcount, (xfs_qcnt_t)totalbdelta);

			if (qtrx->qt_icount_delta)
				be64_add_cpu(&d->d_icount, (xfs_qcnt_t)qtrx->qt_icount_delta);

			if (totalrtbdelta)
				be64_add_cpu(&d->d_rtbcount, (xfs_qcnt_t)totalrtbdelta);

			/*
			 * Get any default limits in use.
			 * Start/reset the timer(s) if needed.
			 */
			if (d->d_id) {
				xfs_qm_adjust_dqlimits(tp->t_mountp, d);
				xfs_qm_adjust_dqtimers(tp->t_mountp, d);
			}

			dqp->dq_flags |= XFS_DQ_DIRTY;
			/*
			 * add this to the list of items to get logged
			 */
			xfs_trans_log_dquot(tp, dqp);
			/*
			 * Take off what's left of the original reservation.
			 * In case of delayed allocations, there's no
			 * reservation that a transaction structure knows of.
			 */
			if (qtrx->qt_blk_res != 0) {
				if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) {
					if (qtrx->qt_blk_res >
					    qtrx->qt_blk_res_used)
						dqp->q_res_bcount -= (xfs_qcnt_t)
							(qtrx->qt_blk_res -
							 qtrx->qt_blk_res_used);
					else
						dqp->q_res_bcount -= (xfs_qcnt_t)
							(qtrx->qt_blk_res_used -
							 qtrx->qt_blk_res);
				}
			} else {
				/*
				 * These blks were never reserved, either inside
				 * a transaction or outside one (in a delayed
				 * allocation). Also, this isn't always a
				 * negative number since we sometimes
				 * deliberately skip quota reservations.
				 */
				if (qtrx->qt_bcount_delta) {
					dqp->q_res_bcount +=
					      (xfs_qcnt_t)qtrx->qt_bcount_delta;
				}
			}
			/*
			 * Adjust the RT reservation.
			 */
			if (qtrx->qt_rtblk_res != 0) {
				if (qtrx->qt_rtblk_res != qtrx->qt_rtblk_res_used) {
					if (qtrx->qt_rtblk_res >
					    qtrx->qt_rtblk_res_used)
					       dqp->q_res_rtbcount -= (xfs_qcnt_t)
						       (qtrx->qt_rtblk_res -
							qtrx->qt_rtblk_res_used);
					else
					       dqp->q_res_rtbcount -= (xfs_qcnt_t)
						       (qtrx->qt_rtblk_res_used -
							qtrx->qt_rtblk_res);
				}
			} else {
				if (qtrx->qt_rtbcount_delta)
					dqp->q_res_rtbcount +=
					    (xfs_qcnt_t)qtrx->qt_rtbcount_delta;
			}

			/*
			 * Adjust the inode reservation.
			 */
			if (qtrx->qt_ino_res != 0) {
				ASSERT(qtrx->qt_ino_res >=
				       qtrx->qt_ino_res_used);
				if (qtrx->qt_ino_res > qtrx->qt_ino_res_used)
					dqp->q_res_icount -= (xfs_qcnt_t)
						(qtrx->qt_ino_res -
						 qtrx->qt_ino_res_used);
			} else {
				if (qtrx->qt_icount_delta)
					dqp->q_res_icount +=
					    (xfs_qcnt_t)qtrx->qt_icount_delta;
			}

			ASSERT(dqp->q_res_bcount >=
				be64_to_cpu(dqp->q_core.d_bcount));
			ASSERT(dqp->q_res_icount >=
				be64_to_cpu(dqp->q_core.d_icount));
			ASSERT(dqp->q_res_rtbcount >=
				be64_to_cpu(dqp->q_core.d_rtbcount));
		}
		/*
		 * Do the group quotas next
		 */
		qa = tp->t_dqinfo->dqa_grpdquots;
	}
}