Esempio n. 1
0
/* Following functions- ikgt_malloc() and ikgt_free() have been extended to
* use mon_page_alloc() and mon_page_free() as a temporary solution for
* allocation of more than 2040 bytes using page alignment of buffer for
* differentiating between mon_malloc allocation() and mon_page_alloc() */
API uint64_t *ikgt_malloc(IN uint32_t size)
{
	uint64_t *buf = NULL;
	uint32_t num_pages = 0;

	if (0 == size) {
		MON_LOG(mask_plugin, level_trace, "%s: size must be greater than 0.\n",
			__FUNCTION__);
		return NULL;
	}

	/* Internal function requirement. Max block is 2048 bytes,
	* including 8 byte mem_allocation_info_t structure. */
	if (size <= IKGT_MALLOC_MAX_SIZE) {
		buf = mon_malloc(size);
	} else {
		/* If size is more than 2040 bytes, then find number of pages
		* needed and use mon_page_alloc */
		MON_LOG(mask_plugin, level_info, "%s: Using mon_page_alloc\n",
			__FUNCTION__);
		num_pages = PAGE_ROUNDUP(size);
		buf = mon_page_alloc(num_pages);
	}

	return buf;
}
Esempio n. 2
0
int itb_gc_append(int gen, struct itb *itb, struct itb_info *ii)
{
    int err = 0;

    if (unlikely(hmo.conf.option & HVFS_MDSL_WDROP))
        return 0;

    if (ii) {
        struct fdhash_entry *fde;
        struct iovec itb_iov = {
            .iov_base = itb,
            .iov_len = atomic_read(&itb->h.len),
        };
        struct mdsl_storage_access msa = {
            .iov = &itb_iov,
            .arg = ii,
            .iov_nr = 1,
        };

        /* prepare write to the file: "[target dir]/itb-*" */
        fde = mdsl_storage_fd_lookup_create(itb->h.puuid, MDSL_STORAGE_ITB,
                                            gen);
        if (IS_ERR(fde)) {
            hvfs_err(mdsl, "lookup create failed w/ %ld\n", PTR_ERR(fde));
            goto out;
        }
        err = mdsl_storage_fd_write(fde, &msa);
        if (err) {
            hvfs_err(mdsl, "storage_fd_write failed w/ %d\n", err);
            mdsl_storage_fd_put(fde);
            goto out;
        }
        if (unlikely(ii->location == 0)) {
            hvfs_err(mdsl, "Write ITB %ld[%lx] len %d to storage file off "
                   "%ld fde ST %x.\n",
                   itb->h.itbid, itb->h.puuid, 
                   atomic_read(&itb->h.len), ii->location, 
                   fde->state);
            HVFS_BUGON("zero location!");
        }
        mdsl_storage_fd_put(fde);
        /* accumulate to hmi */
        atomic64_add(itb_iov.iov_len, &hmi.mi_bused);
    }

out:
    return err;
}

/* do GC-TX on itb file change
 *
 * write the tx_begin in hmo.storage.gc_fd
 * write the tx_end in hmo.storage.gc_fd
 *
 * Rational:
 * 1. for md file, we copy the md-1 file to md-0 (replace the ranges);
 * 2. for range files, we rename Grange-* to range-*;
 * 3. for itb file, we adjust mdisk.master to the new itb file;
 */
int mdsl_gc_tx_itb(u64 duuid, int gen, struct fdhash_entry *omd)
{
    struct fdhash_entry *fde;
    int err = 0, i, j, abort_flag;

    hvfs_info(mdsl, "BEGIN GC-MD transaction on directoy %ld gen %d\n",
              duuid, gen);
    fde = mdsl_storage_fd_lookup_create(duuid, MDSL_STORAGE_MD, gen);
    if (IS_ERR(fde)) {
        hvfs_err(mdsl, "lookup create gc MD file failed w/ %ld\n",
                 PTR_ERR(fde));
        err = PTR_ERR(fde);
        goto out;
    }
    
    /* Save the old and new md file */
    err = __mdisk_write(omd, NULL);
    if (err) {
        hvfs_err(mdsl, "sync md file failed w/ %d\n", err);
        /* if old md file is failed to be saved, we abort transaction */
        goto out;
    }
    err = __mdisk_write(fde, NULL);
    if (err) {
        hvfs_err(mdsl, "sync gc md file failed w/ %d\n", err);
    }
    
    /* we know the omd file has already been LOCKED, thus we just copy the new
     * md file's range to old md file */
    xfree(omd->mdisk.ranges);
    xfree(omd->mdisk.new_range);
    omd->mdisk.range_nr[0] = fde->mdisk.range_nr[0];
    omd->mdisk.size = fde->mdisk.size;
    omd->mdisk.new_size = fde->mdisk.new_size;
    omd->mdisk.new_range = fde->mdisk.new_range;
    omd->mdisk.ranges = fde->mdisk.ranges;
    omd->mdisk.range_aid = fde->mdisk.range_aid;

    /* for range files, we do rename */
    if (omd->mdisk.ranges) {
        for (i = 0; i < fde->mdisk.size; i++) {
            char opath[256], npath[256];

            abort_flag = 0;
            sprintf(opath, "%s/%lx/%lx/range-%ld", HVFS_MDSL_HOME, 
                    hmo.site_id, duuid, (fde->mdisk.ranges + i)->range_id);
            sprintf(npath, "%s/%lx/%lx/Brange-%ld", HVFS_MDSL_HOME,
                    hmo.site_id, duuid, (fde->mdisk.ranges + i)->range_id);
            err = rename(opath, npath);
            if (err) {
                if (errno == ENOENT) {
                    /* I think it is ok that there is a new range created in
                     * the new metadata file, thus, we just ignore this error */
                    hvfs_warning(mdsl, "Rename '%s' but it doesn't exist!\n",
                                 opath);
                } else {
                    hvfs_err(mdsl, "Rename '%s' to '%s' failed w/ %d\n",
                             opath, npath, errno);
                    err = -errno;
                    goto rollback_ranges;
                }
            }
            abort_flag = 1;
            
            sprintf(opath, "%s/%lx/%lx/Grange-%ld", HVFS_MDSL_HOME, 
                    hmo.site_id, duuid, (fde->mdisk.ranges + i)->range_id);
            sprintf(npath, "%s/%lx/%lx/range-%ld", HVFS_MDSL_HOME,
                    hmo.site_id, duuid, (fde->mdisk.ranges + i)->range_id);
            err = rename(opath, npath);
            if (err) {
                hvfs_err(mdsl, "Rename '%s' to '%s' failed w/ %d\n",
                         opath, npath, errno);
                err = -errno;
                goto rollback_ranges;
            }
        }
    }
    if (omd->mdisk.new_range) {
        for (i = 0; i < fde->mdisk.new_size; i++) {
            char opath[256], npath[256];

            abort_flag = 0;
            sprintf(opath, "%s/%lx/%lx/range-%ld", HVFS_MDSL_HOME, 
                    hmo.site_id, duuid, (fde->mdisk.new_range + i)->range_id);
            sprintf(npath, "%s/%lx/%lx/Brange-%ld", HVFS_MDSL_HOME,
                    hmo.site_id, duuid, (fde->mdisk.new_range + i)->range_id);
            err = rename(opath, npath);
            if (err) {
                hvfs_err(mdsl, "Rename '%s' to '%s' failed w/ %d\n",
                         opath, npath, errno);
                err = -errno;
                goto rollback_new_range;
            }
            abort_flag = 1;
            
            sprintf(opath, "%s/%lx/%lx/Grange-%ld", HVFS_MDSL_HOME, 
                    hmo.site_id, duuid, (fde->mdisk.new_range + i)->range_id);
            sprintf(npath, "%s/%lx/%lx/range-%ld", HVFS_MDSL_HOME,
                    hmo.site_id, duuid, (fde->mdisk.new_range + i)->range_id);
            err = rename(opath, npath);
            if (err) {
                hvfs_err(mdsl, "Rename '%s' to '%s' failed w/ %d\n",
                         opath, npath, errno);
                err = -errno;
                goto rollback_new_range;
            }
        }
    }
    /* final step, atomic change to the new itb file and do cleanups */
    omd->mdisk.itb_master++;
    /* FIXME: cleanup range files(Brange-* and old itb file) */
    err = __mdisk_write(omd, NULL);
    if (err) {
        hvfs_err(mdsl, "sync md file failed w/ %d\n", err);

        i = fde->mdisk.new_size;
        abort_flag = 0;
        goto rollback_new_range;
    }
    /* remove the new md file */
    mdsl_storage_fd_remove(fde);
    close(fde->fd);
    {
        char path[256];

        sprintf(path, "%s/%lx/%lx/md-%d", HVFS_MDSL_HOME,
                hmo.site_id, duuid, gen);
        err = unlink(path);
        if (err) {
            hvfs_err(mdsl, "Unlink '%s' failed w/ %d, need human involving\n", 
                     path, err);
        }
    }
    /* remove the Brange files and old itb file */
    {
        char path[256];

        sprintf(path, "%s/%lx/%lx/itb-%d", HVFS_MDSL_HOME,
                hmo.site_id, duuid, gen - 1);
        err = unlink(path);
        if (err) {
            hvfs_err(mdsl, "Unlink '%s' failed w/ %d, need human involving\n",
                     path, err);
        }
    }
    {
        if (omd->mdisk.ranges) {
            for (i = 0; i < omd->mdisk.size; i++) {
                char path[256];

                sprintf(path, "%s/%lx/%lx/Brange-%ld", HVFS_MDSL_HOME,
                        hmo.site_id, duuid, (omd->mdisk.ranges + i)->range_id);
                err = unlink(path);
                if (err) {
                    hvfs_err(mdsl, "Unlink '%s' failed w/ %d, need human "
                             "involving\n",
                             path, err);
                }
            }
        }
        if (omd->mdisk.new_range) {
            for (i = 0; i < omd->mdisk.new_size; i++) {
                char path[256];

                sprintf(path, "%s/%lx/%lx/Brange-%ld", HVFS_MDSL_HOME,
                        hmo.site_id, duuid, 
                        (omd->mdisk.new_range + i)->range_id);
                err = unlink(path);
                if (err) {
                    hvfs_err(mdsl, "Unlink '%s' failed w/ %d, need human "
                             "involving\n",
                             path, err);
                }
            }
        }
    }

    hvfs_info(mdsl, "END GC-MD transaction on directory %ld gen %d\n",
              duuid ,gen);
    goto out;
    
out_put:
    mdsl_storage_fd_put(fde);
out:
    return err;
rollback_new_range:
    for (j = 0; j < i; j++) {
        char opath[256], npath[256];
        
        sprintf(opath, "%s/%lx/%lx/range-%ld", HVFS_MDSL_HOME, 
                hmo.site_id, duuid, (fde->mdisk.new_range + j)->range_id);
        sprintf(npath, "%s/%lx/%lx/Grange-%ld", HVFS_MDSL_HOME,
                hmo.site_id, duuid, (fde->mdisk.new_range + j)->range_id);
        err = rename(opath, npath);
        if (err) {
            hvfs_err(mdsl, "Rename '%s' to '%s' failed w/ %d, request "
                     "for human involving\n",
                     opath, npath, errno);
        }

        sprintf(opath, "%s/%lx/%lx/Brange-%ld", HVFS_MDSL_HOME, 
                hmo.site_id, duuid, (fde->mdisk.new_range + j)->range_id);
        sprintf(npath, "%s/%lx/%lx/range-%ld", HVFS_MDSL_HOME,
                hmo.site_id, duuid, (fde->mdisk.new_range + j)->range_id);
        err = rename(opath, npath);
        if (err) {
            hvfs_err(mdsl, "Rename '%s' to '%s' failed w/ %d, request "
                     "for human involving\n",
                     opath, npath, errno);
        }
    }
    if (abort_flag) {
        char opath[256], npath[256];
        
        sprintf(opath, "%s/%lx/%lx/Brange-%ld", HVFS_MDSL_HOME, 
                hmo.site_id, duuid, (fde->mdisk.new_range + i)->range_id);
        sprintf(npath, "%s/%lx/%lx/range-%ld", HVFS_MDSL_HOME,
                hmo.site_id, duuid, (fde->mdisk.new_range + i)->range_id);
        err = rename(opath, npath);
        if (err) {
            hvfs_err(mdsl, "Rename '%s' to '%s' failed w/ %d, request "
                     "for human involving\n",
                     opath, npath, errno);
        }
    }
    /* reset flag and size for rollback of ranges! */
    abort_flag = 0;
    i = fde->mdisk.size;
rollback_ranges:
    for (j = 0; j < i; j++) {
        char opath[256], npath[256];
        
        sprintf(opath, "%s/%lx/%lx/range-%ld", HVFS_MDSL_HOME, 
                hmo.site_id, duuid, (fde->mdisk.ranges + j)->range_id);
        sprintf(npath, "%s/%lx/%lx/Grange-%ld", HVFS_MDSL_HOME,
                hmo.site_id, duuid, (fde->mdisk.ranges + j)->range_id);
        err = rename(opath, npath);
        if (err) {
            hvfs_err(mdsl, "Rename '%s' to '%s' failed w/ %d, request "
                     "for human involving\n",
                     opath, npath, errno);
        }

        sprintf(opath, "%s/%lx/%lx/Brange-%ld", HVFS_MDSL_HOME, 
                hmo.site_id, duuid, (fde->mdisk.ranges + j)->range_id);
        sprintf(npath, "%s/%lx/%lx/range-%ld", HVFS_MDSL_HOME,
                hmo.site_id, duuid, (fde->mdisk.ranges + j)->range_id);
        err = rename(opath, npath);
        if (err) {
            hvfs_err(mdsl, "Rename '%s' to '%s' failed w/ %d, request "
                     "for human involving\n",
                     opath, npath, errno);
        }
    }
    if (abort_flag) {
        char opath[256], npath[256];
        
        sprintf(opath, "%s/%lx/%lx/Brange-%ld", HVFS_MDSL_HOME, 
                hmo.site_id, duuid, (fde->mdisk.ranges + i)->range_id);
        sprintf(npath, "%s/%lx/%lx/range-%ld", HVFS_MDSL_HOME,
                hmo.site_id, duuid, (fde->mdisk.ranges + i)->range_id);
        err = rename(opath, npath);
        if (err) {
            hvfs_err(mdsl, "Rename '%s' to '%s' failed w/ %d, request "
                     "for human involving\n",
                     opath, npath, errno);
        }
    }
    /* rollback mdisk */
    omd->state = FDE_OPEN;
    err = mdsl_storage_fd_mdisk(omd, NULL);
    if (err) {
        hvfs_err(mdsl, "Reload mdisk from disk failed w/ %d. CRASH is "
                 "nearing\n", err);
    }
    err = -EABORT;
    hvfs_info(mdsl, "ABORT GC-MD transaction on directory %ld gen %d\n",
              duuid, gen);
    goto out_put;
}

/* GC one round of the metadata
 *
 * Return Value: >=0: # of ITBs we handled; <0: error
 */
int mdsl_gc_md_round(u64 duuid, int master, struct fdhash_entry *md, 
                     struct fdhash_entry *fde, u64 *gc_offset)
{
    struct itb_info ii = {
        .duuid = duuid,
    };
    struct itb *itb;
    struct fdhash_entry *nmd;
    range_t *range;
    struct iovec itb_iov;
    struct mdsl_storage_access msa = {
        .iov = &itb_iov,
        .iov_nr = 1,
    };
    struct mmap_args ma;
    u64 offset = *gc_offset, location = 0;
    int err = 0, data_len, compacted = 0, handled = 0, missed = 0;

    if (offset == -1UL) {
        return -EINVAL;
    }
    if (!offset) {
        /* adjust offset to 1! */
        offset = 1;
    }

    if (!md->mdisk.ranges && !md->mdisk.new_range) {
        return -ENOENT;
    }

    itb = xmalloc(sizeof(struct itb) + ITB_SIZE * sizeof(struct ite));
    if (!itb) {
        hvfs_err(mdsl, "xzalloc() itb failed.\n");
        return -ENOMEM;
    }
    
    while (1) {
        /* prepare itb */
        memset(itb, 0, sizeof(struct itb) + ITB_SIZE * sizeof(struct ite));
        /* read the header */
        msa.offset = offset;
        itb_iov.iov_base = itb;
        itb_iov.iov_len = sizeof(itb->h);
        if (offset >= fde->abuf.file_offset + fde->abuf.offset) {
            break;
        }
        err = mdsl_storage_fd_read(fde, &msa);
        if (err) {
            hvfs_err(mdsl, "fd read failed w/ %d\n", err);
            goto out_free;
        }
        if (!atomic_read(&itb->h.len)) {
            /* we should seek to next active page! */
            u64 last_offset;

        reskip:
            last_offset = offset;
            offset = PAGE_ROUNDUP(offset, getpagesize());
            if (offset == last_offset) {
                offset += 1;
                goto reskip;
            }
            continue;
        }

        handled++;
        /* compare this itb with the latest offset */
        ma.win = MDSL_STORAGE_DEFAULT_RANGE_SIZE;

        err = __mdisk_lookup(md, MDSL_MDISK_RANGE, itb->h.itbid, &range);
        if (err == -ENOENT) {
            /* it is ok */
            missed++;
            hvfs_warning(mdsl, "lookup itbid %ld in md file failed w/ %d\n",
                         itb->h.itbid, err);
            goto compact_it;
        } else if (err) {
            hvfs_err(mdsl, "lookup itbid %ld in old md file failed w/ %d\n",
                     itb->h.itbid, err);
            goto out_free;
        }
        ma.foffset = 0;
        ma.range_id = range->range_id;
        ma.range_begin = range->begin;
        ma.flag = MA_OFFICIAL;

        err = __range_lookup(duuid, itb->h.itbid, &ma, &location);
        if (err) {
            goto out_free;
        }
        /* if location is ZERO, it means that the racer has not been update
         * the range file, we just compact this entry to the new itb file to
         * not lose any itb. */
        if (!location) {
            missed++;
            hvfs_warning(mdsl, "lookup itbid %ld in range file "
                         "failed w/ ENOENT\n",
                         itb->h.itbid);
            goto compact_it;
        }

        if (offset == location) {
            compacted++;
        compact_it:
            /* read in the data region of itb */
            data_len = atomic_read(&itb->h.len) - sizeof(itb->h);
            if (data_len > 0) {
                msa.offset = offset + sizeof(itb->h);
                msa.iov->iov_base = &itb->lock;
                msa.iov->iov_len = data_len;
                err = mdsl_storage_fd_read(fde, &msa);
                if (err) {
                    hvfs_err(mdsl, "fd read failed w/ %d\n", err);
                    goto out_free;
                }
            } else {
                hvfs_err(mdsl, "data_len %d is minus, internal error!\n", 
                         data_len);
                err = -EFAULT;
                goto out_free;
            }
            /* ok, we should copy this itb entry to the new itb file */
            ii.itbid = itb->h.itbid;
            ASSERT(itb->h.puuid == duuid, mdsl);
            err = itb_gc_append(master + 1, itb, &ii);
            if (err) {
                hvfs_err(mdsl, "GC append itb %ld to new file failed w/ %d\n",
                         itb->h.itbid, err);
                goto out_free;
            }
            /* open the gc_md file and update the gc range region */
            nmd = mdsl_storage_fd_lookup_create(duuid, MDSL_STORAGE_MD, 
                                                master + 1);
            if (IS_ERR(nmd)) {
                hvfs_err(mdsl, "lookup create gc MD file failed w/ %ld\n",
                         PTR_ERR(nmd));
                err = PTR_ERR(nmd);
                goto out_free;
            }
            ma.win = MDSL_STORAGE_DEFAULT_RANGE_SIZE;
        relookup:
            xlock_lock(&nmd->lock);
            err = __mdisk_lookup_nolock(nmd, MDSL_MDISK_RANGE, 
                                        itb->h.itbid, &range);
            if (err == -ENOENT) {
                /* create a new range now */
                u64 i;

                i = MDSL_STORAGE_idx2range(itb->h.itbid);
                __mdisk_add_range_nolock(nmd, i * MDSL_STORAGE_RANGE_SLOTS,
                                         (i + 1) * MDSL_STORAGE_RANGE_SLOTS - 1,
                                         nmd->mdisk.range_aid++);
                __mdisk_range_sort(nmd->mdisk.new_range, nmd->mdisk.new_size);
                xlock_unlock(&nmd->lock);
                goto relookup;
            } else if (err) {
                hvfs_err(mdsl, "mdisk_lookup_nolock failed w/ %d\n", err);
                xlock_unlock(&nmd->lock);
                goto put_fde;
            }
            xlock_unlock(&nmd->lock);

            ma.foffset = 0;
            ma.range_id = range->range_id;
            ma.range_begin = range->begin;
            ma.flag = MA_GC;

            err = __range_write(duuid, itb->h.itbid, &ma, ii.location);
            if (err) {
                hvfs_err(mdsl, "range write failed w/ %d\n", err);
                goto put_fde;
            }
            err = __mdisk_write(nmd, NULL);
            if (err) {
                hvfs_err(mdsl, "sync gc md file failed w/ %d\n", err);
            }
            
        put_fde:
            mdsl_storage_fd_put(nmd);
            if (err) {
                goto out_free;
            }
        }
        /* ok, this itb is done. handle the next one */
        hvfs_warning(mdsl, "Process itb %ld len %d (%s) done. LOC %ld\n", 
                     itb->h.itbid, atomic_read(&itb->h.len), 
                     (offset == location ? "+" : "."),
                     ii.location);
        offset += atomic_read(&itb->h.len);
    }
    err = compacted;
    hvfs_info(mdsl, "This round handled %s%d%s, compacted %s%d%s "
              "and missed %s%d%s ITBs.\n",
              HVFS_COLOR_RED, handled, HVFS_COLOR_END,
              HVFS_COLOR_GREEN, compacted, HVFS_COLOR_END,
              HVFS_COLOR_YELLOW, missed, HVFS_COLOR_END);
    
out_free:
    xfree(itb);
    *gc_offset = offset;

    return err;
}

/* GC the medata of directory duuid
 */
int mdsl_gc_md(u64 duuid)
{
    struct fdhash_entry *fde, *itbf;
    u64 last_offset, gc_offset;
    int err = 0, round = 0, master;
    
    /* Step 0: prepare: open the md file */
    fde = mdsl_storage_fd_lookup_create(duuid, MDSL_STORAGE_MD, 0);
    if (IS_ERR(fde)) {
        hvfs_err(mdsl, "lookup create MD file failed w/ %ld\n", PTR_ERR(fde));
        err = PTR_ERR(fde);
        goto out;
    }

    /* Step 1: issue one round to scan the itb file */
    master = fde->mdisk.itb_master;
    itbf = mdsl_storage_fd_lookup_create(duuid, MDSL_STORAGE_ITB, master);
    if (IS_ERR(itbf)) {
        hvfs_err(mdsl, "lookup create ITB file failed w/ %ld\n", PTR_ERR(itbf));
        err = PTR_ERR(itbf);
        goto out_put;
    }
    
    gc_offset = fde->mdisk.gc_offset;
    last_offset = gc_offset;

redo_round:
    hvfs_warning(mdsl, "GC directory %lx in round %d\n", duuid, round++);
    err = mdsl_gc_md_round(duuid, master, fde, itbf, &gc_offset);
    if (err < 0) {
        hvfs_err(mdsl, "GC metadata in stage %d's round %d failed w/ %d\n",
                 master, round - 1, err);
        goto out_cleanup;
    } else if (err == 0) {
        /* nothing has been handled, we just stop gc */
        hvfs_warning(mdsl, "Not redundant ITB to be compacted!\n");
        goto out_cleanup;
    }
    
    /* Step 2: do check */
    last_offset = mdsl_storage_fd_max_offset(itbf);
    if (last_offset == -1UL) {
        hvfs_err(mdsl, "get the ITB file offset failed\n");
        err = -EINVAL;
        goto out_cleanup;
    }
    
    if (gc_offset < last_offset) {
        /* we need another gc round! */
        goto redo_round;
    }
    
    /* Step 3: lockup the fde and recheck */
    err = mdsl_storage_fd_lockup(fde);
    if (err) {
        hvfs_err(mdsl, "lockup the MD file failed w/ %d\n",
                 err);
        goto out_cleanup;
    }
    /* wait for the last reference of ITB file */
    while (atomic_read(&itbf->ref) > 1) {
        xsleep(100);
    }
    last_offset = mdsl_storage_fd_max_offset(itbf);
    if (last_offset == -1UL) {
        hvfs_err(mdsl, "get the ITB file offset failed\n");
        err = -EINVAL;
        goto out_unlock;
    }
    if (gc_offset < last_offset) {
        /* we need the final gc round! */
        hvfs_warning(mdsl, "GC directory %lx in round %d(final)\n", 
                     duuid, round++);
        err = mdsl_gc_md_round(duuid, master, fde, itbf, &gc_offset);
        if (err) {
            hvfs_err(mdsl, "GC metadata in stage %d's round %d failed w/ %d\n",
                     master, round, err);
            goto out_unlock;
        }
        ASSERT(gc_offset >= last_offset, mdsl);
    }
    /* ok, we should release the memory resouce of ITB file */
    append_buf_destroy_async(itbf); /* already close the fd */
    mdsl_storage_fd_remove(itbf);
    xfree(itbf);
    mdsl_storage_evict_rangef(duuid);
    
    /* Step 4: do GC-TX */
    err = mdsl_gc_tx_itb(duuid, master + 1, fde);
    if (err) {
        hvfs_err(mdsl, "mdsl_gc_tx_itb(%d) failed w/ %d\n",
                 master, err);
        goto out_release;
    }

    /* finally, release the MD's fde */
    fde->mdisk.gc_offset = 0;
    
out_release:
    mdsl_storage_fd_unlock(fde);
    
out_put:
    mdsl_storage_fd_put(fde);
    
out:
    return err;
out_unlock:
    mdsl_storage_fd_unlock(fde);
out_cleanup:
    /* cleanup the round result? do not do that, we remember the gc_offset */
    fde->mdisk.gc_offset = gc_offset;
    mdsl_storage_fd_put(itbf);
    goto out_put;
}