/*
 * scfs_file_release
 */
static int scfs_file_release(struct inode *inode, struct file *file)
{
	int ret;

	SCFS_PRINT("f:%s calling fput with lower_file\n",
			file->f_path.dentry->d_name.name);

	if (file->f_flags & (O_RDWR | O_WRONLY)) {
		CLEAR_WROPENED(SCFS_I(inode));
		ret = scfs_write_meta(file);
		if (ret)
			return ret;
	}

	fput(SCFS_F(file)->lower_file);
	kmem_cache_free(scfs_file_info_cache, SCFS_F(file));
	profile_sub_kmcached(sizeof(struct scfs_file_info), SCFS_S(inode->i_sb));

	return 0;
}
/**
 * scfs_readpages
 *
 * Parameters:
 * @file: upper file
 * @*mapping: address_space struct for the file
 * @*pages: list of pages to read in
 * @nr_pages: number of pages to read in
 *
 * Return:
 * SCFS_SUCCESS if success, otherwise if error
 *
 * Description:
 * - Asynchronously read pages for readahead. A scaling number of background threads
 *   will read & decompress them in a slightly deferred but parallelized manner.
 */
static int
scfs_readpages(struct file *file, struct address_space *mapping,
		struct list_head *pages, unsigned nr_pages)
{
	struct scfs_inode_info *sii = SCFS_I(file->f_mapping->host);
	struct scfs_sb_info *sbi = SCFS_S(file->f_mapping->host->i_sb);
	struct file *lower_file = NULL;
	struct page *page;
	struct scfs_cinfo cinfo;
	loff_t i_size;
	pgoff_t start, end;
	int page_idx, page_idx_readahead = 1024, ret = 0;
	int readahead_page = 0;
	int prev_cbi = 0;
	int prev_cluster = -1, cur_cluster = -1;
	int cluster_idx = 0;

	i_size = i_size_read(&sii->vfs_inode);
	if (!i_size) {
		SCFS_PRINT("file %s: i_size is zero, "
			"flags 0x%x sii->clust_info_size %d\n",
			file->f_path.dentry->d_name.name, sii->flags,
			sii->cinfo_array_size);
		return 0;
	}

#ifdef SCFS_ASYNC_READ_PROFILE
	atomic_add(nr_pages, &sbi->scfs_standby_readpage_count);
#endif

#ifdef SCFS_NOTIFY_RANDOM_READ
	lower_file = scfs_lower_file(file);
	if (!lower_file) {
		SCFS_PRINT_ERROR("file %s: lower file is null!\n",
		        file->f_path.dentry->d_name.name);
		return -EINVAL;
	}

	/* if the read request was random (enough), hint it to the lower file. 
	 * scfs_sequential_page_number is the tunable threshold.
	 * filemap.c will later on refer to this FMODE_RANDOM flag.
	*/
	spin_lock(&lower_file->f_lock);
	if (nr_pages > sbi->scfs_sequential_page_number)
		lower_file->f_mode &= ~FMODE_RANDOM;
	else
		lower_file->f_mode |= FMODE_RANDOM;
	spin_unlock(&lower_file->f_lock);
#endif
	lower_file = scfs_lower_file(file);
	page = list_entry(pages->prev, struct page, lru);
	cluster_idx = page->index / (sii->cluster_size / PAGE_SIZE);

	if (sii->compressed) {
		mutex_lock(&sii->cinfo_mutex);
		ret = get_cluster_info(file, cluster_idx, &cinfo);
		mutex_unlock(&sii->cinfo_mutex);
		if (ret) {
			SCFS_PRINT_ERROR("err in get_cluster_info, ret : %d,"
				"i_size %lld\n", ret, i_size);
			return ret;
		}

		if (!cinfo.size || cinfo.size > sii->cluster_size) {
			SCFS_PRINT_ERROR("file %s: cinfo is invalid, "
				"clust %u cinfo.size %u\n",
				file->f_path.dentry->d_name.name,
				cluster_idx, cinfo.size);
			return -EINVAL;
		}
		start = (pgoff_t)(cinfo.offset / PAGE_SIZE);
	} else {
		start = (pgoff_t)(cluster_idx * sii->cluster_size / PAGE_SIZE);
	}

	cluster_idx = (page->index + nr_pages - 1) / (sii->cluster_size / PAGE_SIZE);
	if (sii->compressed) {
		mutex_lock(&sii->cinfo_mutex);
		ret = get_cluster_info(file, cluster_idx, &cinfo);
		mutex_unlock(&sii->cinfo_mutex);
		if (ret) {
			SCFS_PRINT_ERROR("err in get_cluster_info, ret : %d,"
				"i_size %lld\n", ret, i_size);
			return ret;
		}

		if (!cinfo.size || cinfo.size > sii->cluster_size) {
			SCFS_PRINT_ERROR("file %s: cinfo is invalid, "
				"clust %u cinfo.size %u\n",
				file->f_path.dentry->d_name.name,
				cluster_idx, cinfo.size);
			return -EINVAL;
		}
		end = (pgoff_t)((cinfo.offset + cinfo.size -1) / PAGE_SIZE);
	} else {
		end = (pgoff_t)(((cluster_idx + 1) * sii->cluster_size - 1) / PAGE_SIZE);
		/* check upper inode size */

		/* out of range? on compressed file, it is handled returning error,
		   which one is right? */
		if (end > (i_size / PAGE_SIZE))
			end = (i_size / PAGE_SIZE);
	}
	force_page_cache_readahead(lower_file->f_mapping, lower_file,
		start, (unsigned long)(end - start +1));

	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
		page = list_entry(pages->prev, struct page, lru);
		list_del(&page->lru);

		if (PageReadahead(page))
			page_idx_readahead = page_idx;

		ret = add_to_page_cache_lru(page, mapping,
				      page->index, GFP_KERNEL);
		if (ret) {
			SCFS_PRINT("adding to page cache failed, "
				"page %x page->idx %d ret %d\n",
				page, page->index, ret);
			page_cache_release(page);
			continue;
		}

		/* memory buffer is full or synchronous read request -
		   call scfs_readpage to read now */
		if (sbi->page_buffer_next_filling_index_smb ==
				MAX_PAGE_BUFFER_SIZE_SMB || page_idx < page_idx_readahead) {
			cur_cluster = PAGE_TO_CLUSTER_INDEX(page, sii);

			if (prev_cluster == cur_cluster && prev_cbi > 0)
				prev_cbi = _scfs_readpage(file, page, prev_cbi - 1);
			else
				prev_cbi = _scfs_readpage(file, page, -1);

			prev_cluster = cur_cluster;
			page_cache_release(page); /* refer line 701 */
		} else {
			spin_lock(&sbi->spinlock_smb);

			/* Queue is not full so add the page into the queue.
			   Also, here we increase file->f_count to protect
			   the file structs from multi-threaded accesses */
			atomic_long_inc(&SCFS_F(file)->lower_file->f_count);
			atomic_long_inc(&file->f_count);
			sbi->page_buffer_smb[sbi->page_buffer_next_filling_index_smb] = page;
			sbi->file_buffer_smb[sbi->page_buffer_next_filling_index_smb++] = file;

			/* check whether page buffer is full and set page buffer full if needed */
			if (((sbi->page_buffer_next_filling_index_smb == MAX_PAGE_BUFFER_SIZE_SMB) &&
				sbi->page_buffer_next_io_index_smb == 0) ||
				(sbi->page_buffer_next_filling_index_smb ==
				sbi->page_buffer_next_io_index_smb))
				sbi->page_buffer_next_filling_index_smb = MAX_PAGE_BUFFER_SIZE_SMB;
			else if (sbi->page_buffer_next_filling_index_smb == MAX_PAGE_BUFFER_SIZE_SMB)
				sbi->page_buffer_next_filling_index_smb = 0;
			spin_unlock(&sbi->spinlock_smb);
			++readahead_page;
		}
		//page_cache_release(page);
	}

	if (readahead_page > 0)
		wakeup_smb_thread(sbi);

	SCFS_PRINT("<e>\n");

#ifdef SCFS_ASYNC_READ_PROFILE
	atomic_sub(nr_pages, &sbi->scfs_standby_readpage_count);
#endif
	return 0;
}
int smb_thread(void *data)
{
	u32 length = 0, io_index, filling_index;
	struct scfs_sb_info *sbi = (struct scfs_sb_info *)data;
	struct page *page;
	struct page *temp_page;
	struct page *page_buffer[3] = {NULL, NULL, NULL};
	struct file *file;
	struct file *temp_file = NULL;
	struct scfs_inode_info *sii;
	int cluster_number = -1;
	int page_buffer_count = 0;
	int i;
	int prev_cbi = 0;

	set_freezable();

	/* handle any queued-up read requests, or else go back to sleep */
	while (!kthread_should_stop()) {
		set_current_state(TASK_INTERRUPTIBLE);
		spin_lock(&sbi->spinlock_smb);

		/* calculate number of pages of page buffer */
		io_index = sbi->page_buffer_next_io_index_smb;
		filling_index = sbi->page_buffer_next_filling_index_smb;

		if (filling_index == MAX_PAGE_BUFFER_SIZE_SMB) {
			length = MAX_PAGE_BUFFER_SIZE_SMB;
			sbi->page_buffer_next_filling_index_smb =
				sbi->page_buffer_next_io_index_smb;
		} else if (filling_index > io_index)
			length = filling_index - io_index;
		else if (filling_index < io_index)
			length = (MAX_PAGE_BUFFER_SIZE_SMB - io_index) + filling_index;
		else if (filling_index == io_index) 
			length = 0;

		page_buffer_count = 0;

		/* the requested page, as well as subsequent pages in the same cluster,
		 * will be serviced, in two separate readpage calls
		 */
		if (length > 0) {
			__set_current_state(TASK_RUNNING);
			page = sbi->page_buffer_smb[sbi->page_buffer_next_io_index_smb];
			file = sbi->file_buffer_smb[sbi->page_buffer_next_io_index_smb];
			sbi->page_buffer_next_io_index_smb++;

			if (sbi->page_buffer_next_io_index_smb >= MAX_PAGE_BUFFER_SIZE_SMB)
				sbi->page_buffer_next_io_index_smb = 0;

			length--;
			sii = SCFS_I(page->mapping->host);
			cluster_number = PAGE_TO_CLUSTER_INDEX(page, sii);

			while (length-- > 0) {
				temp_page = sbi->page_buffer_smb[sbi->page_buffer_next_io_index_smb];
				temp_file = sbi->file_buffer_smb[sbi->page_buffer_next_io_index_smb];

				if ((temp_file == file) &&
					(cluster_number == PAGE_TO_CLUSTER_INDEX(temp_page, sii))) {
					page_buffer[page_buffer_count++] = temp_page;
					sbi->page_buffer_next_io_index_smb++;

					if (sbi->page_buffer_next_io_index_smb >=
						MAX_PAGE_BUFFER_SIZE_SMB)
						sbi->page_buffer_next_io_index_smb = 0;
				} else
					break;
			}
			spin_unlock(&sbi->spinlock_smb);

			/* read first page */
			prev_cbi = _scfs_readpage(file, page, -1);
			fput(SCFS_F(file)->lower_file);
			fput(file);
			page_cache_release(page);

			/* read related pages with cluster of first page*/
			for (i = 0; i < page_buffer_count; i++) {
				prev_cbi = _scfs_readpage(file, page_buffer[i], prev_cbi - 1);
				fput(SCFS_F(file)->lower_file);
				fput(file);
				page_cache_release(page_buffer[i]);
			}
		} else {
			//sbi->smb_task_status[xx] = 0;
			spin_unlock(&sbi->spinlock_smb);
			schedule();
			//sbi->smb_task_status[xx] = 1;
		}
	}

	return 0;
}