/* * SCFS doesn't have a writepage, so write with mmap has no effect. * First implementation was returning error when having VM_WRITE, * but some process in boot sequence uses mmap with VM_WRITE * - without write, just with flag - so, now using VM_WRITE is * available. */ static int scfs_mmap(struct file *file, struct vm_area_struct *vma) { struct address_space *mapping = file->f_mapping; if (!mapping->a_ops->readpage) return -ENOEXEC; SCFS_PRINT("file %s\n", file->f_path.dentry->d_name.name); //if (file->f_mode & FMODE_WRITE) { /* if (vma->vm_flags & VM_WRITE) { SCFS_PRINT_ERROR("f_mode WRITE was set! error. " "f_mode %x (FMODE_READ: %x FMODE_WRITE %x)\n", file->f_mode, file->f_mode & FMODE_READ, file->f_mode & FMODE_WRITE); SCFS_PRINT_ERROR("filename : %s\n", file->f_path.dentry->d_name.name); return -EPERM; } */ file_accessed(file); vma->vm_ops = &scfs_file_vm_ops; #if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) vma->vm_flags |= VM_CAN_NONLINEAR; #endif SCFS_PRINT("VM flags: %lx " "EXEC %lx IO %lx " "SEQ %lx RAND %lx " "READ %lx MAYREAD %lx " "WRITE %lx MAYWRITE %lx " "SHARED %lx MAYSHARE %lx\n", vma->vm_flags, vma->vm_flags & VM_EXECUTABLE, vma->vm_flags & VM_IO, vma->vm_flags & VM_SEQ_READ, vma->vm_flags & VM_RAND_READ, vma->vm_flags & VM_READ, vma->vm_flags & VM_MAYREAD, vma->vm_flags & VM_WRITE, vma->vm_flags & VM_MAYWRITE, vma->vm_flags & VM_SHARED, vma->vm_flags & VM_MAYSHARE); if (vma->vm_flags & VM_WRITE) { SCFS_PRINT("VM_WRITE: file %s flags %lx VM_MAYWRITE %lx\n", file->f_path.dentry->d_name.name, vma->vm_flags, vma->vm_flags & VM_MAYWRITE); } return 0; }
/** * compr_init - initialize a compressor. * @compr: compressor description object * * This function initializes the requested compressor and returns zero in case * of success or a negative error code in case of failure. */ static int compr_init(struct scfs_compressor *compr) { if (compr->capi_name) { compr->cc = crypto_alloc_comp(compr->capi_name, 0, 0); if (IS_ERR(compr->cc)) { SCFS_PRINT_ERROR("cannot initialize compressor %s, error %ld", compr->name, PTR_ERR(compr->cc)); return PTR_ERR(compr->cc); } } scfs_compressors[compr->compr_type] = compr; SCFS_PRINT("compr name %s(%d) got cc(%x)\n", compr->capi_name, compr->compr_type, compr->cc); return 0; }
/* * Check validity of cinfo data(array). * It is called in scfs_open, failed, the file is treated non-compressed, * such as the one have no 'footer'. */ int scfs_check_cinfo(struct scfs_inode_info *sii, void *buf) { struct scfs_cinfo *cinfo = buf; int prev_last_offset = 0; int cinfo_size = sii->cinfo_array_size; for (cinfo = buf; (unsigned long)cinfo < (unsigned long)buf + cinfo_size; cinfo++) { if (cinfo->offset < prev_last_offset || !cinfo->size || cinfo->size > sii->cluster_size) { SCFS_PRINT("invalid cinfo, prev_last_offset : %d, " "offset : %d, size : %d\n", prev_last_offset, cinfo->offset, cinfo->size); return -1; } prev_last_offset = cinfo->offset + cinfo->size; } return 0; }
/* * scfs_file_release */ static int scfs_file_release(struct inode *inode, struct file *file) { int ret; SCFS_PRINT("f:%s calling fput with lower_file\n", file->f_path.dentry->d_name.name); if (file->f_flags & (O_RDWR | O_WRONLY)) { CLEAR_WROPENED(SCFS_I(inode)); ret = scfs_write_meta(file); if (ret) return ret; } fput(SCFS_F(file)->lower_file); kmem_cache_free(scfs_file_info_cache, SCFS_F(file)); profile_sub_kmcached(sizeof(struct scfs_file_info), SCFS_S(inode->i_sb)); return 0; }
static int scfs_open(struct inode *inode, struct file *file) { struct scfs_sb_info *sbi = SCFS_S(inode->i_sb); struct scfs_inode_info *sii = SCFS_I(inode); struct scfs_file_info *fi; int ret = 0; struct file *lower_file; if (IS_WROPENED(sii)) { SCFS_PRINT("This file is already opened with 'WRITE' flag\n"); return -EPERM; } fi = kmem_cache_zalloc(scfs_file_info_cache, GFP_KERNEL); if (!fi) return -ENOMEM; profile_add_kmcached(sizeof(struct scfs_file_info), sbi); file->private_data = fi; mutex_lock(&sii->cinfo_mutex); if (IS_INVALID_META(sii)) { SCFS_PRINT("meta is invalid, so we should re-load it\n"); ret = scfs_reload_meta(file); if (ret) { SCFS_PRINT_ERROR("error in re-reading footer, err : %d\n", ret); goto out; } } else if (sii->compressed && !sii->cinfo_array) { /* 1st lower-open is for getting cinfo */ ret = scfs_initialize_lower_file(file->f_dentry, &lower_file, O_RDONLY); if (ret) { SCFS_PRINT_ERROR("err in get_lower_file %s\n", file->f_dentry->d_name.name); goto out; } scfs_set_lower_file(file, lower_file); SCFS_PRINT("info size = %d \n", sii->cinfo_array_size); ret = scfs_load_cinfo(sii, lower_file); if (ret) { SCFS_PRINT_ERROR("err in loading cinfo, ret : %d\n", file->f_dentry->d_name.name); fput(lower_file); goto out; } fput(lower_file); } ret = scfs_initialize_lower_file(file->f_dentry, &lower_file, file->f_flags); if (ret) { SCFS_PRINT_ERROR("err in get_lower_file %s\n", file->f_dentry->d_name.name); goto out; } scfs_set_lower_file(file, lower_file); out: if (!ret) { fsstack_copy_attr_all(inode, scfs_lower_inode(inode)); if (file->f_flags & (O_RDWR | O_WRONLY)) MAKE_WROPENED(sii); } else { scfs_set_lower_file(file, NULL); kmem_cache_free(scfs_file_info_cache, file->private_data); profile_sub_kmcached(sizeof(struct scfs_file_info), sbi); sii->cinfo_array = NULL; } mutex_unlock(&sii->cinfo_mutex); SCFS_PRINT("lower, dentry name : %s, count : %d, ret : %d\n", file->f_dentry->d_name.name, file->f_dentry->d_count, ret); return ret; }
/** * scfs_readpage * * Parameters: * @file: upper file * @page: upper page from SCFS inode mapping, data will be copied in here * * Return: * SCFS_SUCCESS if success, otherwise if error * * Description: * - Read in a page by reading a cluster from the file's lower file. * (Reading in a cluster for just a single page read is inevitable, but this * "amplified read" and decompressing overhead should be amortized when * other pages in that same cluster is accessed later, and only incurs * memcpy from the cached cluster buffer.) * - Recently accessed clusters ("buffer_cache") are cached for later reads. */ static inline int _scfs_readpage(struct file *file, struct page *page, int pref_index) { struct scfs_inode_info *sii = SCFS_I(page->mapping->host); struct scfs_sb_info *sbi = SCFS_S(page->mapping->host->i_sb); struct scfs_cluster_buffer buffer = {NULL, NULL, NULL, NULL, 0}; int ret = 0, compressed = 0; int alloc_membuffer = 1; int allocated_index = -1; int i; char *virt; SCFS_PRINT("f:%s i:%d c:0x%x u:0x%x\n", file->f_path.dentry->d_name.name, page->index, buffer.c_buffer, buffer.u_buffer); ASSERT(sii->cluster_size <= SCFS_CLUSTER_SIZE_MAX); #ifdef SCFS_ASYNC_READ_PROFILE sbi->scfs_readpage_total_count++; #endif #if MAX_BUFFER_CACHE /* search buffer_cache first in case the cluster is left cached */ if (pref_index >= 0 && sbi->buffer_cache[pref_index].inode_number == sii->vfs_inode.i_ino && sbi->buffer_cache[pref_index].cluster_number == PAGE_TO_CLUSTER_INDEX(page, sii) && atomic_read(&sbi->buffer_cache[pref_index].is_used) != 1) { spin_lock(&sbi->buffer_cache_lock); /* this pref_index is used for another page */ if (atomic_read(&sbi->buffer_cache[pref_index].is_used) == 1) { spin_unlock(&sbi->buffer_cache_lock); sbi->buffer_cache_reclaimed_before_used_count++; goto pick_slot; } atomic_set(&sbi->buffer_cache[pref_index].is_used, 1); spin_unlock(&sbi->buffer_cache_lock); virt = kmap_atomic(page); if (sbi->buffer_cache[pref_index].is_compressed) memcpy(virt, page_address(sbi->buffer_cache[pref_index].u_page) + PGOFF_IN_CLUSTER(page, sii) * PAGE_SIZE, PAGE_SIZE); else memcpy(virt, page_address(sbi->buffer_cache[pref_index].c_page) + PGOFF_IN_CLUSTER(page, sii) * PAGE_SIZE, PAGE_SIZE); atomic_set(&sbi->buffer_cache[pref_index].is_used, 0); kunmap_atomic(virt); SetPageUptodate(page); unlock_page(page); SCFS_PRINT("%s<h> %d\n",file->f_path.dentry->d_name.name, page->index); return pref_index + 1; } else if (pref_index >= 0) { sbi->buffer_cache_reclaimed_before_used_count++; goto pick_slot; } /* search buffer_cache first in case the cluster is left cached */ for (i = 0; i < MAX_BUFFER_CACHE; i++) { if (sbi->buffer_cache[i].inode_number == sii->vfs_inode.i_ino && sbi->buffer_cache[i].cluster_number == PAGE_TO_CLUSTER_INDEX(page, sii) && atomic_read(&sbi->buffer_cache[i].is_used) != 1) { spin_lock(&sbi->buffer_cache_lock); if (atomic_read(&sbi->buffer_cache[i].is_used) == 1) { spin_unlock(&sbi->buffer_cache_lock); goto pick_slot; } atomic_set(&sbi->buffer_cache[i].is_used, 1); spin_unlock(&sbi->buffer_cache_lock); virt = kmap_atomic(page); if (sbi->buffer_cache[i].is_compressed) memcpy(virt, page_address(sbi->buffer_cache[i].u_page) + PGOFF_IN_CLUSTER(page, sii) * PAGE_SIZE, PAGE_SIZE); else memcpy(virt, page_address(sbi->buffer_cache[i].c_page) + PGOFF_IN_CLUSTER(page, sii) * PAGE_SIZE, PAGE_SIZE); atomic_set(&sbi->buffer_cache[i].is_used, 0); kunmap_atomic(virt); SetPageUptodate(page); unlock_page(page); SCFS_PRINT("%s<h> %d\n", file->f_path.dentry->d_name.name, page->index); return i + 1; } } pick_slot: /* pick a slot in buffer_cache to use */ if (atomic_read(&sbi->buffer_cache[sbi->read_buffer_index].is_used) != 1) { spin_lock(&sbi->buffer_cache_lock); /* this index is used for another page */ if (atomic_read(&sbi->buffer_cache[sbi->read_buffer_index].is_used) == 1) { spin_unlock(&sbi->buffer_cache_lock); goto pick_slot_full; } atomic_set(&sbi->buffer_cache[sbi->read_buffer_index].is_used, 1); allocated_index = sbi->read_buffer_index++; if (sbi->read_buffer_index >= MAX_BUFFER_CACHE) sbi->read_buffer_index = 0; spin_unlock(&sbi->buffer_cache_lock); buffer.c_page = sbi->buffer_cache[allocated_index].c_page; buffer.u_page = sbi->buffer_cache[allocated_index].u_page; sbi->buffer_cache[allocated_index].inode_number = sii->vfs_inode.i_ino; sbi->buffer_cache[allocated_index].cluster_number = PAGE_TO_CLUSTER_INDEX(page, sii); alloc_membuffer = 0; goto real_io; } pick_slot_full: for (i = 0; i < MAX_BUFFER_CACHE; i++) { if (atomic_read(&sbi->buffer_cache[i].is_used) != 1) { spin_lock(&sbi->buffer_cache_lock); /* this index is used for another page */ if (atomic_read(&sbi->buffer_cache[i].is_used) == 1) { spin_unlock(&sbi->buffer_cache_lock); continue; } atomic_set(&sbi->buffer_cache[i].is_used, 1); sbi->read_buffer_index = i + 1; if (sbi->read_buffer_index >= MAX_BUFFER_CACHE) sbi->read_buffer_index = 0; spin_unlock(&sbi->buffer_cache_lock); buffer.c_page = sbi->buffer_cache[i].c_page; buffer.u_page = sbi->buffer_cache[i].u_page; sbi->buffer_cache[i].inode_number = sii->vfs_inode.i_ino; sbi->buffer_cache[i].cluster_number = PAGE_TO_CLUSTER_INDEX(page, sii); allocated_index = i; alloc_membuffer = 0; break; } } #endif real_io: #ifdef SCFS_ASYNC_READ_PROFILE sbi->scfs_readpage_io_count++; #endif /* sanity check & prepare buffers for scfs_read_cluster */ if (alloc_membuffer == 1 && (buffer.c_page || buffer.c_buffer)) ASSERT(0); if (!buffer.c_page) buffer.c_page = scfs_alloc_mempool_buffer(sbi); if (!buffer.c_page) { SCFS_PRINT_ERROR("c_page malloc failed\n"); ret = -ENOMEM; goto out; } if (!buffer.c_buffer) buffer.c_buffer = page_address(buffer.c_page); if (!buffer.c_buffer) { SCFS_PRINT_ERROR("c_buffer malloc failed\n"); ret = -ENOMEM; goto out; } if (!buffer.u_page) buffer.u_page = scfs_alloc_mempool_buffer(sbi); if (!buffer.u_page) { SCFS_PRINT_ERROR("u_page malloc failed\n"); ret = -ENOMEM; goto out; } if (!buffer.u_buffer) buffer.u_buffer = page_address(buffer.u_page); if (!buffer.u_buffer) { SCFS_PRINT_ERROR("u_buffer malloc failed\n"); ret = -ENOMEM; goto out; } /* read cluster from lower */ ret = scfs_read_cluster(file, page, buffer.c_buffer, &buffer.u_buffer, &compressed); if (ret) { if (ret == -ERANGE) SCFS_PRINT_ERROR("file %s error on readpage, OOB. ret %x\n", file->f_path.dentry->d_name.name, ret); else SCFS_PRINT_ERROR("read cluster failed, " "file %s page->index %u ret %d\n", file->f_path.dentry->d_name.name, page->index, ret); goto out; } #if MAX_BUFFER_CACHE /* don't need to spinlock, we have is_used=1 for this buffer */ if (alloc_membuffer != 1) sbi->buffer_cache[allocated_index].is_compressed = compressed; #endif #ifdef SCFS_REMOVE_NO_COMPRESSED_UPPER_MEMCPY /* fill page cache with the decompressed or original page */ if (compressed) { virt = kmap_atomic(page); memcpy(virt, page_address(buffer.u_page) + PGOFF_IN_CLUSTER(page, sii) * PAGE_SIZE, PAGE_SIZE); kunmap_atomic(virt); } #else /* fill page cache with the decompressed/original data */ virt = kmap_atomic(page); if (compressed) memcpy(virt, page_address(buffer.u_page) + PGOFF_IN_CLUSTER(page, sii) * PAGE_SIZE, PAGE_SIZE); else memcpy(virt, page_address(buffer.c_page) + PGOFF_IN_CLUSTER(page, sii) * PAGE_SIZE, PAGE_SIZE); kunmap_atomic(virt); #endif SetPageUptodate(page); #if MAX_BUFFER_CACHE #ifndef SCFS_REMOVE_NO_COMPRESSED_UPPER_MEMCPY if (alloc_membuffer != 1) { atomic_set(&sbi->buffer_cache[allocated_index].is_used, 0); } #else if (alloc_membuffer != 1 && compressed) { atomic_set(&sbi->buffer_cache[allocated_index].is_used, 0); } else if (alloc_membuffer != 1) { spin_lock(&sbi->buffer_cache_lock); sbi->buffer_cache[allocated_index].inode_number = -1; sbi->buffer_cache[allocated_index].cluster_number = -1; sbi->buffer_cache[allocated_index].is_compressed = -1; atomic_set(&sbi->buffer_cache[allocated_index].is_used, -1); spin_unlock(&sbi->buffer_cache_lock); } #endif #endif out: unlock_page(page); if (alloc_membuffer == 1) { sbi->buffer_cache_overflow_count_smb++; scfs_free_mempool_buffer(buffer.c_page, sbi); scfs_free_mempool_buffer(buffer.u_page, sbi); } SCFS_PRINT("-f:%s i:%d c:0x%x u:0x%x\n", file->f_path.dentry->d_name.name, page->index, buffer.c_buffer, buffer.u_buffer); SCFS_PRINT("%s<r> %d\n",file->f_path.dentry->d_name.name, page->index); if (ret < 0) return ret; else if (alloc_membuffer != 1) return allocated_index + 1; else return 0; }
/** * scfs_readpages * * Parameters: * @file: upper file * @*mapping: address_space struct for the file * @*pages: list of pages to read in * @nr_pages: number of pages to read in * * Return: * SCFS_SUCCESS if success, otherwise if error * * Description: * - Asynchronously read pages for readahead. A scaling number of background threads * will read & decompress them in a slightly deferred but parallelized manner. */ static int scfs_readpages(struct file *file, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { struct scfs_inode_info *sii = SCFS_I(file->f_mapping->host); struct scfs_sb_info *sbi = SCFS_S(file->f_mapping->host->i_sb); struct file *lower_file = NULL; struct page *page; struct scfs_cinfo cinfo; loff_t i_size; pgoff_t start, end; int page_idx, page_idx_readahead = 1024, ret = 0; int readahead_page = 0; int prev_cbi = 0; int prev_cluster = -1, cur_cluster = -1; int cluster_idx = 0; i_size = i_size_read(&sii->vfs_inode); if (!i_size) { SCFS_PRINT("file %s: i_size is zero, " "flags 0x%x sii->clust_info_size %d\n", file->f_path.dentry->d_name.name, sii->flags, sii->cinfo_array_size); return 0; } #ifdef SCFS_ASYNC_READ_PROFILE atomic_add(nr_pages, &sbi->scfs_standby_readpage_count); #endif #ifdef SCFS_NOTIFY_RANDOM_READ lower_file = scfs_lower_file(file); if (!lower_file) { SCFS_PRINT_ERROR("file %s: lower file is null!\n", file->f_path.dentry->d_name.name); return -EINVAL; } /* if the read request was random (enough), hint it to the lower file. * scfs_sequential_page_number is the tunable threshold. * filemap.c will later on refer to this FMODE_RANDOM flag. */ spin_lock(&lower_file->f_lock); if (nr_pages > sbi->scfs_sequential_page_number) lower_file->f_mode &= ~FMODE_RANDOM; else lower_file->f_mode |= FMODE_RANDOM; spin_unlock(&lower_file->f_lock); #endif lower_file = scfs_lower_file(file); page = list_entry(pages->prev, struct page, lru); cluster_idx = page->index / (sii->cluster_size / PAGE_SIZE); if (sii->compressed) { mutex_lock(&sii->cinfo_mutex); ret = get_cluster_info(file, cluster_idx, &cinfo); mutex_unlock(&sii->cinfo_mutex); if (ret) { SCFS_PRINT_ERROR("err in get_cluster_info, ret : %d," "i_size %lld\n", ret, i_size); return ret; } if (!cinfo.size || cinfo.size > sii->cluster_size) { SCFS_PRINT_ERROR("file %s: cinfo is invalid, " "clust %u cinfo.size %u\n", file->f_path.dentry->d_name.name, cluster_idx, cinfo.size); return -EINVAL; } start = (pgoff_t)(cinfo.offset / PAGE_SIZE); } else { start = (pgoff_t)(cluster_idx * sii->cluster_size / PAGE_SIZE); } cluster_idx = (page->index + nr_pages - 1) / (sii->cluster_size / PAGE_SIZE); if (sii->compressed) { mutex_lock(&sii->cinfo_mutex); ret = get_cluster_info(file, cluster_idx, &cinfo); mutex_unlock(&sii->cinfo_mutex); if (ret) { SCFS_PRINT_ERROR("err in get_cluster_info, ret : %d," "i_size %lld\n", ret, i_size); return ret; } if (!cinfo.size || cinfo.size > sii->cluster_size) { SCFS_PRINT_ERROR("file %s: cinfo is invalid, " "clust %u cinfo.size %u\n", file->f_path.dentry->d_name.name, cluster_idx, cinfo.size); return -EINVAL; } end = (pgoff_t)((cinfo.offset + cinfo.size -1) / PAGE_SIZE); } else { end = (pgoff_t)(((cluster_idx + 1) * sii->cluster_size - 1) / PAGE_SIZE); /* check upper inode size */ /* out of range? on compressed file, it is handled returning error, which one is right? */ if (end > (i_size / PAGE_SIZE)) end = (i_size / PAGE_SIZE); } force_page_cache_readahead(lower_file->f_mapping, lower_file, start, (unsigned long)(end - start +1)); for (page_idx = 0; page_idx < nr_pages; page_idx++) { page = list_entry(pages->prev, struct page, lru); list_del(&page->lru); if (PageReadahead(page)) page_idx_readahead = page_idx; ret = add_to_page_cache_lru(page, mapping, page->index, GFP_KERNEL); if (ret) { SCFS_PRINT("adding to page cache failed, " "page %x page->idx %d ret %d\n", page, page->index, ret); page_cache_release(page); continue; } /* memory buffer is full or synchronous read request - call scfs_readpage to read now */ if (sbi->page_buffer_next_filling_index_smb == MAX_PAGE_BUFFER_SIZE_SMB || page_idx < page_idx_readahead) { cur_cluster = PAGE_TO_CLUSTER_INDEX(page, sii); if (prev_cluster == cur_cluster && prev_cbi > 0) prev_cbi = _scfs_readpage(file, page, prev_cbi - 1); else prev_cbi = _scfs_readpage(file, page, -1); prev_cluster = cur_cluster; page_cache_release(page); /* refer line 701 */ } else { spin_lock(&sbi->spinlock_smb); /* Queue is not full so add the page into the queue. Also, here we increase file->f_count to protect the file structs from multi-threaded accesses */ atomic_long_inc(&SCFS_F(file)->lower_file->f_count); atomic_long_inc(&file->f_count); sbi->page_buffer_smb[sbi->page_buffer_next_filling_index_smb] = page; sbi->file_buffer_smb[sbi->page_buffer_next_filling_index_smb++] = file; /* check whether page buffer is full and set page buffer full if needed */ if (((sbi->page_buffer_next_filling_index_smb == MAX_PAGE_BUFFER_SIZE_SMB) && sbi->page_buffer_next_io_index_smb == 0) || (sbi->page_buffer_next_filling_index_smb == sbi->page_buffer_next_io_index_smb)) sbi->page_buffer_next_filling_index_smb = MAX_PAGE_BUFFER_SIZE_SMB; else if (sbi->page_buffer_next_filling_index_smb == MAX_PAGE_BUFFER_SIZE_SMB) sbi->page_buffer_next_filling_index_smb = 0; spin_unlock(&sbi->spinlock_smb); ++readahead_page; } //page_cache_release(page); } if (readahead_page > 0) wakeup_smb_thread(sbi); SCFS_PRINT("<e>\n"); #ifdef SCFS_ASYNC_READ_PROFILE atomic_sub(nr_pages, &sbi->scfs_standby_readpage_count); #endif return 0; }