// per_entry values must be less than or equal to the whole value std::string generate_classify_layer_code (unsigned num_input_neurons, unsigned num_output_neurons, unsigned num_input_neurons_per_entry, unsigned num_output_neurons_per_entry, unsigned num_sb_entries, unsigned num_nbin_entries, unsigned num_nbout_entries, unsigned bit_width, unsigned sb_addr, unsigned nbin_addr, unsigned nbout_addr, bool verbose) { // various performance counter unsigned cycles = 0; // cycles needed to execute current control instruction stat_keeper stat; // intermediate data unsigned word_size = div_roundup(bit_width,8); unsigned remaining_input_neurons = num_input_neurons; unsigned remaining_output_neurons = num_output_neurons; unsigned current_sb_pointer = sb_addr; unsigned current_nbin_pointer = nbin_addr; unsigned current_nbout_pointer = nbout_addr; unsigned num_nbout_to_write; do { bool is_new_block = true; remaining_input_neurons = num_input_neurons; num_nbout_to_write = std::min(remaining_output_neurons, num_output_neurons_per_entry * num_nbout_entries); unsigned total_num_nbin_entry = div_roundup(num_input_neurons, num_input_neurons_per_entry); unsigned num_input_to_load = std::min(remaining_input_neurons, num_input_neurons_per_entry * num_nbin_entries); unsigned nbin_entry_loaded = 0; if (verbose) { unsigned output_from = num_output_neurons - remaining_output_neurons; unsigned output_to = output_from + num_nbout_to_write - 1; std::cout << std::endl << "Output Neuron " << output_from << " - " << output_to << std::endl; } // go through the neuron inputs an entry at a time for (int current_nbin_entry = 0; current_nbin_entry < total_num_nbin_entry; current_nbin_entry++) { // instruction to print out cp_inst inst; inst.cp_end = cp_inst::NOP; // calculate how many SB entries to load unsigned num_sb_to_load = num_input_neurons_per_entry * num_nbout_to_write; // load SB buffer load_sb(inst, num_sb_to_load, current_sb_pointer, false, word_size); // update SB pointer current_sb_pointer += num_sb_to_load * word_size; cycles = div_roundup(num_sb_to_load / num_input_neurons_per_entry, num_output_neurons_per_entry); // check to see if NBin is filled or not // if not, fill it if (nbin_entry_loaded == 0) { num_input_to_load = std::min(remaining_input_neurons, num_input_neurons_per_entry * num_nbin_entries); nbin_entry_loaded = div_roundup(num_input_to_load, num_input_neurons_per_entry); load_nbin(inst, num_input_to_load, current_nbin_pointer, true, word_size); if (verbose) { unsigned output_from = num_output_neurons - remaining_output_neurons; unsigned output_to = output_from + num_nbout_to_write - 1; unsigned input_from = num_input_neurons - remaining_input_neurons; unsigned input_to = input_from + num_input_to_load - 1; std::cout << std::endl << "Output Neuron " << output_from << " - " << output_to << ": Input Neuron " << input_from << " - " << input_to << std::endl; } remaining_input_neurons -= num_input_to_load; } else { read_nbin(inst, current_nbin_pointer, true, word_size); } // update counters nbin_entry_loaded--; current_nbin_pointer += num_input_neurons_per_entry * word_size; cycles = std::max(cycles, div_roundup(inst.nbin_size / word_size, num_input_neurons_per_entry)); // prepare for output if it is last entry to finalize the sum if (current_nbin_entry == total_num_nbin_entry - 1) { sigmoid_NFU(inst, is_new_block ? cp_inst::RESET : cp_inst::NBOUT); output_NBout(inst, num_nbout_to_write, current_nbout_pointer, word_size); // update counters current_nbout_pointer += num_nbout_to_write * word_size; remaining_output_neurons -= num_nbout_to_write; } else { partial_sum_NFU(inst, is_new_block ? cp_inst::RESET : cp_inst::NBOUT); nop_NBout(inst); } // stat for the instruction cycles = std::max(cycles, div_roundup(num_nbout_to_write,num_output_neurons_per_entry)); stat.update(inst,cycles); is_new_block = false; // output load instruction std::cout << inst << stat.inst_report(verbose) << std::endl; } } while (remaining_output_neurons > 0); // print out summary std::cout << stat.code_report(verbose); // config entry for classify layer std::stringstream ss; ss << "CLASS 0 0 0 0 " << num_input_neurons << " " << num_output_neurons << std::endl; return ss.str(); }
static int tux3_fill_super(struct super_block *sb, void *data, int silent) { struct sb *sbi; int err, blocksize; sbi = kzalloc(sizeof(struct sb), GFP_KERNEL); if (!sbi) return -ENOMEM; sbi->vfs_sb = sb; sb->s_fs_info = sbi; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_magic = TUX3_SUPER_MAGIC; sb->s_op = &tux3_super_ops; sb->s_time_gran = 1; mutex_init(&sbi->loglock); INIT_LIST_HEAD(&sbi->alloc_inodes); err = -EIO; blocksize = sb_min_blocksize(sb, BLOCK_SIZE); if (!blocksize) { if (!silent) printk(KERN_ERR "TUX3: unable to set blocksize\n"); goto error; } if ((err = load_sb(tux_sb(sb)))) { if (!silent) { if (err == -EINVAL) warn("invalid superblock [%Lx]", (L)from_be_u64(*(be_u64 *)sbi->super.magic)); else warn("Unable to read superblock"); } goto error; } if (sbi->blocksize != blocksize) { if (!sb_set_blocksize(sb, sbi->blocksize)) { printk(KERN_ERR "TUX3: blocksize too small for device.\n"); goto error; } } warn("s_blocksize %lu", sb->s_blocksize); err = -ENOMEM; sbi->volmap = tux_new_volmap(tux_sb(sb)); if (!sbi->volmap) goto error; insert_inode_hash(sbi->volmap); sbi->logmap = tux_new_logmap(tux_sb(sb)); if (!sbi->logmap) goto error_logmap; err = load_itable(sbi); if (err) goto error_bitmap; // struct inode *vtable; sbi->bitmap = tux3_iget(sb, TUX_BITMAP_INO); err = PTR_ERR(sbi->bitmap); if (IS_ERR(sbi->bitmap)) goto error_bitmap; sbi->rootdir = tux3_iget(sb, TUX_ROOTDIR_INO); err = PTR_ERR(sbi->rootdir); if (IS_ERR(sbi->rootdir)) goto error_rootdir; sbi->atable = tux3_iget(sb, TUX_ATABLE_INO); err = PTR_ERR(sbi->atable); if (IS_ERR(sbi->atable)) goto error_atable; sb->s_root = d_alloc_root(sbi->rootdir); if (!sb->s_root) goto error_alloc_root; return 0; error_alloc_root: iput(sbi->atable); error_atable: iput(sbi->rootdir); error_rootdir: iput(sbi->bitmap); error_bitmap: iput(sbi->logmap); error_logmap: iput(sbi->volmap); error: kfree(sbi); return err; }