// per_entry values must be less than or equal to the whole value
std::string generate_classify_layer_code (unsigned num_input_neurons,
                                          unsigned num_output_neurons,
                                          unsigned num_input_neurons_per_entry,
                                          unsigned num_output_neurons_per_entry,
                                          unsigned num_sb_entries,
                                          unsigned num_nbin_entries,
                                          unsigned num_nbout_entries,
                                          unsigned bit_width,
                                          unsigned sb_addr,
                                          unsigned nbin_addr,
                                          unsigned nbout_addr,
                                          bool verbose) {

    // various performance counter
    unsigned cycles = 0;            // cycles needed to execute current control instruction
    stat_keeper stat;

    // intermediate data
    unsigned word_size = div_roundup(bit_width,8);

    unsigned remaining_input_neurons  = num_input_neurons;
    unsigned remaining_output_neurons = num_output_neurons;

    unsigned current_sb_pointer = sb_addr;
    unsigned current_nbin_pointer = nbin_addr;
    unsigned current_nbout_pointer = nbout_addr;

    unsigned num_nbout_to_write;

    do {

      bool is_new_block = true;
      remaining_input_neurons  = num_input_neurons;
      num_nbout_to_write = std::min(remaining_output_neurons, num_output_neurons_per_entry * num_nbout_entries);

      unsigned total_num_nbin_entry = div_roundup(num_input_neurons, num_input_neurons_per_entry);
      unsigned num_input_to_load = std::min(remaining_input_neurons, num_input_neurons_per_entry * num_nbin_entries);
      unsigned nbin_entry_loaded = 0;

      if (verbose) {
       unsigned output_from = num_output_neurons - remaining_output_neurons;
       unsigned output_to = output_from + num_nbout_to_write - 1;
       std::cout << std::endl << "Output Neuron " << output_from << " - " << output_to << std::endl;
      }

      // go through the neuron inputs an entry at a time
      for (int current_nbin_entry = 0;
           current_nbin_entry < total_num_nbin_entry; current_nbin_entry++) {

        // instruction to print out
        cp_inst inst;
        inst.cp_end = cp_inst::NOP;

        // calculate how many SB entries to load
        unsigned num_sb_to_load = num_input_neurons_per_entry * num_nbout_to_write;

        // load SB buffer
        load_sb(inst, num_sb_to_load, current_sb_pointer, false, word_size);

        // update SB pointer
        current_sb_pointer += num_sb_to_load * word_size;
        cycles = div_roundup(num_sb_to_load / num_input_neurons_per_entry,
                             num_output_neurons_per_entry);

        // check to see if NBin is filled or not
        // if not, fill it
        if (nbin_entry_loaded == 0) { 
          num_input_to_load = std::min(remaining_input_neurons, num_input_neurons_per_entry * num_nbin_entries);
          nbin_entry_loaded = div_roundup(num_input_to_load, num_input_neurons_per_entry);
          load_nbin(inst, num_input_to_load, current_nbin_pointer, true, word_size);

          if (verbose) {
           unsigned output_from = num_output_neurons - remaining_output_neurons;
           unsigned output_to = output_from + num_nbout_to_write - 1;
           unsigned input_from = num_input_neurons - remaining_input_neurons;
           unsigned input_to = input_from + num_input_to_load - 1;
           std::cout << std::endl << "Output Neuron " << output_from << " - " << output_to
                     << ": Input Neuron " << input_from << " - " << input_to << std::endl;
          }

          remaining_input_neurons  -= num_input_to_load;
        } else {
          read_nbin(inst, current_nbin_pointer, true, word_size);
        }

        // update counters
        nbin_entry_loaded--;
        current_nbin_pointer += num_input_neurons_per_entry * word_size;
        cycles = std::max(cycles,
            div_roundup(inst.nbin_size / word_size, num_input_neurons_per_entry));

        // prepare for output if it is last entry to finalize the sum
        if (current_nbin_entry == total_num_nbin_entry - 1) {

          sigmoid_NFU(inst, is_new_block ? cp_inst::RESET : cp_inst::NBOUT);
          output_NBout(inst, num_nbout_to_write, current_nbout_pointer, word_size);

          // update counters
          current_nbout_pointer += num_nbout_to_write * word_size;
          remaining_output_neurons -= num_nbout_to_write;

        } else {

          partial_sum_NFU(inst, is_new_block ? cp_inst::RESET : cp_inst::NBOUT);
          nop_NBout(inst);

        }

        // stat for the instruction
        cycles = std::max(cycles,
           div_roundup(num_nbout_to_write,num_output_neurons_per_entry));
        stat.update(inst,cycles);
        is_new_block = false;

        // output load instruction
        std::cout << inst << stat.inst_report(verbose) << std::endl;
      }

    } while (remaining_output_neurons > 0);

    // print out summary
    std::cout << stat.code_report(verbose);

    // config entry for classify layer
    std::stringstream ss;
    ss << "CLASS 0 0 0 0 " << num_input_neurons << " " << num_output_neurons << std::endl;

    return ss.str();
}
Esempio n. 2
0
File: super.c Progetto: cdkamat/tux3
static int tux3_fill_super(struct super_block *sb, void *data, int silent)
{
	struct sb *sbi;
	int err, blocksize;

	sbi = kzalloc(sizeof(struct sb), GFP_KERNEL);
	if (!sbi)
		return -ENOMEM;
	sbi->vfs_sb = sb;
	sb->s_fs_info = sbi;
	sb->s_maxbytes = MAX_LFS_FILESIZE;
	sb->s_magic = TUX3_SUPER_MAGIC;
	sb->s_op = &tux3_super_ops;
	sb->s_time_gran = 1;

	mutex_init(&sbi->loglock);
	INIT_LIST_HEAD(&sbi->alloc_inodes);

	err = -EIO;
	blocksize = sb_min_blocksize(sb, BLOCK_SIZE);
	if (!blocksize) {
		if (!silent)
			printk(KERN_ERR "TUX3: unable to set blocksize\n");
		goto error;
	}

	if ((err = load_sb(tux_sb(sb)))) {
		if (!silent) {
			if (err == -EINVAL)
				warn("invalid superblock [%Lx]",
					(L)from_be_u64(*(be_u64 *)sbi->super.magic));
			else
				warn("Unable to read superblock");
		}
		goto error;
	}

	if (sbi->blocksize != blocksize) {
		if (!sb_set_blocksize(sb, sbi->blocksize)) {
			printk(KERN_ERR "TUX3: blocksize too small for device.\n");
			goto error;
		}
	}
	warn("s_blocksize %lu", sb->s_blocksize);

	err = -ENOMEM;
	sbi->volmap = tux_new_volmap(tux_sb(sb));
	if (!sbi->volmap)
		goto error;
	insert_inode_hash(sbi->volmap);

	sbi->logmap = tux_new_logmap(tux_sb(sb));
	if (!sbi->logmap)
		goto error_logmap;

	err = load_itable(sbi);
	if (err)
		goto error_bitmap;

//	struct inode *vtable;
	sbi->bitmap = tux3_iget(sb, TUX_BITMAP_INO);
	err = PTR_ERR(sbi->bitmap);
	if (IS_ERR(sbi->bitmap))
		goto error_bitmap;

	sbi->rootdir = tux3_iget(sb, TUX_ROOTDIR_INO);
	err = PTR_ERR(sbi->rootdir);
	if (IS_ERR(sbi->rootdir))
		goto error_rootdir;

	sbi->atable = tux3_iget(sb, TUX_ATABLE_INO);
	err = PTR_ERR(sbi->atable);
	if (IS_ERR(sbi->atable))
		goto error_atable;

	sb->s_root = d_alloc_root(sbi->rootdir);
	if (!sb->s_root)
		goto error_alloc_root;

	return 0;

error_alloc_root:
	iput(sbi->atable);
error_atable:
	iput(sbi->rootdir);
error_rootdir:
	iput(sbi->bitmap);
error_bitmap:
	iput(sbi->logmap);
error_logmap:
	iput(sbi->volmap);
error:
	kfree(sbi);
	return err;
}