コード例 #1
0
  void ConvolutionLayer::build_tree(map<string, BoundedFunc>* func_tree) {
    LOG(INFO) << "Starting to build " << this->name() << endl;
    CHECK(init()) << "Cannot build tree because " << this->name()
		  << " could not be initialized." << endl;
      
    map<string, BoundedFunc>::iterator input_it = 
      func_tree->find(layer_param_.bottom(0));
    CHECK(input_it != func_tree->end()) << "Could not find " <<
      layer_param_.bottom(0) << " in the function tree" << endl;

    Func& input_func = (input_it->second).first;
    array<int, 4> dims = (input_it->second).second;

    Func conv(layer_param_.top(0));
    RDom r(0, weights_.extent(0), 0, weights_.extent(1), 0, weights_.extent(2));
    conv(x, y, c, n) = sum(weights_(weights_.extent(0)-1-r.x,
				    weights_.extent(1)-1-r.y, r.z, c) *
			   input_func(x+weights_.extent(0)-1-r.x,
				      y+weights_.extent(1)-1-r.y, r.z, n));
    if(layer_param_.convolution_param().bias_term())
      conv(x, y, c, n) += bias_(c, 0, 0, 0);

    dims[2] = layer_param_.blobs(0).num();
    const ConvolutionParameter& conv_param = layer_param_.convolution_param();
    dims[1] = (dims[1] + 2*conv_param.pad() - layer_param_.blobs(0).height())/
	       conv_param.stride() + 1;
    dims[0] = (dims[0] + 2*conv_param.pad() - layer_param_.blobs(0).width())/
	       conv_param.stride() + 1;

    conv.compute_root().vectorize(x, 8).parallel(n);
    func_tree->insert(make_pair(layer_param_.top(0),
				make_pair(conv, dims)));

    LOG(INFO) << layer_param_.top(0) << ": {" << dims[0] << ", "
	      << dims[1] << ", " << dims[2] << ", " << dims[3] << "}" << endl;

    LOG(INFO) << "Completed building " << this->name() << endl;
  }
コード例 #2
0
ファイル: shmem_delta.c プロジェクト: neonatura/share
static int _xd3_main_input (xd3_cmd     cmd,
	    _xd3_file   *ifile,
	    _xd3_file   *ofile,
	    _xd3_file   *sfile)
{
  int        ret;
  xd3_stream stream;
  size_t     nread = 0;
  usize_t    winsize;
  int        stream_flags = 0;
  xd3_config config;
  xd3_source source;
  xoff_t     last_total_in = 0;
  xoff_t     last_total_out = 0;
  long       start_time;
  int        stdout_only = 0;
  int (*input_func) (xd3_stream*);
  int (*output_func) (xd3_stream*, _xd3_file *);

  memset (& stream, 0, sizeof (stream));
  memset (& source, 0, sizeof (source));
  memset (& config, 0, sizeof (config));

  config.alloc = _xd3_alloc;
  config.freef = _xd3_free1;

  config.iopt_size = option_iopt_size;
  config.sprevsz = option_sprevsz;

  start_time = get_millisecs_now ();

  if (option_use_checksum) { stream_flags |= XD3_ADLER32; }

  /* main_input setup. */
  switch ((int) cmd)
  {
#if VCDIFF_TOOLS
    if (1) { case CMD_PRINTHDR:   stream_flags |= XD3_JUST_HDR; }
    else if (1) { case CMD_PRINTHDRS:  stream_flags |= XD3_SKIP_WINDOW; }
    else        { case CMD_PRINTDELTA: stream_flags |= XD3_SKIP_EMIT; }
    ifile->flags |= RD_NONEXTERNAL;
    input_func    = xd3_decode_input;
    output_func   = main_print_func;
    stream_flags |= XD3_ADLER32_NOVER;
    stdout_only   = 1;
    break;

    case CMD_RECODE:
    case CMD_MERGE:
    case CMD_MERGE_ARG:
    /* No source will be read */
    stream_flags |= XD3_ADLER32_NOVER | XD3_SKIP_EMIT;
    ifile->flags |= RD_NONEXTERNAL;
    input_func = xd3_decode_input;

    if ((ret = main_init_recode_stream ()))
    {
      return (SHERR_INVAL);
    }

    if (cmd == CMD_RECODE) { output_func = main_recode_func; }
    else                   { output_func = _xd3_merge_func; }
    break;
#endif /* VCDIFF_TOOLS */

#if XD3_ENCODER
    case CMD_ENCODE:
    input_func  = xd3_encode_input;
    output_func = _xd3_write_output;

    if (option_no_compress)      { stream_flags |= XD3_NOCOMPRESS; }

    {
      if (option_level == 0)
      {
        stream_flags |= XD3_NOCOMPRESS;
        config.smatch_cfg = XD3_SMATCH_FASTEST;
      }
      else if (option_level == 1)
      { config.smatch_cfg = XD3_SMATCH_FASTEST; }
      else if (option_level == 2)
      { config.smatch_cfg = XD3_SMATCH_FASTER; }
      else if (option_level <= 5)
      { config.smatch_cfg = XD3_SMATCH_FAST; }
      else if (option_level == 6)
      { config.smatch_cfg = XD3_SMATCH_DEFAULT; }
      else
      { config.smatch_cfg = XD3_SMATCH_SLOW; }
    }
    break;
#endif
    case CMD_DECODE:
    if (option_use_checksum == 0) { stream_flags |= XD3_ADLER32_NOVER; }
    ifile->flags |= RD_NONEXTERNAL;
    input_func    = xd3_decode_input;
    output_func   = _xd3_write_output;
    break;
    default:
    XPR(NT "internal error\n");
    return (SHERR_INVAL);
  }

  main_bsize = winsize = _xd3_get_winsize (ifile);

  if (!(main_bdata = (uint8_t*)_xd3_alloc(NULL, 1, winsize))) {
    return (SHERR_NOMEM);
  }

  config.winsize = winsize;
  config.getblk = _xd3_getblk_func;
  config.flags = stream_flags;

  if ((ret = xd3_config_stream (& stream, & config)))
  {
    return (SHERR_INVAL);
  }

#if VCDIFF_TOOLS
  if ((cmd == CMD_MERGE || cmd == CMD_MERGE_ARG) &&
      (ret = xd3_whole_state_init (& stream)))
  {
    XPR(NT XD3_LIB_ERRMSG (& stream, ret));
    return (SHERR_INVAL);
  }
#endif

  if (cmd != CMD_DECODE)
  {
    /* When not decoding, set source now.  The decoder delays this
     * step until XD3_GOTHEADER. */
    if (sfile && sfile->filename != NULL)
    {
      if ((ret = _xd3_set_source (& stream, cmd, sfile, & source)))
      {
        return (SHERR_INVAL);
      }

      XD3_ASSERT(stream.src != NULL);
    }
  }

  /* This times each window. */
  get_millisecs_since ();

  /* Main input loop. */
  do
  {
    xoff_t input_offset;
    xoff_t input_remain;
    usize_t try_read;

    input_offset = ifile->nread;

    input_remain = XOFF_T_MAX - input_offset;

    try_read = (usize_t) min ((xoff_t) config.winsize, input_remain);

    ret = _xd3_read_primary_input(ifile, main_bdata, try_read, &nread);
    if (ret != 0)
      return (SHERR_IO);

    /* If we've reached EOF tell the stream to flush. */
    if (nread < try_read)
    {
      stream.flags |= XD3_FLUSH;
    }

#if XD3_ENCODER
    /* After the first _xd3_read_primary_input completes, we know
     * all the information needed to encode the application
     * header. */
    if (cmd == CMD_ENCODE &&
        (ret = _xd3_set_appheader (& stream, ifile, sfile)))
    {
      return (SHERR_INVAL);
    }
#endif
    xd3_avail_input (& stream, main_bdata, nread);

    /* If we read zero bytes after encoding at least one window... */
    if (nread == 0 && stream.current_window > 0) {
      break;
    }

again:
    ret = input_func (& stream);

    switch (ret)
    {
      case XD3_INPUT:
        continue;

      case XD3_GOTHEADER:
        {
          XD3_ASSERT (stream.current_window == 0);

          /* Need to process the appheader as soon as possible.  It may
           * contain a suggested default filename/decompression routine for
           * the ofile, and it may contain default/decompression routine for
           * the sources. */
          if (cmd == CMD_DECODE)
          {
            /* May need to set the sfile->filename if none was given. */
            _xd3_get_appheader (& stream, ifile, ofile, sfile);

            /* Now open the source file. */
            if ((sfile->filename != NULL) &&
                (ret = _xd3_set_source (& stream, cmd, sfile, & source)))
            {
              return (SHERR_INVAL);
            }
          }
        }
        /* FALLTHROUGH */
      case XD3_WINSTART:
        {
          /* e.g., set or unset XD3_SKIP_WINDOW. */
          goto again;
        }

      case XD3_OUTPUT:
        {
          /* Defer opening the output file until the stream produces its
           * first output for both encoder and decoder, this way we
           * delay long enough for the decoder to receive the
           * application header.  (Or longer if there are skipped
           * windows, but I can't think of any reason not to delay
           * open.) */
          if (ofile != NULL &&
              ! _xd3_file_isopen (ofile) &&
              (ret = _xd3_open_output (& stream, ofile)) != 0)
          {
            return (SHERR_INVAL);
          }

          if ((ret = output_func (& stream, ofile)) &&
              (ret != PRINTHDR_SPECIAL))
          {
            return (SHERR_INVAL);
          }

          if (ret == PRINTHDR_SPECIAL)
          {
            xd3_abort_stream (& stream);
            ret = 0;
            goto done;
          }

          ret = 0;

          xd3_consume_output (& stream);
          goto again;
        }

      case XD3_WINFINISH:
        {
          goto again;
        }

      default:
        /* input_func() error */
        if (! option_quiet && ret == XD3_INVALID_INPUT)
        {
          XPR(NT "normally this indicates that the source file is incorrect\n");
          XPR(NT "please verify the source file with sha1sum or equivalent\n");
        }
        return (SHERR_INVAL);
    }
  }
  while (nread == config.winsize);

done:
  /* Close the inputs. (ifile must be open, sfile may be open) */
  _xd3_file_close (ifile);
  if (sfile != NULL)
  {
    _xd3_file_close (sfile);
  }


  /* If output file is not open yet because of delayed-open, it means
   * we never encountered a window in the delta, but it could have had
   * a VCDIFF header?  TODO: solve this elsewhere.  For now, it prints
   * "nothing to output" below, but the check doesn't happen in case
   * of option_no_output.  */
  if (! option_no_output && ofile != NULL)
  {
    if (!stdout_only && ! _xd3_file_isopen (ofile))
    {
      XPR(NT "nothing to output: %s\n", ifile->filename);
      return (SHERR_INVAL);
    }

    /* Have to close the output before calling
     * main_external_compression_finish, or else it hangs. */
    if (_xd3_file_close (ofile) != 0)
    {
      return (SHERR_INVAL);
    }
  }

#if EXTERNAL_COMPRESSION
  if ((ret = main_external_compression_finish ()))
  {
    XPR(NT "external compression commands failed\n");
    return (SHERR_INVAL);
  }
#endif

  if ((ret = xd3_close_stream (& stream)))
  {
    return (SHERR_INVAL);
  }

  xd3_free_stream (& stream);

  return (0);
}