void ConvolutionLayer::build_tree(map<string, BoundedFunc>* func_tree) { LOG(INFO) << "Starting to build " << this->name() << endl; CHECK(init()) << "Cannot build tree because " << this->name() << " could not be initialized." << endl; map<string, BoundedFunc>::iterator input_it = func_tree->find(layer_param_.bottom(0)); CHECK(input_it != func_tree->end()) << "Could not find " << layer_param_.bottom(0) << " in the function tree" << endl; Func& input_func = (input_it->second).first; array<int, 4> dims = (input_it->second).second; Func conv(layer_param_.top(0)); RDom r(0, weights_.extent(0), 0, weights_.extent(1), 0, weights_.extent(2)); conv(x, y, c, n) = sum(weights_(weights_.extent(0)-1-r.x, weights_.extent(1)-1-r.y, r.z, c) * input_func(x+weights_.extent(0)-1-r.x, y+weights_.extent(1)-1-r.y, r.z, n)); if(layer_param_.convolution_param().bias_term()) conv(x, y, c, n) += bias_(c, 0, 0, 0); dims[2] = layer_param_.blobs(0).num(); const ConvolutionParameter& conv_param = layer_param_.convolution_param(); dims[1] = (dims[1] + 2*conv_param.pad() - layer_param_.blobs(0).height())/ conv_param.stride() + 1; dims[0] = (dims[0] + 2*conv_param.pad() - layer_param_.blobs(0).width())/ conv_param.stride() + 1; conv.compute_root().vectorize(x, 8).parallel(n); func_tree->insert(make_pair(layer_param_.top(0), make_pair(conv, dims))); LOG(INFO) << layer_param_.top(0) << ": {" << dims[0] << ", " << dims[1] << ", " << dims[2] << ", " << dims[3] << "}" << endl; LOG(INFO) << "Completed building " << this->name() << endl; }
static int _xd3_main_input (xd3_cmd cmd, _xd3_file *ifile, _xd3_file *ofile, _xd3_file *sfile) { int ret; xd3_stream stream; size_t nread = 0; usize_t winsize; int stream_flags = 0; xd3_config config; xd3_source source; xoff_t last_total_in = 0; xoff_t last_total_out = 0; long start_time; int stdout_only = 0; int (*input_func) (xd3_stream*); int (*output_func) (xd3_stream*, _xd3_file *); memset (& stream, 0, sizeof (stream)); memset (& source, 0, sizeof (source)); memset (& config, 0, sizeof (config)); config.alloc = _xd3_alloc; config.freef = _xd3_free1; config.iopt_size = option_iopt_size; config.sprevsz = option_sprevsz; start_time = get_millisecs_now (); if (option_use_checksum) { stream_flags |= XD3_ADLER32; } /* main_input setup. */ switch ((int) cmd) { #if VCDIFF_TOOLS if (1) { case CMD_PRINTHDR: stream_flags |= XD3_JUST_HDR; } else if (1) { case CMD_PRINTHDRS: stream_flags |= XD3_SKIP_WINDOW; } else { case CMD_PRINTDELTA: stream_flags |= XD3_SKIP_EMIT; } ifile->flags |= RD_NONEXTERNAL; input_func = xd3_decode_input; output_func = main_print_func; stream_flags |= XD3_ADLER32_NOVER; stdout_only = 1; break; case CMD_RECODE: case CMD_MERGE: case CMD_MERGE_ARG: /* No source will be read */ stream_flags |= XD3_ADLER32_NOVER | XD3_SKIP_EMIT; ifile->flags |= RD_NONEXTERNAL; input_func = xd3_decode_input; if ((ret = main_init_recode_stream ())) { return (SHERR_INVAL); } if (cmd == CMD_RECODE) { output_func = main_recode_func; } else { output_func = _xd3_merge_func; } break; #endif /* VCDIFF_TOOLS */ #if XD3_ENCODER case CMD_ENCODE: input_func = xd3_encode_input; output_func = _xd3_write_output; if (option_no_compress) { stream_flags |= XD3_NOCOMPRESS; } { if (option_level == 0) { stream_flags |= XD3_NOCOMPRESS; config.smatch_cfg = XD3_SMATCH_FASTEST; } else if (option_level == 1) { config.smatch_cfg = XD3_SMATCH_FASTEST; } else if (option_level == 2) { config.smatch_cfg = XD3_SMATCH_FASTER; } else if (option_level <= 5) { config.smatch_cfg = XD3_SMATCH_FAST; } else if (option_level == 6) { config.smatch_cfg = XD3_SMATCH_DEFAULT; } else { config.smatch_cfg = XD3_SMATCH_SLOW; } } break; #endif case CMD_DECODE: if (option_use_checksum == 0) { stream_flags |= XD3_ADLER32_NOVER; } ifile->flags |= RD_NONEXTERNAL; input_func = xd3_decode_input; output_func = _xd3_write_output; break; default: XPR(NT "internal error\n"); return (SHERR_INVAL); } main_bsize = winsize = _xd3_get_winsize (ifile); if (!(main_bdata = (uint8_t*)_xd3_alloc(NULL, 1, winsize))) { return (SHERR_NOMEM); } config.winsize = winsize; config.getblk = _xd3_getblk_func; config.flags = stream_flags; if ((ret = xd3_config_stream (& stream, & config))) { return (SHERR_INVAL); } #if VCDIFF_TOOLS if ((cmd == CMD_MERGE || cmd == CMD_MERGE_ARG) && (ret = xd3_whole_state_init (& stream))) { XPR(NT XD3_LIB_ERRMSG (& stream, ret)); return (SHERR_INVAL); } #endif if (cmd != CMD_DECODE) { /* When not decoding, set source now. The decoder delays this * step until XD3_GOTHEADER. */ if (sfile && sfile->filename != NULL) { if ((ret = _xd3_set_source (& stream, cmd, sfile, & source))) { return (SHERR_INVAL); } XD3_ASSERT(stream.src != NULL); } } /* This times each window. */ get_millisecs_since (); /* Main input loop. */ do { xoff_t input_offset; xoff_t input_remain; usize_t try_read; input_offset = ifile->nread; input_remain = XOFF_T_MAX - input_offset; try_read = (usize_t) min ((xoff_t) config.winsize, input_remain); ret = _xd3_read_primary_input(ifile, main_bdata, try_read, &nread); if (ret != 0) return (SHERR_IO); /* If we've reached EOF tell the stream to flush. */ if (nread < try_read) { stream.flags |= XD3_FLUSH; } #if XD3_ENCODER /* After the first _xd3_read_primary_input completes, we know * all the information needed to encode the application * header. */ if (cmd == CMD_ENCODE && (ret = _xd3_set_appheader (& stream, ifile, sfile))) { return (SHERR_INVAL); } #endif xd3_avail_input (& stream, main_bdata, nread); /* If we read zero bytes after encoding at least one window... */ if (nread == 0 && stream.current_window > 0) { break; } again: ret = input_func (& stream); switch (ret) { case XD3_INPUT: continue; case XD3_GOTHEADER: { XD3_ASSERT (stream.current_window == 0); /* Need to process the appheader as soon as possible. It may * contain a suggested default filename/decompression routine for * the ofile, and it may contain default/decompression routine for * the sources. */ if (cmd == CMD_DECODE) { /* May need to set the sfile->filename if none was given. */ _xd3_get_appheader (& stream, ifile, ofile, sfile); /* Now open the source file. */ if ((sfile->filename != NULL) && (ret = _xd3_set_source (& stream, cmd, sfile, & source))) { return (SHERR_INVAL); } } } /* FALLTHROUGH */ case XD3_WINSTART: { /* e.g., set or unset XD3_SKIP_WINDOW. */ goto again; } case XD3_OUTPUT: { /* Defer opening the output file until the stream produces its * first output for both encoder and decoder, this way we * delay long enough for the decoder to receive the * application header. (Or longer if there are skipped * windows, but I can't think of any reason not to delay * open.) */ if (ofile != NULL && ! _xd3_file_isopen (ofile) && (ret = _xd3_open_output (& stream, ofile)) != 0) { return (SHERR_INVAL); } if ((ret = output_func (& stream, ofile)) && (ret != PRINTHDR_SPECIAL)) { return (SHERR_INVAL); } if (ret == PRINTHDR_SPECIAL) { xd3_abort_stream (& stream); ret = 0; goto done; } ret = 0; xd3_consume_output (& stream); goto again; } case XD3_WINFINISH: { goto again; } default: /* input_func() error */ if (! option_quiet && ret == XD3_INVALID_INPUT) { XPR(NT "normally this indicates that the source file is incorrect\n"); XPR(NT "please verify the source file with sha1sum or equivalent\n"); } return (SHERR_INVAL); } } while (nread == config.winsize); done: /* Close the inputs. (ifile must be open, sfile may be open) */ _xd3_file_close (ifile); if (sfile != NULL) { _xd3_file_close (sfile); } /* If output file is not open yet because of delayed-open, it means * we never encountered a window in the delta, but it could have had * a VCDIFF header? TODO: solve this elsewhere. For now, it prints * "nothing to output" below, but the check doesn't happen in case * of option_no_output. */ if (! option_no_output && ofile != NULL) { if (!stdout_only && ! _xd3_file_isopen (ofile)) { XPR(NT "nothing to output: %s\n", ifile->filename); return (SHERR_INVAL); } /* Have to close the output before calling * main_external_compression_finish, or else it hangs. */ if (_xd3_file_close (ofile) != 0) { return (SHERR_INVAL); } } #if EXTERNAL_COMPRESSION if ((ret = main_external_compression_finish ())) { XPR(NT "external compression commands failed\n"); return (SHERR_INVAL); } #endif if ((ret = xd3_close_stream (& stream))) { return (SHERR_INVAL); } xd3_free_stream (& stream); return (0); }