Example #1
0
// Runs forward propagation of activations on the input line.
// See NetworkCpp for a detailed discussion of the arguments.
void Parallel::Forward(bool debug, const NetworkIO& input,
                       const TransposedArray* input_transpose,
                       NetworkScratch* scratch, NetworkIO* output) {
  bool parallel_debug = false;
  // If this parallel is a replicator of convolvers, or holds a 1-d LSTM pair,
  // or a 2-d LSTM quad, do debug locally, and don't pass the flag on.
  if (debug && type_ != NT_PARALLEL) {
    parallel_debug = true;
    debug = false;
  }
  int stack_size = stack_.size();
  if (type_ == NT_PAR_2D_LSTM) {
    // Special case, run parallel in parallel.
    GenericVector<NetworkScratch::IO> results;
    results.init_to_size(stack_size, NetworkScratch::IO());
    for (int i = 0; i < stack_size; ++i) {
      results[i].Resize(input, stack_[i]->NumOutputs(), scratch);
    }
#ifdef _OPENMP
#pragma omp parallel for num_threads(stack_size)
#endif
    for (int i = 0; i < stack_size; ++i) {
      stack_[i]->Forward(debug, input, nullptr, scratch, results[i]);
    }
    // Now pack all the results (serially) into the output.
    int out_offset = 0;
    output->Resize(*results[0], NumOutputs());
    for (int i = 0; i < stack_size; ++i) {
      out_offset = output->CopyPacking(*results[i], out_offset);
    }
  } else {
    // Revolving intermediate result.
    NetworkScratch::IO result(input, scratch);
    // Source for divided replicated.
    NetworkScratch::IO source_part;
    TransposedArray* src_transpose = nullptr;
    if (IsTraining() && type_ == NT_REPLICATED) {
      // Make a transposed copy of the input.
      input.Transpose(&transposed_input_);
      src_transpose = &transposed_input_;
    }
    // Run each network, putting the outputs into result.
    int out_offset = 0;
    for (int i = 0; i < stack_size; ++i) {
      stack_[i]->Forward(debug, input, src_transpose, scratch, result);
      // All networks must have the same output width
      if (i == 0) {
        output->Resize(*result, NumOutputs());
      } else {
        ASSERT_HOST(result->Width() == output->Width());
      }
      out_offset = output->CopyPacking(*result, out_offset);
    }
  }
  if (parallel_debug) {
    DisplayForward(*output);
  }
}
Example #2
0
// Runs forward propagation of activations on the input line.
// See NetworkCpp for a detailed discussion of the arguments.
void LSTM::Forward(bool debug, const NetworkIO& input,
                   const TransposedArray* input_transpose,
                   NetworkScratch* scratch, NetworkIO* output) {
  input_map_ = input.stride_map();
  input_width_ = input.Width();
  if (softmax_ != NULL)
    output->ResizeFloat(input, no_);
  else if (type_ == NT_LSTM_SUMMARY)
    output->ResizeXTo1(input, no_);
  else
    output->Resize(input, no_);
  ResizeForward(input);
  // Temporary storage of forward computation for each gate.
  NetworkScratch::FloatVec temp_lines[WT_COUNT];
  for (int i = 0; i < WT_COUNT; ++i) temp_lines[i].Init(ns_, scratch);
  // Single timestep buffers for the current/recurrent output and state.
  NetworkScratch::FloatVec curr_state, curr_output;
  curr_state.Init(ns_, scratch);
  ZeroVector<double>(ns_, curr_state);
  curr_output.Init(ns_, scratch);
  ZeroVector<double>(ns_, curr_output);
  // Rotating buffers of width buf_width allow storage of the state and output
  // for the other dimension, used only when working in true 2D mode. The width
  // is enough to hold an entire strip of the major direction.
  int buf_width = Is2D() ? input_map_.Size(FD_WIDTH) : 1;
  GenericVector<NetworkScratch::FloatVec> states, outputs;
  if (Is2D()) {
    states.init_to_size(buf_width, NetworkScratch::FloatVec());
    outputs.init_to_size(buf_width, NetworkScratch::FloatVec());
    for (int i = 0; i < buf_width; ++i) {
      states[i].Init(ns_, scratch);
      ZeroVector<double>(ns_, states[i]);
      outputs[i].Init(ns_, scratch);
      ZeroVector<double>(ns_, outputs[i]);
    }
  }
  // Used only if a softmax LSTM.
  NetworkScratch::FloatVec softmax_output;
  NetworkScratch::IO int_output;
  if (softmax_ != NULL) {
    softmax_output.Init(no_, scratch);
    ZeroVector<double>(no_, softmax_output);
    int rounded_softmax_inputs = gate_weights_[CI].RoundInputs(ns_);
    if (input.int_mode())
      int_output.Resize2d(true, 1, rounded_softmax_inputs, scratch);
    softmax_->SetupForward(input, NULL);
  }
  NetworkScratch::FloatVec curr_input;
  curr_input.Init(na_, scratch);
  StrideMap::Index src_index(input_map_);
  // Used only by NT_LSTM_SUMMARY.
  StrideMap::Index dest_index(output->stride_map());
  do {
    int t = src_index.t();
    // True if there is a valid old state for the 2nd dimension.
    bool valid_2d = Is2D();
    if (valid_2d) {
      StrideMap::Index dim_index(src_index);
      if (!dim_index.AddOffset(-1, FD_HEIGHT)) valid_2d = false;
    }
    // Index of the 2-D revolving buffers (outputs, states).
    int mod_t = Modulo(t, buf_width);      // Current timestep.
    // Setup the padded input in source.
    source_.CopyTimeStepGeneral(t, 0, ni_, input, t, 0);
    if (softmax_ != NULL) {
      source_.WriteTimeStepPart(t, ni_, nf_, softmax_output);
    }
    source_.WriteTimeStepPart(t, ni_ + nf_, ns_, curr_output);
    if (Is2D())
      source_.WriteTimeStepPart(t, ni_ + nf_ + ns_, ns_, outputs[mod_t]);
    if (!source_.int_mode()) source_.ReadTimeStep(t, curr_input);
    // Matrix multiply the inputs with the source.
    PARALLEL_IF_OPENMP(GFS)
    // It looks inefficient to create the threads on each t iteration, but the
    // alternative of putting the parallel outside the t loop, a single around
    // the t-loop and then tasks in place of the sections is a *lot* slower.
    // Cell inputs.
    if (source_.int_mode())
      gate_weights_[CI].MatrixDotVector(source_.i(t), temp_lines[CI]);
    else
      gate_weights_[CI].MatrixDotVector(curr_input, temp_lines[CI]);
    FuncInplace<GFunc>(ns_, temp_lines[CI]);

    SECTION_IF_OPENMP
    // Input Gates.
    if (source_.int_mode())
      gate_weights_[GI].MatrixDotVector(source_.i(t), temp_lines[GI]);
    else
      gate_weights_[GI].MatrixDotVector(curr_input, temp_lines[GI]);
    FuncInplace<FFunc>(ns_, temp_lines[GI]);

    SECTION_IF_OPENMP
    // 1-D forget gates.
    if (source_.int_mode())
      gate_weights_[GF1].MatrixDotVector(source_.i(t), temp_lines[GF1]);
    else
      gate_weights_[GF1].MatrixDotVector(curr_input, temp_lines[GF1]);
    FuncInplace<FFunc>(ns_, temp_lines[GF1]);

    // 2-D forget gates.
    if (Is2D()) {
      if (source_.int_mode())
        gate_weights_[GFS].MatrixDotVector(source_.i(t), temp_lines[GFS]);
      else
        gate_weights_[GFS].MatrixDotVector(curr_input, temp_lines[GFS]);
      FuncInplace<FFunc>(ns_, temp_lines[GFS]);
    }

    SECTION_IF_OPENMP
    // Output gates.
    if (source_.int_mode())
      gate_weights_[GO].MatrixDotVector(source_.i(t), temp_lines[GO]);
    else
      gate_weights_[GO].MatrixDotVector(curr_input, temp_lines[GO]);
    FuncInplace<FFunc>(ns_, temp_lines[GO]);
    END_PARALLEL_IF_OPENMP

    // Apply forget gate to state.
    MultiplyVectorsInPlace(ns_, temp_lines[GF1], curr_state);
    if (Is2D()) {
      // Max-pool the forget gates (in 2-d) instead of blindly adding.
      inT8* which_fg_col = which_fg_[t];
      memset(which_fg_col, 1, ns_ * sizeof(which_fg_col[0]));
      if (valid_2d) {
        const double* stepped_state = states[mod_t];
        for (int i = 0; i < ns_; ++i) {
          if (temp_lines[GF1][i] < temp_lines[GFS][i]) {
            curr_state[i] = temp_lines[GFS][i] * stepped_state[i];
            which_fg_col[i] = 2;
          }
        }
      }
    }
    MultiplyAccumulate(ns_, temp_lines[CI], temp_lines[GI], curr_state);
    // Clip curr_state to a sane range.
    ClipVector<double>(ns_, -kStateClip, kStateClip, curr_state);
    if (IsTraining()) {
      // Save the gate node values.
      node_values_[CI].WriteTimeStep(t, temp_lines[CI]);
      node_values_[GI].WriteTimeStep(t, temp_lines[GI]);
      node_values_[GF1].WriteTimeStep(t, temp_lines[GF1]);
      node_values_[GO].WriteTimeStep(t, temp_lines[GO]);
      if (Is2D()) node_values_[GFS].WriteTimeStep(t, temp_lines[GFS]);
    }
    FuncMultiply<HFunc>(curr_state, temp_lines[GO], ns_, curr_output);
    if (IsTraining()) state_.WriteTimeStep(t, curr_state);
    if (softmax_ != NULL) {
      if (input.int_mode()) {
        int_output->WriteTimeStepPart(0, 0, ns_, curr_output);
        softmax_->ForwardTimeStep(NULL, int_output->i(0), t, softmax_output);
      } else {
        softmax_->ForwardTimeStep(curr_output, NULL, t, softmax_output);
      }
      output->WriteTimeStep(t, softmax_output);
      if (type_ == NT_LSTM_SOFTMAX_ENCODED) {
        CodeInBinary(no_, nf_, softmax_output);
      }
    } else if (type_ == NT_LSTM_SUMMARY) {
      // Output only at the end of a row.
      if (src_index.IsLast(FD_WIDTH)) {
        output->WriteTimeStep(dest_index.t(), curr_output);
        dest_index.Increment();
      }
    } else {
      output->WriteTimeStep(t, curr_output);
    }
    // Save states for use by the 2nd dimension only if needed.
    if (Is2D()) {
      CopyVector(ns_, curr_state, states[mod_t]);
      CopyVector(ns_, curr_output, outputs[mod_t]);
    }
    // Always zero the states at the end of every row, but only for the major
    // direction. The 2-D state remains intact.
    if (src_index.IsLast(FD_WIDTH)) {
      ZeroVector<double>(ns_, curr_state);
      ZeroVector<double>(ns_, curr_output);
    }
  } while (src_index.Increment());
#if DEBUG_DETAIL > 0
  tprintf("Source:%s\n", name_.string());
  source_.Print(10);
  tprintf("State:%s\n", name_.string());
  state_.Print(10);
  tprintf("Output:%s\n", name_.string());
  output->Print(10);
#endif
  if (debug) DisplayForward(*output);
}
Example #3
0
// Recognizes the image_data, returning the labels,
// scores, and corresponding pairs of start, end x-coords in coords.
bool LSTMRecognizer::RecognizeLine(const ImageData& image_data, bool invert,
                                   bool debug, bool re_invert,
                                   float* scale_factor, NetworkIO* inputs,
                                   NetworkIO* outputs) {
  // Maximum width of image to train on.
  const int kMaxImageWidth = 2560;
  // This ensures consistent recognition results.
  SetRandomSeed();
  int min_width = network_->XScaleFactor();
  Pix* pix = Input::PrepareLSTMInputs(image_data, network_, min_width,
                                      &randomizer_, scale_factor);
  if (pix == NULL) {
    tprintf("Line cannot be recognized!!\n");
    return false;
  }
  if (network_->IsTraining() && pixGetWidth(pix) > kMaxImageWidth) {
    tprintf("Image too large to learn!! Size = %dx%d\n", pixGetWidth(pix),
            pixGetHeight(pix));
    pixDestroy(&pix);
    return false;
  }
  // Reduction factor from image to coords.
  *scale_factor = min_width / *scale_factor;
  inputs->set_int_mode(IsIntMode());
  SetRandomSeed();
  Input::PreparePixInput(network_->InputShape(), pix, &randomizer_, inputs);
  network_->Forward(debug, *inputs, NULL, &scratch_space_, outputs);
  // Check for auto inversion.
  float pos_min, pos_mean, pos_sd;
  OutputStats(*outputs, &pos_min, &pos_mean, &pos_sd);
  if (invert && pos_min < 0.5) {
    // Run again inverted and see if it is any better.
    NetworkIO inv_inputs, inv_outputs;
    inv_inputs.set_int_mode(IsIntMode());
    SetRandomSeed();
    pixInvert(pix, pix);
    Input::PreparePixInput(network_->InputShape(), pix, &randomizer_,
                           &inv_inputs);
    network_->Forward(debug, inv_inputs, NULL, &scratch_space_, &inv_outputs);
    float inv_min, inv_mean, inv_sd;
    OutputStats(inv_outputs, &inv_min, &inv_mean, &inv_sd);
    if (inv_min > pos_min && inv_mean > pos_mean && inv_sd < pos_sd) {
      // Inverted did better. Use inverted data.
      if (debug) {
        tprintf("Inverting image: old min=%g, mean=%g, sd=%g, inv %g,%g,%g\n",
                pos_min, pos_mean, pos_sd, inv_min, inv_mean, inv_sd);
      }
      *outputs = inv_outputs;
      *inputs = inv_inputs;
    } else if (re_invert) {
      // Inverting was not an improvement, so undo and run again, so the
      // outputs match the best forward result.
      SetRandomSeed();
      network_->Forward(debug, *inputs, NULL, &scratch_space_, outputs);
    }
  }
  pixDestroy(&pix);
  if (debug) {
    GenericVector<int> labels, coords;
    LabelsFromOutputs(*outputs, &labels, &coords);
    DisplayForward(*inputs, labels, coords, "LSTMForward", &debug_win_);
    DebugActivationPath(*outputs, labels, coords);
  }
  return true;
}