// backward substitution on the linear system U*X = B, filling in an existing Matrix X
int BackSub(const Matrix& U, Matrix& X, const Matrix& B) {

  // check that matrix sizes match
  if (U.Rows() != B.Rows() || U.Rows() != U.Cols() || 
      B.Cols() != X.Cols() || X.Rows() != U.Rows()) {
    cerr << "BackSub error, incompatible matrix/vector dimensions\n";
    cerr << "  Matrix is " << U.Rows() << " x " << U.Cols() 
	 << ",  rhs is " << B.Rows() << " x " << B.Cols()
	 << ",  solution is " << X.Rows() << " x " << X.Cols() << endl;
    return 1;
  }
  
  // copy B into X
  X.Copy(B);

  // analyze matrix for magnitude
  double Umax = InfNorm(U);

  // perform column-oriented Backward Substitution algorithm
  for (long int j=U.Rows()-1; j>=0; j--) {

    // check for nonzero matrix diagonal
    if (fabs(U.data[j][j]) < STOL*Umax) {
      cerr << "BackSub error: numerically singular matrix!\n";
      return 1;
    }

    // solve for this row of solution
    for (long int k=0; k<X.Cols(); k++) 
      X.data[k][j] /= U.data[j][j];

    // update all remaining rhs
    for (long int k=0; k<X.Cols(); k++)
      for (long int i=0; i<j; i++)
	X.data[k][i] -= U.data[j][i]*X.data[k][j];

  }

  // return success
  return 0;
}
// forward substitution on the linear system L*X = B, filling in the input Matrix X
//    L and B remain unchanged in this operation; X holds the result
//    B and X may have multiple columns
int FwdSub(const Matrix& L, Matrix& X, const Matrix& B) {

  // check that matrix sizes match
  if (L.Rows() != B.Rows() || L.Rows() != L.Cols() || 
      B.Cols() != X.Cols() || X.Rows() != L.Rows()) {
    cerr << "FwdSub error, illegal matrix/vector dimensions\n";
    cerr << "  Matrix is " << L.Rows() << " x " << L.Cols() 
	 << ",  rhs is " << B.Rows() << " x " << B.Cols()
	 << ",  solution is " << X.Rows() << " x " << X.Cols() << endl;
    return 1;
  }
  
  // copy B into X
  X.Copy(B);

  // analyze matrix magnitude
  double Lmax = InfNorm(L);

  // perform column-oriented Forwards Substitution algorithm
  for (long int j=0; j<L.Rows(); j++) {

    // check for nonzero matrix diagonal
    if (fabs(L.data[j][j]) < STOL*Lmax) {
      cerr << "FwdSub error: singular matrix!\n";
      return 1;
    }

    // solve for this row of solution
    for (long int k=0; k<X.Cols(); k++)
      X.data[k][j] /= L.data[j][j];

    // update all remaining rhs
    for (long int k=0; k<X.Cols(); k++)
      for (long int i=j+1; i<L.Rows(); i++)
	X.data[k][i] -= L.data[j][i]*X.data[k][j];

  }

  // return success
  return 0;
}
예제 #3
0
int main( int argc, char** argv ) {

    if ( argc == 6 || argc == 7 ) {
        const char* config = argv[1];
        const char* model = argv[2];
        const char* data = argv[3];
        int order = atoi( argv[4] );
        int batch = atoi( argv[5] );

        float lnZ = 0.0f;
        if ( argc > 6 ) {
            lnZ = atof( argv[6] );
            ASSERT( lnZ > 0.0f );
        }

        BatchConstructor bc( data, order, batch, 0, false );
        Network network( config );
        network.LoadParam( model );

        Matrix placeholder;
        Matrix buffer;
        double loss = 0.0;
        int nExample = 0;

        while( bc.HasNext() ) {
            bc.PrepareNext();

            const SubMatrix input = bc.GetInput();
            const SubMatrix target = bc.GetTarget();

            ExtraInfo info( input.Rows(),
                            bc.GetSentenceLength(),
                            false,
                            target,            // actually not used
                            placeholder );    // not used either
            network.Prepare( info );

            const MatrixBase& output = network.Compute( input );

            if ( lnZ == 0.0f ) {
                loss += output.Xent( target );
                nExample += output.Rows();
            }
            else {
                buffer.Reshape( output.Rows(), output.Columns() );
                buffer.Copy( output );
                buffer.Shift( -lnZ );
                buffer.Exp( buffer );
                loss += buffer.Xent( target );
                nExample += output.Rows();
            }
        }

        double avgLoss = loss / (double)nExample;
        double ppl = exp(avgLoss);
        cout << right
             << CurrentTime() << ") average cross-entropy loss of " << nExample
             << " examples: " << KGRN << avgLoss << KNRM << " or "
             << KGRN << ppl << KNRM << " in PPL " << endl;

        return EXIT_SUCCESS;
    }

    else {
        cerr << KRED;
        cerr << "Usag: " << argv[0] << " <config> <model> <numeric-data> <order> <batch-size> [lnZ]" << endl;
        cerr << "    <config>       : network configuration" << endl;
        cerr << "    <model>        : trained model corresponding to config" << endl;
        cerr << "    <numeric-data> : data set to be evaluated, in numeric form" << endl;
        cerr << "    <order>        : length of context window" << endl;
        cerr << "    <batch-size>   : mini-batch size" << endl;
        cerr << "    [lnZ]          : optional, if set, softmax is not applied" << endl;
        cerr << KNRM;
        exit( EXIT_FAILURE );
    }
}