// backward substitution on the linear system U*X = B, filling in an existing Matrix X int BackSub(const Matrix& U, Matrix& X, const Matrix& B) { // check that matrix sizes match if (U.Rows() != B.Rows() || U.Rows() != U.Cols() || B.Cols() != X.Cols() || X.Rows() != U.Rows()) { cerr << "BackSub error, incompatible matrix/vector dimensions\n"; cerr << " Matrix is " << U.Rows() << " x " << U.Cols() << ", rhs is " << B.Rows() << " x " << B.Cols() << ", solution is " << X.Rows() << " x " << X.Cols() << endl; return 1; } // copy B into X X.Copy(B); // analyze matrix for magnitude double Umax = InfNorm(U); // perform column-oriented Backward Substitution algorithm for (long int j=U.Rows()-1; j>=0; j--) { // check for nonzero matrix diagonal if (fabs(U.data[j][j]) < STOL*Umax) { cerr << "BackSub error: numerically singular matrix!\n"; return 1; } // solve for this row of solution for (long int k=0; k<X.Cols(); k++) X.data[k][j] /= U.data[j][j]; // update all remaining rhs for (long int k=0; k<X.Cols(); k++) for (long int i=0; i<j; i++) X.data[k][i] -= U.data[j][i]*X.data[k][j]; } // return success return 0; }
// forward substitution on the linear system L*X = B, filling in the input Matrix X // L and B remain unchanged in this operation; X holds the result // B and X may have multiple columns int FwdSub(const Matrix& L, Matrix& X, const Matrix& B) { // check that matrix sizes match if (L.Rows() != B.Rows() || L.Rows() != L.Cols() || B.Cols() != X.Cols() || X.Rows() != L.Rows()) { cerr << "FwdSub error, illegal matrix/vector dimensions\n"; cerr << " Matrix is " << L.Rows() << " x " << L.Cols() << ", rhs is " << B.Rows() << " x " << B.Cols() << ", solution is " << X.Rows() << " x " << X.Cols() << endl; return 1; } // copy B into X X.Copy(B); // analyze matrix magnitude double Lmax = InfNorm(L); // perform column-oriented Forwards Substitution algorithm for (long int j=0; j<L.Rows(); j++) { // check for nonzero matrix diagonal if (fabs(L.data[j][j]) < STOL*Lmax) { cerr << "FwdSub error: singular matrix!\n"; return 1; } // solve for this row of solution for (long int k=0; k<X.Cols(); k++) X.data[k][j] /= L.data[j][j]; // update all remaining rhs for (long int k=0; k<X.Cols(); k++) for (long int i=j+1; i<L.Rows(); i++) X.data[k][i] -= L.data[j][i]*X.data[k][j]; } // return success return 0; }
int main( int argc, char** argv ) { if ( argc == 6 || argc == 7 ) { const char* config = argv[1]; const char* model = argv[2]; const char* data = argv[3]; int order = atoi( argv[4] ); int batch = atoi( argv[5] ); float lnZ = 0.0f; if ( argc > 6 ) { lnZ = atof( argv[6] ); ASSERT( lnZ > 0.0f ); } BatchConstructor bc( data, order, batch, 0, false ); Network network( config ); network.LoadParam( model ); Matrix placeholder; Matrix buffer; double loss = 0.0; int nExample = 0; while( bc.HasNext() ) { bc.PrepareNext(); const SubMatrix input = bc.GetInput(); const SubMatrix target = bc.GetTarget(); ExtraInfo info( input.Rows(), bc.GetSentenceLength(), false, target, // actually not used placeholder ); // not used either network.Prepare( info ); const MatrixBase& output = network.Compute( input ); if ( lnZ == 0.0f ) { loss += output.Xent( target ); nExample += output.Rows(); } else { buffer.Reshape( output.Rows(), output.Columns() ); buffer.Copy( output ); buffer.Shift( -lnZ ); buffer.Exp( buffer ); loss += buffer.Xent( target ); nExample += output.Rows(); } } double avgLoss = loss / (double)nExample; double ppl = exp(avgLoss); cout << right << CurrentTime() << ") average cross-entropy loss of " << nExample << " examples: " << KGRN << avgLoss << KNRM << " or " << KGRN << ppl << KNRM << " in PPL " << endl; return EXIT_SUCCESS; } else { cerr << KRED; cerr << "Usag: " << argv[0] << " <config> <model> <numeric-data> <order> <batch-size> [lnZ]" << endl; cerr << " <config> : network configuration" << endl; cerr << " <model> : trained model corresponding to config" << endl; cerr << " <numeric-data> : data set to be evaluated, in numeric form" << endl; cerr << " <order> : length of context window" << endl; cerr << " <batch-size> : mini-batch size" << endl; cerr << " [lnZ] : optional, if set, softmax is not applied" << endl; cerr << KNRM; exit( EXIT_FAILURE ); } }