void Feature:: createLam(int which, ECString tmp, ECString path) { ECString npath("/ltmp/discrim/FOLDS/"); int b,f; int tot = Feature::total[which]; for(int i = 0 ; i < 20 ; i++) { ECString ftstr(npath); ftstr += intToString(i) + "/"; ftstr += tmp; ftstr += ".lambdas"; ifstream fts(ftstr.c_str()); assert(fts); for(b = 1; b < 15 ; b++) { int bb; if(!fts) { cerr << "Trouble reading lambs for " << which << " in " << ftstr << endl; assert(fts); } fts >> bb ; //cerr << bb << endl; if(bb != b) { cerr << tmp << " " << b << " " << bb << endl; assert(bb == b); } for(f = 2; f <= tot ; f++) { float lam; assert(fts); fts >> lam; //cerr << which << " " << f << " " << b << " " << lam << endl; if(i == 0) Feature::setLambda(which,f,b,lam); else lamVal(which,f,b) += lam; } } } for(b = 1; b < 15 ; b++) for(f = 2; f <= tot ; f++) lamVal(which,f,b) /= 20.0; ECString ftstr(path); ftstr += tmp; ftstr += ".lambdas"; ofstream res(ftstr.c_str()); assert(res); res.precision(3); printLambdas(res); }
void Feature:: readLam(int which, ECString tmp, ECString path) { ECString ftstr(path); ftstr += tmp; ftstr += ".lambdas"; ifstream fts(ftstr.c_str()); assert(fts); int b,f; // wul: lambdas文件有14行,每行的格式如下(第一列为序号): // wul: 2 0 0.329 0.336 0.00178 0.353 0.00129 0.158 0.417 for(b = 1; b < 15 ; b++) { int bb; assert(fts); fts >> bb ; //cerr << bb << endl; assert(bb == b); // wul: 读取后面的各列(不同的Which,如ru, u...)有不同的列。根据它们的Feature数目 // 而定 for(f = 2; f <= Feature::total[which] ; f++) { float lam; assert(fts); fts >> lam; //cerr << which << " " << f << " " << b << " " << lam << endl; // wul: Lambda Table有点数据库。在此记录下。b表示一个实例(行号) Feature::setLambda(which,f,b,lam); } } }
void MeChart:: init(ECString path) { Feat::Usage = PARSE; addEdgeSubFeatureFns(); addSubFeatureFns(); ECString tmpA[MAXNUMCALCS] = {"r","h","u","m","l","lm","ru","rm","tt", "s","t","ww","dummy","dummy","dummy"}; for(int which = 0 ; which < Feature::numCalcs ; which++) { ECString tmp = tmpA[which]; Feature::init(path, tmp); if(tmp == "s" || tmp == "t") continue; ECString ftstr(path); ftstr += tmp; ftstr += ".g"; ifstream fts(ftstr.c_str()); if(!fts) cerr << "could not find " << ftstr << endl; assert(fts); FeatureTree* ft = new FeatureTree(fts); //puts it in root; if(tmp == "ww") continue; Feature::readLam(which, tmp, path); } int cntxSzReq = Feature::total[TCALC]; int scSz = Feature::total[SCALC]; if(scSz > cntxSzReq) cntxSzReq = scSz; //assert(CntxArray::sz == cntxSzReq); //assert(CntxArray::sz == (Feature::total[UCALC] -1)); }
void Feature:: readLam(int which, ECString tmp, ECString path) { ECString ftstr(path); ftstr += tmp; ftstr += ".lambdas"; ifstream fts(ftstr.c_str()); assert(fts); int b,f; int tot = Feature::total[which]; /* The standard training programs never read in lambdas. Only getProbs does, and it uses the new bucketing, which uses the next for loop */ for(f = 2 ; f <= tot ; f++) { float logBase; fts >> logBase; /*JT: this used to be logFacs[f][which] = 1.0/log(logBase), but that occurs in bucket */ logFacs[which][f] = logBase; } for(b = 1; b < 15 ; b++) { int bb; if(!fts) { cerr << "Trouble reading lambs for " << which << " in " << ftstr << endl; assert(fts); } fts >> bb ; //cerr << bb << endl; if(bb != b) { cerr << tmp << " " << b << " " << bb << endl; assert(bb == b); } for(f = 2; f <= tot ; f++) { float lam; assert(fts); fts >> lam; //cerr << which << " " << f << " " << b << " " << lam << endl; Feature::setLambda(which,f,b,lam); } } }
void Feature:: readLam(int which, ECString tmp, ECString path) { ECString ftstr(path); ftstr += tmp; ftstr += ".lambdas"; ifstream fts(ftstr.c_str()); assert(fts); int b,f; int tot = Feature::total[which]; if(Feature::isLM or Feature::useExtraConditioning) { /* This for loop is removed for old bucketing; */ for(f = 2 ; f <= tot ; f++) { float logBase; fts >> logBase; logFacs[which][f] = 1.0/log(logBase); } } for(b = 1; b < 15 ; b++) { int bb; if(!fts) { cerr << "Trouble reading lambs for " << which << " in " << ftstr << endl; assert(fts); } fts >> bb ; //cerr << bb << endl; if(bb != b) { cerr << tmp << " " << b << " " << bb << endl; assert(bb == b); } for(f = 2; f <= tot ; f++) { float lam; assert(fts); fts >> lam; //cerr << which << " " << f << " " << b << " " << lam << endl; Feature::setLambda(which,f,b,lam); } } }
int main(int argc, char *argv[]) { struct rlimit core_limits; core_limits.rlim_cur = 0; core_limits.rlim_max = 0; setrlimit( RLIMIT_CORE, &core_limits ); ECArgs args( argc, argv ); assert(args.nargs() == 2); conditionedType = args.arg(0); cerr << "start trainRs: " << conditionedType << endl; ECString path( args.arg( 1 ) ); if(args.isset('L')) Feature::setLM(); Term::init(path); readHeadInfo(path); Pst pst(path); if(Feature::isLM) ClassRule::readCRules(path); addSubFeatureFns(); Feature::init(path, conditionedType); whichInt = Feature::whichInt; int ceFunInt = Feature::conditionedFeatureInt[Feature::whichInt]; Feature::conditionedEvent = SubFeature::Funs[ceFunInt]; Feat::Usage = PARSE; ECString ftstr(path); ftstr += conditionedType; ftstr += ".g"; ifstream fts(ftstr.c_str()); if(!fts) { cerr << "Could not find " << ftstr << endl; assert(fts); } tRoot = new FeatureTree(fts); //puts it in root; cout.precision(3); cerr.precision(3); lamInit(); InputTree* trainingData[1001]; int usedCount = 0; sentenceCount = 0; for( ; ; sentenceCount++) { if(sentenceCount%10000 == 1) { // cerr << conditionedType << ".tr " //<< sentenceCount << endl; } if(usedCount >= 1000) break; InputTree* correct = new InputTree; cin >> (*correct); if(correct->length() == 0) break; if(!cin) break; EcSPairs wtList; correct->make(wtList); InputTree* par; par = correct; trainingData[usedCount++] = par; } if(Feature::isLM) pickLogBases(trainingData,sentenceCount); procGSwitch = true; for(pass = 0 ; pass < 10 ; pass++) { if(pass%2 == 1) cout << "Pass " << pass << endl; goThroughSents(trainingData, sentenceCount); updateLambdas(); //printLambdas(cout); zeroData(); } ECString resS(path); resS += conditionedType; resS += ".lambdas"; ofstream res(resS.c_str()); res.precision(3); printLambdas(res); printLambdas(cout); cout << "Total params = " << FeatureTree::totParams << endl; cout << "Done: " << (int)sbrk(0) << endl; }