void TTransCorpus::SaveTxt(const TStr& OutFBase, const TStr& OutOrgFNm, const TStr& OutTransFNm, const TStr& OutRefTransFNm, TStrV& OrgFNmV, TStrV& TransFNmV, TStrV& RefTransFNmV, const int& LinesPerFile) { // prepare filenames OrgFNmV.Clr(); TransFNmV.Clr(); RefTransFNmV.Clr(); if (!OutOrgFNm.Empty()) { OrgFNmV.Add(GetOutFNm(OutFBase, 0, LinesPerFile, OutOrgFNm)); } if (!OutTransFNm.Empty()) { TransFNmV.Add(GetOutFNm(OutFBase, 0, LinesPerFile, OutTransFNm)); } if (!OutRefTransFNm.Empty()) { RefTransFNmV.Add(GetOutFNm(OutFBase, 0, LinesPerFile, OutRefTransFNm)); } // open files PSOut OrgSOut = !OutOrgFNm.Empty() ? TFOut::New(OrgFNmV.Last()) : PSOut(); PSOut TransSOut = !OutTransFNm.Empty() ? TFOut::New(TransFNmV.Last()) : PSOut(); PSOut RefTransSOut = !OutRefTransFNm.Empty() ? TFOut::New(RefTransFNmV.Last()) : PSOut(); // check which are given const bool IsOrgP = !OrgSOut.Empty() && IsOrg(); const bool IsTransP = !TransSOut.Empty() && IsTrans(); const bool IsRefTransP = !RefTransSOut.Empty() && IsRefTrans(); // print warnings if (!IsOrgP) { printf("No original sentences!\n"); } if (!IsTransP) { printf("No machine translation sentences!\n"); } if (!IsRefTransP) { printf("No reference translation sentences!\n"); } // go over all the sentences and store them in the file TIntV SentIdV; GetSentIdV(SentIdV); for (int SentIdN = 0; SentIdN < SentIdV.Len(); SentIdN++) { const int SentId = SentIdV[SentIdN]; if (IsOrgP) { OrgSOut->PutStrLn(GetOrgStr(SentId)); } if (IsTransP) { TransSOut->PutStrLn(GetTransStr(SentId)); } if (IsRefTransP) { RefTransSOut->PutStrLn(GetRefTransStrV(SentId)[0]); } // should we break and go to next file? if ((LinesPerFile!=-1) && (SentIdN>0) && (SentIdN % LinesPerFile == 0)) { // prepare filenames if (!OutOrgFNm.Empty()) { OrgFNmV.Add(GetOutFNm(OutFBase, SentIdN, LinesPerFile, OutOrgFNm)); } if (!OutTransFNm.Empty()) { TransFNmV.Add(GetOutFNm(OutFBase, SentIdN, LinesPerFile, OutTransFNm)); } if (!OutRefTransFNm.Empty()) { RefTransFNmV.Add(GetOutFNm(OutFBase, SentIdN, LinesPerFile, OutRefTransFNm)); } // open next files files OrgSOut = !OutOrgFNm.Empty() ? TFOut::New(OrgFNmV.Last()) : PSOut(); TransSOut = !OutTransFNm.Empty() ? TFOut::New(TransFNmV.Last()) : PSOut(); RefTransSOut = !OutRefTransFNm.Empty() ? TFOut::New(RefTransFNmV.Last()) : PSOut(); } } }
void TTransCorpus::SaveTransScript(const TStr& OutFBase, const TStr& OrgLang, const TStr& TransLang, const TStrV& OrgFNmV, const TStrV& RefTransFNmV) { const int FNms = OrgFNmV.Len(); IAssert(OrgFNmV.Len() == RefTransFNmV.Len()); // make tokenize PSOut TokSOut = TFOut::New(OutFBase + "_tokenize.sh"); for (int FNmN = 0; FNmN < FNms; FNmN++) { TokSOut->PutStrLn("tokenize.pl -lang=" + OrgLang + " " + OrgFNmV[FNmN].GetFBase() + " " + TStr::PutFExt(OrgFNmV[FNmN], ".tok").GetFBase()); TokSOut->PutStrLn("tokenize.pl -lang=" + TransLang + " " + RefTransFNmV[FNmN].GetFBase() + " " + TStr::PutFExt(RefTransFNmV[FNmN], ".tok").GetFBase()); } TokSOut->Flush(); // make align PSOut AlignSOut = TFOut::New(OutFBase + "_align.sh"); for (int FNmN = 0; FNmN < FNms; FNmN++) { AlignSOut->PutStrLn("ssal -f " + TStr::PutFExt(OrgFNmV[FNmN], ".tok").GetFBase() + " " + TStr::PutFExt(RefTransFNmV[FNmN], ".tok").GetFBase()); AlignSOut->PutStrLn("mv " + TStr::PutFExt(OrgFNmV[FNmN], ".tok.al").GetFBase() + " " + TStr::PutFExt(OrgFNmV[FNmN], ".al").GetFBase()); AlignSOut->PutStrLn("mv " + TStr::PutFExt(RefTransFNmV[FNmN], ".tok.al").GetFBase() + " " + TStr::PutFExt(RefTransFNmV[FNmN], ".al").GetFBase()); } AlignSOut->Flush(); // make lowercase PSOut LcSOut = TFOut::New(OutFBase + "_lowercase.sh"); for (int FNmN = 0; FNmN < FNms; FNmN++) { LcSOut->PutStrLn("lc-latin.pl " + TStr::PutFExt(OrgFNmV[FNmN], ".al").GetFBase() + " " + TStr::PutFExt(OrgFNmV[FNmN], ".lc").GetFBase()); LcSOut->PutStrLn("lc-latin.pl " + TStr::PutFExt(RefTransFNmV[FNmN], ".al").GetFBase() + " " + TStr::PutFExt(RefTransFNmV[FNmN], ".lc").GetFBase()); } LcSOut->Flush(); // rest -- script }
int main(int argc, char* argv[]) { TEnv Env(argc, argv); TStr PrefixPath = Env.GetArgs() > 1 ? Env.GetArg(1) : TStr(""); double ts1 = Tick(); TTableContext Context; TVec<TPair<PTable,TStr> > NodeTblV = TVec<TPair<PTable,TStr> >(); TVec<TPair<PTable, int> > EdgeTblV = TVec<TPair<PTable, int> >(); Schema NodeSchema = Schema(); Schema EdgeSchema = Schema(); LoadFlickrTables(PrefixPath, Context, NodeTblV, NodeSchema, EdgeTblV, EdgeSchema); double ts2 = Tick(); THash<TStr,TStrH> NStrH; TIntStrH NIdH; CreateIdHashes(NodeTblV, NStrH, NIdH); double ts3 = Tick(); PSVNet Graph = LoadGraphMNet<PSVNet>(NodeTblV, EdgeTblV, NStrH, NIdH); double ts4 = Tick(); int nExps = 10; int nTriads = 0; for (int i = 0; i < nExps; i++) { nTriads = TSnap::GetTriads(Graph); } double ts5 = Tick(); StdOut->PutStrFmtLn("Triads %d", nTriads); PSOut TimeOut = TFOut::New(PrefixPath + TStr("time.txt"), true); TimeOut->PutStrFmtLn("===== Triad Counting - PSVNet ====="); TimeOut->PutStrLn(Env.GetCmLn()); TimeOut->PutStrFmtLn("Input Time = %f", GetCPUTimeUsage(ts1, ts2)); TimeOut->PutStrFmtLn("Preprocessing Time = %f", GetCPUTimeUsage(ts2, ts3)); TimeOut->PutStrFmtLn("Conversion Time = %f", GetCPUTimeUsage(ts3, ts4)); TimeOut->PutStrFmtLn("Computing Time = %f", GetCPUTimeUsage(ts4, ts5)/nExps); return 0; }
void TTransCorpus::SaveSgm(const TStr& OutOrgFNm, const TStr& OutTransFNm, const TStr& OutRefTransFNm) { // open files PSOut OrgSOut = !OutOrgFNm.Empty() ? TFOut::New(OutOrgFNm) : NULL; PSOut TransSOut = !OutTransFNm.Empty() ? TFOut::New(OutTransFNm) : NULL; PSOut RefTransSOut = !OutRefTransFNm.Empty() ? TFOut::New(OutRefTransFNm) : NULL; // check which are given const bool IsOrgP = !OrgSOut.Empty() && IsOrg(); const bool IsTransP = !TransSOut.Empty() && IsTrans(); const bool IsRefTransP = !RefTransSOut.Empty() && IsRefTrans(); // prepare headers if (IsOrgP) { OrgSOut->PutStrLn("<srcset setid=\"tmp\" srclang=\"source\" trglang=\"target\">"); OrgSOut->PutStrLn("<DOC docid=\"tmpdoc\">"); } if (IsTransP) { TransSOut->PutStrLn("<tstset setid=\"tmp\" srclang=\"source\" trglang=\"target\">"); TransSOut->PutStrLn("<DOC docid=\"tmpdoc\" sysid=\"trans\">"); } if (IsRefTransP) { RefTransSOut->PutStrLn("<refset setid=\"tmp\" srclang=\"source\" trglang=\"target\">"); RefTransSOut->PutStrLn("<DOC docid=\"tmpdoc\" sysid=\"ref\">"); } // output sentences TIntV SentIdV; GetSentIdV(SentIdV); for (int SentIdN = 0; SentIdN < SentIdV.Len(); SentIdN++) { const int SentId = SentIdV[SentIdN]; if (IsOrgP) { OrgSOut->PutStrLn(TStr::Fmt( "<p><seg id=\"%d\">%s</seg></p>", SentId, GetOrgStr(SentId).CStr())); } if (IsTransP) { TransSOut->PutStrLn(TStr::Fmt( "<p><seg id=\"%d\">%s</seg></p>", SentId, GetTransStr(SentId).CStr())); } if (IsRefTransP) { RefTransSOut->PutStrLn(TStr::Fmt( "<p><seg id=\"%d\">%s</seg></p>", SentId, GetRefTransStrV(SentId)[0].CStr())); } } // prepare footers if (IsOrgP) { OrgSOut->PutStrLn("</DOC>"); OrgSOut->PutStrLn("</srcset>"); } if (IsTransP) { TransSOut->PutStrLn("</DOC>"); TransSOut->PutStrLn("</tstset>"); } if (IsRefTransP) { RefTransSOut->PutStrLn("</DOC>"); RefTransSOut->PutStrLn("</refset>"); } }