void TTransCorpus::SaveSgm(const TStr& OutOrgFNm, const TStr& OutTransFNm, const TStr& OutRefTransFNm) { // open files PSOut OrgSOut = !OutOrgFNm.Empty() ? TFOut::New(OutOrgFNm) : NULL; PSOut TransSOut = !OutTransFNm.Empty() ? TFOut::New(OutTransFNm) : NULL; PSOut RefTransSOut = !OutRefTransFNm.Empty() ? TFOut::New(OutRefTransFNm) : NULL; // check which are given const bool IsOrgP = !OrgSOut.Empty() && IsOrg(); const bool IsTransP = !TransSOut.Empty() && IsTrans(); const bool IsRefTransP = !RefTransSOut.Empty() && IsRefTrans(); // prepare headers if (IsOrgP) { OrgSOut->PutStrLn("<srcset setid=\"tmp\" srclang=\"source\" trglang=\"target\">"); OrgSOut->PutStrLn("<DOC docid=\"tmpdoc\">"); } if (IsTransP) { TransSOut->PutStrLn("<tstset setid=\"tmp\" srclang=\"source\" trglang=\"target\">"); TransSOut->PutStrLn("<DOC docid=\"tmpdoc\" sysid=\"trans\">"); } if (IsRefTransP) { RefTransSOut->PutStrLn("<refset setid=\"tmp\" srclang=\"source\" trglang=\"target\">"); RefTransSOut->PutStrLn("<DOC docid=\"tmpdoc\" sysid=\"ref\">"); } // output sentences TIntV SentIdV; GetSentIdV(SentIdV); for (int SentIdN = 0; SentIdN < SentIdV.Len(); SentIdN++) { const int SentId = SentIdV[SentIdN]; if (IsOrgP) { OrgSOut->PutStrLn(TStr::Fmt( "<p><seg id=\"%d\">%s</seg></p>", SentId, GetOrgStr(SentId).CStr())); } if (IsTransP) { TransSOut->PutStrLn(TStr::Fmt( "<p><seg id=\"%d\">%s</seg></p>", SentId, GetTransStr(SentId).CStr())); } if (IsRefTransP) { RefTransSOut->PutStrLn(TStr::Fmt( "<p><seg id=\"%d\">%s</seg></p>", SentId, GetRefTransStrV(SentId)[0].CStr())); } } // prepare footers if (IsOrgP) { OrgSOut->PutStrLn("</DOC>"); OrgSOut->PutStrLn("</srcset>"); } if (IsTransP) { TransSOut->PutStrLn("</DOC>"); TransSOut->PutStrLn("</tstset>"); } if (IsRefTransP) { RefTransSOut->PutStrLn("</DOC>"); RefTransSOut->PutStrLn("</refset>"); } }
void TTransCorpus::SaveTxt(const TStr& OutFBase, const TStr& OutOrgFNm, const TStr& OutTransFNm, const TStr& OutRefTransFNm, TStrV& OrgFNmV, TStrV& TransFNmV, TStrV& RefTransFNmV, const int& LinesPerFile) { // prepare filenames OrgFNmV.Clr(); TransFNmV.Clr(); RefTransFNmV.Clr(); if (!OutOrgFNm.Empty()) { OrgFNmV.Add(GetOutFNm(OutFBase, 0, LinesPerFile, OutOrgFNm)); } if (!OutTransFNm.Empty()) { TransFNmV.Add(GetOutFNm(OutFBase, 0, LinesPerFile, OutTransFNm)); } if (!OutRefTransFNm.Empty()) { RefTransFNmV.Add(GetOutFNm(OutFBase, 0, LinesPerFile, OutRefTransFNm)); } // open files PSOut OrgSOut = !OutOrgFNm.Empty() ? TFOut::New(OrgFNmV.Last()) : PSOut(); PSOut TransSOut = !OutTransFNm.Empty() ? TFOut::New(TransFNmV.Last()) : PSOut(); PSOut RefTransSOut = !OutRefTransFNm.Empty() ? TFOut::New(RefTransFNmV.Last()) : PSOut(); // check which are given const bool IsOrgP = !OrgSOut.Empty() && IsOrg(); const bool IsTransP = !TransSOut.Empty() && IsTrans(); const bool IsRefTransP = !RefTransSOut.Empty() && IsRefTrans(); // print warnings if (!IsOrgP) { printf("No original sentences!\n"); } if (!IsTransP) { printf("No machine translation sentences!\n"); } if (!IsRefTransP) { printf("No reference translation sentences!\n"); } // go over all the sentences and store them in the file TIntV SentIdV; GetSentIdV(SentIdV); for (int SentIdN = 0; SentIdN < SentIdV.Len(); SentIdN++) { const int SentId = SentIdV[SentIdN]; if (IsOrgP) { OrgSOut->PutStrLn(GetOrgStr(SentId)); } if (IsTransP) { TransSOut->PutStrLn(GetTransStr(SentId)); } if (IsRefTransP) { RefTransSOut->PutStrLn(GetRefTransStrV(SentId)[0]); } // should we break and go to next file? if ((LinesPerFile!=-1) && (SentIdN>0) && (SentIdN % LinesPerFile == 0)) { // prepare filenames if (!OutOrgFNm.Empty()) { OrgFNmV.Add(GetOutFNm(OutFBase, SentIdN, LinesPerFile, OutOrgFNm)); } if (!OutTransFNm.Empty()) { TransFNmV.Add(GetOutFNm(OutFBase, SentIdN, LinesPerFile, OutTransFNm)); } if (!OutRefTransFNm.Empty()) { RefTransFNmV.Add(GetOutFNm(OutFBase, SentIdN, LinesPerFile, OutRefTransFNm)); } // open next files files OrgSOut = !OutOrgFNm.Empty() ? TFOut::New(OrgFNmV.Last()) : PSOut(); TransSOut = !OutTransFNm.Empty() ? TFOut::New(TransFNmV.Last()) : PSOut(); RefTransSOut = !OutRefTransFNm.Empty() ? TFOut::New(RefTransFNmV.Last()) : PSOut(); } } }