void TTransCorpus::SaveTxt(const TStr& OutFBase, const TStr& OutOrgFNm, 
        const TStr& OutTransFNm, const TStr& OutRefTransFNm, TStrV& OrgFNmV, 
        TStrV& TransFNmV, TStrV& RefTransFNmV, const int& LinesPerFile) {
    
    // prepare filenames
    OrgFNmV.Clr(); TransFNmV.Clr(); RefTransFNmV.Clr();
    if (!OutOrgFNm.Empty()) { OrgFNmV.Add(GetOutFNm(OutFBase, 0, LinesPerFile, OutOrgFNm)); }
    if (!OutTransFNm.Empty()) { TransFNmV.Add(GetOutFNm(OutFBase, 0, LinesPerFile, OutTransFNm)); }
    if (!OutRefTransFNm.Empty()) { RefTransFNmV.Add(GetOutFNm(OutFBase, 0, LinesPerFile, OutRefTransFNm)); }
    // open files
    PSOut OrgSOut = !OutOrgFNm.Empty() ? TFOut::New(OrgFNmV.Last()) : PSOut();
    PSOut TransSOut = !OutTransFNm.Empty() ? TFOut::New(TransFNmV.Last()) : PSOut();
    PSOut RefTransSOut = !OutRefTransFNm.Empty() ? TFOut::New(RefTransFNmV.Last()) : PSOut();
    // check which are given
    const bool IsOrgP = !OrgSOut.Empty() && IsOrg();
    const bool IsTransP = !TransSOut.Empty() && IsTrans();
    const bool IsRefTransP = !RefTransSOut.Empty() && IsRefTrans();
    // print warnings
    if (!IsOrgP) { printf("No original sentences!\n"); }
    if (!IsTransP) { printf("No machine translation sentences!\n"); }
    if (!IsRefTransP) { printf("No reference translation sentences!\n"); }
    // go over all the sentences and store them in the file
    TIntV SentIdV; GetSentIdV(SentIdV);
    for (int SentIdN = 0; SentIdN < SentIdV.Len(); SentIdN++) {
        const int SentId = SentIdV[SentIdN];
        if (IsOrgP) { OrgSOut->PutStrLn(GetOrgStr(SentId)); }
        if (IsTransP) { TransSOut->PutStrLn(GetTransStr(SentId)); }
        if (IsRefTransP) { RefTransSOut->PutStrLn(GetRefTransStrV(SentId)[0]); }
        // should we break and go to next file?
        if ((LinesPerFile!=-1) && (SentIdN>0) && (SentIdN % LinesPerFile == 0)) {
            // prepare filenames
            if (!OutOrgFNm.Empty()) { OrgFNmV.Add(GetOutFNm(OutFBase, SentIdN, LinesPerFile, OutOrgFNm)); }
            if (!OutTransFNm.Empty()) { TransFNmV.Add(GetOutFNm(OutFBase, SentIdN, LinesPerFile, OutTransFNm)); }
            if (!OutRefTransFNm.Empty()) { RefTransFNmV.Add(GetOutFNm(OutFBase, SentIdN, LinesPerFile, OutRefTransFNm)); }
            // open next files files
            OrgSOut = !OutOrgFNm.Empty() ? TFOut::New(OrgFNmV.Last()) : PSOut();
            TransSOut = !OutTransFNm.Empty() ? TFOut::New(TransFNmV.Last()) : PSOut();
            RefTransSOut = !OutRefTransFNm.Empty() ? TFOut::New(RefTransFNmV.Last()) : PSOut();
        }
    }
}
void TTransCorpus::SaveTransScript(const TStr& OutFBase, const TStr& OrgLang,
        const TStr& TransLang, const TStrV& OrgFNmV, const TStrV& RefTransFNmV) {

    const int FNms = OrgFNmV.Len();
    IAssert(OrgFNmV.Len() == RefTransFNmV.Len());

    // make tokenize
    PSOut TokSOut = TFOut::New(OutFBase + "_tokenize.sh");
    for (int FNmN = 0; FNmN < FNms; FNmN++) {
        TokSOut->PutStrLn("tokenize.pl -lang=" + OrgLang + " " + 
            OrgFNmV[FNmN].GetFBase() + " " + 
            TStr::PutFExt(OrgFNmV[FNmN], ".tok").GetFBase());
        TokSOut->PutStrLn("tokenize.pl -lang=" + TransLang + " " + 
            RefTransFNmV[FNmN].GetFBase() + " " + 
            TStr::PutFExt(RefTransFNmV[FNmN], ".tok").GetFBase());           
    }
    TokSOut->Flush();

    // make align
    PSOut AlignSOut = TFOut::New(OutFBase + "_align.sh");
    for (int FNmN = 0; FNmN < FNms; FNmN++) {
        AlignSOut->PutStrLn("ssal -f " + 
            TStr::PutFExt(OrgFNmV[FNmN], ".tok").GetFBase() + " " + 
            TStr::PutFExt(RefTransFNmV[FNmN], ".tok").GetFBase());
        AlignSOut->PutStrLn("mv " + 
            TStr::PutFExt(OrgFNmV[FNmN], ".tok.al").GetFBase() + " " + 
            TStr::PutFExt(OrgFNmV[FNmN], ".al").GetFBase());
        AlignSOut->PutStrLn("mv " + 
            TStr::PutFExt(RefTransFNmV[FNmN], ".tok.al").GetFBase() + " " + 
            TStr::PutFExt(RefTransFNmV[FNmN], ".al").GetFBase());
    }
    AlignSOut->Flush();

    // make lowercase
    PSOut LcSOut = TFOut::New(OutFBase + "_lowercase.sh");
    for (int FNmN = 0; FNmN < FNms; FNmN++) {
        LcSOut->PutStrLn("lc-latin.pl " + 
            TStr::PutFExt(OrgFNmV[FNmN], ".al").GetFBase() + " " + 
            TStr::PutFExt(OrgFNmV[FNmN], ".lc").GetFBase());
        LcSOut->PutStrLn("lc-latin.pl " + 
            TStr::PutFExt(RefTransFNmV[FNmN], ".al").GetFBase() + " " + 
            TStr::PutFExt(RefTransFNmV[FNmN], ".lc").GetFBase());
    }
    LcSOut->Flush();

    // rest -- script
}
示例#3
0
int main(int argc, char* argv[])
{
  TEnv Env(argc, argv);
  TStr PrefixPath = Env.GetArgs() > 1 ? Env.GetArg(1) : TStr("");

  double ts1 = Tick();
  TTableContext Context;
  TVec<TPair<PTable,TStr> > NodeTblV = TVec<TPair<PTable,TStr> >();
  TVec<TPair<PTable, int> > EdgeTblV = TVec<TPair<PTable, int> >();
  Schema NodeSchema = Schema();
  Schema EdgeSchema = Schema();
  LoadFlickrTables(PrefixPath, Context, NodeTblV, NodeSchema, EdgeTblV, EdgeSchema);

  double ts2 = Tick();

  THash<TStr,TStrH> NStrH;
  TIntStrH NIdH;
  CreateIdHashes(NodeTblV, NStrH, NIdH);

  double ts3 = Tick();
  PSVNet Graph = LoadGraphMNet<PSVNet>(NodeTblV, EdgeTblV, NStrH, NIdH);

  double ts4 = Tick();

  int nExps = 10;
  int nTriads = 0;
  for (int i = 0; i < nExps; i++) {
    nTriads = TSnap::GetTriads(Graph);
  }
  double ts5 = Tick();
  StdOut->PutStrFmtLn("Triads %d", nTriads);

  PSOut TimeOut = TFOut::New(PrefixPath + TStr("time.txt"), true);
  TimeOut->PutStrFmtLn("===== Triad Counting - PSVNet =====");
  TimeOut->PutStrLn(Env.GetCmLn());
  TimeOut->PutStrFmtLn("Input Time = %f", GetCPUTimeUsage(ts1, ts2));
  TimeOut->PutStrFmtLn("Preprocessing Time = %f", GetCPUTimeUsage(ts2, ts3));
  TimeOut->PutStrFmtLn("Conversion Time = %f", GetCPUTimeUsage(ts3, ts4));
  TimeOut->PutStrFmtLn("Computing Time = %f", GetCPUTimeUsage(ts4, ts5)/nExps);

	return 0;
}
void TTransCorpus::SaveSgm(const TStr& OutOrgFNm, 
        const TStr& OutTransFNm, const TStr& OutRefTransFNm) {

    // open files
    PSOut OrgSOut = !OutOrgFNm.Empty() ? TFOut::New(OutOrgFNm) : NULL;
    PSOut TransSOut = !OutTransFNm.Empty() ? TFOut::New(OutTransFNm) : NULL;
    PSOut RefTransSOut = !OutRefTransFNm.Empty() ? TFOut::New(OutRefTransFNm) : NULL;
    // check which are given
    const bool IsOrgP = !OrgSOut.Empty() && IsOrg();
    const bool IsTransP = !TransSOut.Empty() && IsTrans();
    const bool IsRefTransP = !RefTransSOut.Empty() && IsRefTrans();
    // prepare headers
    if (IsOrgP) {
        OrgSOut->PutStrLn("<srcset setid=\"tmp\" srclang=\"source\" trglang=\"target\">");
        OrgSOut->PutStrLn("<DOC docid=\"tmpdoc\">");
    }
    if (IsTransP) {
        TransSOut->PutStrLn("<tstset setid=\"tmp\" srclang=\"source\" trglang=\"target\">");
        TransSOut->PutStrLn("<DOC docid=\"tmpdoc\" sysid=\"trans\">");
    }
    if (IsRefTransP) {
        RefTransSOut->PutStrLn("<refset setid=\"tmp\" srclang=\"source\" trglang=\"target\">");
        RefTransSOut->PutStrLn("<DOC docid=\"tmpdoc\" sysid=\"ref\">");
    }
    // output sentences
    TIntV SentIdV; GetSentIdV(SentIdV);
    for (int SentIdN = 0; SentIdN < SentIdV.Len(); SentIdN++) {
        const int SentId = SentIdV[SentIdN];
        if (IsOrgP) { 
            OrgSOut->PutStrLn(TStr::Fmt(
                "<p><seg id=\"%d\">%s</seg></p>", 
                SentId, GetOrgStr(SentId).CStr())); 
        }
        if (IsTransP) { 
            TransSOut->PutStrLn(TStr::Fmt(
                "<p><seg id=\"%d\">%s</seg></p>", 
                SentId, GetTransStr(SentId).CStr()));
        }
        if (IsRefTransP) { 
            RefTransSOut->PutStrLn(TStr::Fmt(
                "<p><seg id=\"%d\">%s</seg></p>", 
                SentId, GetRefTransStrV(SentId)[0].CStr()));
        }
    }
    // prepare footers
    if (IsOrgP) {
        OrgSOut->PutStrLn("</DOC>");
        OrgSOut->PutStrLn("</srcset>");
    }
    if (IsTransP) {
        TransSOut->PutStrLn("</DOC>");
        TransSOut->PutStrLn("</tstset>");
    }
    if (IsRefTransP) {
        RefTransSOut->PutStrLn("</DOC>");
        RefTransSOut->PutStrLn("</refset>");
    }
}