bool GetBoolArg(const PXmlTok& QueryXml, const TStr& ArgNm, const bool DfVal)
{
	if (QueryXml.Empty()) return DfVal;
	TStr val = QueryXml->GetArgVal(ArgNm, "");
	if (val == "") return DfVal;
	val = val.GetLc();
	if (val == "true" || val == "1")
		return true;
	return false;
}
Ejemplo n.º 2
0
PTransCorpus TTransCorpus::LoadAC(const TStr& InXmlFNm, const int& MxSents) {
    // prepare prset structures
    PTransCorpus TransCorpus = TTransCorpus::New();
    // we load xml by skiping first tags
    PSIn XmlSIn=TFIn::New(InXmlFNm); 
    TXmlDoc::SkipTopTag(XmlSIn); // ignore TEI
    printf("Ignoring: %s\n", TXmlDoc::LoadTxt(XmlSIn)->GetTok()->GetTagNm().CStr()); // ignore teiHeader
    TXmlDoc::SkipTopTag(XmlSIn); // ignore text
    TXmlDoc::SkipTopTag(XmlSIn); // ignore body
    PXmlDoc XmlDoc; int XmlDocs = 0, SentId = 0;;
    forever{
        // load xml tree
        XmlDocs++; printf("%7d Sentences \r", SentId);
        XmlDoc=TXmlDoc::LoadTxt(XmlSIn);
        // stop if at the last tag
        if (!XmlDoc->IsOk()) { /*printf("Error: %s\n", XmlDoc->GetMsgStr().CStr());*/ break; }
        // extract documents from xml-trees
        PXmlTok TopTok=XmlDoc->GetTok();
        if (TopTok->IsTag("div")){
            // extract document Id
            TStr DocNm = TopTok->GetArgVal("n");
            // and paragraphs
            TXmlTokV LinkTokV; TopTok->GetTagTokV("linkGrp|link", LinkTokV);
            for (int LinkTokN = 0; LinkTokN < LinkTokV.Len(); LinkTokN++) {
                PXmlTok LinkTok = LinkTokV[LinkTokN];
                TStr LinkType = LinkTok->GetArgVal("type");
                // skip if paragraph for one language is empty
                if (LinkType == "1:1") {
                    TXmlTokV S1TokV; LinkTok->GetTagTokV("s1", S1TokV); 
                    TXmlTokV S2TokV; LinkTok->GetTagTokV("s2", S2TokV);
                    IAssert(S1TokV.Len() == 1); IAssert(S2TokV.Len() == 1);
                    TStr ParaStr1 = S1TokV[0]->GetTagTokStr("");
                    TStr ParaStr2 = S2TokV[0]->GetTagTokStr("");
                    TransCorpus->AddSentenceNoTrans(SentId, ParaStr1, ParaStr2); SentId++;
                }
            }
        } else {
            printf("Unknow tag: %s\n", TopTok->GetTagNm().CStr());
        }
        if ((MxSents != -1) && (TransCorpus->GetSentences() > MxSents)) { break; }
    }
    printf("\n");
    // finish
    return TransCorpus;
}
TStr GetStrArg(const PXmlTok& QueryXml, const TStr& ArgNm, const TStr& DfVal)
{
	if (QueryXml.Empty()) return DfVal;
	return QueryXml->GetArgVal(ArgNm, DfVal);
}
TStr GetStrArg(const PXmlTok& QueryXml, const TStr& TagPath, const TStr& ArgNm, const TStr& DfVal)
{
	PXmlTok XmlTok = QueryXml->GetTagTok(TagPath);
	if (XmlTok.Empty()) return DfVal;
	return XmlTok->GetArgVal(ArgNm, DfVal);
}