bool GetBoolArg(const PXmlTok& QueryXml, const TStr& ArgNm, const bool DfVal) { if (QueryXml.Empty()) return DfVal; TStr val = QueryXml->GetArgVal(ArgNm, ""); if (val == "") return DfVal; val = val.GetLc(); if (val == "true" || val == "1") return true; return false; }
PTransCorpus TTransCorpus::LoadAC(const TStr& InXmlFNm, const int& MxSents) { // prepare prset structures PTransCorpus TransCorpus = TTransCorpus::New(); // we load xml by skiping first tags PSIn XmlSIn=TFIn::New(InXmlFNm); TXmlDoc::SkipTopTag(XmlSIn); // ignore TEI printf("Ignoring: %s\n", TXmlDoc::LoadTxt(XmlSIn)->GetTok()->GetTagNm().CStr()); // ignore teiHeader TXmlDoc::SkipTopTag(XmlSIn); // ignore text TXmlDoc::SkipTopTag(XmlSIn); // ignore body PXmlDoc XmlDoc; int XmlDocs = 0, SentId = 0;; forever{ // load xml tree XmlDocs++; printf("%7d Sentences \r", SentId); XmlDoc=TXmlDoc::LoadTxt(XmlSIn); // stop if at the last tag if (!XmlDoc->IsOk()) { /*printf("Error: %s\n", XmlDoc->GetMsgStr().CStr());*/ break; } // extract documents from xml-trees PXmlTok TopTok=XmlDoc->GetTok(); if (TopTok->IsTag("div")){ // extract document Id TStr DocNm = TopTok->GetArgVal("n"); // and paragraphs TXmlTokV LinkTokV; TopTok->GetTagTokV("linkGrp|link", LinkTokV); for (int LinkTokN = 0; LinkTokN < LinkTokV.Len(); LinkTokN++) { PXmlTok LinkTok = LinkTokV[LinkTokN]; TStr LinkType = LinkTok->GetArgVal("type"); // skip if paragraph for one language is empty if (LinkType == "1:1") { TXmlTokV S1TokV; LinkTok->GetTagTokV("s1", S1TokV); TXmlTokV S2TokV; LinkTok->GetTagTokV("s2", S2TokV); IAssert(S1TokV.Len() == 1); IAssert(S2TokV.Len() == 1); TStr ParaStr1 = S1TokV[0]->GetTagTokStr(""); TStr ParaStr2 = S2TokV[0]->GetTagTokStr(""); TransCorpus->AddSentenceNoTrans(SentId, ParaStr1, ParaStr2); SentId++; } } } else { printf("Unknow tag: %s\n", TopTok->GetTagNm().CStr()); } if ((MxSents != -1) && (TransCorpus->GetSentences() > MxSents)) { break; } } printf("\n"); // finish return TransCorpus; }
TStr GetStrArg(const PXmlTok& QueryXml, const TStr& ArgNm, const TStr& DfVal) { if (QueryXml.Empty()) return DfVal; return QueryXml->GetArgVal(ArgNm, DfVal); }
TStr GetStrArg(const PXmlTok& QueryXml, const TStr& TagPath, const TStr& ArgNm, const TStr& DfVal) { PXmlTok XmlTok = QueryXml->GetTagTok(TagPath); if (XmlTok.Empty()) return DfVal; return XmlTok->GetArgVal(ArgNm, DfVal); }