예제 #1
0
PAmazonItem TAmazonItem::New(const PXmlDoc& XmlDoc){
  // create item
  PAmazonItem AmazonItem=PAmazonItem(new TAmazonItem());
  // item-id
  AmazonItem->ItemId=XmlDoc->GetTagTok("AmazonItem|ItemId")->GetTokStr(false);
  // title
  AmazonItem->TitleStr=XmlDoc->GetTagTok("AmazonItem|Title")->GetTokStr(false);
  // authors
  TXmlTokV AuthorNmTokV;
  XmlDoc->GetTagTokV("AmazonItem|Authors|Name", AuthorNmTokV);
  for (int AuthorNmTokN=0; AuthorNmTokN<AuthorNmTokV.Len(); AuthorNmTokN++){
    PXmlTok AuthorNmTok=AuthorNmTokV[AuthorNmTokN];
    TStr AuthorNm=AuthorNmTok->GetTokStr(false);
    AmazonItem->AuthorNmV.Add(AuthorNm);
  }
  // x-sell item-ids
  TXmlTokV NextItemIdTokV;
  XmlDoc->GetTagTokV("AmazonItem|XSell|ItemId", NextItemIdTokV);
  for (int ItemIdTokN=0; ItemIdTokN<NextItemIdTokV.Len(); ItemIdTokN++){
    PXmlTok NextItemIdTok=NextItemIdTokV[ItemIdTokN];
    TStr NextItemId=NextItemIdTok->GetTokStr(false);
    AmazonItem->NextItemIdV.Add(NextItemId);
  }
  // return item
  return AmazonItem;
}
PTransCorpus TTransCorpus::LoadAC(const TStr& InXmlFNm, const int& MxSents) {
    // prepare prset structures
    PTransCorpus TransCorpus = TTransCorpus::New();
    // we load xml by skiping first tags
    PSIn XmlSIn=TFIn::New(InXmlFNm); 
    TXmlDoc::SkipTopTag(XmlSIn); // ignore TEI
    printf("Ignoring: %s\n", TXmlDoc::LoadTxt(XmlSIn)->GetTok()->GetTagNm().CStr()); // ignore teiHeader
    TXmlDoc::SkipTopTag(XmlSIn); // ignore text
    TXmlDoc::SkipTopTag(XmlSIn); // ignore body
    PXmlDoc XmlDoc; int XmlDocs = 0, SentId = 0;;
    forever{
        // load xml tree
        XmlDocs++; printf("%7d Sentences \r", SentId);
        XmlDoc=TXmlDoc::LoadTxt(XmlSIn);
        // stop if at the last tag
        if (!XmlDoc->IsOk()) { /*printf("Error: %s\n", XmlDoc->GetMsgStr().CStr());*/ break; }
        // extract documents from xml-trees
        PXmlTok TopTok=XmlDoc->GetTok();
        if (TopTok->IsTag("div")){
            // extract document Id
            TStr DocNm = TopTok->GetArgVal("n");
            // and paragraphs
            TXmlTokV LinkTokV; TopTok->GetTagTokV("linkGrp|link", LinkTokV);
            for (int LinkTokN = 0; LinkTokN < LinkTokV.Len(); LinkTokN++) {
                PXmlTok LinkTok = LinkTokV[LinkTokN];
                TStr LinkType = LinkTok->GetArgVal("type");
                // skip if paragraph for one language is empty
                if (LinkType == "1:1") {
                    TXmlTokV S1TokV; LinkTok->GetTagTokV("s1", S1TokV); 
                    TXmlTokV S2TokV; LinkTok->GetTagTokV("s2", S2TokV);
                    IAssert(S1TokV.Len() == 1); IAssert(S2TokV.Len() == 1);
                    TStr ParaStr1 = S1TokV[0]->GetTagTokStr("");
                    TStr ParaStr2 = S2TokV[0]->GetTagTokStr("");
                    TransCorpus->AddSentenceNoTrans(SentId, ParaStr1, ParaStr2); SentId++;
                }
            }
        } else {
            printf("Unknow tag: %s\n", TopTok->GetTagNm().CStr());
        }
        if ((MxSents != -1) && (TransCorpus->GetSentences() > MxSents)) { break; }
    }
    printf("\n");
    // finish
    return TransCorpus;
}
PTransCorpus TTransCorpus::LoadTMX(const TStr& InTmxFPath,
        const TStr& OrgLang, const TStr& RefTransLang) {

    // prepare prset structures
    PTransCorpus TransCorpus = TTransCorpus::New();
    // iterate over all the TMX files
    TFFile TmxFNms(InTmxFPath, "tmx", false); TStr TmxFNm; int SentId = 0;
    while (TmxFNms.Next(TmxFNm)) {
        printf("Loading %s ...\n", TmxFNm.CStr());
        // we load xml by skiping first tag...
        TStr CleanTmxFNm = TmxFNm + ".xml";
        CleanTmx(TmxFNm, CleanTmxFNm);
        PSIn XmlSIn = TFIn::New(CleanTmxFNm);
        PXmlDoc XmlDoc = TXmlDoc::LoadTxt(XmlSIn);
        // stop if at the last tag
        if (!XmlDoc->IsOk()) { 
            printf(" error: %s\n", XmlDoc->GetMsgStr().CStr());
            continue; 
        }
        // extract sentences from xml-trees
        TXmlTokV TuTokV; XmlDoc->GetTagTokV("tmx|body|tu", TuTokV);
        const int TuToks = TuTokV.Len();
        for (int TuTokN = 0; TuTokN < TuToks; TuTokN++) {
            if (TuTokN % 100 == 0) { printf(" %d / %d\r", TuTokN, TuToks); }
            TXmlTokV TuvTokV; TuTokV[TuTokN]->GetTagTokV("tuv", TuvTokV);
            IAssert(TuvTokV.Len() == 2);
            TStr OrgSent, RefTransSent;
            for (int TuvTokN = 0; TuvTokN < TuvTokV.Len(); TuvTokN++) {
                TStr Lang = TuvTokV[TuvTokN]->GetStrArgVal("xml:lang", "");
                TStr Sent = CleanRtf(TuvTokV[TuvTokN]->GetTagTok("seg")->GetTokStr(false));
                if (Lang == OrgLang) { OrgSent = Sent; } 
                else if (Lang == RefTransLang) { RefTransSent = Sent; }
            }
            TransCorpus->AddSentenceNoTrans(SentId, OrgSent, RefTransSent); SentId++;
        }
        printf(" %d / %d\n", TuToks, TuToks);
    }    
    // finish
    return TransCorpus;
}
예제 #4
0
void TDzsBsDoc::GetDocParts(
 const TStr& FNm, const PXmlDoc& XmlDoc,
 const TStr& FPath, const TStr& WebAlias,
 bool& Ok, TStr& IdStr, TStr& TitleStr, TStr& DataStr, int& YearN){
  Ok=false;
  if (!XmlDoc->IsOk()){return;}
  // id
  IdStr=FNm;
  IdStr.ChangeStr(FPath, WebAlias);
//  PXmlTok IdTok;
//  if (XmlDoc->IsTagTok("term|metadata|identifier", IdTok)){
//    IdStr=IdTok->GetTokStr(false);}
//  else {return;}
  // title
  PXmlTok TitleTok;
  if (XmlDoc->IsTagTok("term|metadata|title", TitleTok)){
    TitleStr=TitleTok->GetTokStr(false);}
  else {return;}
  // timedata
  TXmlTokV TimeDataTokV;
  XmlDoc->GetTagTokV("term|data|frame|timedata|fromyear", TimeDataTokV);
  TStr TimeDataStr=TXmlTok::GetTokVStr(TimeDataTokV, false);
  if (TimeDataStr.IsInt(YearN)){} else {YearN=0;}
  // locdata
  TXmlTokV LocDataTokV;
  XmlDoc->GetTagTokV("term|data|frame|locdata", LocDataTokV);
  TStr LocDataStr=TXmlTok::GetTokVStr(LocDataTokV, true);
  // pages
  TXmlTokV PageTokV; XmlDoc->GetTagTokV("term|data|frame|page", PageTokV);
  DataStr=GetDataTokVStr(PageTokV, "\n")+" "+LocDataStr;
  // character-set transformation
  TitleStr=THtmlLxChDef::GetCSZFromWin1250(TitleStr);
  DataStr=THtmlLxChDef::GetCSZFromWin1250(DataStr);
  // success
  Ok=true;
}
예제 #5
0
PDzsBsDoc TDzsBsDoc::GetDzsBsDoc(
 const TStr& FNm, const PXmlDoc& XmlDoc,
 const TStr& FPath, const TStr& WebAlias){
  TStr TitleStr; TStr IdStr; TStr DataStr; int YearN;
  if (!XmlDoc->IsOk()){return NULL;}
  bool Ok;
  GetDocParts(FNm, XmlDoc, FPath, WebAlias, Ok, IdStr, TitleStr, DataStr, YearN);
  if (!Ok){return NULL;}
  PDzsBsDoc DzsBsDoc=TDzsBsDoc::New(IdStr, TitleStr, DataStr, YearN);
//  printf("---------------------\n");
//  printf("%s\n", TitleStr.CStr());
//  printf("%s\n", IdStr.CStr());
//  printf("%s\n", DataStr.CStr());
//  printf("%s\n", TInt::GetStr(YearN));
  return DzsBsDoc;
}
예제 #6
0
void TSAppSrvFun::Exec(const TStrKdV& FldNmValPrV, const PSAppSrvRqEnv& RqEnv) {
	const PNotify& Notify = RqEnv->GetWebSrv()->GetNotify();
	PHttpResp HttpResp;
	try {
        // log the call
		if (NotifyOnRequest)
			Notify->OnStatus(TStr::Fmt("RequestStart  %s", FunNm.CStr()));
		TTmStopWatch StopWatch(true);
		// execute the actual function, according to the type
		PSIn BodySIn; TStr ContTypeVal;
		if (GetFunOutType() == saotXml) {
			PXmlDoc ResXmlDoc = ExecXml(FldNmValPrV, RqEnv);        
			TStr ResXmlStr; ResXmlDoc->SaveStr(ResXmlStr);
			BodySIn = TMIn::New(XmlHdStr + ResXmlStr);
			ContTypeVal = THttp::TextXmlFldVal;
		} else if (GetFunOutType() == saotJSon) {
			TStr ResStr = ExecJSon(FldNmValPrV, RqEnv);
			BodySIn = TMIn::New(ResStr);
			ContTypeVal = THttp::AppJSonFldVal;
		} else {
			BodySIn = ExecSIn(FldNmValPrV, RqEnv, ContTypeVal);
		}
		if (ReportResponseSize)
			Notify->OnStatusFmt("Response size: %.1f KB", BodySIn->Len() / (double) TInt::Kilo);
		// log finish of the call
		if (NotifyOnRequest)
			Notify->OnStatus(TStr::Fmt("RequestFinish %s [request took %d ms]", FunNm.CStr(), StopWatch.GetMSecInt()));
		// prepare response
		HttpResp = THttpResp::New(THttp::OkStatusCd, 
			ContTypeVal, false, BodySIn);
    } catch (PExcept Except) {
        // known internal error
        Notify->OnStatusFmt("Exception: %s", Except->GetMsgStr().CStr());
        Notify->OnStatusFmt("Location: %s", Except->GetLocStr().CStr());
        TStr ResStr, ContTypeVal = THttp::TextPlainFldVal;
		if (GetFunOutType() == saotXml) {
			PXmlTok TopTok = TXmlTok::New("error");
			TopTok->AddSubTok(TXmlTok::New("message", Except->GetMsgStr()));
			TopTok->AddSubTok(TXmlTok::New("location", Except->GetLocStr()));
			PXmlDoc ErrorXmlDoc = TXmlDoc::New(TopTok); 
			ResStr = XmlHdStr + ErrorXmlDoc->SaveStr();
            ContTypeVal = THttp::TextXmlFldVal;
		} else if (GetFunOutType() == saotJSon) {
			PJsonVal ResVal = TJsonVal::NewObj();
			ResVal->AddToObj("message", Except->GetMsgStr());
			ResVal->AddToObj("location", Except->GetLocStr());
			ResStr = TJsonVal::NewObj("error", ResVal)->SaveStr();
            ContTypeVal = THttp::AppJSonFldVal;
		}
        // prepare response
        HttpResp = THttpResp::New(THttp::InternalErrStatusCd, 
            ContTypeVal, false, TMIn::New(ResStr));        
    } catch (...) {
		// unknown internal error
		TStr ResStr, ContTypeVal = THttp::TextPlainFldVal;
		if (GetFunOutType() == saotXml) {
			PXmlDoc ErrorXmlDoc = TXmlDoc::New(TXmlTok::New("error")); 
			ResStr = XmlHdStr + ErrorXmlDoc->SaveStr();
            ContTypeVal = THttp::TextXmlFldVal;            
		} else if (GetFunOutType() == saotJSon) {
			ResStr = TJsonVal::NewObj("error", "Unknown")->SaveStr();
            ContTypeVal = THttp::AppJSonFldVal;
		}
		// prepare response
        HttpResp = THttpResp::New(THttp::InternalErrStatusCd, 
            ContTypeVal, false, TMIn::New(ResStr));
    }

	if (LogRqToFile)
		LogReqRes(FldNmValPrV, HttpResp);
	// send response
	RqEnv->GetWebSrv()->SendHttpResp(RqEnv->GetSockId(), HttpResp); 
}
예제 #7
0
파일: sappsrv.cpp 프로젝트: edgeflip/dmoz
void TSAppSrv::OnHttpRq(const uint64& SockId, const PHttpRq& HttpRq) {
	// last appropriate error code, start with bad request
	int ErrStatusCd = THttp::BadRqStatusCd;
    try {
        // check http-request correctness - return if error
        EAssertR(HttpRq->IsOk(), "Bad HTTP request!");
        // check url correctness - return if error
        PUrl RqUrl = HttpRq->GetUrl();
        EAssertR(RqUrl->IsOk(), "Bad request URL!");
        // extract function name
        PUrl HttpRqUrl = HttpRq->GetUrl();
        TStr FunNm = HttpRqUrl->GetPathSeg(0);
		// check if we have the function registered
		if (FunNm == "favicon.ico") {
			PHttpResp HttpResp = THttpResp::New(THttp::OkStatusCd,
				THttp::ImageIcoFldVal, false, Favicon.GetSIn());
			SendHttpResp(SockId, HttpResp); 
			return;
		} else if (!FunNm.Empty() && !FunNmToFunH.IsKey(FunNm)) { 
			ErrStatusCd = THttp::ErrNotFoundStatusCd;
			GetNotify()->OnStatusFmt("[AppSrv] Unknown function '%s'!", FunNm.CStr());
			TExcept::Throw("Unknown function '" + FunNm + "'!");
		}
        // extract parameters
        TStrKdV FldNmValPrV;
        PUrlEnv HttpRqUrlEnv = HttpRq->GetUrlEnv();
        const int Keys = HttpRqUrlEnv->GetKeys();
        for (int KeyN = 0; KeyN < Keys; KeyN++) {
            TStr KeyNm = HttpRqUrlEnv->GetKeyNm(KeyN);
            const int Vals = HttpRqUrlEnv->GetVals(KeyN);
            for (int ValN = 0; ValN < Vals; ValN++) {
                TStr Val = HttpRqUrlEnv->GetVal(KeyN, ValN);
                FldNmValPrV.Add(TStrKd(KeyNm, Val));
            }
        }
		// report call
		if (ShowParamP) {  GetNotify()->OnStatus(" " + HttpRq->GetUrl()->GetUrlStr()); }
		// request parsed well, from now on it's internal error
		ErrStatusCd = THttp::InternalErrStatusCd;
		// processed requested function
		if (!FunNm.Empty()) {
			// prepare request environment
			PSAppSrvRqEnv RqEnv = TSAppSrvRqEnv::New(this, SockId, HttpRq, FunNmToFunH);
			// retrieve function
			PSAppSrvFun SrvFun = FunNmToFunH.GetDat(FunNm);
			// call function
			SrvFun->Exec(FldNmValPrV, RqEnv);
		} else {
			// internal SAppSrv call
			if (!ListFunP) {
				// we are not allowed to list functions
				ErrStatusCd = THttp::ErrNotFoundStatusCd;
				TExcept::Throw("Unknown page");
			}
			// prepare a list of registered functions
			PXmlTok TopTok = TXmlTok::New("registered-functions");
			int KeyId = FunNmToFunH.FFirstKeyId();
			while (FunNmToFunH.FNextKeyId(KeyId)) {
				PXmlTok FunTok = TXmlTok::New("function");
				FunTok->AddArg("name", FunNmToFunH.GetKey(KeyId));
				TopTok->AddSubTok(FunTok);
			}
			TStr ResXmlStr; TXmlDoc::New(TopTok)->SaveStr(ResXmlStr);
			PSIn BodySIn = TMIn::New(TSAppSrvFun::XmlHdStr + ResXmlStr);
			// prepare response
			PHttpResp HttpResp = THttpResp::New(THttp::OkStatusCd, 
				THttp::TextXmlFldVal, false, BodySIn);
			// send response
			SendHttpResp(SockId, HttpResp); 
		}
    } catch (PExcept Except) {
		// known internal error
		PXmlTok TopTok = TXmlTok::New("error");
		TopTok->AddSubTok(TXmlTok::New("message", Except->GetMsgStr()));
		TopTok->AddSubTok(TXmlTok::New("location", Except->GetLocStr()));
		PXmlDoc ErrorXmlDoc = TXmlDoc::New(TopTok); 
        TStr ResXmlStr; ErrorXmlDoc->SaveStr(ResXmlStr);
        // prepare response
		PHttpResp HttpResp = THttpResp::New(ErrStatusCd, 
            THttp::TextHtmlFldVal, false, 
			TMIn::New(TSAppSrvFun::XmlHdStr + ResXmlStr));
        // send response
	    SendHttpResp(SockId, HttpResp); 
    } catch (...) {
		// unknown internal error
		PXmlDoc ErrorXmlDoc = TXmlDoc::New(TXmlTok::New("error")); 
        TStr ResXmlStr; ErrorXmlDoc->SaveStr(ResXmlStr);
        // prepare response
        PHttpResp HttpResp = THttpResp::New(ErrStatusCd, 
            THttp::TextHtmlFldVal, false, 
			TMIn::New(TSAppSrvFun::XmlHdStr + ResXmlStr));
        // send response
	    SendHttpResp(SockId, HttpResp); 
    }
}
예제 #8
0
void TSAppSrv::OnHttpRq(const int& SockId, const PHttpRq& HttpRq) {
    PHttpResp HttpResp;
    try {
        // check http-request correctness - return if error
        EAssertR(HttpRq->IsOk(), "Bad HTTP request!");
        // check url correctness - return if error
        PUrl RqUrl = HttpRq->GetUrl();
        EAssertR(RqUrl->IsOk(), "Bad request URL!");
        // extract function name
        PUrl HttpRqUrl = HttpRq->GetUrl();
        TStr FunNm = HttpRqUrl->GetPathSeg(0);
        EAssertR(FunNmToFunH.IsKey(FunNm) || FunNm.Empty(), "Unknown function '" + FunNm + "' !");
        // extract parameters
        TStrKdV FldNmValPrV;
        PUrlEnv HttpRqUrlEnv = HttpRq->GetUrlEnv();
        const int Keys = HttpRqUrlEnv->GetKeys();
        for (int KeyN = 0; KeyN < Keys; KeyN++) {
            TStr KeyNm = HttpRqUrlEnv->GetKeyNm(KeyN);
            const int Vals = HttpRqUrlEnv->GetVals(KeyN);
            for (int ValN = 0; ValN < Vals; ValN++) {
                TStr Val = HttpRqUrlEnv->GetVal(KeyN, ValN);
                FldNmValPrV.Add(TStrKd(KeyNm, Val));
            }
        }
        // log the call
        TStr TimeNow = TTm::GetCurLocTm().GetWebLogDateTimeStr(true);
        GetNotify()->OnStatus(TStr::Fmt("[%s] Request %s", TimeNow.CStr(), FunNm.CStr()));
		// prepare request environment
		PSAppSrvRqEnv RqEnv = TSAppSrvRqEnv::New(this, SockId, HttpRq);
		PSIn BodySIn; TStr ContTypeVal;
		if (!FunNm.Empty()) {
			// call function
			PSAppSrvFun SrvFun = FunNmToFunH.GetDat(FunNm);
			if (SrvFun->GetFunOutType() == saotXml) {
				PXmlDoc ResXmlDoc = SrvFun->Exec(FldNmValPrV, RqEnv);        
				TStr ResXmlStr; ResXmlDoc->SaveStr(ResXmlStr);
				//ResXmlDoc->SaveTxt(TFile::GetUniqueFNm("test.xml"));
				BodySIn = TMIn::New(XmlHdStr + ResXmlStr);
				ContTypeVal = THttp::TextXmlFldVal;
			} else if (SrvFun->GetFunOutType() == saotJSon) {
				TStr ResStr = SrvFun->ExecJSon(FldNmValPrV, RqEnv);
				BodySIn = TMIn::New(ResStr);
				//ContTypeVal = THttp::TextHtmlFldVal;
				ContTypeVal = THttp::AppJSonFldVal;
			} else {
				BodySIn = SrvFun->ExecCustom(FldNmValPrV, RqEnv, ContTypeVal);
			}
		} else {
			PXmlTok TopTok = TXmlTok::New("registered-functions");
			int KeyId = FunNmToFunH.FFirstKeyId();
			while (FunNmToFunH.FNextKeyId(KeyId)) {
				PXmlTok FunTok = TXmlTok::New("function");
				FunTok->AddArg("name", FunNmToFunH.GetKey(KeyId));
				TopTok->AddSubTok(FunTok);
			}
			PXmlDoc ResXmlDoc = TXmlDoc::New(TopTok);
			TStr ResXmlStr; ResXmlDoc->SaveStr(ResXmlStr);
			BodySIn = TMIn::New(XmlHdStr + ResXmlStr);
			ContTypeVal = THttp::TextXmlFldVal;
		}
        // prepare response
        HttpResp = THttpResp::New(THttp::OkStatusCd, 
            ContTypeVal, false, BodySIn);
        // send response
    } catch (PExcept Except) {
		PXmlTok TopTok = TXmlTok::New("error");
		TopTok->AddSubTok(TXmlTok::New("message", Except->GetMsgStr()));
		TopTok->AddSubTok(TXmlTok::New("location", Except->GetLocStr()));
		PXmlDoc ErrorXmlDoc = TXmlDoc::New(TopTok); 
        TStr ResXmlStr; ErrorXmlDoc->SaveStr(ResXmlStr);
        HttpResp = THttpResp::New(THttp::OkStatusCd, 
            THttp::TextHtmlFldVal, false, 
			TMIn::New(XmlHdStr + ResXmlStr));
    } catch (...) {
		PXmlDoc ErrorXmlDoc = TXmlDoc::New(TXmlTok::New("error")); 
        TStr ResXmlStr; ErrorXmlDoc->SaveStr(ResXmlStr);
        HttpResp = THttpResp::New(THttp::OkStatusCd, 
            THttp::TextHtmlFldVal, false, 
			TMIn::New(XmlHdStr + ResXmlStr));
    }
    SendHttpResp(SockId, HttpResp); 
}
예제 #9
0
PCycBs TCycBs::LoadCycXmlDump(const TStr& FPath){
  // file-names
  TStr NrFPath=TStr::GetNrFPath(FPath);
  TStr CycLexiconFNm=NrFPath+"lexicon-dump.xml";
  TStr CycTaxonomyFNm=NrFPath+"taxonomy-dump.xml";
  TStr CycRelevanceFNm=NrFPath+"relevance-dump.xml";
  TStr CycKBaseFNm=NrFPath+"kb-dump.xml";                                                 

  // create cyc-base
  PCycBs CycBs=TCycBs::New();

  // lexicon
  {printf("Processing Lexicon %s ...\n", CycLexiconFNm.CStr());
  PSIn CycLexiconSIn=TFIn::New(CycLexiconFNm);
  PXmlDoc XmlDoc; int XmlDocs=0;
  TStr PrevCycWStr; TStr PrevCycLStr;
  forever{
    // statistics
    XmlDocs++; if (XmlDocs%1000==0){printf("%d Docs\r", XmlDocs);}
    // load xml-tree
    XmlDoc=TXmlDoc::LoadTxt(CycLexiconSIn);
    if (!XmlDoc->IsOk()){
      printf("%s - %s\n", PrevCycWStr.CStr(), PrevCycLStr.CStr());
      Fail;
    }
    // extract fields from xml-tree
    PXmlTok TopTok=XmlDoc->GetTok();
    if (TopTok->IsTag("end")){break;}
    IAssert(TopTok->IsTag("word"));
    TStr CycWStr=TopTok->GetArgVal("string");
    TStr CycLStr=TopTok->GetArgVal("cycl");
    PrevCycWStr=CycWStr; PrevCycLStr;
    // insert data
    CycBs->AddEdge(CycLStr, "#$nameString", CycWStr);
    CycBs->AddEdge(CycWStr, "~#$nameString", CycLStr);
  }
  printf("%d Docs\nDone.\n", XmlDocs);}

  // taxonomy
  {printf("Processing Taxonomy %s ...\n", CycTaxonomyFNm.CStr());
  PSIn CycTaxonomySIn=TFIn::New(CycTaxonomyFNm);
  PXmlDoc XmlDoc; int XmlDocs=0;
  TStr PrevSrcCycLStr;
  forever{
    // statistics
    XmlDocs++;
    if (XmlDocs%1000==0){
      printf("%d Docs\r", XmlDocs);}
    // load xml-tree
    XmlDoc=TXmlDoc::LoadTxt(CycTaxonomySIn);
    if (!XmlDoc->IsOk()){
      printf("%s\n", PrevSrcCycLStr.CStr());
      Fail;
    }
    // extract fields from xml-tree
    PXmlTok TopTok=XmlDoc->GetTok();
    if (TopTok->IsTag("end")){break;}
    IAssert(TopTok->IsTag("term"));
    TStr SrcCycLStr=TopTok->GetArgVal("cycl");
    PrevSrcCycLStr=SrcCycLStr;
    for (int SubTokN=0; SubTokN<TopTok->GetSubToks(); SubTokN++){
      PXmlTok SubTok=TopTok->GetSubTok(SubTokN);
      TStr DstCycLStr=SubTok->GetTagNm();
      if (SubTok->IsTag("isa")){
        DstCycLStr=SubTok->GetArgVal("value");
        CycBs->AddEdge(SrcCycLStr, "#$isa", DstCycLStr);
        CycBs->AddEdge(DstCycLStr, "~#$isa", SrcCycLStr);
      } else
      if (SubTok->IsTag("genl")){
        DstCycLStr=SubTok->GetArgVal("value");
        CycBs->AddEdge(SrcCycLStr, "#$genls", DstCycLStr);
        CycBs->AddEdge(DstCycLStr, "~#$genls", SrcCycLStr);
      } else {
        Fail;
      }
    }
  }
  printf("%d Docs\nDone.\n", XmlDocs);}

  // relevance
  {printf("Processing Relevance %s ...\n", CycRelevanceFNm.CStr());
  PSIn CycRelevanceSIn=TFIn::New(CycRelevanceFNm);
  PXmlDoc XmlDoc; int XmlDocs=0;
  TStr PrevCycStr;
  forever{
    // statistics
    XmlDocs++;
    if (XmlDocs%1000==0){
      printf("%d Docs\r", XmlDocs);}
    // load xml-tree
    XmlDoc=TXmlDoc::LoadTxt(CycRelevanceSIn);
    if (!XmlDoc->IsOk()){
      printf("%s\n", PrevCycStr.CStr());
      Fail;
    }
    // extract fields from xml-tree
    PXmlTok TopTok=XmlDoc->GetTok();
    if (TopTok->IsTag("end")){break;}
    IAssert(TopTok->IsTag("term"));
    TStr CycStr=TopTok->GetArgVal("cyc");
    PrevCycStr=CycStr;
    //IAssert(CycBs->IsVNm(CycStr));
    if (CycBs->IsVNm(CycStr)){
      if (TopTok->GetArgVal("thcl")=="T"){
        CycBs->GetVrtx(CycStr).SetFlag(cvfHumanRelevant, true);}
      if (TopTok->GetArgVal("irrel")=="T"){
        CycBs->GetVrtx(CycStr).SetFlag(cfvHumanIrrelevant, true);}
      if (TopTok->GetArgVal("clarifying")=="T"){
        CycBs->GetVrtx(CycStr).SetFlag(cfvHumanClarifying, true);}
      if ((TopTok->GetArgVal("thcl")=="T")||(TopTok->GetArgVal("clarifying")=="T")){
        CycBs->GetVrtx(CycStr).SetFlag(cvfHumanOk, true);}
    } else {
      //printf("%s\n", CycStr.CStr());
    }
  }
  printf("%d Docs\nDone.\n", XmlDocs);}

  // knowledge-base
  {printf("Processing KBase %s ...\n", CycKBaseFNm.CStr());
  PSIn CycKBaseSIn=TFIn::New(CycKBaseFNm);
  PXmlDoc XmlDoc; int XmlDocs=0;
  TStr PrevCycLStr; TStrV PrevArgCycLStrV;
  TStrIntH HdCycLToFq;
  forever{
    // statistics
    XmlDocs++;
    if (XmlDocs%1000==0){
      printf("%d Docs\r", XmlDocs);}
    //if (XmlDocs>10000){break;}
    // load xml-tree
    XmlDoc=TXmlDoc::LoadTxt(CycKBaseSIn);
    if (!XmlDoc->IsOk()){
      printf("%s\n", PrevCycLStr.CStr());
      for (int ArgN=0; ArgN<PrevArgCycLStrV.Len(); ArgN++){
        printf(" [%s]", PrevArgCycLStrV[ArgN].CStr());}
      printf("\n");
      Fail;
    }
    // extract fields from xml-tree
    PXmlTok TopTok=XmlDoc->GetTok();
    if (TopTok->IsTag("end")){break;}
    IAssert(TopTok->IsTag("sentence"));
    TStr CycLStr=TopTok->GetArgVal("cycl");
    TXmlTokV ArgXmlTokV; XmlDoc->GetTagTokV("sentence|arg", ArgXmlTokV);
    TStrV ArgCycLStrV;
    for (int ArgN=0; ArgN<ArgXmlTokV.Len(); ArgN++){
      PXmlTok Tok=ArgXmlTokV[ArgN];
      IAssert(Tok->IsTag("arg"));
      if (Tok->IsArg("cycl")){
        TStr ArgCycLStr=Tok->GetArgVal("cycl");
        ArgCycLStrV.Add(ArgCycLStr);
      } else {
        ArgCycLStrV.Add("Empty");
      }
    }
    PrevCycLStr=CycLStr;
    PrevArgCycLStrV=ArgCycLStrV;
    if (ArgCycLStrV.Len()>0){
      HdCycLToFq.AddDat(ArgCycLStrV[0]+" - "+TInt::GetStr(ArgCycLStrV.Len()-1))++;}
    // insert
    if (ArgCycLStrV.Len()==3){
      TStr PredNm=ArgCycLStrV[0];
      if ((PredNm!="#$isa")&&(PredNm!="#$termOfUnit")&&(PredNm!="#$genls")){
        TStr BackLinkPredNm=TStr("~")+PredNm;
        TStr Arg1=ArgCycLStrV[1];
        TStr Arg2=ArgCycLStrV[2];
        CycBs->AddEdge(Arg1, PredNm, Arg2);
        CycBs->AddEdge(Arg2, BackLinkPredNm, Arg1);
      }
    }
  }
  // output top cycl relations
  {TFOut CycLSOut("CycKB-CycLFq.Stat.Txt"); FILE* fCycL=CycLSOut.GetFileId();
  TIntStrPrV FqCycLStrPrV; HdCycLToFq.GetDatKeyPrV(FqCycLStrPrV); 
  FqCycLStrPrV.Sort(false);
  for (int CycLN=0; CycLN<FqCycLStrPrV.Len(); CycLN++){
    fprintf(fCycL, "%6d. %s\n", 1+FqCycLStrPrV[CycLN].Val1, FqCycLStrPrV[CycLN].Val2.CStr());
  }}
  printf("%d Docs\nDone.\n", XmlDocs);}

  // return cyc-base
  return CycBs;
}