void TVizMapContext::GetSelectDIdV(TIntV& DIdV) { DIdV.Gen(SelPointV.Len(), 0); const int SelPoints = SelPointV.Len(); for (int SelPointN = 0; SelPointN < SelPoints; SelPointN++) { DIdV.Add(VizMapFrame->GetPoint(SelPointV[SelPointN])->GetDocId()); } }
void TNmObjBs::GetNmObjDIdV( const PBowDocBs& BowDocBs, TIntV& BowDIdV, const TStr& NmObjStr1, const TStr& NmObjStr2) const { // get first named-object-id int NmObjId1=GetNmObjId(NmObjStr1); TIntV NmObjDocIdV1; GetNmObjDocIdV(NmObjId1, NmObjDocIdV1); NmObjDocIdV1.Sort(); // get second named-object-id TIntV NmObjDocIdV2; if (!NmObjStr2.Empty()){ int NmObjId2=GetNmObjId(NmObjStr2); GetNmObjDocIdV(NmObjId2, NmObjDocIdV2); NmObjDocIdV2.Sort(); } // create joint doc-id-vector TIntV NmObjDocIdV; if (NmObjDocIdV2.Empty()){ NmObjDocIdV=NmObjDocIdV1; } else { NmObjDocIdV1.Intrs(NmObjDocIdV2, NmObjDocIdV); } // traverse named-object-documents to collect bow-document-ids BowDIdV.Gen(NmObjDocIdV.Len(), 0); for (int NmObjDocIdN=0; NmObjDocIdN<NmObjDocIdV.Len(); NmObjDocIdN++){ TStr DocNm=GetDocNm(NmObjDocIdV[NmObjDocIdN]); int DId=BowDocBs->GetDId(DocNm); if (DId!=-1){ BowDIdV.Add(DId); } } }
void TBPGraph::GetNIdV(TIntV& NIdV) const { NIdV.Gen(GetNodes(), 0); for (int N=LeftH.FFirstKeyId(); LeftH.FNextKeyId(N); ) { NIdV.Add(LeftH.GetKey(N)); } for (int N=RightH.FFirstKeyId(); RightH.FNextKeyId(N); ) { NIdV.Add(RightH.GetKey(N)); } }
void TBlobBs::GetBlockLenV(const PFRnd& FBlobBs, TIntV& BlockLenV){ EAssert(FBlobBs->GetStr(BlockLenVNm.Len())==BlockLenVNm); BlockLenV.Gen(FBlobBs->GetInt()); for (int BlockLenN=0; BlockLenN<BlockLenV.Len(); BlockLenN++){ BlockLenV[BlockLenN]=FBlobBs->GetInt();} EAssert(FBlobBs->GetInt()==-1); }
void TYInvIx::GetDocIdV( const PYWordDs& WordDs, const int& MnDocFq, TIntV& DocIdV){ IAssert(MnDocFq>=0); if (MnDocFq==0){ DocIdV=AllDocIdV; } else { TIntIntH DocIdFqH(100); int MxDocFq=0; int WordIdN=WordDs->FFirstWordId(); int WordId; double WordFq; while (WordDs->FNextWordId(WordIdN, WordId, WordFq)){ if (WordIdToFirstDocIdNH.IsKey(WordId)){ int DocIdN=FFirstDocId(WordId); int DocId; while (FNextWordId(DocIdN, DocId)){ DocIdFqH.AddDat(DocId)+=int(WordFq); MxDocFq=TInt::GetMx(MxDocFq, DocIdFqH.GetDat(DocId)); } } } int NewMnDocFq=(MnDocFq<=MxDocFq) ? MnDocFq : MxDocFq-3; DocIdV.Gen(DocIdFqH.Len(), 0); int DocIdP=DocIdFqH.FFirstKeyId(); while (DocIdFqH.FNextKeyId(DocIdP)){ int DocId=DocIdFqH.GetKey(DocIdP); int DocFq=DocIdFqH[DocIdP]; if (DocFq>=NewMnDocFq){DocIdV.Add(DocId);} } } }
void TYFSelBs::GetBestWordIdV( const int& DocId, const double& EstExp, const double& SumEstPrb, const PYWordDs& IntrsWordDs, TIntV& BestWordIdV){ TIntFltKdV& WordIdEstKdV=DocIdToWordIdEstVV[DocId]; TFltIntKdV WordEstIdKdV(WordIdEstKdV.Len(), 0); double MnWordEst=TFlt::Mx; for (int WordIdN=0; WordIdN<WordIdEstKdV.Len(); WordIdN++){ int WordId=WordIdEstKdV[WordIdN].Key; double WordEst=pow(WordIdEstKdV[WordIdN].Dat, EstExp); if (IntrsWordDs->IsWordId(WordId)){ WordEstIdKdV.Add(TFltIntKd(WordEst, WordId)); MnWordEst=TFlt::GetMn(WordEst, MnWordEst); } } double SumWordEst=0; {for (int WordIdN=0; WordIdN<WordEstIdKdV.Len(); WordIdN++){ SumWordEst+=(WordEstIdKdV[WordIdN].Key-=MnWordEst);}} WordEstIdKdV.Sort(false); {BestWordIdV.Gen(WordEstIdKdV.Len(), 0); SumWordEst*=SumEstPrb; int WordIdN=0; while ((SumWordEst>=0)&&(WordIdN<WordEstIdKdV.Len())){ double WordEst=WordEstIdKdV[WordIdN].Key; int WordId=WordEstIdKdV[WordIdN].Dat; SumWordEst-=WordEst; BestWordIdV.Add(WordId); WordIdN++; }} }
///Generate sequence from Power law void TAGMUtil::GenPLSeq(TIntV& SzSeq, const int& SeqLen, const double& Alpha, TRnd& Rnd, const int& Min, const int& Max) { SzSeq.Gen(SeqLen, 0); while (SzSeq.Len() < SeqLen) { int Sz = (int) TMath::Round(Rnd.GetPowerDev(Alpha)); if (Sz >= Min && Sz <= Max) { SzSeq.Add(Sz); } } }
void TGraphCascade::TopologicalSort(TIntV& SortedNIdV) { int Nodes = Graph.GetNodes(); SortedNIdV.Gen(Nodes, 0); // result THash<TInt, TBool> Marks(Nodes); // nodeid -> mark map THash<TInt,TBool> TempMarks(Nodes); // nodeid -> temp mark map THash<TInt, TBool> Added(Nodes); TIntV NIdV; Graph.GetNIdV(NIdV); // all node ids // set marks for (int NodeN = 0; NodeN < Nodes; NodeN++) { int NodeId = NIdV[NodeN]; Marks.AddDat(NodeId, false); TempMarks.AddDat(NodeId, false); Added.AddDat(NodeId, false); } TSStack<TInt> Stack; for (int NodeN = 0; NodeN < Nodes; NodeN++) { int NodeId = NIdV[NodeN]; // select an unmarked node if (!Marks.GetDat(NodeId)) { Stack.Push(NodeId); while (!Stack.Empty()) { // visit TopNode int TopNodeId = Stack.Top(); Marks.GetDat(TopNodeId) = true; TempMarks.GetDat(TopNodeId) = true; // add children, set their temp marks to true TNGraph::TNodeI NI = Graph.GetNI(TopNodeId); int Children = NI.GetOutDeg(); bool IsFinal = true; for (int ChildN = 0; ChildN < Children; ChildN++) { int ChildId = NI.GetOutNId(ChildN); EAssertR(!TempMarks.GetDat(ChildId), "TGraphCascade::TopologicalSort: the graph is not a DAG!"); if (!Marks.GetDat(ChildId)) { // unvisited node IsFinal = false; Stack.Push(ChildId); } } if (IsFinal) { // push TopNode to tail if (!Added.GetDat(TopNodeId)) { SortedNIdV.Add(TopNodeId); Added.GetDat(TopNodeId) = true; } TempMarks.GetDat(TopNodeId) = false; Stack.Pop(); } } } } SortedNIdV.Reverse(); }
///////////////////////////////////////////////// // SkyGrid-Entity void TSkyGridEnt::GetDocIdV(const TSkyGridBs* SkyGridBs, const uint64& MnTm, const uint64& MxTm, TIntV& DocIdV) const { DocIdV.Gen(GetDocIds(), 0); for (int DocN=0; DocN<GetDocIds(); DocN++){ int DocId=GetDocId(DocN); PSkyGridDoc Doc=SkyGridBs->GetDoc(DocId); uint64 DocTm=Doc->GetTm(); if (((MnTm==0)||(MnTm<=DocTm))&&((MxTm==0)||(DocTm<MxTm))){ DocIdV.Add(DocId); } } }
void TNmObjBs::GetMergedNmObj(TIntV& NewNmObjIdV){ // matching constraints int MnPfxLen=3; int MxSfxLen=2; // create transformation vector int NmObjs=NmObjWordStrVToDocIdVH.Len(); NewNmObjIdV.Gen(NmObjs); NewNmObjIdV.PutAll(-1); // merging single words // merging statistics {int SingleWords=0; int ReducedSingleWords=0; // collect single words according to prefix TStrIntVH PfxStrToNmObjIdVH; for (int NmObjId=0; NmObjId<NmObjs; NmObjId++){ if (NewNmObjIdV[NmObjId]!=-1){continue;} const TStrV& WordStrV=NmObjWordStrVToDocIdVH.GetKey(NmObjId); if (WordStrV.Len()==1){ TStr PfxStr=WordStrV[0].GetSubStr(0, 2); PfxStrToNmObjIdVH.AddDat(PfxStr).Add(NmObjId); SingleWords++; } } // traverse word-groups with the same prefix int Pfxs=PfxStrToNmObjIdVH.Len(); for (int PfxId=0; PfxId<Pfxs; PfxId++){ // get & traverse word-group TIntV& NmObjIdV=PfxStrToNmObjIdVH[PfxId]; for (int NmObjIdN=0; NmObjIdN<NmObjIdV.Len(); NmObjIdN++){ int NmObjId=NmObjIdV[NmObjIdN]; if (NewNmObjIdV[NmObjId]!=-1){continue;} NewNmObjIdV[NmObjId]=NmObjId; const TStr& WordStr=NmObjWordStrVToDocIdVH.GetKey(NmObjId)[0]; int Fq=NmObjWordStrVToDocIdVH[NmObjId].Len(); TIntPrV FqNmObjIdPrV(NmObjIdV.Len(), 0); FqNmObjIdPrV.Add(TIntPr(Fq, NmObjId)); // traverse rest of the word-group for matching words for (int SubNmObjIdN=NmObjIdN+1; SubNmObjIdN<NmObjIdV.Len(); SubNmObjIdN++){ int SubNmObjId=NmObjIdV[SubNmObjIdN]; if (NewNmObjIdV[SubNmObjId]!=-1){continue;} const TStr& SubWordStr=NmObjWordStrVToDocIdVH.GetKey(SubNmObjId)[0]; // test matching if (IsMatchPfx(WordStr, SubWordStr, MnPfxLen, MxSfxLen)){ NewNmObjIdV[SubNmObjId]=NmObjId; int SubFq=NmObjWordStrVToDocIdVH[SubNmObjId].Len(); FqNmObjIdPrV.Add(TIntPr(SubFq, SubNmObjId)); //printf("%s -> %s\n", WordStr.CStr(), SubWordStr.CStr()); } } // increment number of equivalence word-groups ReducedSingleWords++; // collapse matching words into most frequent word if (FqNmObjIdPrV.Len()>1){ FqNmObjIdPrV.Sort(false); int MainNmObjId=FqNmObjIdPrV[0].Val2; NewNmObjIdV[MainNmObjId]=MainNmObjId; TStr MainWordStr=NmObjWordStrVToDocIdVH.GetKey(MainNmObjId)[0]; //printf("[%s:", MainWordStr.CStr()); for (int FqNmObjIdPrN=1; FqNmObjIdPrN<FqNmObjIdPrV.Len(); FqNmObjIdPrN++){ int SubNmObjId=FqNmObjIdPrV[FqNmObjIdPrN].Val2; NewNmObjIdV[SubNmObjId]=MainNmObjId; //TStr& SubWordStr=NmObjWordStrVToDocIdVH.GetKey(SubNmObjId)[0]; //printf(" %s", SubWordStr.CStr()); } //printf("]\n"); } } } // print statistics //printf("SingleWords:%d ReducedSingleWords:%d\n", // SingleWords, ReducedSingleWords); } // merging double words // merging statistics {int DoubleWords=0; int ReducedDoubleWords=0; // collect double words according to prefix TStrIntVH PfxStrToNmObjIdVH; for (int NmObjId=0; NmObjId<NmObjs; NmObjId++){ if (NewNmObjIdV[NmObjId]!=-1){continue;} const TStrV& WordStrV=NmObjWordStrVToDocIdVH.GetKey(NmObjId); if (WordStrV.Len()==2){ TStr PfxStr=WordStrV[0].GetSubStr(0, 2)+WordStrV[1].GetSubStr(0, 2); PfxStrToNmObjIdVH.AddDat(PfxStr).Add(NmObjId); DoubleWords++; } } // traverse word-groups with the same prefix int Pfxs=PfxStrToNmObjIdVH.Len(); for (int PfxId=0; PfxId<Pfxs; PfxId++){ // get & traverse word-group TIntV& NmObjIdV=PfxStrToNmObjIdVH[PfxId]; for (int NmObjIdN=0; NmObjIdN<NmObjIdV.Len(); NmObjIdN++){ int NmObjId=NmObjIdV[NmObjIdN]; if (NewNmObjIdV[NmObjId]!=-1){continue;} NewNmObjIdV[NmObjId]=NmObjId; const TStr& WordStr1=NmObjWordStrVToDocIdVH.GetKey(NmObjId)[0]; const TStr& WordStr2=NmObjWordStrVToDocIdVH.GetKey(NmObjId)[1]; int Fq=NmObjWordStrVToDocIdVH[NmObjId].Len(); TIntPrV FqNmObjIdPrV(NmObjIdV.Len(), 0); FqNmObjIdPrV.Add(TIntPr(Fq, NmObjId)); // traverse rest of the word-group for matching words for (int SubNmObjIdN=NmObjIdN+1; SubNmObjIdN<NmObjIdV.Len(); SubNmObjIdN++){ int SubNmObjId=NmObjIdV[SubNmObjIdN]; if (NewNmObjIdV[SubNmObjId]!=-1){continue;} const TStr& SubWordStr1=NmObjWordStrVToDocIdVH.GetKey(SubNmObjId)[0]; const TStr& SubWordStr2=NmObjWordStrVToDocIdVH.GetKey(SubNmObjId)[1]; // test matching if (IsMatchPfx(WordStr1, SubWordStr1, MnPfxLen, MxSfxLen+1)&& IsMatchPfx(WordStr2, SubWordStr2, MnPfxLen, MxSfxLen+1)){ NewNmObjIdV[SubNmObjId]=NmObjId; int SubFq=NmObjWordStrVToDocIdVH[SubNmObjId].Len(); FqNmObjIdPrV.Add(TIntPr(SubFq, SubNmObjId)); //printf("%s_%s -> %s_%s\n", // WordStr1.CStr(), WordStr2.CStr(), // SubWordStr1.CStr(), SubWordStr2.CStr()); } } // increment number of equivalence word-groups ReducedDoubleWords++; // collapse matching words into most frequent word if (FqNmObjIdPrV.Len()>1){ FqNmObjIdPrV.Sort(false); int MainNmObjId=FqNmObjIdPrV[0].Val2; NewNmObjIdV[MainNmObjId]=MainNmObjId; TStr MainWordStr1=NmObjWordStrVToDocIdVH.GetKey(MainNmObjId)[0]; TStr MainWordStr2=NmObjWordStrVToDocIdVH.GetKey(MainNmObjId)[1]; //printf("[%s_%s:", MainWordStr1.CStr(), MainWordStr2.CStr()); for (int FqNmObjIdPrN=1; FqNmObjIdPrN<FqNmObjIdPrV.Len(); FqNmObjIdPrN++){ int SubNmObjId=FqNmObjIdPrV[FqNmObjIdPrN].Val2; NewNmObjIdV[SubNmObjId]=MainNmObjId; //TStr& SubWordStr1=NmObjWordStrVToDocIdVH.GetKey(SubNmObjId)[0]; //TStr& SubWordStr2=NmObjWordStrVToDocIdVH.GetKey(SubNmObjId)[1]; //printf(" %s_%s", SubWordStr1.CStr(), SubWordStr2.CStr()); } //printf("]\n"); } } } // print statistics //printf("DoubleWords:%d ReducedDoubleWords:%d\n", // DoubleWords, ReducedDoubleWords); } // merging triples to doubles // ... (prefix, first-name, last-name) to (first-name, last-name) // merging statistics {int TripleWords=0; int ReducedTripleWords=0; // collect single words according to prefix TStrIntVH PfxStrToNmObjIdVH; for (int NmObjId=0; NmObjId<NmObjs; NmObjId++){ if (NewNmObjIdV[NmObjId]!=-1){continue;} const TStrV& WordStrV=NmObjWordStrVToDocIdVH.GetKey(NmObjId); if (WordStrV.Len()==3){ TripleWords++; TStrV DbWordStrV(2, 0); DbWordStrV.Add(WordStrV[1]); DbWordStrV.Add(WordStrV[2]); int DbNmObjId=NmObjWordStrVToDocIdVH.GetKeyId(DbWordStrV); if (DbNmObjId!=-1){ ReducedTripleWords++; int NewDbNmObjId=NewNmObjIdV[DbNmObjId]; NewNmObjIdV[NmObjId]=NewDbNmObjId; //TStr NmObjStr=GetNmObjStr(NmObjId); //TStr DbNmObjStr=GetNmObjStr(DbNmObjId); //TStr NewDbNmObjStr=GetNmObjStr(NewDbNmObjId); //printf("%s -> %s -> %s\n", // NmObjStr.CStr(), DbNmObjStr.CStr(), NewDbNmObjStr.CStr()); } } } //printf("TripleWords:%d ReducedTripleWords:%d\n", // TripleWords, ReducedTripleWords); } // merging triple words // merging statistics {int TripleWords=0; int ReducedTripleWords=0; // collect triple words according to prefix TStrIntVH PfxStrToNmObjIdVH; for (int NmObjId=0; NmObjId<NmObjs; NmObjId++){ if (NewNmObjIdV[NmObjId]!=-1){continue;} const TStrV& WordStrV=NmObjWordStrVToDocIdVH.GetKey(NmObjId); if (WordStrV.Len()==3){ TStr PfxStr=WordStrV[0].GetSubStr(0, 2)+WordStrV[1].GetSubStr(0, 2)+WordStrV[2].GetSubStr(0, 2); PfxStrToNmObjIdVH.AddDat(PfxStr).Add(NmObjId); TripleWords++; } } // traverse word-groups with the same prefix int Pfxs=PfxStrToNmObjIdVH.Len(); for (int PfxId=0; PfxId<Pfxs; PfxId++){ // get & traverse word-group TIntV& NmObjIdV=PfxStrToNmObjIdVH[PfxId]; for (int NmObjIdN=0; NmObjIdN<NmObjIdV.Len(); NmObjIdN++){ int NmObjId=NmObjIdV[NmObjIdN]; if (NewNmObjIdV[NmObjId]!=-1){continue;} NewNmObjIdV[NmObjId]=NmObjId; const TStr& WordStr1=NmObjWordStrVToDocIdVH.GetKey(NmObjId)[0]; const TStr& WordStr2=NmObjWordStrVToDocIdVH.GetKey(NmObjId)[1]; const TStr& WordStr3=NmObjWordStrVToDocIdVH.GetKey(NmObjId)[2]; int Fq=NmObjWordStrVToDocIdVH[NmObjId].Len(); TIntPrV FqNmObjIdPrV(NmObjIdV.Len(), 0); FqNmObjIdPrV.Add(TIntPr(Fq, NmObjId)); // traverse rest of the word-group for matching words for (int SubNmObjIdN=NmObjIdN+1; SubNmObjIdN<NmObjIdV.Len(); SubNmObjIdN++){ int SubNmObjId=NmObjIdV[SubNmObjIdN]; if (NewNmObjIdV[SubNmObjId]!=-1){continue;} const TStr& SubWordStr1=NmObjWordStrVToDocIdVH.GetKey(SubNmObjId)[0]; const TStr& SubWordStr2=NmObjWordStrVToDocIdVH.GetKey(SubNmObjId)[1]; const TStr& SubWordStr3=NmObjWordStrVToDocIdVH.GetKey(SubNmObjId)[2]; // test matching if (IsMatchPfx(WordStr1, SubWordStr1, MnPfxLen, MxSfxLen+1)&& IsMatchPfx(WordStr2, SubWordStr2, MnPfxLen, MxSfxLen+1)&& IsMatchPfx(WordStr3, SubWordStr3, MnPfxLen, MxSfxLen+1)){ NewNmObjIdV[SubNmObjId]=NmObjId; int SubFq=NmObjWordStrVToDocIdVH[SubNmObjId].Len(); FqNmObjIdPrV.Add(TIntPr(SubFq, SubNmObjId)); //printf("%s_%s_%s -> %s_%s_%s\n", // WordStr1.CStr(), WordStr2.CStr(), WordStr3.CStr(), // SubWordStr1.CStr(), SubWordStr2.CStr(), SubWordStr3.CStr()); } } // increment number of equivalence word-groups ReducedTripleWords++; // collapse matching words into most frequent word if (FqNmObjIdPrV.Len()>1){ FqNmObjIdPrV.Sort(false); int MainNmObjId=FqNmObjIdPrV[0].Val2; NewNmObjIdV[MainNmObjId]=MainNmObjId; TStr MainWordStr1=NmObjWordStrVToDocIdVH.GetKey(MainNmObjId)[0]; TStr MainWordStr2=NmObjWordStrVToDocIdVH.GetKey(MainNmObjId)[1]; TStr MainWordStr3=NmObjWordStrVToDocIdVH.GetKey(MainNmObjId)[2]; //printf("[%s_%s_%s:", MainWordStr1.CStr(), MainWordStr2.CStr(), MainWordStr3.CStr()); for (int FqNmObjIdPrN=1; FqNmObjIdPrN<FqNmObjIdPrV.Len(); FqNmObjIdPrN++){ int SubNmObjId=FqNmObjIdPrV[FqNmObjIdPrN].Val2; NewNmObjIdV[SubNmObjId]=MainNmObjId; //TStr& SubWordStr1=NmObjWordStrVToDocIdVH.GetKey(SubNmObjId)[0]; //TStr& SubWordStr2=NmObjWordStrVToDocIdVH.GetKey(SubNmObjId)[1]; //TStr& SubWordStr3=NmObjWordStrVToDocIdVH.GetKey(SubNmObjId)[2]; //printf(" %s_%s_%s", SubWordStr1.CStr(), SubWordStr2.CStr(), SubWordStr3.CStr()); } //printf("]\n"); } } } // print statistics //printf("TripleWords:%d ReducedTripleWords:%d\n", // TripleWords, ReducedTripleWords); } }
void TNmObjBs::FilterCandToNmObjIdV( const TStrV& CandWordStrV, TIntV& NmObjIdV, const bool& DumpP){ // prepare candidate traversal TVec<TStrV> NmObjIdWordStrVV; int CandWordStrN=0; int CandWordStrs=CandWordStrV.Len(); while (CandWordStrN<CandWordStrs){ // get candidate TStr WordStr=CandWordStrV[CandWordStrN]; //printf("%s ", WordStr.CStr()); // simple filters if (WordStr.Len()<=1){CandWordStrN++; continue;} if (WordStr==ParagraphTagStr){CandWordStrN++; continue;} if (WordStr==BreakTagStr){CandWordStrN++; continue;} if (WordStr==EofTagStr){CandWordStrN++; break;} if (IsNumStr(WordStr)){CandWordStrN++; continue;} TStr UcWordStr=ChDef->GetUcStr(WordStr); //if (SwSet->IsIn(UcWordStr, true)){ // CandWordStrN++; continue;} if ((WordStr==UcWordStr)&&((WordStr.Len()>4)&&(!IsNmObjAttr(WordStr, noaAcronym)))){ CandWordStrN++; continue;} // unperiod if (IsNmObjAttr(WordStr, noaUnperiod)&&(CandWordStrV[CandWordStrN+1]==PeriodTagStr)){ CandWordStrN+=1; } // period if (WordStr==PeriodTagStr){ CandWordStrN++; WordStr=CandWordStrV[CandWordStrN]; if (IsTagStr(WordStr)){continue;} if (IsNmObjAttr(WordStr, noaDefined)){ continue; } else if ((CandWordStrN>1)&&(IsNmObjAttr(CandWordStrV[CandWordStrN-2], noaUnperiod))){ continue; } else { TStr NextWordStr=CandWordStrV[CandWordStrN+1]; if (IsFirstCapWordStr(NextWordStr)||IsNmObjAttr(NextWordStr, noaAsCapitalized)){ continue; } else if (!IsNmObj(WordStr)){ CandWordStrN++; continue; } } } // if (WordStr=="British"){ // printf("");} // ignore if (IsNmObjAttr(WordStr, noaIgnore)){ CandWordStrN++; continue; } // collect named-object words TStrV WordStrV; forever { WordStrV.Add(WordStr); CandWordStrN++; WordStr=CandWordStrV[CandWordStrN]; if (IsTagStr(WordStr)){break;} if (WordStr.Len()<=1){break;} if (IsNmObjAttr(WordStr, noaIgnore)){CandWordStrN++; break;} if (IsNmObjAttr(WordStr, noaStandalone)){break;} if (IsNmObjAttr(WordStrV, noaStandalone)){break;} } // get normalized version of named-object TStrV NrWordStrV; GetNrNmObjStrV(WordStrV, NrWordStrV); // simple filters if (IsNmObjAttr(NrWordStrV, noaIgnore)){continue;} if (IsNmObjAttr(NrWordStrV, noaFirstName)){continue;} if (NrWordStrV.Len()>5){ while (NrWordStrV.Len()>2){NrWordStrV.Del(0);}} if (NrWordStrV.Len()==1){ TStr UcWordStr=ChDef->GetUcStr(NrWordStrV[0]); if (SwSet->IsIn(UcWordStr, true)){continue;} } // add named object NmObjIdWordStrVV.Add(NrWordStrV); } // merge similar words for (int NmObjN=0; NmObjN<NmObjIdWordStrVV.Len(); NmObjN++){ TStrV& WordStrV=NmObjIdWordStrVV[NmObjN]; if (WordStrV.Len()==1){ // merge single words for (int SubNmObjN=0; SubNmObjN<NmObjIdWordStrVV.Len(); SubNmObjN++){ TStrV& SubWordStrV=NmObjIdWordStrVV[SubNmObjN]; if (SubWordStrV.Len()==1){ if (WordStrV[0]!=SubWordStrV[0]){ if (IsMatchPfx(WordStrV[0], SubWordStrV[0], 3, 4)){ // normalize to shorter string if (WordStrV[0].Len()<SubWordStrV[0].Len()){SubWordStrV=WordStrV;} else {WordStrV=SubWordStrV;} } } } } } else if (WordStrV.Len()>=2){ TStr LastNm=WordStrV.Last(); for (int SubNmObjN=0; SubNmObjN<NmObjIdWordStrVV.Len(); SubNmObjN++){ TStrV& SubWordStrV=NmObjIdWordStrVV[SubNmObjN]; if (SubWordStrV.Len()==1){ // merge last-name with [first-name,last-name] pairs TStr SubLastNm=SubWordStrV[0]; if (LastNm!=SubLastNm){ if (IsMatchPfx(LastNm, SubLastNm, 3, 4)){ if (LastNm.Len()<SubLastNm.Len()){SubWordStrV=WordStrV;} else {WordStrV=SubWordStrV;} } } } else if (false&&(SubWordStrV.Len()==2)){ // merge [first-name,last-name] with [first-name,last-name] pairs if ((WordStrV[0]!=SubWordStrV[0])||(WordStrV[1]!=SubWordStrV[1])){ if ((IsMatchPfx(WordStrV[0], SubWordStrV[0], 3, 4))&& (IsMatchPfx(WordStrV[1], SubWordStrV[1], 3, 4))){ // normalize to shorter string (first word) if (WordStrV[0].Len()<SubWordStrV[0].Len()){ SubWordStrV[0]=WordStrV[0];} else {WordStrV[0]=SubWordStrV[0];} // normalize to shorter string (second word) if (WordStrV[1].Len()<SubWordStrV[1].Len()){ SubWordStrV[1]=WordStrV[1];} else {WordStrV[1]=SubWordStrV[1];} } } } } } } // get named-objects-ids NmObjIdV.Gen(NmObjIdWordStrVV.Len(), 0); {for (int NmObjN=0; NmObjN<NmObjIdWordStrVV.Len(); NmObjN++){ TStrV& NmObjWordStrV=NmObjIdWordStrVV[NmObjN]; int NmObjId=GetNmObjId(NmObjWordStrV, true); NmObjIdV.Add(NmObjId); }} // dump if (DumpP){ printf("Named-Objects: "); for (int NmObjN=0; NmObjN<NmObjIdV.Len(); NmObjN++){ int NmObjId=NmObjIdV[NmObjN]; TStr NmObjStr=GetNmObjStr(NmObjId); printf("%s ", NmObjStr.CStr()); } printf("\n"); } }
void TUNGraph::GetNIdV(TIntV& NIdV) const { NIdV.Gen(GetNodes(), 0); for (int N=NodeH.FFirstKeyId(); NodeH.FNextKeyId(N); ) { NIdV.Add(NodeH.GetKey(N)); } }
void TNEGraph::GetEIdV(TIntV& EIdV) const { EIdV.Gen(GetEdges(), 0); for (int E=EdgeH.FFirstKeyId(); EdgeH.FNextKeyId(E); ) { EIdV.Add(EdgeH.GetKey(E)); } }
void TMultimodalGraphImplB::GetNIdV(TIntV& NIdV) const { NIdV.Gen(GetNodes(), 0); for (int N=NodeToModeMapping.FFirstKeyId(); NodeToModeMapping.FNextKeyId(N); ) { NIdV.Add(NodeToModeMapping.GetKey(N)); } }