Example #1
0
void TVizMapContext::GetSelectDIdV(TIntV& DIdV) {
    DIdV.Gen(SelPointV.Len(), 0);
    const int SelPoints = SelPointV.Len();
    for (int SelPointN = 0; SelPointN < SelPoints; SelPointN++) {
        DIdV.Add(VizMapFrame->GetPoint(SelPointV[SelPointN])->GetDocId());
    }
}
void TNmObjBs::GetNmObjDIdV(
 const PBowDocBs& BowDocBs, TIntV& BowDIdV, 
 const TStr& NmObjStr1, const TStr& NmObjStr2) const {
  // get first named-object-id
  int NmObjId1=GetNmObjId(NmObjStr1);
  TIntV NmObjDocIdV1; GetNmObjDocIdV(NmObjId1, NmObjDocIdV1);
  NmObjDocIdV1.Sort();
  // get second named-object-id
  TIntV NmObjDocIdV2;
  if (!NmObjStr2.Empty()){
    int NmObjId2=GetNmObjId(NmObjStr2);
    GetNmObjDocIdV(NmObjId2, NmObjDocIdV2);
    NmObjDocIdV2.Sort();
  }
  // create joint doc-id-vector
  TIntV NmObjDocIdV;
  if (NmObjDocIdV2.Empty()){
    NmObjDocIdV=NmObjDocIdV1;
  } else {
    NmObjDocIdV1.Intrs(NmObjDocIdV2, NmObjDocIdV);
  }
  // traverse named-object-documents to collect bow-document-ids
  BowDIdV.Gen(NmObjDocIdV.Len(), 0);
  for (int NmObjDocIdN=0; NmObjDocIdN<NmObjDocIdV.Len(); NmObjDocIdN++){
    TStr DocNm=GetDocNm(NmObjDocIdV[NmObjDocIdN]);
    int DId=BowDocBs->GetDId(DocNm);
    if (DId!=-1){
      BowDIdV.Add(DId);
    } 
  }
}
Example #3
0
void TBPGraph::GetNIdV(TIntV& NIdV) const {
  NIdV.Gen(GetNodes(), 0);
  for (int N=LeftH.FFirstKeyId(); LeftH.FNextKeyId(N); ) {
    NIdV.Add(LeftH.GetKey(N)); }
  for (int N=RightH.FFirstKeyId(); RightH.FNextKeyId(N); ) {
    NIdV.Add(RightH.GetKey(N)); }
}
Example #4
0
void TBlobBs::GetBlockLenV(const PFRnd& FBlobBs, TIntV& BlockLenV){
  EAssert(FBlobBs->GetStr(BlockLenVNm.Len())==BlockLenVNm);
  BlockLenV.Gen(FBlobBs->GetInt());
  for (int BlockLenN=0; BlockLenN<BlockLenV.Len(); BlockLenN++){
    BlockLenV[BlockLenN]=FBlobBs->GetInt();}
  EAssert(FBlobBs->GetInt()==-1);
}
Example #5
0
void TYInvIx::GetDocIdV(
 const PYWordDs& WordDs, const int& MnDocFq, TIntV& DocIdV){
  IAssert(MnDocFq>=0);
  if (MnDocFq==0){
    DocIdV=AllDocIdV;
  } else {
    TIntIntH DocIdFqH(100); int MxDocFq=0;
    int WordIdN=WordDs->FFirstWordId(); int WordId; double WordFq;
    while (WordDs->FNextWordId(WordIdN, WordId, WordFq)){
      if (WordIdToFirstDocIdNH.IsKey(WordId)){
        int DocIdN=FFirstDocId(WordId); int DocId;
        while (FNextWordId(DocIdN, DocId)){
          DocIdFqH.AddDat(DocId)+=int(WordFq);
          MxDocFq=TInt::GetMx(MxDocFq, DocIdFqH.GetDat(DocId));
        }
      }
    }
    int NewMnDocFq=(MnDocFq<=MxDocFq) ? MnDocFq : MxDocFq-3;
    DocIdV.Gen(DocIdFqH.Len(), 0);
    int DocIdP=DocIdFqH.FFirstKeyId();
    while (DocIdFqH.FNextKeyId(DocIdP)){
      int DocId=DocIdFqH.GetKey(DocIdP);
      int DocFq=DocIdFqH[DocIdP];
      if (DocFq>=NewMnDocFq){DocIdV.Add(DocId);}
    }
  }
}
Example #6
0
void TYFSelBs::GetBestWordIdV(
 const int& DocId, const double& EstExp, const double& SumEstPrb,
 const PYWordDs& IntrsWordDs, TIntV& BestWordIdV){
  TIntFltKdV& WordIdEstKdV=DocIdToWordIdEstVV[DocId];
  TFltIntKdV WordEstIdKdV(WordIdEstKdV.Len(), 0);
  double MnWordEst=TFlt::Mx;
  for (int WordIdN=0; WordIdN<WordIdEstKdV.Len(); WordIdN++){
    int WordId=WordIdEstKdV[WordIdN].Key;
    double WordEst=pow(WordIdEstKdV[WordIdN].Dat, EstExp);
    if (IntrsWordDs->IsWordId(WordId)){
      WordEstIdKdV.Add(TFltIntKd(WordEst, WordId));
      MnWordEst=TFlt::GetMn(WordEst, MnWordEst);
    }
  }
  double SumWordEst=0;
  {for (int WordIdN=0; WordIdN<WordEstIdKdV.Len(); WordIdN++){
    SumWordEst+=(WordEstIdKdV[WordIdN].Key-=MnWordEst);}}
  WordEstIdKdV.Sort(false);

  {BestWordIdV.Gen(WordEstIdKdV.Len(), 0);
  SumWordEst*=SumEstPrb; int WordIdN=0;
  while ((SumWordEst>=0)&&(WordIdN<WordEstIdKdV.Len())){
    double WordEst=WordEstIdKdV[WordIdN].Key;
    int WordId=WordEstIdKdV[WordIdN].Dat;
    SumWordEst-=WordEst;
    BestWordIdV.Add(WordId);
    WordIdN++;
  }}
}
Example #7
0
///Generate sequence from Power law
void TAGMUtil::GenPLSeq(TIntV& SzSeq, const int& SeqLen, const double& Alpha, TRnd& Rnd, const int& Min, const int& Max) {
    SzSeq.Gen(SeqLen, 0);
    while (SzSeq.Len() < SeqLen) {
        int Sz = (int) TMath::Round(Rnd.GetPowerDev(Alpha));
        if (Sz >= Min && Sz <= Max) {
            SzSeq.Add(Sz);
        }
    }
}
Example #8
0
void TGraphCascade::TopologicalSort(TIntV& SortedNIdV) {
    int Nodes = Graph.GetNodes();
        
    SortedNIdV.Gen(Nodes, 0); // result
    THash<TInt, TBool> Marks(Nodes); // nodeid -> mark map
    THash<TInt,TBool> TempMarks(Nodes); // nodeid -> temp mark map
    THash<TInt, TBool> Added(Nodes);
    TIntV NIdV;  Graph.GetNIdV(NIdV); // all node ids

    // set marks
    for (int NodeN = 0; NodeN < Nodes; NodeN++) {
        int NodeId = NIdV[NodeN];
        Marks.AddDat(NodeId, false);
        TempMarks.AddDat(NodeId, false);
        Added.AddDat(NodeId, false);
    }

    TSStack<TInt> Stack;
    for (int NodeN = 0; NodeN < Nodes; NodeN++) {
        int NodeId = NIdV[NodeN];
        // select an unmarked node
        if (!Marks.GetDat(NodeId)) {
            Stack.Push(NodeId);
            while (!Stack.Empty()) {
                // visit TopNode
                int TopNodeId = Stack.Top();
                Marks.GetDat(TopNodeId) = true;
                TempMarks.GetDat(TopNodeId) = true;
                // add children, set their temp marks to true
                TNGraph::TNodeI NI = Graph.GetNI(TopNodeId);
                int Children = NI.GetOutDeg();
                bool IsFinal = true;
                for (int ChildN = 0; ChildN < Children; ChildN++) {
                    int ChildId = NI.GetOutNId(ChildN);
                    EAssertR(!TempMarks.GetDat(ChildId), "TGraphCascade::TopologicalSort: the graph is not a DAG!");
                    if (!Marks.GetDat(ChildId)) {
                        // unvisited node
                        IsFinal = false;
                        Stack.Push(ChildId);
                    }
                }
                if (IsFinal) {
                    // push TopNode to tail
                    if (!Added.GetDat(TopNodeId)) {
                        SortedNIdV.Add(TopNodeId);
                        Added.GetDat(TopNodeId) = true;
                    }
                    TempMarks.GetDat(TopNodeId) = false;
                    Stack.Pop();
                }
            }
        }
    }
    SortedNIdV.Reverse();
}
Example #9
0
/////////////////////////////////////////////////
// SkyGrid-Entity
void TSkyGridEnt::GetDocIdV(const TSkyGridBs* SkyGridBs,
 const uint64& MnTm, const uint64& MxTm, TIntV& DocIdV) const {
  DocIdV.Gen(GetDocIds(), 0);
  for (int DocN=0; DocN<GetDocIds(); DocN++){
    int DocId=GetDocId(DocN);
    PSkyGridDoc Doc=SkyGridBs->GetDoc(DocId);
    uint64 DocTm=Doc->GetTm();
    if (((MnTm==0)||(MnTm<=DocTm))&&((MxTm==0)||(DocTm<MxTm))){
      DocIdV.Add(DocId);
    }
  }
}
void TNmObjBs::GetMergedNmObj(TIntV& NewNmObjIdV){
  // matching constraints
  int MnPfxLen=3; int MxSfxLen=2;

  // create transformation vector
  int NmObjs=NmObjWordStrVToDocIdVH.Len();
  NewNmObjIdV.Gen(NmObjs); NewNmObjIdV.PutAll(-1);

  // merging single words
  // merging statistics
  {int SingleWords=0; int ReducedSingleWords=0;
  // collect single words according to prefix
  TStrIntVH PfxStrToNmObjIdVH;
  for (int NmObjId=0; NmObjId<NmObjs; NmObjId++){
    if (NewNmObjIdV[NmObjId]!=-1){continue;}
    const TStrV& WordStrV=NmObjWordStrVToDocIdVH.GetKey(NmObjId);
    if (WordStrV.Len()==1){
      TStr PfxStr=WordStrV[0].GetSubStr(0, 2);
      PfxStrToNmObjIdVH.AddDat(PfxStr).Add(NmObjId);
      SingleWords++;
    }
  }
  // traverse word-groups with the same prefix
  int Pfxs=PfxStrToNmObjIdVH.Len();
  for (int PfxId=0; PfxId<Pfxs; PfxId++){
    // get & traverse word-group
    TIntV& NmObjIdV=PfxStrToNmObjIdVH[PfxId];
    for (int NmObjIdN=0; NmObjIdN<NmObjIdV.Len(); NmObjIdN++){
      int NmObjId=NmObjIdV[NmObjIdN];
      if (NewNmObjIdV[NmObjId]!=-1){continue;}
      NewNmObjIdV[NmObjId]=NmObjId;
      const TStr& WordStr=NmObjWordStrVToDocIdVH.GetKey(NmObjId)[0];
      int Fq=NmObjWordStrVToDocIdVH[NmObjId].Len();
      TIntPrV FqNmObjIdPrV(NmObjIdV.Len(), 0);
      FqNmObjIdPrV.Add(TIntPr(Fq, NmObjId));
      // traverse rest of the word-group for matching words
      for (int SubNmObjIdN=NmObjIdN+1; SubNmObjIdN<NmObjIdV.Len(); SubNmObjIdN++){
        int SubNmObjId=NmObjIdV[SubNmObjIdN];
        if (NewNmObjIdV[SubNmObjId]!=-1){continue;}
        const TStr& SubWordStr=NmObjWordStrVToDocIdVH.GetKey(SubNmObjId)[0];
        // test matching
        if (IsMatchPfx(WordStr, SubWordStr, MnPfxLen, MxSfxLen)){
          NewNmObjIdV[SubNmObjId]=NmObjId;
          int SubFq=NmObjWordStrVToDocIdVH[SubNmObjId].Len();
          FqNmObjIdPrV.Add(TIntPr(SubFq, SubNmObjId));
          //printf("%s -> %s\n", WordStr.CStr(), SubWordStr.CStr());
        }
      }
      // increment number of equivalence word-groups
      ReducedSingleWords++;
      // collapse matching words into most frequent word
      if (FqNmObjIdPrV.Len()>1){
        FqNmObjIdPrV.Sort(false);
        int MainNmObjId=FqNmObjIdPrV[0].Val2;
        NewNmObjIdV[MainNmObjId]=MainNmObjId;
        TStr MainWordStr=NmObjWordStrVToDocIdVH.GetKey(MainNmObjId)[0];
        //printf("[%s:", MainWordStr.CStr());
        for (int FqNmObjIdPrN=1; FqNmObjIdPrN<FqNmObjIdPrV.Len(); FqNmObjIdPrN++){
          int SubNmObjId=FqNmObjIdPrV[FqNmObjIdPrN].Val2;
          NewNmObjIdV[SubNmObjId]=MainNmObjId;
          //TStr& SubWordStr=NmObjWordStrVToDocIdVH.GetKey(SubNmObjId)[0];
          //printf(" %s", SubWordStr.CStr());
        }
        //printf("]\n");
      }
    }
  }
  // print statistics
  //printf("SingleWords:%d ReducedSingleWords:%d\n",
  // SingleWords, ReducedSingleWords);
  }

  // merging double words
  // merging statistics
  {int DoubleWords=0; int ReducedDoubleWords=0;
  // collect double words according to prefix
  TStrIntVH PfxStrToNmObjIdVH;
  for (int NmObjId=0; NmObjId<NmObjs; NmObjId++){
    if (NewNmObjIdV[NmObjId]!=-1){continue;}
    const TStrV& WordStrV=NmObjWordStrVToDocIdVH.GetKey(NmObjId);
    if (WordStrV.Len()==2){
      TStr PfxStr=WordStrV[0].GetSubStr(0, 2)+WordStrV[1].GetSubStr(0, 2);
      PfxStrToNmObjIdVH.AddDat(PfxStr).Add(NmObjId);
      DoubleWords++;
    }
  }
  // traverse word-groups with the same prefix
  int Pfxs=PfxStrToNmObjIdVH.Len();
  for (int PfxId=0; PfxId<Pfxs; PfxId++){
    // get & traverse word-group
    TIntV& NmObjIdV=PfxStrToNmObjIdVH[PfxId];
    for (int NmObjIdN=0; NmObjIdN<NmObjIdV.Len(); NmObjIdN++){
      int NmObjId=NmObjIdV[NmObjIdN];
      if (NewNmObjIdV[NmObjId]!=-1){continue;}
      NewNmObjIdV[NmObjId]=NmObjId;
      const TStr& WordStr1=NmObjWordStrVToDocIdVH.GetKey(NmObjId)[0];
      const TStr& WordStr2=NmObjWordStrVToDocIdVH.GetKey(NmObjId)[1];
      int Fq=NmObjWordStrVToDocIdVH[NmObjId].Len();
      TIntPrV FqNmObjIdPrV(NmObjIdV.Len(), 0);
      FqNmObjIdPrV.Add(TIntPr(Fq, NmObjId));
      // traverse rest of the word-group for matching words
      for (int SubNmObjIdN=NmObjIdN+1; SubNmObjIdN<NmObjIdV.Len(); SubNmObjIdN++){
        int SubNmObjId=NmObjIdV[SubNmObjIdN];
        if (NewNmObjIdV[SubNmObjId]!=-1){continue;}
        const TStr& SubWordStr1=NmObjWordStrVToDocIdVH.GetKey(SubNmObjId)[0];
        const TStr& SubWordStr2=NmObjWordStrVToDocIdVH.GetKey(SubNmObjId)[1];
        // test matching
        if (IsMatchPfx(WordStr1, SubWordStr1, MnPfxLen, MxSfxLen+1)&&
         IsMatchPfx(WordStr2, SubWordStr2, MnPfxLen, MxSfxLen+1)){
          NewNmObjIdV[SubNmObjId]=NmObjId;
          int SubFq=NmObjWordStrVToDocIdVH[SubNmObjId].Len();
          FqNmObjIdPrV.Add(TIntPr(SubFq, SubNmObjId));
          //printf("%s_%s -> %s_%s\n",
          // WordStr1.CStr(), WordStr2.CStr(),
          // SubWordStr1.CStr(), SubWordStr2.CStr());
        }
      }
      // increment number of equivalence word-groups
      ReducedDoubleWords++;
      // collapse matching words into most frequent word
      if (FqNmObjIdPrV.Len()>1){
        FqNmObjIdPrV.Sort(false);
        int MainNmObjId=FqNmObjIdPrV[0].Val2;
        NewNmObjIdV[MainNmObjId]=MainNmObjId;
        TStr MainWordStr1=NmObjWordStrVToDocIdVH.GetKey(MainNmObjId)[0];
        TStr MainWordStr2=NmObjWordStrVToDocIdVH.GetKey(MainNmObjId)[1];
        //printf("[%s_%s:", MainWordStr1.CStr(), MainWordStr2.CStr());
        for (int FqNmObjIdPrN=1; FqNmObjIdPrN<FqNmObjIdPrV.Len(); FqNmObjIdPrN++){
          int SubNmObjId=FqNmObjIdPrV[FqNmObjIdPrN].Val2;
          NewNmObjIdV[SubNmObjId]=MainNmObjId;
          //TStr& SubWordStr1=NmObjWordStrVToDocIdVH.GetKey(SubNmObjId)[0];
          //TStr& SubWordStr2=NmObjWordStrVToDocIdVH.GetKey(SubNmObjId)[1];
          //printf(" %s_%s", SubWordStr1.CStr(), SubWordStr2.CStr());
        }
        //printf("]\n");
      }
    }
  }
  // print statistics
  //printf("DoubleWords:%d ReducedDoubleWords:%d\n",
  // DoubleWords, ReducedDoubleWords);
  }

  // merging triples to doubles
  // ... (prefix, first-name, last-name) to (first-name, last-name)
  // merging statistics
  {int TripleWords=0; int ReducedTripleWords=0;
  // collect single words according to prefix
  TStrIntVH PfxStrToNmObjIdVH;
  for (int NmObjId=0; NmObjId<NmObjs; NmObjId++){
    if (NewNmObjIdV[NmObjId]!=-1){continue;}
    const TStrV& WordStrV=NmObjWordStrVToDocIdVH.GetKey(NmObjId);
    if (WordStrV.Len()==3){
      TripleWords++;
      TStrV DbWordStrV(2, 0);
      DbWordStrV.Add(WordStrV[1]); DbWordStrV.Add(WordStrV[2]);
      int DbNmObjId=NmObjWordStrVToDocIdVH.GetKeyId(DbWordStrV);
      if (DbNmObjId!=-1){
        ReducedTripleWords++;
        int NewDbNmObjId=NewNmObjIdV[DbNmObjId];
        NewNmObjIdV[NmObjId]=NewDbNmObjId;
        //TStr NmObjStr=GetNmObjStr(NmObjId);
        //TStr DbNmObjStr=GetNmObjStr(DbNmObjId);
        //TStr NewDbNmObjStr=GetNmObjStr(NewDbNmObjId);
        //printf("%s -> %s -> %s\n",
        // NmObjStr.CStr(), DbNmObjStr.CStr(), NewDbNmObjStr.CStr());
      }
    }
  }
  //printf("TripleWords:%d ReducedTripleWords:%d\n",
  // TripleWords, ReducedTripleWords);
  }

  // merging triple words
  // merging statistics
  {int TripleWords=0; int ReducedTripleWords=0;
  // collect triple words according to prefix
  TStrIntVH PfxStrToNmObjIdVH;
  for (int NmObjId=0; NmObjId<NmObjs; NmObjId++){
    if (NewNmObjIdV[NmObjId]!=-1){continue;}
    const TStrV& WordStrV=NmObjWordStrVToDocIdVH.GetKey(NmObjId);
    if (WordStrV.Len()==3){
      TStr PfxStr=WordStrV[0].GetSubStr(0, 2)+WordStrV[1].GetSubStr(0, 2)+WordStrV[2].GetSubStr(0, 2);
      PfxStrToNmObjIdVH.AddDat(PfxStr).Add(NmObjId);
      TripleWords++;
    }
  }
  // traverse word-groups with the same prefix
  int Pfxs=PfxStrToNmObjIdVH.Len();
  for (int PfxId=0; PfxId<Pfxs; PfxId++){
    // get & traverse word-group
    TIntV& NmObjIdV=PfxStrToNmObjIdVH[PfxId];
    for (int NmObjIdN=0; NmObjIdN<NmObjIdV.Len(); NmObjIdN++){
      int NmObjId=NmObjIdV[NmObjIdN];
      if (NewNmObjIdV[NmObjId]!=-1){continue;}
      NewNmObjIdV[NmObjId]=NmObjId;
      const TStr& WordStr1=NmObjWordStrVToDocIdVH.GetKey(NmObjId)[0];
      const TStr& WordStr2=NmObjWordStrVToDocIdVH.GetKey(NmObjId)[1];
      const TStr& WordStr3=NmObjWordStrVToDocIdVH.GetKey(NmObjId)[2];
      int Fq=NmObjWordStrVToDocIdVH[NmObjId].Len();
      TIntPrV FqNmObjIdPrV(NmObjIdV.Len(), 0);
      FqNmObjIdPrV.Add(TIntPr(Fq, NmObjId));
      // traverse rest of the word-group for matching words
      for (int SubNmObjIdN=NmObjIdN+1; SubNmObjIdN<NmObjIdV.Len(); SubNmObjIdN++){
        int SubNmObjId=NmObjIdV[SubNmObjIdN];
        if (NewNmObjIdV[SubNmObjId]!=-1){continue;}
        const TStr& SubWordStr1=NmObjWordStrVToDocIdVH.GetKey(SubNmObjId)[0];
        const TStr& SubWordStr2=NmObjWordStrVToDocIdVH.GetKey(SubNmObjId)[1];
        const TStr& SubWordStr3=NmObjWordStrVToDocIdVH.GetKey(SubNmObjId)[2];
        // test matching
        if (IsMatchPfx(WordStr1, SubWordStr1, MnPfxLen, MxSfxLen+1)&&
         IsMatchPfx(WordStr2, SubWordStr2, MnPfxLen, MxSfxLen+1)&&
         IsMatchPfx(WordStr3, SubWordStr3, MnPfxLen, MxSfxLen+1)){
          NewNmObjIdV[SubNmObjId]=NmObjId;
          int SubFq=NmObjWordStrVToDocIdVH[SubNmObjId].Len();
          FqNmObjIdPrV.Add(TIntPr(SubFq, SubNmObjId));
          //printf("%s_%s_%s -> %s_%s_%s\n",
          // WordStr1.CStr(), WordStr2.CStr(), WordStr3.CStr(),
          // SubWordStr1.CStr(), SubWordStr2.CStr(), SubWordStr3.CStr());
        }
      }
      // increment number of equivalence word-groups
      ReducedTripleWords++;
      // collapse matching words into most frequent word
      if (FqNmObjIdPrV.Len()>1){
        FqNmObjIdPrV.Sort(false);
        int MainNmObjId=FqNmObjIdPrV[0].Val2;
        NewNmObjIdV[MainNmObjId]=MainNmObjId;
        TStr MainWordStr1=NmObjWordStrVToDocIdVH.GetKey(MainNmObjId)[0];
        TStr MainWordStr2=NmObjWordStrVToDocIdVH.GetKey(MainNmObjId)[1];
        TStr MainWordStr3=NmObjWordStrVToDocIdVH.GetKey(MainNmObjId)[2];
        //printf("[%s_%s_%s:", MainWordStr1.CStr(), MainWordStr2.CStr(), MainWordStr3.CStr());
        for (int FqNmObjIdPrN=1; FqNmObjIdPrN<FqNmObjIdPrV.Len(); FqNmObjIdPrN++){
          int SubNmObjId=FqNmObjIdPrV[FqNmObjIdPrN].Val2;
          NewNmObjIdV[SubNmObjId]=MainNmObjId;
          //TStr& SubWordStr1=NmObjWordStrVToDocIdVH.GetKey(SubNmObjId)[0];
          //TStr& SubWordStr2=NmObjWordStrVToDocIdVH.GetKey(SubNmObjId)[1];
          //TStr& SubWordStr3=NmObjWordStrVToDocIdVH.GetKey(SubNmObjId)[2];
          //printf(" %s_%s_%s", SubWordStr1.CStr(), SubWordStr2.CStr(), SubWordStr3.CStr());
        }
        //printf("]\n");
      }
    }
  }
  // print statistics
  //printf("TripleWords:%d ReducedTripleWords:%d\n",
  // TripleWords, ReducedTripleWords);
  }
}
void TNmObjBs::FilterCandToNmObjIdV(
 const TStrV& CandWordStrV, TIntV& NmObjIdV, const bool& DumpP){
  // prepare candidate traversal
  TVec<TStrV> NmObjIdWordStrVV;
  int CandWordStrN=0; int CandWordStrs=CandWordStrV.Len();
  while (CandWordStrN<CandWordStrs){
    // get candidate
    TStr WordStr=CandWordStrV[CandWordStrN];
    //printf("%s ", WordStr.CStr());
    // simple filters
    if (WordStr.Len()<=1){CandWordStrN++; continue;}
    if (WordStr==ParagraphTagStr){CandWordStrN++; continue;}
    if (WordStr==BreakTagStr){CandWordStrN++; continue;}
    if (WordStr==EofTagStr){CandWordStrN++; break;}
    if (IsNumStr(WordStr)){CandWordStrN++; continue;}
    TStr UcWordStr=ChDef->GetUcStr(WordStr);
    //if (SwSet->IsIn(UcWordStr, true)){
    //  CandWordStrN++; continue;}
    if ((WordStr==UcWordStr)&&((WordStr.Len()>4)&&(!IsNmObjAttr(WordStr, noaAcronym)))){
      CandWordStrN++; continue;}
    // unperiod
    if (IsNmObjAttr(WordStr, noaUnperiod)&&(CandWordStrV[CandWordStrN+1]==PeriodTagStr)){
      CandWordStrN+=1;
    }
    // period
    if (WordStr==PeriodTagStr){
      CandWordStrN++; WordStr=CandWordStrV[CandWordStrN];
      if (IsTagStr(WordStr)){continue;}
      if (IsNmObjAttr(WordStr, noaDefined)){
        continue;
      } else 
      if ((CandWordStrN>1)&&(IsNmObjAttr(CandWordStrV[CandWordStrN-2], noaUnperiod))){
        continue;
      } else {
        TStr NextWordStr=CandWordStrV[CandWordStrN+1];
        if (IsFirstCapWordStr(NextWordStr)||IsNmObjAttr(NextWordStr, noaAsCapitalized)){
          continue;
        } else 
        if (!IsNmObj(WordStr)){
          CandWordStrN++; continue;
        }
      }
    }
//    if (WordStr=="British"){
//      printf("");}
    // ignore
    if (IsNmObjAttr(WordStr, noaIgnore)){
      CandWordStrN++; continue;
    } 
    // collect named-object words
    TStrV WordStrV;
    forever {
      WordStrV.Add(WordStr);
      CandWordStrN++; WordStr=CandWordStrV[CandWordStrN];
      if (IsTagStr(WordStr)){break;}
      if (WordStr.Len()<=1){break;}
      if (IsNmObjAttr(WordStr, noaIgnore)){CandWordStrN++; break;}
      if (IsNmObjAttr(WordStr, noaStandalone)){break;}
      if (IsNmObjAttr(WordStrV, noaStandalone)){break;}
    }
    // get normalized version of named-object
    TStrV NrWordStrV; GetNrNmObjStrV(WordStrV, NrWordStrV);
    // simple filters
    if (IsNmObjAttr(NrWordStrV, noaIgnore)){continue;}
    if (IsNmObjAttr(NrWordStrV, noaFirstName)){continue;}
    if (NrWordStrV.Len()>5){
      while (NrWordStrV.Len()>2){NrWordStrV.Del(0);}}
    if (NrWordStrV.Len()==1){
      TStr UcWordStr=ChDef->GetUcStr(NrWordStrV[0]);
      if (SwSet->IsIn(UcWordStr, true)){continue;}
    }
    // add named object
    NmObjIdWordStrVV.Add(NrWordStrV);
  }
  // merge similar words
  for (int NmObjN=0; NmObjN<NmObjIdWordStrVV.Len(); NmObjN++){
    TStrV& WordStrV=NmObjIdWordStrVV[NmObjN];
    if (WordStrV.Len()==1){
      // merge single words
      for (int SubNmObjN=0; SubNmObjN<NmObjIdWordStrVV.Len(); SubNmObjN++){
        TStrV& SubWordStrV=NmObjIdWordStrVV[SubNmObjN];
        if (SubWordStrV.Len()==1){
          if (WordStrV[0]!=SubWordStrV[0]){
            if (IsMatchPfx(WordStrV[0], SubWordStrV[0], 3, 4)){
              // normalize to shorter string
              if (WordStrV[0].Len()<SubWordStrV[0].Len()){SubWordStrV=WordStrV;}
              else {WordStrV=SubWordStrV;}
            }
          }
        }
      }
    } else
    if (WordStrV.Len()>=2){
      TStr LastNm=WordStrV.Last();
      for (int SubNmObjN=0; SubNmObjN<NmObjIdWordStrVV.Len(); SubNmObjN++){
        TStrV& SubWordStrV=NmObjIdWordStrVV[SubNmObjN];
        if (SubWordStrV.Len()==1){
          // merge last-name with [first-name,last-name] pairs
          TStr SubLastNm=SubWordStrV[0];
          if (LastNm!=SubLastNm){
            if (IsMatchPfx(LastNm, SubLastNm, 3, 4)){
              if (LastNm.Len()<SubLastNm.Len()){SubWordStrV=WordStrV;} 
              else {WordStrV=SubWordStrV;}
            }
          }
        } else
        if (false&&(SubWordStrV.Len()==2)){
          // merge [first-name,last-name] with [first-name,last-name] pairs
          if ((WordStrV[0]!=SubWordStrV[0])||(WordStrV[1]!=SubWordStrV[1])){
            if ((IsMatchPfx(WordStrV[0], SubWordStrV[0], 3, 4))&&
             (IsMatchPfx(WordStrV[1], SubWordStrV[1], 3, 4))){
              // normalize to shorter string (first word)
              if (WordStrV[0].Len()<SubWordStrV[0].Len()){
                SubWordStrV[0]=WordStrV[0];}
              else {WordStrV[0]=SubWordStrV[0];}
              // normalize to shorter string (second word)
              if (WordStrV[1].Len()<SubWordStrV[1].Len()){
                SubWordStrV[1]=WordStrV[1];}
              else {WordStrV[1]=SubWordStrV[1];}
            }
          }
        }
      }
    }
  }
  // get named-objects-ids
  NmObjIdV.Gen(NmObjIdWordStrVV.Len(), 0);
  {for (int NmObjN=0; NmObjN<NmObjIdWordStrVV.Len(); NmObjN++){
    TStrV& NmObjWordStrV=NmObjIdWordStrVV[NmObjN];
    int NmObjId=GetNmObjId(NmObjWordStrV, true);
    NmObjIdV.Add(NmObjId);
  }}
  // dump
  if (DumpP){
    printf("Named-Objects: ");
    for (int NmObjN=0; NmObjN<NmObjIdV.Len(); NmObjN++){
      int NmObjId=NmObjIdV[NmObjN];
      TStr NmObjStr=GetNmObjStr(NmObjId);
      printf("%s ", NmObjStr.CStr());
    }
    printf("\n");
  }
}
Example #12
0
void TUNGraph::GetNIdV(TIntV& NIdV) const {
  NIdV.Gen(GetNodes(), 0);
  for (int N=NodeH.FFirstKeyId(); NodeH.FNextKeyId(N); ) {
    NIdV.Add(NodeH.GetKey(N)); }
}
Example #13
0
void TNEGraph::GetEIdV(TIntV& EIdV) const {
  EIdV.Gen(GetEdges(), 0);
  for (int E=EdgeH.FFirstKeyId(); EdgeH.FNextKeyId(E); ) {
    EIdV.Add(EdgeH.GetKey(E));
  }
}
void TMultimodalGraphImplB::GetNIdV(TIntV& NIdV) const {
  NIdV.Gen(GetNodes(), 0);
  for (int N=NodeToModeMapping.FFirstKeyId(); NodeToModeMapping.FNextKeyId(N); ) {
    NIdV.Add(NodeToModeMapping.GetKey(N));
  }
}