Ejemplo n.º 1
0
void TNmObjBs::GetFqNmObjIdPrV(
 const TStr& TargetNmObjStr, TIntPrV& FqNmObjIdPrV) const {
  //printf("Searching %s ...", TargetNmObjStr.CStr());
  // get target named-object-id
  int TargetNmObjId=GetNmObjId(TargetNmObjStr);
  // collect target named-object frequencies
  TIntIntH NmObjIdToFqH;
  // traverse target named-object documents
  int NmObjDocs=GetNmObjDocs(TargetNmObjId);
  for (int NmObjDocIdN=0; NmObjDocIdN<NmObjDocs; NmObjDocIdN++){
    // get document-id
    int DocId=GetNmObjDocId(TargetNmObjId, NmObjDocIdN);
    // traverse named-object in document
    int DocNmObjs=GetDocNmObjs(DocId);
    for (int DocNmObjN=0; DocNmObjN<DocNmObjs; DocNmObjN++){
      // get named-object & frequency
      int NmObjId; int TermFq;
      GetDocNmObjId(DocId, DocNmObjN, NmObjId, TermFq);
      // increment named-object document frequency
      NmObjIdToFqH.AddDat(NmObjId)++;
    }
  }
  // get & sort frequency table
  FqNmObjIdPrV.Clr(); NmObjIdToFqH.GetDatKeyPrV(FqNmObjIdPrV);
  FqNmObjIdPrV.Sort(false);
}
Ejemplo n.º 2
0
void TSkyGridEnt::GetSorted_LinkWgtDstEntIdPrV(
 const uint64& MnTm, const double& TopWgtSumPrc, TIntPrV& LinkWgtDstEntIdPrV) const {
  double AllLinkWgtSum=0;
  TIntIntH DstEntIdLinkWgtH;
  int LinkEnts=GetLinkEnts();
  for (int LinkEntN=0; LinkEntN<LinkEnts; LinkEntN++){
    int DstEntId=GetLinkEntId(LinkEntN);
    int EntLinks=GetEntLinks(LinkEntN);
    int EntLinkWgtSum=0;
    for (int EntLinkN=0; EntLinkN<EntLinks; EntLinkN++){
      const TSkyGridEntLinkCtx& EntLinkCtx=GetEntLinkCtx(LinkEntN, EntLinkN);
      if (EntLinkCtx.Tm>=MnTm){
        EntLinkWgtSum+=EntLinkCtx.LinkWgt;}
    }
    DstEntIdLinkWgtH.AddDat(DstEntId, EntLinkWgtSum);
    AllLinkWgtSum+=EntLinkWgtSum;
  }
  LinkWgtDstEntIdPrV.Clr(); DstEntIdLinkWgtH.GetDatKeyPrV(LinkWgtDstEntIdPrV);
  LinkWgtDstEntIdPrV.Sort(false);
  // cut long-tail
  if ((TopWgtSumPrc>0.0)&&(LinkWgtDstEntIdPrV.Len()>0)){
    int TopLinkWgt=LinkWgtDstEntIdPrV[0].Val1;
    if (TopLinkWgt>(3*AllLinkWgtSum)/LinkWgtDstEntIdPrV.Len()){
      double CutWgtSum=AllLinkWgtSum*(1-TopWgtSumPrc);
      int LastValN=LinkWgtDstEntIdPrV.Len()-1;
      while ((LastValN>0)&&(CutWgtSum>0)){
        CutWgtSum-=LinkWgtDstEntIdPrV[LastValN].Val1;
        LastValN--;
      }
      LinkWgtDstEntIdPrV.Trunc(LastValN+1);
    }
  }
}
Ejemplo n.º 3
0
// Algorithm: Find all bridges, remove them from the graph, find largest component K
// now add all bridges that do not touch K, find connected components
void Get1CnComSzCnt(const PUNGraph& Graph, TIntPrV& SzCntV) {
  //TCnCom::GetWccCnt(Graph, SzCntV);  IAssertR(SzCntV.Len() == 1, "Graph is not connected.");
  TIntPrV EdgeV;
  GetEdgeBridges(Graph, EdgeV);
  if (EdgeV.Empty()) { SzCntV.Clr(false); return; }
  PUNGraph TmpG = TUNGraph::New();
  *TmpG = *Graph;
  for (int e = 0; e < EdgeV.Len(); e++) {
    TmpG->DelEdge(EdgeV[e].Val1, EdgeV[e].Val2);  }
  TCnComV CnComV;  GetWccs(TmpG, CnComV);
  IAssert(CnComV.Len() >= 2);
  const TIntV& MxWcc = CnComV[0].NIdV;
  TIntSet MxCcSet(MxWcc.Len());
  for (int i = 0; i < MxWcc.Len(); i++) { 
    MxCcSet.AddKey(MxWcc[i]); }
  // create new graph: bridges not touching MxCc of G with no bridges
  for (int e = 0; e < EdgeV.Len(); e++) {
    if (! MxCcSet.IsKey(EdgeV[e].Val1) &&  ! MxCcSet.IsKey(EdgeV[e].Val2)) {
      TmpG->AddEdge(EdgeV[e].Val1, EdgeV[e].Val2); }
  }
  GetWccSzCnt(TmpG, SzCntV);
  for (int c = 0; c < SzCntV.Len(); c++) {
    if (SzCntV[c].Val1 == MxCcSet.Len()) { 
      SzCntV.Del(c);  break; }
  }
}
Ejemplo n.º 4
0
/////////////////////////////////////////////////
// SkyGrid-Base
void TSkyGridBs::GetSorted_DocsEntIdPrV(TIntPrV& DocsEntIdPrV){
  TIntIntH EntIdToDocsH;
  for (int EntId=0; EntId<GetEnts(); EntId++){
    int Docs=GetEnt(EntId).GetDocIds();
    EntIdToDocsH.AddDat(EntId, Docs);
  }
  DocsEntIdPrV.Clr(); EntIdToDocsH.GetDatKeyPrV(DocsEntIdPrV);
  DocsEntIdPrV.Sort(false);
}
Ejemplo n.º 5
0
// Test GetTriadParticip
// Number of nodes with x number of triangles it participates in
TEST(triad, TestGetTriadParticip) {
  // Test TUNGraph
  PUNGraph GraphTUN = TriadGetTestTUNGraph();
  TIntPrV TriadCntV;
  TSnap::GetTriadParticip(GraphTUN, TriadCntV);
  VerifyGetTriadParticip(TriadCntV);

  // Test TNGraph which is same as undirected.
  PNGraph GraphTN = TriadGetTestTNGraph();
  TriadCntV.Clr();
  TSnap::GetTriadParticip(GraphTN, TriadCntV);
  VerifyGetTriadParticip(TriadCntV);
  
  // Test TNEGraph which is same as undirected.
  PNEGraph GraphTNE = TriadGetTestTNEGraph();
  TriadCntV.Clr();
  TSnap::GetTriadParticip(GraphTN, TriadCntV);
  VerifyGetTriadParticip(TriadCntV);
}
Ejemplo n.º 6
0
void TNGramBs::GetNGramStrV(
 const TStr& HtmlStr, TStrV& NGramStrV, TIntPrV& NGramBEChXPrV) const {
  TIntV NGramIdV; NGramStrV.Clr(); NGramBEChXPrV.Clr();
  TNGramBs::GetNGramIdV(HtmlStr, NGramIdV, NGramBEChXPrV);
  NGramStrV.Gen(NGramIdV.Len(), 0);
  for (int NGramIdN=0; NGramIdN<NGramIdV.Len(); NGramIdN++){
    TStr NGramStr=GetNGramStr(NGramIdV[NGramIdN]);
    NGramStrV.Add(NGramStr);
  }
}
Ejemplo n.º 7
0
// set actor's language and country
void TImdbNet::SetActorCntryLangByMajority() {
    // set language
  TIntPrV NIdToVal;
  for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
    if (! NI().IsActor()) { continue; }
    IAssert(NI().GetLang() == 0); // no language set
    IAssert(NI.GetInDeg() == 0);  // actors point to movies
    int Nbhs=0;  TIntH LangCntH;
    for (int e = 0; e < NI.GetOutDeg(); e++) {
      LangCntH.AddDat(NI.GetOutNDat(e).GetLang()) += 1;  Nbhs++; }
    if (LangCntH.IsKey(0)) { Nbhs-=LangCntH.GetDat(0); LangCntH.GetDat(0)=0; }
    LangCntH.SortByDat(false);
    if (LangCntH.GetKey(0) == 0) { continue; }
    if (LangCntH[0]*2 >= Nbhs) { 
      NIdToVal.Add(TIntPr(NI.GetId(), LangCntH.GetKey(0))); }
  }
  for (int i = 0; i < NIdToVal.Len(); i++) {
    GetNDat(NIdToVal[i].Val1).Lang = NIdToVal[i].Val2; }
  printf("  language set: %d\n", NIdToVal.Len());
  
  int cnt=0;
  for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { if (NI().IsActor() && NI().GetLang()==0) cnt++; }
  printf("  Actors NO language: %d\n\n", cnt);
  // set country
  NIdToVal.Clr(true);
  for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
    if (! NI().IsActor()) { continue; }
    IAssert(NI().GetCntry() == 0); // no country set
    IAssert(NI.GetInDeg() == 0);   // actors point to movies
    int Nbhs=0; TIntH CntryCntH;
    for (int e = 0; e < NI.GetOutDeg(); e++) {
      CntryCntH.AddDat(NI.GetOutNDat(e).GetCntry()) += 1;  Nbhs++; }
    if (CntryCntH.IsKey(0)) { Nbhs-=CntryCntH.GetDat(0); CntryCntH.GetDat(0)=0; }
    CntryCntH.SortByDat(false);
    if (CntryCntH.GetKey(0) == 0) { continue; }
    if (CntryCntH[0]*2 >= Nbhs) { 
      NIdToVal.Add(TIntPr(NI.GetId(), CntryCntH.GetKey(0))); }
  }
  for (int i = 0; i < NIdToVal.Len(); i++) {
    GetNDat(NIdToVal[i].Val1).Cntry = NIdToVal[i].Val2; }
  printf("  country set: %d\n", NIdToVal.Len());
  cnt=0;
  for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { if (NI().IsActor() && NI().GetCntry()==0) cnt++; }
  printf("  Actors NO country: %d\n\n", cnt);
}
Ejemplo n.º 8
0
/////////////////////////////////////////////////
// Context-Tree
PGraph GetCtxTreeGraph(
 const PNmObjBs& NmObjBs, const TStr& RootNmObjStr, const int& MxDist){
  // create distance graph
  PGraph Graph=TGraph::New();
  // create root note
  int RootNmObjId=NmObjBs->GetNmObjId(RootNmObjStr);
  PVrtx RootVrtx=TGVrtx::New(RootNmObjId, RootNmObjStr);
  Graph->AddVrtx(RootVrtx);
  // create distance vector
  TIntV NmObjDistV(NmObjBs->GetNmObjs()); NmObjDistV.PutAll(-1);
  NmObjDistV[RootNmObjId]=0;
  // create queue
  TIntPrQ OpenNmObjIdDistPrQ; OpenNmObjIdDistPrQ.Push(TIntPr(RootNmObjId, 0));
  while (!OpenNmObjIdDistPrQ.Empty()){
    // get name-object-id from queue
    int NmObjId=OpenNmObjIdDistPrQ.Top().Val1;
    int NmObjDist=OpenNmObjIdDistPrQ.Top().Val2;
    OpenNmObjIdDistPrQ.Pop();
    IAssert(NmObjDistV[NmObjId]==NmObjDist);
    // get named-object string
    TStr NmObjStr=NmObjBs->GetNmObjStr(NmObjId);
    TStr UcNmObjStr=NmObjStr.GetUc();
    printf("[%s:%d] ", NmObjStr.CStr(), NmObjDist);
    // check distance
    if (NmObjDist>MxDist){continue;}
    // get named-object vertex
    PVrtx SrcVrtx=Graph->GetVrtx(NmObjId);
    // get named-object children
    TIntPrV FqNmObjIdPrV; NmObjBs->GetFqNmObjIdPrV(NmObjStr, FqNmObjIdPrV);
    int SubNmObjs=FqNmObjIdPrV.Len();
    // traverse named-object children
    int CreatedSubNmObjs=0;
    for (int SubNmObjN=0; SubNmObjN<SubNmObjs; SubNmObjN++){
      // get child data
      int SubNmObjFq=FqNmObjIdPrV[SubNmObjN].Val1;
      int SubNmObjId=FqNmObjIdPrV[SubNmObjN].Val2;
      TStr SubNmObjStr=NmObjBs->GetNmObjStr(SubNmObjId);
      TStr UcSubNmObjStr=SubNmObjStr.GetUc();
      TStr SubNmObjVNm=SubNmObjStr;
      // calculate and add context string formed from coref-named-objects
      {TChA CtxChA; TIntPrV FqNmObjIdPrV;
      ContexterF->State->NmObjBs->GetFqNmObjIdPrV(SubNmObjStr, FqNmObjIdPrV);
      FqNmObjIdPrV.Sort(false);
      FqNmObjIdPrV.Trunc(ContexterF->State->EnCtxLen); FqNmObjIdPrV.Clr();
      for (int NmObjN=0; NmObjN<FqNmObjIdPrV.Len(); NmObjN++){
        TStr CoNmObjStr=ContexterF->State->NmObjBs->GetNmObjStr(FqNmObjIdPrV[NmObjN].Val2);
        if (SubNmObjStr!=CoNmObjStr){
          CtxChA+='['; CtxChA+=CoNmObjStr; CtxChA+=']'; CtxChA+='\\';}
      }
      if (!CtxChA.Empty()){
        SubNmObjVNm=SubNmObjStr+"\\"+CtxChA;}}
      // push child named-object-id if necessary
      if (NmObjDistV[SubNmObjId]==-1){
        // check number of subnodes
        int MxCreatedSubNmObjs=0;
        switch (NmObjDist){
          case 0: MxCreatedSubNmObjs=/*20;*/ContexterF->State->EnSubNodes; break;
          case 1: MxCreatedSubNmObjs=4; break;
          case 2: MxCreatedSubNmObjs=2; break;
          case 3: MxCreatedSubNmObjs=1; break;
          case 4: MxCreatedSubNmObjs=1; break;
          default: MxCreatedSubNmObjs=0; break;
        }
        // check if stop creating branches
        CreatedSubNmObjs++;
        if (CreatedSubNmObjs>MxCreatedSubNmObjs){break;}
        // push edge
        OpenNmObjIdDistPrQ.Push(TIntPr(SubNmObjId, NmObjDist+1));
        NmObjDistV[SubNmObjId]=NmObjDist+1;
        // create vertex
        TStr VNm=SubNmObjVNm;
        PVrtx DstVrtx=TGVrtx::New(SubNmObjId, VNm);
        Graph->AddVrtx(DstVrtx);
        // create edge
        //TStr ENm=TStr("_")+TInt::GetStr(NmObjId)+"-"+TInt::GetStr(SubNmObjId);
        TStr ENm=TInt::GetStr(SubNmObjFq);
        // calculate and add context string formed from coref-named-objects
        if (ContexterF->EnInterNmObjContextCb->Checked){
          TChA CtxChA;
          TStr SrcNmObjStr=NmObjStr;
          TChA DstNmObjChA=DstVrtx->GetVNm();
          if (DstNmObjChA.IsChIn('\\')){
            DstNmObjChA.Trunc(DstNmObjChA.SearchCh('\\'));}
          TStr DstNmObjStr=DstNmObjChA;
          PBowSpV ConceptSpV=ContexterF->State->NmObjBs->GetNmObjConcept(
           ContexterF->State->BowDocBs, ContexterF->State->BowDocWgtBs,
           SrcNmObjStr, DstNmObjStr);
          TStrFltPrV WordStrWgtPrV;
          ConceptSpV->GetWordStrWgtPrV(
           ContexterF->State->BowDocBs, -1, 1, WordStrWgtPrV);
          TStrV UcWordStrSfV;
          for (int WordN=0; WordN<WordStrWgtPrV.Len(); WordN++){
            // get word
            TStr UcWordStr=WordStrWgtPrV[WordN].Val1;
            // remove duplicates
            if (UcWordStr.IsStrIn(UcNmObjStr)){continue;}
            if (UcWordStr.IsStrIn(UcSubNmObjStr)){continue;}
            if (UcNmObjStr.IsStrIn(UcWordStr)){continue;}
            if (UcSubNmObjStr.IsStrIn(UcWordStr)){continue;}
            bool Ok=true;
            for (int WordSfN=0; WordSfN<UcWordStrSfV.Len(); WordSfN++){
              if (UcWordStrSfV[WordSfN].IsStrIn(UcWordStr)){Ok=false; break;}
              if (UcWordStr.IsStrIn(UcWordStrSfV[WordSfN])){Ok=false; break;}
            }
            if (!Ok){continue;}
            // add word
            UcWordStrSfV.Add(UcWordStr);
            CtxChA+='['; CtxChA+=UcWordStr; CtxChA+=']'; CtxChA+='\n';
            // finish if limit reached
            if (UcWordStrSfV.Len()>=ContexterF->State->EnCtxLen){break;}
          }
          ENm=ENm+"\n"+CtxChA;
        }
        // create and add edge to the graph
        PEdge Edge=TGEdge::New(SrcVrtx, DstVrtx, ENm);
        Edge->PutWgt(1+log(SubNmObjFq));
        Graph->AddEdge(Edge);
      }
    }
  }
  Graph->SetEdgeWidth(5);
  Graph->PlaceTreeAsStar();
  Graph->RescaleXY(0.1, RootVrtx);
  // return graph
  return Graph;
}