Exemple #1
0
void TSkyGridBs::GetLinkWgtDstEntIdPrVDiff(
 const TIntPrV& OldLinkWgtDstEntIdPrV, const TIntPrV& NewLinkWgtDstEntIdPrV,
 TIntPrV& NegDiffLinkWgtDstEntIdPrV, TIntPrV& PosDiffLinkWgtDstEntIdPrV){
  TIntIntH DstEntIdToLinkWgtH;
  // set previous-vector
  for (int WordN=0; WordN<NewLinkWgtDstEntIdPrV.Len(); WordN++){
    int LinkWgt=NewLinkWgtDstEntIdPrV[WordN].Val1;
    int DstEntId=NewLinkWgtDstEntIdPrV[WordN].Val2;
    DstEntIdToLinkWgtH.AddDat(DstEntId, LinkWgt);
  }
  // diff current-vector
  for (int WordN=0; WordN<OldLinkWgtDstEntIdPrV.Len(); WordN++){
    int LinkWgt=OldLinkWgtDstEntIdPrV[WordN].Val1;
    int DstEntId=OldLinkWgtDstEntIdPrV[WordN].Val2;
    int CurLinkWgt=DstEntIdToLinkWgtH.AddDat(DstEntId);
    DstEntIdToLinkWgtH.AddDat(DstEntId, CurLinkWgt-LinkWgt);
  }
  // extract vector
  TIntPrV _DiffLinkWgtDstEntIdPrV;
  DstEntIdToLinkWgtH.GetDatKeyPrV(_DiffLinkWgtDstEntIdPrV);
  // clean zeros
  TIntPrV DiffLinkWgtDstEntIdPrV(_DiffLinkWgtDstEntIdPrV.Len(), 0);
  for (int EntN=0; EntN<_DiffLinkWgtDstEntIdPrV.Len(); EntN++){
    int LinkWgt=_DiffLinkWgtDstEntIdPrV[EntN].Val1;
    if (LinkWgt!=0){
      DiffLinkWgtDstEntIdPrV.Add(_DiffLinkWgtDstEntIdPrV[EntN]);}
  }
  // positive-vector
  DiffLinkWgtDstEntIdPrV.Sort(true);
  NegDiffLinkWgtDstEntIdPrV=DiffLinkWgtDstEntIdPrV;
  // negative-vector
  DiffLinkWgtDstEntIdPrV.Sort(false);
  PosDiffLinkWgtDstEntIdPrV=DiffLinkWgtDstEntIdPrV;
}
Exemple #2
0
void LearnEmbeddings(TVVec<TInt, int64>& WalksVV, int& Dimensions, int& WinSize,
 int& Iter, bool& Verbose, TIntFltVH& EmbeddingsHV) {
  TIntIntH RnmH;
  TIntIntH RnmBackH;
  int64 NNodes = 0;
  //renaming nodes into consecutive numbers
  for (int i = 0; i < WalksVV.GetXDim(); i++) {
    for (int64 j = 0; j < WalksVV.GetYDim(); j++) {
      if ( RnmH.IsKey(WalksVV(i, j)) ) {
        WalksVV(i, j) = RnmH.GetDat(WalksVV(i, j));
      } else {
        RnmH.AddDat(WalksVV(i,j),NNodes);
        RnmBackH.AddDat(NNodes,WalksVV(i, j));
        WalksVV(i, j) = NNodes++;
      }
    }
  }
  TIntV Vocab(NNodes);
  LearnVocab(WalksVV, Vocab);
  TIntV KTable(NNodes);
  TFltV UTable(NNodes);
  TVVec<TFlt, int64> SynNeg;
  TVVec<TFlt, int64> SynPos;
  TRnd Rnd(time(NULL));
  InitPosEmb(Vocab, Dimensions, Rnd, SynPos);
  InitNegEmb(Vocab, Dimensions, SynNeg);
  InitUnigramTable(Vocab, KTable, UTable);
  TFltV ExpTable(TableSize);
  double Alpha = StartAlpha;                              //learning rate
#pragma omp parallel for schedule(dynamic)
  for (int i = 0; i < TableSize; i++ ) {
    double Value = -MaxExp + static_cast<double>(i) / static_cast<double>(ExpTablePrecision);
    ExpTable[i] = TMath::Power(TMath::E, Value);
  }
  int64 WordCntAll = 0;
// op RS 2016/09/26, collapse does not compile on Mac OS X
//#pragma omp parallel for schedule(dynamic) collapse(2)
  for (int j = 0; j < Iter; j++) {
#pragma omp parallel for schedule(dynamic)
    for (int64 i = 0; i < WalksVV.GetXDim(); i++) {
      TrainModel(WalksVV, Dimensions, WinSize, Iter, Verbose, KTable, UTable,
       WordCntAll, ExpTable, Alpha, i, Rnd, SynNeg, SynPos); 
    }
  }
  if (Verbose) { printf("\n"); fflush(stdout); }
  for (int64 i = 0; i < SynPos.GetXDim(); i++) {
    TFltV CurrV(SynPos.GetYDim());
    for (int j = 0; j < SynPos.GetYDim(); j++) { CurrV[j] = SynPos(i, j); }
    EmbeddingsHV.AddDat(RnmBackH.GetDat(i), CurrV);
  }
}
Exemple #3
0
void TCycBs::_SaveTaxonomyTxt(FILE* fOut, 
 const int& Lev, TIntPrV& RelIdVIdPrV, TIntIntH& VIdToLevH){
  for (int VidN=0; VidN<RelIdVIdPrV.Len(); VidN++){
    int FromRelId=RelIdVIdPrV[VidN].Val1;
    int SrcVId=RelIdVIdPrV[VidN].Val2;
    TStr SrcVNm=GetVNm(SrcVId);
    TCycVrtx& SrcVrtx=GetVrtx(SrcVId);
    if (!SrcVrtx.IsFlag(cvfHumanOk)){continue;}
    TStr FlagStr=SrcVrtx.GetFlagStr();
    if (FromRelId==-1){
      if (Lev>0){fprintf(fOut, "===upper");} else {fprintf(fOut, "===lower");}
      fprintf(fOut, "=======================================================\n");
      fprintf(fOut, "%s - %s\n", SrcVNm.CStr(), FlagStr.CStr());
    } else {
      TStr FromRelNm=GetVNm(FromRelId);
      fprintf(fOut, "%*c[%s] --> %s\n", (Lev-1)*5, ' ', FromRelNm.CStr(), SrcVNm.CStr());
    }
    TIntPrV UpRelIdVIdPrV;
    for (int EdgeN=0; EdgeN<SrcVrtx.GetEdges(); EdgeN++){
      TCycEdge& Edge=SrcVrtx.GetEdge(EdgeN);
      int RelId=Edge.GetRelId();
      int DstVId=Edge.GetDstVId();
      TStr RelNm=GetVNm(RelId);
      TStr DstVNm=GetVNm(DstVId);
      if (Lev>0){
        // upper taxonomy
        if ((RelNm=="#$isa")||(RelNm=="#$genls")){
          if (!VIdToLevH.IsKey(DstVId)){
            VIdToLevH.AddDat(DstVId, Lev+1);
            UpRelIdVIdPrV.Add(TIntPr(RelId, DstVId));
          }
        }
      } else {
        // lower taxonomy
        if ((RelNm=="~#$isa")||(RelNm=="~#$genls")){
          if (!VIdToLevH.IsKey(DstVId)){
            VIdToLevH.AddDat(DstVId, Lev-1);
            UpRelIdVIdPrV.Add(TIntPr(RelId, DstVId));
          }
        }
      }
    } 
    // recursive call
    if (Lev>0){
      _SaveTaxonomyTxt(fOut, Lev+1, UpRelIdVIdPrV, VIdToLevH);
    } else {
      _SaveTaxonomyTxt(fOut, Lev-1, UpRelIdVIdPrV, VIdToLevH);
    }
  }
}
void TNmObjBs::GetFqNmObjIdPrV(
 const TStr& TargetNmObjStr, TIntPrV& FqNmObjIdPrV) const {
  //printf("Searching %s ...", TargetNmObjStr.CStr());
  // get target named-object-id
  int TargetNmObjId=GetNmObjId(TargetNmObjStr);
  // collect target named-object frequencies
  TIntIntH NmObjIdToFqH;
  // traverse target named-object documents
  int NmObjDocs=GetNmObjDocs(TargetNmObjId);
  for (int NmObjDocIdN=0; NmObjDocIdN<NmObjDocs; NmObjDocIdN++){
    // get document-id
    int DocId=GetNmObjDocId(TargetNmObjId, NmObjDocIdN);
    // traverse named-object in document
    int DocNmObjs=GetDocNmObjs(DocId);
    for (int DocNmObjN=0; DocNmObjN<DocNmObjs; DocNmObjN++){
      // get named-object & frequency
      int NmObjId; int TermFq;
      GetDocNmObjId(DocId, DocNmObjN, NmObjId, TermFq);
      // increment named-object document frequency
      NmObjIdToFqH.AddDat(NmObjId)++;
    }
  }
  // get & sort frequency table
  FqNmObjIdPrV.Clr(); NmObjIdToFqH.GetDatKeyPrV(FqNmObjIdPrV);
  FqNmObjIdPrV.Sort(false);
}
Exemple #5
0
void TSkyGridEnt::GetSorted_LinkWgtDstEntIdPrV(
 const uint64& MnTm, const double& TopWgtSumPrc, TIntPrV& LinkWgtDstEntIdPrV) const {
  double AllLinkWgtSum=0;
  TIntIntH DstEntIdLinkWgtH;
  int LinkEnts=GetLinkEnts();
  for (int LinkEntN=0; LinkEntN<LinkEnts; LinkEntN++){
    int DstEntId=GetLinkEntId(LinkEntN);
    int EntLinks=GetEntLinks(LinkEntN);
    int EntLinkWgtSum=0;
    for (int EntLinkN=0; EntLinkN<EntLinks; EntLinkN++){
      const TSkyGridEntLinkCtx& EntLinkCtx=GetEntLinkCtx(LinkEntN, EntLinkN);
      if (EntLinkCtx.Tm>=MnTm){
        EntLinkWgtSum+=EntLinkCtx.LinkWgt;}
    }
    DstEntIdLinkWgtH.AddDat(DstEntId, EntLinkWgtSum);
    AllLinkWgtSum+=EntLinkWgtSum;
  }
  LinkWgtDstEntIdPrV.Clr(); DstEntIdLinkWgtH.GetDatKeyPrV(LinkWgtDstEntIdPrV);
  LinkWgtDstEntIdPrV.Sort(false);
  // cut long-tail
  if ((TopWgtSumPrc>0.0)&&(LinkWgtDstEntIdPrV.Len()>0)){
    int TopLinkWgt=LinkWgtDstEntIdPrV[0].Val1;
    if (TopLinkWgt>(3*AllLinkWgtSum)/LinkWgtDstEntIdPrV.Len()){
      double CutWgtSum=AllLinkWgtSum*(1-TopWgtSumPrc);
      int LastValN=LinkWgtDstEntIdPrV.Len()-1;
      while ((LastValN>0)&&(CutWgtSum>0)){
        CutWgtSum-=LinkWgtDstEntIdPrV[LastValN].Val1;
        LastValN--;
      }
      LinkWgtDstEntIdPrV.Trunc(LastValN+1);
    }
  }
}
Exemple #6
0
/////////////////////////////////////////////////
// SkyGrid-Base
void TSkyGridBs::GetSorted_DocsEntIdPrV(TIntPrV& DocsEntIdPrV){
  TIntIntH EntIdToDocsH;
  for (int EntId=0; EntId<GetEnts(); EntId++){
    int Docs=GetEnt(EntId).GetDocIds();
    EntIdToDocsH.AddDat(EntId, Docs);
  }
  DocsEntIdPrV.Clr(); EntIdToDocsH.GetDatKeyPrV(DocsEntIdPrV);
  DocsEntIdPrV.Sort(false);
}
Exemple #7
0
void TCycBs::SaveTaxonomyTxt(const TStr& FNm){
  TFOut FOut(FNm); FILE* fOut=FOut.GetFileId();
  for (int VId=0; VId<GetVIds(); VId++){
    printf("%d/%d (%.1f%%)\r", 1+VId, GetVIds(), 100.0*(1+VId)/GetVIds());
    //if (VId>10){break;}
    // upper taxonomy
    {int Lev=0;
    TIntIntH VIdToLevH; VIdToLevH.AddDat(VId, Lev);
    TIntPrV UpRelIdVIdPrV; UpRelIdVIdPrV.Add(TIntPr(-1, VId));
    _SaveTaxonomyTxt(fOut, Lev+1, UpRelIdVIdPrV, VIdToLevH);}
    // lower taxonomy
    {int Lev=0;
    TIntIntH VIdToLevH; VIdToLevH.AddDat(VId, Lev);
    TIntPrV UpRelIdVIdPrV; UpRelIdVIdPrV.Add(TIntPr(-1, VId));
    _SaveTaxonomyTxt(fOut, Lev-1, UpRelIdVIdPrV, VIdToLevH);}
  }
  printf("\n");
}
Exemple #8
0
int FastCorePeriphery(PUNGraph& Graph, TIntIntH& out) {

    TIntIntH nodes;
    double Z=0;

    for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { // Calculate and store the degrees of each node.
        int deg = NI.GetDeg();
        int id = NI.GetId();
        Z += deg;
        nodes.AddDat(id,deg);
    }

    Z = Z/2;

    nodes.SortByDat(false); // Then sort the nodes in descending order of degree, to get a list of nodes {v1, v2, . . . , vn}.

    double Zbest = 99999900000000000;
    int kbest = 0;

    int br=0;
    for (int k=0; k<nodes.Len(); k++) {
        br++;
        Z = Z + br - 1 - nodes[k];
        if (Z < Zbest) { // or <=
            Zbest = Z;
            kbest = br;
        }
    }

    int cp = 0;
    br = 0;
    for (THashKeyDatI<TInt, TInt> it = nodes.BegI();  !it.IsEnd(); it++) {
        if (br < kbest)
            cp = 1;
        else
            cp = 0;
        out.AddDat(it.GetKey(), cp);
        br++;
    }

    return kbest;
}
Exemple #9
0
TIntH LoadNodeList(TStr InFNmNodes) {
  TSsParser Ss(InFNmNodes, ssfWhiteSep, true, true, true);
  TIntIntH Nodes;
  int br = 0, NId;
  while (Ss.Next()) {
    if (Ss.GetInt(0, NId)) {
      Nodes.AddDat(br, NId);
      br++;
    }
  }
  return Nodes;
}
// hash table benchmark with integer keys
void HashBench(const int& n) {
  TIntIntH TableInt;
  float ft0, ft1;
  int x;
  int i;
  int Found;
  int NotFound;
  int Id;

  // build the hash table
  ft0 = GetCPUTime();
  for (i = 0; i < n; i++) {
    x = (int) (drand48() * 100000000);
    TableInt.AddDat(x,0);
  }
  printf("hash:          size %d\n", TableInt.Len());

  ft1 = GetCPUTime();
  printf("hash: %7.3fs inserting  %d numbers\n",ft1-ft0,i);

  // search the hash table
  ft0 = GetCPUTime();
  Found = 0;
  NotFound = 0;
  for (i = 0; i < n; i++) {
    x = (int) (drand48() * 100000000);
    Id = TableInt.GetKeyId(x);
    if (Id < 0) {
      NotFound++;
    } else {
      Found++;
    }
  }
  printf("hash:          found %d, notfound %d\n", Found, NotFound);

  ft1 = GetCPUTime();
  printf("hash: %7.3fs searching %d numbers\n",ft1-ft0,i);
}
void TGreedyAlg::generateCascades(const int& noCasacdes, const double& pInit, const double& p, const double& q) {
    int noNodes = groundTruthGraph->GetNodes();
    //    printf("Generating cascade for graph with noNodes = %d:\n\n", noNodes);
    if (noNodes == 0) {
        return;
    }
    
    // set random seed
    TInt::Rnd.Randomize();
    
    for (int casacdeI = 0; casacdeI < noCasacdes; casacdeI++) {
        TCascade cascade;
    
        double globalTime = 0;
        int noActiveNodes = 0;
        int noSusceptibleNodes = 0;
        int noInactiveNodes = 0;
        TIntIntH nodeStates;
        
        // flip biased coin for each node to collect seeds
        for (TKColourNet::TNodeI NI = groundTruthGraph->BegNI(); NI < groundTruthGraph->EndNI(); NI++) {
            const int nodeId = NI.GetId();
            double flipResult = TInt::Rnd.GetUniDev();
//            printf("nodeId = %d, flipResult = %f\n", nodeId, (float) flipResult);
            if (flipResult <= pInit) {
                nodeStates.AddDat(nodeId) = activeState;
    //            printf("#####     ADD TO CASCADE:   nodeId = %d, globalTime = %f     #####\n", nodeId, globalTime);
                cascade.Add(nodeId, globalTime);
                noActiveNodes++;
            }
            else {
                nodeStates.AddDat(nodeId) = susceptibleState;
                noSusceptibleNodes++;
            }
        }
        
        globalTime++;
        
        while (noActiveNodes > 0) {
            //        printf("\n*****   noActiveNodes = %d, noSusceptibleNodes = %d, noInactiveNodes = %d, globalTime = %f   *****\n\n", noActiveNodes, noSusceptibleNodes, noInactiveNodes, (float) globalTime);
            
            const TIntIntH beginningNodeStates = nodeStates;
            // for each node in the graph
            for (TKColourNet::TNodeI NI = groundTruthGraph->BegNI(); NI < groundTruthGraph->EndNI(); NI++) {
                const int nodeId = NI.GetId();
                const int nodeColourId = NI.GetDat().getColourId();
                const int nodeState = beginningNodeStates.GetDat(nodeId);
                //            printf("nodeId = %d, nodeColourId = %d, nodeState = %d\n", nodeId, nodeColourId, nodeState);
                // if node is active, infect susceptible child nodes with probability pij
                if (nodeState == activeState) {
                    const TKColourNet::TNodeI LNI = groundTruthGraph->GetNI(nodeId);
                    for (int e = 0; e < LNI.GetOutDeg(); e++) {
                        const int childNodeId = LNI.GetOutNId(e);
                        const int childNodeColourId = LNI.GetOutNDat(e).getColourId();
                        const int childNodeState = nodeStates.GetDat(childNodeId);
                        //                    printf("childNodeId = %d, childNodeColourId = %d, childNodeState = %d\n", childNodeId, childNodeColourId, childNodeState);
                        if (childNodeState == susceptibleState) {
                            double probablityInfection = (nodeColourId == childNodeColourId) ? p : q;
                            double flipResult = TInt::Rnd.GetUniDev();
                            //                        printf("childNodeId = %d, probabilityInfection = %f, flipResult = %f\n", childNodeId, (float) probablityInfection, (float) flipResult);
                            // infection occurred: childNode goes from susceptible -> active
                            if (flipResult <= probablityInfection) {
                                nodeStates[childNodeId] = activeState;
                                //                            printf("#####     ADD TO CASCADE:   nodeId = %d, globalTime = %f     #####\n", childNodeId, globalTime);
                                cascade.Add(childNodeId, globalTime);
                                noActiveNodes++;
                                noSusceptibleNodes--;
                            }
                        }
                    }
                    // main node goes from active -> inactive
                    nodeStates[nodeId] = inactiveState;
                    noActiveNodes--;
                    noInactiveNodes++;
                }
            }
            globalTime++;
        }
        addCascade(cascade);
    }
}
Exemple #12
0
PBowDocBs TBowFl::LoadSvmLightTxt(
 const TStr& DocDefFNm, const TStr& WordDefFNm,
 const TStr& TrainDataFNm, const TStr& TestDataFNm,
 const int& MxDocs){ //TODO: use MxDocs
  // prepare document set
  PBowDocBs BowDocBs=TBowDocBs::New();
  int MOneCId=BowDocBs->CatNmToFqH.AddKey("-1");
  int POneCId=BowDocBs->CatNmToFqH.AddKey("+1");

  // document definition
  bool DocDefP=false;
  if (!DocDefFNm.Empty()&&(TFile::Exists(DocDefFNm))){
    // (DId "DoxNm"<eoln>)*
    PSIn SIn=TFIn::New(DocDefFNm);
    TILx Lx(SIn, TFSet()|iloRetEoln|iloSigNum|iloExcept);
    Lx.GetSym(syInt, syEof);
    while (Lx.Sym==syInt){
      int DId=Lx.Int;
      Lx.GetSym(syColon);
      Lx.GetSym(syQStr); TStr DocNm=Lx.Str;
      Lx.GetSym(syEoln);
      Lx.GetSym(syInt, syEof);
      int NewDId=BowDocBs->DocNmToDescStrH.AddKey(DocNm);
      EAssertR(DId==NewDId, "Document-Ids don't match.");
    }
    DocDefP=true;
  }
  // word definition
  if (!WordDefFNm.Empty()&&(TFile::Exists(WordDefFNm))){
    BowDocBs->WordStrToDescH.AddDat("Undef").Fq=0; // ... to have WId==0
    PSIn SIn=TFIn::New(WordDefFNm);
    TILx Lx(SIn, TFSet()|iloRetEoln|iloSigNum|iloExcept);
    Lx.GetSym(syQStr, syEof);
    while (Lx.Sym==syQStr){
      TStr WordStr=Lx.Str;
      Lx.GetSym(syInt); int WId=Lx.Int;
      Lx.GetSym(syInt); int WordFq=Lx.Int;
      Lx.GetSym(syEoln);
      Lx.GetSym(syQStr, syEof);
      int NewWId=BowDocBs->WordStrToDescH.AddKey(WordStr);
      EAssertR(WId==NewWId, "Word-Ids don't match.");
      BowDocBs->WordStrToDescH[WId].Fq=WordFq;
    }
  }
  // train & test data
  int MxWId=-1; TIntIntH WIdToFqH;
  // train data
  if (!TrainDataFNm.Empty()){
    PSIn SIn=TFIn::New(TrainDataFNm);
    TILx Lx(SIn, TFSet()|iloCmtAlw|iloRetEoln|iloSigNum|iloExcept);
    // skip comment lines
    while (Lx.GetSym(syInt, syEoln, syEof)==syEoln){}
    // parse data lines
    while (Lx.Sym==syInt){
      // document
      TStr DocNm=TInt::GetStr(BowDocBs->GetDocs());
      int DId;
      if (DocDefP){
        DId=BowDocBs->DocNmToDescStrH.GetKeyId(DocNm);
      } else {
        DId=BowDocBs->DocNmToDescStrH.AddKey(DocNm);
      }
      BowDocBs->TrainDIdV.Add(DId);
      // category (class value)
      int CId=(Lx.Int==-1) ? MOneCId : POneCId;
      BowDocBs->DocCIdVV.Add(); IAssert(DId==BowDocBs->DocCIdVV.Len()-1);
      BowDocBs->DocCIdVV.Last().Gen(1, 0);
      BowDocBs->DocCIdVV.Last().Add(CId);
      // words (attributes)
      PBowSpV SpV=TBowSpV::New(DId);
      BowDocBs->DocSpVV.Add(SpV); IAssert(DId==BowDocBs->DocSpVV.Len()-1);
      Lx.GetSym(syInt, syEoln);
      while (Lx.Sym==syInt){
        int WId=Lx.Int;
        Lx.GetSym(syColon);
        Lx.GetSym(syFlt); double WordFq=Lx.Flt;
        Lx.GetSym(syInt, syEoln);
        SpV->AddWIdWgt(WId, WordFq);
        if (MxWId==-1){MxWId=WId;} else {MxWId=TInt::GetMx(MxWId, WId);}
        WIdToFqH.AddDat(WId)++;
      }
      if (!Lx.CmtStr.Empty()){
        // change document name to 'N' if comment 'docDesc=N'
        TStr CmtStr=Lx.CmtStr;
        static TStr DocNmPrefixStr="docDesc=";
        if (CmtStr.IsPrefix(DocNmPrefixStr)){
          TStr NewDocNm=
           TStr("D")+CmtStr.GetSubStr(DocNmPrefixStr.Len(), CmtStr.Len()-1);
          BowDocBs->DocNmToDescStrH.DelKey(DocNm);
          int NewDId=BowDocBs->DocNmToDescStrH.AddKey(NewDocNm);
          IAssert(DId==NewDId);
        }
      }
      SpV->Trunc();
      while (Lx.GetSym(syInt, syEoln, syEof)==syEoln){}
    }
  }
  // test data
  if (!TestDataFNm.Empty()){
    PSIn SIn=TFIn::New(TestDataFNm);
    TILx Lx(SIn, TFSet()|iloCmtAlw|iloRetEoln|iloSigNum|iloExcept);
    while (Lx.GetSym(syInt, syEoln, syEof)==syEoln){}
    while (Lx.Sym==syInt){
      // document
      TStr DocNm=TInt::GetStr(BowDocBs->GetDocs());
      int DId;
      if (DocDefP){
        DId=BowDocBs->DocNmToDescStrH.GetKeyId(DocNm);
      } else {
        DId=BowDocBs->DocNmToDescStrH.AddKey(DocNm);
      }
      BowDocBs->TestDIdV.Add(DId);
      // category (class value)
      int CId=(Lx.Int==-1) ? MOneCId : POneCId;
      BowDocBs->DocCIdVV.Add(); IAssert(DId==BowDocBs->DocCIdVV.Len()-1);
      BowDocBs->DocCIdVV.Last().Gen(1, 0);
      BowDocBs->DocCIdVV.Last().Add(CId);
      // words (attributes)
      PBowSpV SpV=TBowSpV::New(DId);
      BowDocBs->DocSpVV.Add(SpV); IAssert(DId==BowDocBs->DocSpVV.Len()-1);
      Lx.GetSym(syInt, syEoln);
      while (Lx.Sym==syInt){
        int WId=Lx.Int;
        Lx.GetSym(syColon);
        Lx.GetSym(syFlt); double WordFq=Lx.Flt;
        Lx.GetSym(syInt, syEoln);
        SpV->AddWIdWgt(WId, WordFq);
        if (MxWId==-1){MxWId=WId;} else {MxWId=TInt::GetMx(MxWId, WId);}
        WIdToFqH.AddDat(WId)++;
      }
      if (!Lx.CmtStr.Empty()){
        // change document name to 'N' if comment 'docDesc=N'
        TStr CmtStr=Lx.CmtStr;
        static TStr DocNmPrefixStr="docDesc=";
        if (CmtStr.IsPrefix(DocNmPrefixStr)){
          TStr NewDocNm=
           TStr("D")+CmtStr.GetSubStr(DocNmPrefixStr.Len(), CmtStr.Len()-1);
          BowDocBs->DocNmToDescStrH.DelKey(DocNm);
          int NewDId=BowDocBs->DocNmToDescStrH.AddKey(NewDocNm);
          IAssert(DId==NewDId);
        }
      }
      SpV->Trunc();
      while (Lx.GetSym(syInt, syEoln, syEof)==syEoln){}
    }
  }
  // add missing words
  for (int WId=0; WId<=MxWId; WId++){
    if (!BowDocBs->IsWId(WId)){
      TStr WordStr=TInt::GetStr(WId, "W%d");
      int _WId=BowDocBs->AddWordStr(WordStr);
      IAssert(WId==_WId);
      TInt Fq;
      if (WIdToFqH.IsKeyGetDat(WId, Fq)){
        BowDocBs->PutWordFq(WId, Fq);
      }
    }
  }

  BowDocBs->AssertOk();
  return BowDocBs;
}
Exemple #13
0
int FastCorePeripheryGC(PUNGraph& Graph, TIntIntH& out) {
    TIntH GroupNodes; // buildup cpntainer of group nodes
    int *NNodes = new int[Graph->GetNodes()]; // container of neighbouring nodes
    int NNodes_br = 0;

    TIntIntH nodes;
    TIntIntH nodesIds;
    double Z=0;

    for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { // Calculate and store the degrees of each node.
        int deg = NI.GetDeg();
        int id = NI.GetId();
        Z += deg;
        nodes.AddDat(id,deg);

    }

    Z = Z/2;

    nodes.SortByDat(false); // Then sort the nodes in descending order of degree, to get a list of nodes {v1, v2, . . . , vn}.

    int br1=0;
    for (THashKeyDatI<TInt,TInt> NI = nodes.BegI(); NI < nodes.EndI(); NI++) {
        nodesIds.AddDat(NI.GetKey(),NI.GetKey());
        br1++;
    }

    double Zbest = 99999900000000000;
    //int kbest;
    //int olddeg;
    int br=0;
    for (int k=0; k<nodes.Len(); k++) {
        if (k<nodes.Len()-1) {
            if (nodes[k]==nodes[k+1]) { // go into same deg mode
                int kmin=-2;
                int knew=-1;
                while (kmin < 999999 && kmin !=-1 ) {
                    int kind=-1;
                    knew=k;
                    kmin=999999;
                    while(nodes[k]==nodes[knew] && knew < nodes.Len()-1) {
                        int inter = Intersect(Graph->GetNI(nodesIds[knew]),NNodes,NNodes_br);
                        int deg = nodes[knew];
                        //if (((((nodes.Len()-NNodes_br)*(nodes.Len()-NNodes_br)))-(nodes.Len()-NNodes_br))/2<(((br*br)-br)/2))
                        if ((deg-inter)<kmin && !GroupNodes.IsKey(nodesIds[knew]))
                        {
                            kmin = deg-inter;
                            kind = knew;
                        }

                        knew++;
                    }

                    if (kind!=-1) {
                        br++;
                        Z = Z + br - 1 - nodes[kind];
                        if (Z < (Zbest)) { // or <=
                            //if (olddeg>nodes[kind])

                            //olddeg = nodes[kind];
                            Zbest = Z;
                            //kbest = br;
                            int w = nodes[kind];
                            int id = nodesIds[kind];
                            GroupNodes.AddDat(id,w);
                            NNodes[NNodes_br] = id;
                            NNodes_br++;
                        }
                        else {

                            break;
                        }
                    }
                }
                k=knew-1;
            }
            else {
                br++;
                Z = Z + br - 1 - nodes[k];
                if (Z < (Zbest)) { // or <=
                    //if (olddeg>nodes[k])

                    //olddeg = nodes[k];
                    Zbest = Z;
                    //kbest = br;
                    int w = nodes[k];
                    int id = nodesIds[k];
                    GroupNodes.AddDat(id,w);
                    NNodes[NNodes_br] = id;
                    NNodes_br++;
                }
            }
        }

        else {
            br++;
            Z = Z + br - 1 - nodes[k];
            if (Z < Zbest) { // or <=
                //if (olddeg>nodes[k])

                //olddeg = nodes[k];
                Zbest = Z;
                //kbest = br;
                int w = nodes[k];
                int id = nodesIds[k];
                GroupNodes.AddDat(id,w);
                NNodes[NNodes_br] = id;
                NNodes_br++;
            }
        }
    }

    int cp = 0;
    br = 0;
    for (THashKeyDatI<TInt, TInt> it = nodes.BegI();  !it.IsEnd(); it++) {
        if (GroupNodes.IsKey(it.GetKey()))
            cp = 1;
        else
            cp = 0;
        out.AddDat(it.GetKey(), cp);
        br++;
    }

    /*for (THashKeyDatI<TInt, TInt> it = GroupNodes.BegI();  it < GroupNodes.EndI(); it++) {
      out.AddDat(it.GetKey(), 1);
      br++;
    }*/

    //return kbest;
    return GroupNodes.Len();
}
Exemple #14
0
// Table manipulations
TEST(TIntIntH, ManipulateTable) {
  const int64 NElems = 1000000;
  int DDist = 10;
  const char *FName = "test.hashint.dat";
  TIntIntH TableInt;
  TIntIntH TableInt1;
  TIntIntH TableInt2;
  int i;
  int d;
  int n;
  int Id;
  int Key;
  int64 KeySumVal;
  int64 DatSumVal;
  int64 KeySum;
  int64 DatSum;
  int64 KeySumDel;
  int64 DatSumDel;
  int DelCount;
  int Count;

  // add table elements
  d = Prime(NElems);
  n = d;
  KeySumVal = 0;
  DatSumVal = 0;
  for (i = 0; i < NElems; i++) {
    TableInt.AddDat(n,n+1);
    KeySumVal += n;
    DatSumVal += (n+1);
    //printf("add %d %d\n", n, n+1);
    n = (n + d) % NElems;
  }
  EXPECT_EQ(0,TableInt.Empty());
  EXPECT_EQ(NElems,TableInt.Len());

  EXPECT_EQ(0,(NElems-1)*(NElems)/2 - KeySumVal);
  EXPECT_EQ(0,(NElems)*(NElems+1)/2 - DatSumVal);

  // verify elements by successive keys
  KeySum = 0;
  DatSum = 0;
  for (i = 0; i < NElems; i++) {
    Id = TableInt.GetKeyId(i);
    EXPECT_EQ(1,Id >= 0);
    Key = TableInt.GetKey(Id);
    EXPECT_EQ(0,TableInt.GetDat(Key)-Key-1);
    KeySum += Key;
    DatSum += TableInt.GetDat(Key);
  }

  EXPECT_EQ(0,KeySumVal - KeySum);
  EXPECT_EQ(0,DatSumVal - DatSum);

  // verify elements by distant keys
  KeySum = 0;
  DatSum = 0;
  n = Prime(d);
  for (i = 0; i < NElems; i++) {
    Id = TableInt.GetKeyId(n);
    EXPECT_EQ(1,Id >= 0);
    Key = TableInt.GetKey(Id);
    EXPECT_EQ(0,TableInt.GetDat(Key)-Key-1);
    KeySum += Key;
    DatSum += TableInt.GetDat(Key);
    n = (n + d) % NElems;
  }

  EXPECT_EQ(0,KeySumVal - KeySum);
  EXPECT_EQ(0,DatSumVal - DatSum);

  // verify elements by iterator
  KeySum = 0;
  DatSum = 0;
  for (TIntIntH::TIter It = TableInt.BegI(); It < TableInt.EndI(); It++) {
    EXPECT_EQ(0,It.GetDat()-It.GetKey()-1);
    KeySum += It.GetKey();
    DatSum += It.GetDat();
  }

  EXPECT_EQ(0,KeySumVal - KeySum);
  EXPECT_EQ(0,DatSumVal - DatSum);

  // verify elements by key index
  KeySum = 0;
  DatSum = 0;
  Id = TableInt.FFirstKeyId();
  while (TableInt.FNextKeyId(Id)) {
    EXPECT_EQ(1,Id >= 0);
    Key = TableInt.GetKey(Id);
    EXPECT_EQ(0,TableInt.GetDat(Key)-Key-1);
    KeySum += Key;
    DatSum += TableInt.GetDat(Key);
  }

  EXPECT_EQ(0,KeySumVal - KeySum);
  EXPECT_EQ(0,DatSumVal - DatSum);

  // delete elements
  DelCount = 0;
  KeySumDel = 0;
  DatSumDel = 0;
  for (n = 0; n < NElems; n += DDist) {
    Id = TableInt.GetKeyId(n);
    //printf("del %d %d %d\n", n, Id, (int) TableInt[Id]);
    KeySumDel += n;
    DatSumDel += TableInt[Id];
    TableInt.DelKeyId(Id);
    DelCount++;
  }
  EXPECT_EQ(0,TableInt.Empty());
  EXPECT_EQ(NElems-DelCount,TableInt.Len());

  // verify elements by iterator
  KeySum = 0;
  DatSum = 0;
  Count = 0;
  for (TIntIntH::TIter It = TableInt.BegI(); It < TableInt.EndI(); It++) {
    EXPECT_EQ(0,It.GetDat()-It.GetKey()-1);
    //printf("get %d %d\n", (int) It.GetKey(), (int) It.GetDat());
    KeySum += It.GetKey();
    DatSum += It.GetDat();
    Count++;
  }

  EXPECT_EQ(NElems-DelCount,Count);
  EXPECT_EQ(0,KeySumVal - KeySumDel - KeySum);
  EXPECT_EQ(0,DatSumVal - DatSumDel - DatSum);

  // assignment
  TableInt1 = TableInt;
  EXPECT_EQ(0,TableInt1.Empty());
  EXPECT_EQ(NElems-DelCount,TableInt1.Len());

  // verify elements by iterator
  KeySum = 0;
  DatSum = 0;
  Count = 0;
  for (TIntIntH::TIter It = TableInt1.BegI(); It < TableInt1.EndI(); It++) {
    EXPECT_EQ(0,It.GetDat()-It.GetKey()-1);
    //printf("get %d %d\n", (int) It.GetKey(), (int) It.GetDat());
    KeySum += It.GetKey();
    DatSum += It.GetDat();
    Count++;
  }

  EXPECT_EQ(NElems-DelCount,Count);
  EXPECT_EQ(0,KeySumVal - KeySumDel - KeySum);
  EXPECT_EQ(0,DatSumVal - DatSumDel - DatSum);

  // saving and loading
  {
    TFOut FOut(FName);
    TableInt.Save(FOut);
    FOut.Flush();
  }

  {
    TFIn FIn(FName);
    TableInt2.Load(FIn);
  }

  EXPECT_EQ(NElems-DelCount,TableInt2.Len());

  // verify elements by iterator
  KeySum = 0;
  DatSum = 0;
  Count = 0;
  for (TIntIntH::TIter It = TableInt2.BegI(); It < TableInt2.EndI(); It++) {
    EXPECT_EQ(0,It.GetDat()-It.GetKey()-1);
    //printf("get %d %d\n", (int) It.GetKey(), (int) It.GetDat());
    KeySum += It.GetKey();
    DatSum += It.GetDat();
    Count++;
  }

  EXPECT_EQ(NElems-DelCount,Count);
  EXPECT_EQ(0,KeySumVal - KeySumDel - KeySum);
  EXPECT_EQ(0,DatSumVal - DatSumDel - DatSum);

  // remove all elements
  for (i = 0; i < Count; i++) {
    Id = TableInt.GetRndKeyId(TInt::Rnd, 0.5);
    TableInt.DelKeyId(Id);
  }
  EXPECT_EQ(0,TableInt.Len());
  EXPECT_EQ(1,TableInt.Empty());

  // verify elements by iterator
  KeySum = 0;
  DatSum = 0;
  Count = 0;
  for (TIntIntH::TIter It = TableInt.BegI(); It < TableInt.EndI(); It++) {
    EXPECT_EQ(0,It.GetDat()-It.GetKey()-1);
    //printf("get %d %d\n", (int) It.GetKey(), (int) It.GetDat());
    KeySum += It.GetKey();
    DatSum += It.GetDat();
    Count++;
  }

  EXPECT_EQ(0,Count);
  EXPECT_EQ(0,KeySum);
  EXPECT_EQ(0,DatSum);

  // clear the table
  TableInt1.Clr();
  EXPECT_EQ(0,TableInt1.Len());
  EXPECT_EQ(1,TableInt1.Empty());

  // verify elements by iterator
  KeySum = 0;
  DatSum = 0;
  Count = 0;
  for (TIntIntH::TIter It = TableInt1.BegI(); It < TableInt1.EndI(); It++) {
    EXPECT_EQ(0,It.GetDat()-It.GetKey()-1);
    //printf("get %d %d\n", (int) It.GetKey(), (int) It.GetDat());
    KeySum += It.GetKey();
    DatSum += It.GetDat();
    Count++;
  }

  EXPECT_EQ(0,Count);
  EXPECT_EQ(0,KeySum);
  EXPECT_EQ(0,DatSum);
}