int main(int argc, char *argv[]) {
  LogOutput Log;
  THash<TStr, TStr> Arguments;
  ArgumentParser::ParseArguments(argc, argv, Arguments, Log);

  TStr StartString = ArgumentParser::GetArgument(Arguments, "start", "2009-01-14");
  TStr EndString = ArgumentParser::GetArgument(Arguments, "end", "2012-09-30");
  TStr QBDBCDirectory = ArgumentParser::GetArgument(Arguments, "qbdbc", "/lfs/1/tmp/curis/QBDBC-final/");
  TStr QBDBDirectory = ArgumentParser::GetArgument(Arguments, "qbdb", "/lfs/1/tmp/curis/QBDB/");

  TSecTm StartDate = TSecTm::GetDtTmFromYmdHmsStr(StartString);
  TSecTm EndDate = TSecTm::GetDtTmFromYmdHmsStr(EndString);

  TQuoteBase QB;
  TDocBase DB;
  TClusterBase CB;
  PNGraph QGraph;
  TQuoteBase NewQB;
  TDocBase NewDB;

  TSecTm CurrentDate = StartDate;
  TInt NumUnprocessedQuotes = 0;
  TInt NumUnprocessedDocs = 0;
  TInt NumQuotes = 0;
  TInt NumClusters = 0;
  TInt NumTopClusters = 0;
  TInt NumDiscardedClustersByPeak = 0;
  TInt NumDiscardedClustersByVariant = 0;
  TInt NumRemainingClusters = 0;
  TInt NumVariants = 0;
  TInt NumDocs = 0;
  TInt Count = 0;
  while(CurrentDate < EndDate) {
    if (Count % 100 == 0) Err("%d days evaluated!\n", Count.Val);
    TDataLoader::LoadCumulative(QBDBCDirectory, CurrentDate.GetDtYmdStr(), QB, DB, CB, QGraph);
    TDataLoader::LoadQBDB(QBDBDirectory, CurrentDate.GetDtYmdStr(), NewQB, NewDB);

    if (NewQB.Len() > 0) {
      NumUnprocessedQuotes += NewQB.Len();
      NumUnprocessedDocs += NewDB.Len();
    }

    if (QB.Len() > 0) {
      Count++;
      Err("Loaded base for %s! Calculating stats...\n", CurrentDate.GetDtYmdStr().CStr());
      NumQuotes += QB.Len();
      NumClusters += CB.Len();
      NumDocs += DB.Len();

      TIntV TopClusters;
      CB.GetTopClusterIdsByFreq(TopClusters);

      int NmTopClusters = TopClusters.Len();
      NumTopClusters += NmTopClusters;
      for (int i = 0; i < NmTopClusters; i++) {
        TCluster C;
        CB.GetCluster(TopClusters[i], C);
        if (C.GetDiscardState() == 1) {
          NumDiscardedClustersByPeak++;
        } else if (C.GetDiscardState() == 2) {
          NumDiscardedClustersByVariant++;
        } else {
          NumRemainingClusters++;
          NumVariants += C.GetNumUniqueQuotes();
        }
      }
    }
    CurrentDate.AddDays(1);
  }

  Err("Number of quotes processed through in total: %d\n", NumUnprocessedQuotes.Val);
  Err("Number of docs processed through in total: %d\n", NumUnprocessedDocs.Val);
  Err("Number of quotes in total: %d\n", NumQuotes.Val);
  Err("Number of clusters in total: %d\n", NumClusters.Val);
  Err("Number of docs in total: %d\n", NumDocs.Val);
  Err("Number of top clusters in total: %d\n", NumTopClusters.Val);
  Err("Number of discarded clusters by peak in total: %d\n", NumDiscardedClustersByPeak.Val);
  Err("Number of discarded clusters by variant total: %d\n", NumDiscardedClustersByVariant.Val);
  Err("Number of top clusters remaining in total: %d\n", NumRemainingClusters.Val);
  Err("Number of top variants found: %d\n", NumVariants.Val);
  Err("=============\n===========\n");
  Err("Number of days: %d\n", Count.Val);
  double AvgQuotes = (NumQuotes.Val * 1.0 / Count.Val);
  double AvgClusters = (NumClusters.Val * 1.0 / Count.Val);
  double AvgDocs = (NumDocs.Val * 1.0 / Count.Val);
  double AvgTopClusters = (NumTopClusters.Val * 1.0 / Count.Val);
  double AvgDiscardedPeaks = (NumDiscardedClustersByPeak.Val * 1.0 / Count.Val);
  double AvgDiscardedVariants = (NumDiscardedClustersByVariant.Val * 1.0 / Count.Val);
  double AvgRemaining = (NumRemainingClusters.Val * 1.0 / Count.Val);
  double AvgNumVariants = (NumVariants.Val * 1.0 / Count.Val);
  Err("Average number of quotes in total: %f\n", AvgQuotes);
  Err("Average number of clusters in total: %f\n", AvgClusters);
  Err("Average number of docs in total: %f\n", AvgDocs);
  Err("Average number of top clusters in total: %f\n", AvgTopClusters);
  Err("Average number of discarded clusters by peak in total: %f\n", AvgDiscardedPeaks);
  Err("Average number of discarded clusters by variant total: %f\n", AvgDiscardedVariants);
  Err("Average number of top clusters remaining in total: %f\n", AvgRemaining);
  Err("Average number of top variants found: %f\n", AvgNumVariants);
  return 0;
}
int main(int argc, char* argv[]) {
  TTableContext Context;
  Schema NetworkS;
  NetworkS.Add(TPair<TStr, TAttrType>("Year", atInt));
  NetworkS.Add(TPair<TStr, TAttrType>("Month", atInt));
  NetworkS.Add(TPair<TStr, TAttrType>("DayOfMonth", atInt));
  NetworkS.Add(TPair<TStr, TAttrType>("DayOfWeek", atInt));
  NetworkS.Add(TPair<TStr, TAttrType>("DepTime", atStr));
  NetworkS.Add(TPair<TStr, TAttrType>("CRSDepTime", atStr));
  NetworkS.Add(TPair<TStr, TAttrType>("ArrTime", atStr));
  NetworkS.Add(TPair<TStr, TAttrType>("CRSArrTime", atStr));
  NetworkS.Add(TPair<TStr, TAttrType>("UniqueCarrier", atStr));
  NetworkS.Add(TPair<TStr, TAttrType>("FlightNum", atStr));
  NetworkS.Add(TPair<TStr, TAttrType>("TailNum", atStr));
  NetworkS.Add(TPair<TStr, TAttrType>("ActualElapsedTime", atInt));
  NetworkS.Add(TPair<TStr, TAttrType>("CRSElapsedTime", atInt));
  NetworkS.Add(TPair<TStr, TAttrType>("AirTime", atInt));
  NetworkS.Add(TPair<TStr, TAttrType>("ArrDelay", atInt));
  NetworkS.Add(TPair<TStr, TAttrType>("DepDelay", atInt));
  NetworkS.Add(TPair<TStr, TAttrType>("Origin", atStr));
  NetworkS.Add(TPair<TStr, TAttrType>("Dest", atStr));
  NetworkS.Add(TPair<TStr, TAttrType>("Distance", atStr));
  NetworkS.Add(TPair<TStr, TAttrType>("TaxiIn", atInt));
  NetworkS.Add(TPair<TStr, TAttrType>("TaxiOut", atInt));
  NetworkS.Add(TPair<TStr, TAttrType>("Cancelled", atStr));
  NetworkS.Add(TPair<TStr, TAttrType>("CancellationCode", atStr));
  NetworkS.Add(TPair<TStr, TAttrType>("Diverted", atStr));
  NetworkS.Add(TPair<TStr, TAttrType>("CarrierDelay", atInt));
  NetworkS.Add(TPair<TStr, TAttrType>("WeatherDelay", atInt));
  NetworkS.Add(TPair<TStr, TAttrType>("NASDelay", atInt));
  NetworkS.Add(TPair<TStr, TAttrType>("SecurityDelay", atInt));
  NetworkS.Add(TPair<TStr, TAttrType>("LateAircraftDelay", atInt));
  TIntV RelevantCols;
  RelevantCols.Add(0); RelevantCols.Add(1); RelevantCols.Add(2);RelevantCols.Add(3); RelevantCols.Add(4); RelevantCols.Add(5);
  RelevantCols.Add(6); RelevantCols.Add(7); RelevantCols.Add(8); RelevantCols.Add(9); RelevantCols.Add(10); RelevantCols.Add(11);
  RelevantCols.Add(12); RelevantCols.Add(13); RelevantCols.Add(14); RelevantCols.Add(15); RelevantCols.Add(16); RelevantCols.Add(17);
  RelevantCols.Add(18); RelevantCols.Add(19); RelevantCols.Add(20); RelevantCols.Add(21); RelevantCols.Add(22); RelevantCols.Add(23);
  RelevantCols.Add(24); RelevantCols.Add(25); RelevantCols.Add(26); RelevantCols.Add(27); RelevantCols.Add(28);
  PTable P = TTable::LoadSS(NetworkS, "table/2007.csv",
  Context, RelevantCols, ',', false);

  TStrV SV;
  TStrV DV;
  TStrV VE;
  double start = omp_get_wtime();
  PNSparseNet G = TSnap::ToNetwork<PNSparseNet>(P, TStr("Origin"),
   TStr("Dest"), SV, DV, VE, aaLast);
  double end = omp_get_wtime();
  printf("Conversion time without attributes %f\n", (end-start));

  start = omp_get_wtime();
  TSnap::AddAttrTable<PNSparseNet>(P, G, TStr("Origin"),
   TStr("Dest"), SV, DV, VE, aaLast);
  end = omp_get_wtime();
  printf("Conversion time with attributes %f\n", (end-start));

  /*(PTable Table, PGraph& Graph, const TStr& SrcCol, const TStr& DstCol, 
  TStrV& SrcAttrV, TStrV& DstAttrV, TStrV& EdgeAttrV, TAttrAggr AggrPolicy, TInt DefaultInt,
  TFlt DefaultFlt, TStr DefaultStr)*/

}
Example #3
0
void TNGramBs::GetNGramIdV(
 const TStr& HtmlStr, TIntV& NGramIdV, TIntPrV& NGramBEChXPrV) const {
  // create MxNGramLen queues
  TVec<TIntQ> WIdQV(MxNGramLen);
  TVec<TIntPrQ> BEChXPrQV(MxNGramLen);
  for (int NGramLen=1; NGramLen<MxNGramLen; NGramLen++){
    WIdQV[NGramLen].Gen(100*NGramLen, NGramLen+1);
    BEChXPrQV[NGramLen].Gen(100*NGramLen, NGramLen+1);
  }
  bool AllWIdQClrP=true;
  // extract words from text-string
  PSIn HtmlSIn=TStrIn::New(HtmlStr, false);
  THtmlLx HtmlLx(HtmlSIn);
  while (HtmlLx.Sym!=hsyEof){
    if ((HtmlLx.Sym==hsyStr)||(HtmlLx.Sym==hsyNum)){
      // get word-string & word-id
      TStr WordStr=HtmlLx.UcChA;
      int WId; int SymBChX=HtmlLx.SymBChX; int SymEChX=HtmlLx.SymEChX;
      if ((SwSet.Empty())||(!SwSet->IsIn(WordStr))){
        if (!Stemmer.Empty()){
          WordStr=Stemmer->GetStem(WordStr);}
        if (IsWord(WordStr, WId)){
          if (!IsSkipWord(WId)){
            NGramIdV.Add(0+WId); // add single word
            NGramBEChXPrV.Add(TIntPr(SymBChX, SymEChX)); // add positions
            for (int NGramLen=1; NGramLen<MxNGramLen; NGramLen++){
              TIntQ& WIdQ=WIdQV[NGramLen];
              TIntPrQ& BEChXPrQ=BEChXPrQV[NGramLen];
              WIdQ.Push(WId); BEChXPrQ.Push(TIntPr(SymBChX, SymEChX));
              AllWIdQClrP=false;
              // if queue full
              if (WIdQ.Len()==NGramLen+1){
                // create sequence
                TIntV WIdV; WIdQ.GetSubValVec(0, WIdQ.Len()-1, WIdV);
                TIntPrV BEChXPrV; BEChXPrQ.GetSubValVec(0, BEChXPrQ.Len()-1, BEChXPrV);
                // add ngram-id or reset queues
                int WIdVP;
                if (WIdVToFqH.IsKey(WIdV, WIdVP)){ // if sequence is frequent
                  int NGramId=GetWords()+WIdVP; // get sequence ngram-id
                  NGramIdV.Add(NGramId); // add sequence ngram-id
                  NGramBEChXPrV.Add(TIntPr(BEChXPrV[0].Val1, BEChXPrV.Last().Val2)); // add positions
                }
              }
            }
          }
        } else {
          // break queue sequences if infrequent word occures
          if (!AllWIdQClrP){
            for (int NGramLen=1; NGramLen<MxNGramLen; NGramLen++){
              TIntQ& WIdQ=WIdQV[NGramLen];
              TIntPrQ& BEChXPrQ=BEChXPrQV[NGramLen];
              if (!WIdQ.Empty()){WIdQ.Clr(); BEChXPrQ.Clr();}
            }
            AllWIdQClrP=true;
          }
        }
      }
    }
    // get next symbol
    HtmlLx.GetSym();
  }
}
Example #4
0
void TNEANet::GetEIdV(TIntV& EIdV) const {
  EIdV.Gen(GetEdges(), 0);
  for (int E=EdgeH.FFirstKeyId(); EdgeH.FNextKeyId(E); ) {
    EIdV.Add(EdgeH.GetKey(E));
  }
}
/////////////////////////////////////////////////
// BLEU-score
double TEvalScoreBleu::Eval(const PTransCorpus& TransCorpus, const TIntV& _SentIdV) {
    // check if the corpus has translations
    IAssert(TransCorpus->IsTrans());

    // ngram counts (cliped and full)
    TIntH ClipCountNGramH, CountNGramH;
    // candidate and effective reference length
    int FullTransLen = 0, FullRefLen = 0;

    // iterate over sentences
    TIntV SentIdV = _SentIdV;
    if (SentIdV.Empty()) { TransCorpus->GetSentIdV(SentIdV); }
    const int Sents = SentIdV.Len();
    for (int SentIdN = 0; SentIdN < Sents; SentIdN++) {
        const int SentId = SentIdV[SentIdN];
        // tokenize translation
        TIntV TransWIdV; Parse(TransCorpus->GetTransStr(SentId), TransWIdV);
        TIntH TransNGramH; GetNGramH(TransWIdV, MxNGramLen, TransNGramH);
        TIntH FreeTransNGramH = TransNGramH; // number of non-matched ngrams
        // counters for getting the closest length of reference sentences
        const int TransLen = TransWIdV.Len(); 
        int BestLen = 0, BestLenDiff = TInt::Mx;
        // go over reference translations and count ngram matches
        TStrV RefTransStrV = TransCorpus->GetRefTransStrV(SentId);
        for (int RefN = 0; RefN < RefTransStrV.Len(); RefN++) {
            // parse reference translation sentence
            TIntV RefWIdV; Parse(RefTransStrV[RefN], RefWIdV);
            TIntH RefNGramH; GetNGramH(RefWIdV, MxNGramLen, RefNGramH);
            // check for matches
            int TransNGramKeyId = TransNGramH.FFirstKeyId();
            while(TransNGramH.FNextKeyId(TransNGramKeyId)) {
                const int NGramId = TransNGramH.GetKey(TransNGramKeyId);
                const int FreeTransNGrams = FreeTransNGramH(NGramId);
                if (RefNGramH.IsKey(NGramId) && (FreeTransNGrams>0)) {
                    // ngram match and still some free ngrams left to clip
                    const int RefNGrams = RefNGramH(NGramId);
                    FreeTransNGramH(NGramId) = TInt::GetMx(0, FreeTransNGrams - RefNGrams);
                }
            }
            // check the length difference
            const int RefLen = RefWIdV.Len();
            const int LenDiff = TInt::Abs(TransLen - RefLen);
            if (LenDiff < BestLenDiff) { 
                BestLen = RefLen; BestLenDiff = LenDiff; 
            }
        }
        // count ngrams
        int TransNGramKeyId = TransNGramH.FFirstKeyId();
        while(TransNGramH.FNextKeyId(TransNGramKeyId)) {
            // get ngram
            const int NGramId = TransNGramH.GetKey(TransNGramKeyId);
            IAssert(NGramId != -1);
            // check if two hash tables are aligned (should be...)
            const int FreeNGramId = FreeTransNGramH.GetKey(TransNGramKeyId);
            IAssert(NGramId == FreeNGramId);
            // get ngram count and clip-count
            const int Count = TransNGramH[TransNGramKeyId];
            const int ClipCount = Count - FreeTransNGramH[TransNGramKeyId];
            // add ngram to the coprus ngram counts
            CountNGramH.AddDat(NGramId) += Count;
            ClipCountNGramH.AddDat(NGramId) += ClipCount;
        }
        // count length
        FullTransLen += TransLen;
        FullRefLen += BestLen;
    }

    // calcualte ngram precisions
    TIntV ClipCountV(MxNGramLen); ClipCountV.PutAll(0);
    int ClipCountKeyId = ClipCountNGramH.FFirstKeyId();
    while (ClipCountNGramH.FNextKeyId(ClipCountKeyId)) {
        const int NGramId = ClipCountNGramH.GetKey(ClipCountKeyId);
        const int NGramLen = GetNGramLen(NGramId);
        IAssert(0 < NGramLen && NGramLen <= MxNGramLen);
        ClipCountV[NGramLen-1] += ClipCountNGramH[ClipCountKeyId];
    }
    TIntV CountV(MxNGramLen); CountV.PutAll(0);
    int CountKeyId = CountNGramH.FFirstKeyId();
    while (CountNGramH.FNextKeyId(CountKeyId)) {
        const int NGramId = CountNGramH.GetKey(CountKeyId);
        const int NGramLen = GetNGramLen(NGramId);
        IAssert(0 < NGramLen && NGramLen <= MxNGramLen);
        CountV[NGramLen-1] += CountNGramH[CountKeyId];
    }
    TFltV PrecV(MxNGramLen, 0);
    for (int NGramLen = 0; NGramLen < MxNGramLen; NGramLen++) {
        const int ClipCount = ClipCountV[NGramLen];
        const int Count = CountV[NGramLen];
        const double Prec = (Count > 0) ? double(ClipCount)/double(Count) : 0.0;
        PrecV.Add(Prec);
        //printf("%d-gram Match:%d Total:%d Prec:%.5f\n", NGramLen+1, ClipCount, Count, Prec);
    }

    // calcualte brevity penalty
    double LogBP = TFlt::GetMn(0.0, 1.0 - double(FullRefLen)/double(FullTransLen));
    double BP = exp(LogBP);

    // calculate full BLEU score
    double BleuScore = BP; 
    const double Wgt = 1.0 / double(MxNGramLen);
    for (int NGramLen = 0; NGramLen < MxNGramLen; NGramLen++) {
        BleuScore *= pow(PrecV[NGramLen], Wgt);
    }    
    printf("BLEU Score: %.5f\n", BleuScore);
    
    // done!
    return BleuScore;
}
Example #6
0
/////////////////////////////////////////////////
// NGram-Base
TStr TNGramBs::GetWIdVStr(const TIntV& WIdV) const {
  TChA ChA;
  for (int WIdN=0; WIdN<WIdV.Len(); WIdN++){
    if (WIdN>0){ChA+=' '/*'_'*/;} ChA+=GetWordStr(WIdV[WIdN]);}
  return ChA;
}
Example #7
0
void DemoFullBfsDfs() {
  
  const int NNodes = 500;
  
  PGraph G = GenFull<PGraph>(NNodes);
  PNGraph GOut;
  int TreeSz, TreeDepth;
  
  // Get BFS tree from first node without following links (demos different options)
  GOut = GetBfsTree(G, 1, false, false);
  GetSubTreeSz(G, 1, false, false, TreeSz, TreeDepth);
  printf("FollowOut=false, FollowIn=false, GOut->GetNodes() == %d, GOut->GetEdges() = %d\n", 
        GOut->GetNodes(), G->GetEdges());
  printf("TreeSz == %d, TreeDepth = %d\n", TreeSz, TreeDepth);
  
  GOut = GetBfsTree(G, NNodes-1, true, true);
  GetSubTreeSz(G, 1, true, true, TreeSz, TreeDepth);
  printf("FollowOut=true, FollowIn=true, GOut->GetNodes() == %d, GOut->GetEdges() = %d\n", 
        GOut->GetNodes(), G->GetEdges());
  printf("TreeSz == %d, TreeDepth = %d\n", TreeSz, TreeDepth);
  
  GOut = GetBfsTree(G, NNodes/2, true, false);
  GetSubTreeSz(G, 1, true, false, TreeSz, TreeDepth);
  printf("FollowOut=true, FollowIn=false, GOut->GetNodes() == %d, GOut->GetEdges() = %d\n", 
        GOut->GetNodes(), G->GetEdges());
  printf("TreeSz == %d, TreeDepth = %d\n", TreeSz, TreeDepth);
  
  GOut = GetBfsTree(G, 1, false, true);
  GetSubTreeSz(G, 1, false, true, TreeSz, TreeDepth);
  printf("FollowOut=false, FollowIn=true, GOut->GetNodes() == %d, GOut->GetEdges() = %d\n", 
        GOut->GetNodes(), G->GetEdges());
  printf("TreeSz == %d, TreeDepth = %d\n", TreeSz, TreeDepth);
  
  TIntV NIdV;
  int StartNId, Hop, Nodes;
  
  StartNId = 1;
  Hop = 1;
  Nodes = GetNodesAtHop(G, StartNId, Hop, NIdV, HasGraphFlag(typename PGraph::TObj, gfDirected));
  printf("StartNId = %d, Nodes = %d, GetNodesAtHop NIdV.Len() = %d\n", StartNId, Nodes, NIdV.Len());
  
  TIntPrV HopCntV;
  Nodes = GetNodesAtHops(G, StartNId, HopCntV, HasGraphFlag(typename PGraph::TObj, gfDirected));
  printf("StartNId = %d, Nodes = %d, GetNodesAtHops HopCntV.Len() = %d\n", StartNId, Nodes, HopCntV.Len());
  
  int Length, SrcNId, DstNId;
  SrcNId = 1;
  DstNId = NNodes-1;
  
  Length = GetShortPath(G, SrcNId, DstNId, HasGraphFlag(typename PGraph::TObj, gfDirected));
  printf("SPL Length = %d\n", Length);
  
  TIntH NIdToDistH;
  int MaxDist = 9;
  Length = GetShortPath(G, SrcNId, NIdToDistH, HasGraphFlag(typename PGraph::TObj, gfDirected), MaxDist);
//  for (int i = 0; i < min(5,NIdToDistH.Len()); i++) {
//    printf("NIdToDistH[%d] = %d\n", i, NIdToDistH[i].Val);
//  }
  
  int FullDiam;
  double EffDiam, AvgDiam;
  int NTestNodes = 10;
  
  for (int IsDir = 0; IsDir < 2; IsDir++) {
    printf("IsDir = %d:\n", IsDir);
    
    FullDiam = GetBfsFullDiam(G, NTestNodes, IsDir);
    printf("FullDiam = %d\n", FullDiam);
    
    EffDiam = GetBfsEffDiam (G, NTestNodes, IsDir);
    printf("EffDiam = %.3f\n", EffDiam);
    
    EffDiam = GetBfsEffDiam (G, NTestNodes, IsDir, EffDiam, FullDiam);
    printf("EffDiam = %.3f, FullDiam = %d\n", EffDiam, FullDiam);
    
    EffDiam = GetBfsEffDiam (G, NTestNodes, IsDir, EffDiam, FullDiam, AvgDiam);
    printf("EffDiam = %.3f, FullDiam = %d, AvgDiam = %.3f\n", EffDiam, FullDiam, AvgDiam);
    
    TIntV SubGraphNIdV;
    for (int i = 0; i < NTestNodes; i++) {
      SubGraphNIdV.Add(G->GetRndNId());
    }
//    for (int i = 0; i < SubGraphNIdV.Len(); i++) {
//      printf("SubGraphNIdV[%d] = %d\n", i, SubGraphNIdV[i].Val);
//    }
    
    EffDiam = GetBfsEffDiam(G, NTestNodes, SubGraphNIdV, IsDir, EffDiam, FullDiam);
    printf("For subgraph: EffDiam = %.3f, FullDiam = %d\n", EffDiam, FullDiam);
    
  }
  
}
Example #8
0
/////////////////////////////////////////////////
// Best-Paths
void GetBestPaths(
 const TStr& SrcNmObjStr, const TStr& DstNmObjStr, const PNmObjBs& NmObjBs){
  int SrcNmObjId=NmObjBs->GetNmObjId(SrcNmObjStr);
  int DstNmObjId=NmObjBs->GetNmObjId(DstNmObjStr);
  int NmObjs=NmObjBs->GetNmObjs();
  TIntPrV ParLevPrV(NmObjs); TIntPrV DstParLevPrV;
  ParLevPrV.PutAll(TIntPr(-1, -1));
  int CurLev=0;
  ParLevPrV[SrcNmObjId]=TIntPr(SrcNmObjId, CurLev);
  forever{
    CurLev++; int NewEdges=0;
    for (int NmObjId1=0; NmObjId1<NmObjs; NmObjId1++){
      if (ParLevPrV[NmObjId1].Val2==CurLev-1){
        TIntV DocIdV1; NmObjBs->GetNmObjDocIdV(NmObjId1, DocIdV1);
        for (int NmObjId2=0; NmObjId2<NmObjs; NmObjId2++){
          if ((NmObjId2==DstNmObjId)||(ParLevPrV[NmObjId2].Val2==-1)){
            TIntV DocIdV2; NmObjBs->GetNmObjDocIdV(NmObjId2, DocIdV2);
            TIntV IntrsDocIdV; DocIdV1.Intrs(DocIdV2, IntrsDocIdV);
            if (!IntrsDocIdV.Empty()){
              ParLevPrV[NmObjId2]=TIntPr(NmObjId1, CurLev); NewEdges++;
              if (NmObjId2==DstNmObjId){
                DstParLevPrV.Add(TIntPr(NmObjId1, CurLev));
              }
            }
          }
        }
      }
    }
    if ((NewEdges==0)||(ParLevPrV[DstNmObjId].Val2!=-1)){
      break;
    }
  }
  // prepare graph
  THash<TStr, PVrtx> VrtxNmToVrtxH; TStrPrV VrtxNmPrV;
  VrtxNmToVrtxH.AddKey(SrcNmObjStr);
  VrtxNmToVrtxH.AddKey(DstNmObjStr);
  // write path
  ContexterF->NmObjLinkageREd->Clear();
  for (int DstParLevPrN=0; DstParLevPrN<DstParLevPrV.Len(); DstParLevPrN++){
    ParLevPrV[DstNmObjId]=DstParLevPrV[DstParLevPrN];
    int DstParLev=ParLevPrV[DstNmObjId].Val2;
    TStr DstNmObjStr=NmObjBs->GetNmObjStr(DstNmObjId);
    ContexterF->NmObjLinkageREd->Lines->Add(DstNmObjStr.CStr());
    int ParNmObjId=DstNmObjId;
    TStr PrevNmObjStr=DstNmObjStr;
    forever {
      if (ParNmObjId==SrcNmObjId){break;}
      ParNmObjId=ParLevPrV[ParNmObjId].Val1;
      int ParLev=ParLevPrV[ParNmObjId].Val2;
      TStr CurNmObjStr=NmObjBs->GetNmObjStr(ParNmObjId);
      TStr ParNmObjStr=TStr::GetSpaceStr((DstParLev-ParLev)*4)+CurNmObjStr;
      ContexterF->NmObjLinkageREd->Lines->Add(ParNmObjStr.CStr());
      // create vertex & edge
      VrtxNmToVrtxH.AddKey(CurNmObjStr);
      if (!PrevNmObjStr.Empty()){
        if (PrevNmObjStr<CurNmObjStr){
          VrtxNmPrV.AddUnique(TStrPr(PrevNmObjStr, CurNmObjStr));
        } else
        if (PrevNmObjStr>CurNmObjStr){
          VrtxNmPrV.AddUnique(TStrPr(CurNmObjStr, PrevNmObjStr));
        }
      }
      // save curent named-object
      PrevNmObjStr=CurNmObjStr;
    }
  }
  // generate graph
  // create graph
  PGraph Graph=TGGraph::New();
  // create vertices
  for (int VrtxN=0; VrtxN<VrtxNmToVrtxH.Len(); VrtxN++){
    TStr VrtxNm=VrtxNmToVrtxH.GetKey(VrtxN);
    PVrtx Vrtx=TGVrtx::New(VrtxNm);
    VrtxNmToVrtxH.GetDat(VrtxNm)=Vrtx;
    Graph->AddVrtx(Vrtx);
  }
  // create edges
  for (int EdgeN=0; EdgeN<VrtxNmPrV.Len(); EdgeN++){
    PVrtx Vrtx1=VrtxNmToVrtxH.GetDat(VrtxNmPrV[EdgeN].Val1);
    PVrtx Vrtx2=VrtxNmToVrtxH.GetDat(VrtxNmPrV[EdgeN].Val2);
    PEdge Edge=new TGEdge(Vrtx1, Vrtx2, TStr::Fmt("_%d", EdgeN), false);
    Graph->AddEdge(Edge);
  }
  // place graph
  ContexterF->State->ElGraph=Graph;
  TRnd Rnd(1);
  ContexterF->State->ElGraph->PlaceSimAnnXY(Rnd, ContexterF->State->ElGks);
  // draw graph
  ContexterF->State->ElGks->Clr();
  ContexterF->ElPbPaint(NULL);
}
Example #9
0
// Demos BFS functions on undirected graph that is not fully connected
void DemoBFSUndirectedRandom() {
  
  PUNGraph G;
  
  TStr FName = TStr::Fmt("%s/sample_bfsdfs_unpower.txt", DIRNAME);
  
  const int NNodes = 50;
  G = GenRndPowerLaw(NNodes, 2.5);
  
  // Can save/here
//  SaveEdgeList(G, FName);
  
//  G = LoadEdgeList<PUNGraph>(FName);
  TIntStrH NodeLabelH;
  for (int i = 0; i < G->GetNodes(); i++) {
    NodeLabelH.AddDat(i, TStr::Fmt("%d", i));
  }
  DrawGViz(G, gvlNeato, TStr::Fmt("%s/sample_bfsdfs_unpower.png", DIRNAME), "Sample bfsdfs Graph", NodeLabelH);
  
  TIntV NIdV;
  int StartNId, Hop, Nodes;
  
  int IsDir = 0;
  printf("IsDir = %d:\n", IsDir);
  
  StartNId = 1;
  Hop = 1;
  Nodes = GetNodesAtHop(G, StartNId, Hop, NIdV, IsDir);
  printf("StartNId = %d, Nodes = %d, GetNodesAtHop NIdV.Len() = %d, NIdV[0] = %d\n", StartNId, Nodes, NIdV.Len(), NIdV[0].Val);
  
  TIntPrV HopCntV;
  Nodes = GetNodesAtHops(G, StartNId, HopCntV, IsDir);
  printf("StartNId = %d, Nodes = %d, GetNodesAtHops HopCntV.Len() = %d\n", StartNId , Nodes, HopCntV.Len());
//  for (int N = 0; N < HopCntV.Len(); N++) {
//    printf("HopCntV[%d] = (%d, %d)\n", N, HopCntV[N].Val1.Val, HopCntV[N].Val2.Val);
//  }
  
  
  int Length, SrcNId, DstNId;
  SrcNId = 1;
  DstNId = G->GetNodes() - 1;
  
  Length = GetShortPath(G, SrcNId, DstNId, IsDir);
  printf("%d -> %d: SPL Length = %d\n", SrcNId, DstNId, Length);
  
  SrcNId = 1;
  DstNId = 33;
  Length = GetShortPath(G, SrcNId, DstNId, IsDir);
  printf("%d -> %d: SPL Length = %d\n", SrcNId, DstNId, Length);
  
  TIntH NIdToDistH;
  int MaxDist = 9;
  Length = GetShortPath(G, SrcNId, NIdToDistH, IsDir, MaxDist);
//  for (int i = 0; i < min(5,NIdToDistH.Len()); i++) {
//    printf("NIdToDistH[%d] = %d\n", i, NIdToDistH[i].Val);
//  }
  
  TInt::Rnd.PutSeed(0);
  
  int FullDiam;
  double EffDiam, AvgSPL;
  int NTestNodes = G->GetNodes() / 3 * 2;
  
  FullDiam = GetBfsFullDiam(G, NTestNodes, IsDir);
  printf("FullDiam = %d\n", FullDiam);
  
  EffDiam = GetBfsEffDiam(G, NTestNodes, IsDir);
  printf("EffDiam = %.3f\n", EffDiam);
  
  EffDiam = GetBfsEffDiam(G, NTestNodes, IsDir, EffDiam, FullDiam);
  printf("EffDiam = %.3f, FullDiam = %d\n", EffDiam, FullDiam);
  
  EffDiam = GetBfsEffDiam(G, NTestNodes, IsDir, EffDiam, FullDiam, AvgSPL);
  printf("EffDiam = %.3f, FullDiam = %d, AvgDiam = %.3f\n", EffDiam, FullDiam, AvgSPL);
  
  TIntV SubGraphNIdV;
  SubGraphNIdV.Add(0);
  SubGraphNIdV.Add(4);
  SubGraphNIdV.Add(31);
  SubGraphNIdV.Add(45);
  SubGraphNIdV.Add(18);
  SubGraphNIdV.Add(11);
  SubGraphNIdV.Add(11);
  SubGraphNIdV.Add(48);
  SubGraphNIdV.Add(34);
  SubGraphNIdV.Add(30);
  
  EffDiam = GetBfsEffDiam(G, NTestNodes, SubGraphNIdV, IsDir, EffDiam, FullDiam);
  printf("For subgraph: EffDiam = %.4f, FullDiam = %d\n", EffDiam, FullDiam);
 
}
Example #10
0
// Demos BFS functions on directed graph that is not fully connected
void DemoBFSDirectedRandom() {

  
  PNGraph G = TNGraph::New();
  
  TStr FName = TStr::Fmt("%s/sample_bfsdfs_ngraph.txt", DIRNAME);
  
  // Create benchmark graph, initially visually to confirm values are correct
  const int NNodes = 30;
  G = GenRndGnm<PNGraph>(NNodes, NNodes*2);
  // Add some more random edges
  for (int i = 0; i < 10; i++) {
    TInt Src, Dst;
    do {
      Src = G->GetRndNId();
      Dst = G->GetRndNId();
    }
    while (Src == Dst || G->IsEdge(Src, Dst));
    G->AddEdge(Src, Dst);
  }
  // Add isolated component
  G->AddNode(NNodes);
  G->AddNode(NNodes+1);
  G->AddNode(NNodes+2);
  G->AddEdge(NNodes, NNodes+1);
  G->AddEdge(NNodes+1, NNodes+2);
  G->AddEdge(NNodes+2, NNodes+1);
  printf("G->GetNodes() = %d, G->GetEdges() = %d\n", G->GetNodes(), G->GetEdges());
  
  
  //  SaveEdgeList(G, FName);
  
  //  G = LoadEdgeList<PNGraph>(FName);
  TIntStrH NodeLabelH;
  for (int i = 0; i < G->GetNodes(); i++) {
    NodeLabelH.AddDat(i, TStr::Fmt("%d", i));
  }
  DrawGViz(G, gvlDot, TStr::Fmt("%s/sample_bfsdfs_ngraph.png", DIRNAME), "Sample BFS Graph", NodeLabelH);
  
  printf("G->GetNodes() = %d, G->GetEdges() = %d\n", G->GetNodes(), G->GetEdges());
  
  TIntV NIdV;
  int StartNId, Hop, Nodes;
  
  //  for (int IsDir = 0; IsDir < 2; IsDir++) {
  int IsDir = 1;
  printf("IsDir = %d:\n", IsDir);
  
  StartNId = 11;
  Hop = 1;
  Nodes = GetNodesAtHop(G, StartNId, Hop, NIdV, IsDir);
  printf("Nodes = %d, GetNodesAtHop NIdV.Len() = %d\n", Nodes, NIdV.Len());
  for (int i = 0; i < NIdV.Len(); i++) {
    printf("NIdV[%d] = %d\n", i, NIdV[i].Val);
  }
  printf("Nodes == 2");
  printf("NIdV.Len() == 2");
  
  TIntPrV HopCntV;
  Nodes = GetNodesAtHops(G, StartNId, HopCntV, IsDir);
  printf("Nodes = %d, GetNodesAtHops HopCntV.Len() = %d\n", Nodes, HopCntV.Len());
  printf("Nodes == 10");
  printf("HopCntV.Len() == 10");
//  for (int N = 0; N < HopCntV.Len(); N++) {
//    printf("HopCntV[%d] = (%d, %d)\n", N, HopCntV[N].Val1.Val, HopCntV[N].Val2.Val);
//  }
  
  int Length, SrcNId, DstNId;
  SrcNId = 11;
  DstNId = G->GetNodes() - 1;
  
  Length = GetShortPath(G, SrcNId, DstNId, IsDir);
  printf("%d -> %d: SPL Length = %d\n", SrcNId, DstNId, Length);
  
  SrcNId = 11;
  DstNId = 27;
  Length = GetShortPath(G, SrcNId, DstNId, IsDir);
  printf("%d -> %d: SPL Length = %d\n", SrcNId, DstNId, Length);
  
  TIntH NIdToDistH;
  int MaxDist = 9;
  Length = GetShortPath(G, SrcNId, NIdToDistH, IsDir, MaxDist);
//  for (int i = 0; i < min(5,NIdToDistH.Len()); i++) {
//    printf("NIdToDistH[%d] = %d\n", i, NIdToDistH[i].Val);
//  }
  
  TInt::Rnd.PutSeed(0);
  
  int FullDiam;
  double EffDiam, AvgSPL;
  int NTestNodes = G->GetNodes() / 2;
  
  FullDiam = GetBfsFullDiam(G, NTestNodes, IsDir);
  printf("FullDiam = %d\n", FullDiam);
  
  EffDiam = GetBfsEffDiam(G, NTestNodes, IsDir);
  printf("EffDiam = %.3f\n", EffDiam);
  
  EffDiam = GetBfsEffDiam(G, NTestNodes, IsDir, EffDiam, FullDiam);
  printf("EffDiam = %.3f, FullDiam = %d\n", EffDiam, FullDiam);
  
  EffDiam = GetBfsEffDiam(G, NTestNodes, IsDir, EffDiam, FullDiam, AvgSPL);
  printf("EffDiam = %.3f, FullDiam = %d, AvgDiam = %.3f\n", EffDiam, FullDiam, AvgSPL);
  
  TIntV SubGraphNIdV;
  SubGraphNIdV.Add(8);
  SubGraphNIdV.Add(29);
  SubGraphNIdV.Add(16);
  SubGraphNIdV.Add(0);
  SubGraphNIdV.Add(19);
  SubGraphNIdV.Add(17);
  SubGraphNIdV.Add(26);
  SubGraphNIdV.Add(14);
  SubGraphNIdV.Add(10);
  SubGraphNIdV.Add(24);
  SubGraphNIdV.Add(27);
  SubGraphNIdV.Add(2);
  SubGraphNIdV.Add(18);
  
  EffDiam = GetBfsEffDiam(G, NTestNodes, SubGraphNIdV, IsDir, EffDiam, FullDiam);
  printf("For subgraph: EffDiam = %.4f, FullDiam = %d\n", EffDiam, FullDiam);
  
}
Example #11
0
void TBlobBs::GenFFreeBlobPtV(const TIntV& BlockLenV, TBlobPtV& FFreeBlobPtV){
  FFreeBlobPtV.Gen(BlockLenV.Len()+1);
}
Example #12
0
// Create TMMNet, add modes, crossnets, create subgraphs, covert to TNEANet
void ManipulateMMNet() {
  int NNodes = 1000;
  int NEdges = 1000;
  // Create a multimodal network
  PMMNet Graph;
  Graph = PMMNet::New();
  PrintMMNetStats("Empty MMNet",Graph);

  // Add mode
  TStr TestMode1("TestMode1");
  Graph->AddModeNet(TestMode1);
  TInt TestModeId1 = Graph->GetModeId(TestMode1);

  // Add same-mode crossnet, directed
  TStr TestCross1("TestCross1");
  Graph->AddCrossNet(TestMode1, TestMode1, TestCross1, true);
  TInt TestCrossId1 = Graph->GetCrossId(TestCross1);

  // Add same-mode crossnet, undirected
  TStr TestCross2("TestCross2");
  Graph->AddCrossNet(TestModeId1, TestModeId1, TestCross2, false);
  TInt TestCrossId2 = Graph->GetCrossId(TestCross2);

  // Add mode
  TStr TestMode2("TestMode2");
  Graph->AddModeNet(TestMode2);
  TInt TestModeId2 = Graph->GetModeId(TestMode2);

  // Add crossnet, directed
  TStr TestCross3("TestCross3");
  Graph->AddCrossNet(TestMode1, TestMode2, TestCross3, true);
  TInt TestCrossId3 = Graph->GetCrossId(TestCross3);

  // Add crossnet, undirected
  TStr TestCross4("TestCross4");
  Graph->AddCrossNet(TestModeId1, TestModeId2, TestCross4, false);
  TInt TestCrossId4 = Graph->GetCrossId(TestCross4);

  PrintMMNetStats("MMNet with modes/crossnets",Graph);

  // Add Nodes
  TModeNet& ModeNet1 = Graph->GetModeNetByName(TestMode1);
  TModeNet& ModeNet2 = Graph->GetModeNetById(TestModeId2);
  for (int i=0; i < NNodes; i++) {
    ModeNet1.AddNode(i);
    ModeNet2.AddNode(i*2);
  }

  // Add edges
  TCrossNet& CrossNet1 = Graph->GetCrossNetByName(TestCross1);
  TCrossNet& CrossNet2 = Graph->GetCrossNetById(TestCrossId2);
  TCrossNet& CrossNet3 = Graph->GetCrossNetByName(TestCross3);
  TCrossNet& CrossNet4 = Graph->GetCrossNetById(TestCrossId4);
  for (int i=0; i < NEdges; i++) {
    CrossNet1.AddEdge(i, (i+1)%NNodes, i);
    CrossNet2.AddEdge((i+5)%NNodes, i, i);
    CrossNet3.AddEdge(i, (i%NNodes)*2, i);
    CrossNet4.AddEdge((i+5)%NNodes, (i%NNodes)*2, i);
  }

  //Iterate over modes
  for (TMMNet::TModeNetI MI = Graph->BegModeNetI(); MI < Graph->EndModeNetI(); MI++) {
    PrintGStats(MI.GetModeName().GetCStr(), MI.GetModeNet());
  }

  //Iterate over crossnets
  for (TMMNet::TCrossNetI CI = Graph->BegCrossNetI(); CI < Graph->EndCrossNetI(); CI++) {
    PrintGStats(CI.GetCrossName().GetCStr(), CI.GetCrossNet());
  }

  // Get subgraph
  TStrV CrossNets;
  CrossNets.Add(TestCross1);
  PMMNet Subgraph = Graph->GetSubgraphByCrossNet(CrossNets);
  PrintMMNetStats("Subgraph", Subgraph);
  TModeNet& M1 = Subgraph->GetModeNetByName(TestMode1);
  PrintGStats("M1", M1);
  // Get neighbor types
  TStrV M1Names;
  M1.GetCrossNetNames(M1Names);
  printf("Num neighbor types %d\n", M1Names.Len());
  // Get Neighbors for node 0
  TIntV Neighbors;
  M1.GetNeighborsByCrossNet(0, TestCross1, Neighbors);
  printf("Num Neighbors %d\n", Neighbors.Len());

  // Convert to TNEANet
  TIntV CrossNetIds;
  CrossNetIds.Add(TestCrossId1);
  CrossNetIds.Add(TestCrossId2);
  CrossNetIds.Add(TestCrossId3);
  CrossNetIds.Add(TestCrossId4);
  TVec<TTriple<TInt, TStr, TStr> > NodeAttrMapping; //Triples of (ModeId, OldAttrName, NewAttrName)
  TVec<TTriple<TInt, TStr, TStr> > EdgeAttrMapping; //Triples of (CrossId, OldAttrName, NewAttrName)
  PNEANet Net = Graph->ToNetwork(CrossNetIds, NodeAttrMapping, EdgeAttrMapping);
  PrintGStats("TNEANet", Net);
}
Example #13
0
/// rewire bipartite community affiliation graphs
void TAGMUtil::RewireCmtyNID(THash<TInt,TIntV >& CmtyVH, TRnd& Rnd) {
    THash<TInt,TIntV > NewCmtyVH(CmtyVH.Len());
    TIntV NDegV;
    TIntV CDegV;
    for (int i = 0; i < CmtyVH.Len(); i++) {
        int CID = CmtyVH.GetKey(i);
        for (int j = 0; j < CmtyVH[i].Len(); j++) {
            int NID = CmtyVH[i][j];
            NDegV.Add(NID);
            CDegV.Add(CID);
        }
    }
    TIntPrSet CNIDSet(CDegV.Len());
    int c=0;
    while (c++ < 15 && CDegV.Len() > 1) {
        for (int i = 0; i < CDegV.Len(); i++) {
            int u = Rnd.GetUniDevInt(CDegV.Len());
            int v = Rnd.GetUniDevInt(NDegV.Len());
            if (CNIDSet.IsKey(TIntPr(CDegV[u], NDegV[v]))) {
                continue;
            }
            CNIDSet.AddKey(TIntPr(CDegV[u], NDegV[v]));
            if (u == CDegV.Len() - 1) {
                CDegV.DelLast();
            }  else {
                CDegV[u] = CDegV.Last();
                CDegV.DelLast();
            }
            if ( v == NDegV.Len() - 1) {
                NDegV.DelLast();
            }  else {
                NDegV[v] = NDegV.Last();
                NDegV.DelLast();
            }
        }
    }
    for (int i = 0; i < CNIDSet.Len(); i++) {
        TIntPr CNIDPr = CNIDSet[i];
        IAssert(CmtyVH.IsKey(CNIDPr.Val1));
        NewCmtyVH.AddDat(CNIDPr.Val1);
        NewCmtyVH.GetDat(CNIDPr.Val1).Add(CNIDPr.Val2);
    }
    CmtyVH = NewCmtyVH;
}
Example #14
0
PLwOntoGround TLwOntoGround::GetOntoGround(
 const PLwOnto& LwOnto, const PBowDocBs& BowDocBs,
 const TStr& LangNm, const bool& DocCatIsTermIdP,
 const double& CutWordWgtSumPrc){
  printf("Generating Ontology-Classifier...\n");
  // shortcuts
  PLwTermBs TermBs=LwOnto->GetTermBs();
  int Terms=TermBs->GetTerms();
  PLwLinkBs LinkBs=LwOnto->GetLinkBs();
  PLwLinkTypeBs LinkTypeBs=LwOnto->GetLinkTypeBs();
  int LangId=LwOnto->GetLangBs()->GetLangId(LangNm);
  int Docs=BowDocBs->GetDocs();
  // create tfidf
  printf("  Creating BowDocWgtBs ...");
  PBowDocWgtBs BowDocWgtBs=TBowDocWgtBs::New(BowDocBs, bwwtNrmTFIDF);
  PBowSim BowSim=TBowSim::New(bstCos);
  printf(" Done.\n");
  // collect documents per ontology-term
  printf("  Collecting documents per ontology-term ...\n");
  TIntIntVH TermIdToDIdVH; int PosCats=0; int NegCats=0;
  for (int DId=0; DId<Docs; DId++){
    printf("    Docs:%d/%d Pos:%d Neg:%d\r", 1+DId, Docs, PosCats, NegCats);
    for (int DocCIdN=0; DocCIdN<BowDocBs->GetDocCIds(DId); DocCIdN++){
      // get document-category
      int CId=BowDocBs->GetDocCId(DId, DocCIdN);
      TStr CatNm=BowDocBs->GetCatNm(CId);
      // get term-id
      if (DocCatIsTermIdP){
        int TermId=CatNm.GetInt();
        if (TermBs->IsTermId(TermId)){
          TermIdToDIdVH.AddDat(TermId).Add(DId); PosCats++;
        } else {NegCats++;}
      } else {
        if (TermBs->IsTermId(CatNm, LangId)){
          int TermId=TermBs->GetTermId(CatNm, LangId);
          TermIdToDIdVH.AddDat(TermId).Add(DId); PosCats++;
        } else {NegCats++;}
      }
    }
  }
  printf("    Docs:%d/%d Pos:%d Neg:%d\n", Docs, Docs, PosCats, NegCats);
  printf("  Done.\n");
  // create sub-terms & up-terms vectors
  printf("  Creating sub-terms & up-terms vectors ...");
  TIntIntVH Const_TermIdToSubTermIdVH;
  TIntIntVH TermIdToSubTermIdVH;
  TIntIntVH TermIdToUpTermIdVH;
  for (int TermN=0; TermN<Terms; TermN++){
    int TermId=TermBs->GetTermId(TermN);
    for (int LinkN=0; LinkN<LinkBs->GetFromLinks(TermId); LinkN++){
      int LinkTypeId; int DstTermId;
      LinkBs->GetFromLink(TermId, LinkN, LinkTypeId, DstTermId);
      TStr LinkTypeNm=LinkTypeBs->GetLinkType(LinkTypeId)->GetLinkTypeNm();
      if (LinkTypeNm=="NT"){
        Const_TermIdToSubTermIdVH.AddDat(TermId).Add(DstTermId);
        TermIdToSubTermIdVH.AddDat(TermId).Add(DstTermId);
        TermIdToUpTermIdVH.AddDat(DstTermId).Add(TermId);
      }
    }
  }
  printf("   Done.\n");
  // create centroids
  printf("  Creating centroids ...\n");
  THash<TInt, PBowSpV> TermIdToConceptSpVH;
  TIntIntVH TermIdToSubTermDIdVH;
  TIntH ProcTermIdH;
  int PrevActiveTerms=-1;
  forever{
    // count active nodes for processing
    int ActiveTerms=0;
    for (int TermN=0; TermN<Terms; TermN++){
      int TermId=TermBs->GetTermId(TermN);
      if ((TermIdToSubTermIdVH.IsKey(TermId))&&
       (TermIdToSubTermIdVH.GetDat(TermId).Len()>0)){
        ActiveTerms++;
      }
    }
    // stop if no change from previous round
    printf("    Active-Terms:%d\n", ActiveTerms);
    if (ActiveTerms==PrevActiveTerms){break;}
    PrevActiveTerms=ActiveTerms;
    // reduce active-nodes with zero-ancestors
    for (int TermN=0; TermN<Terms; TermN++){
      int TermId=TermBs->GetTermId(TermN);
      if (ProcTermIdH.IsKey(TermId)){continue;}
      if ((!TermIdToSubTermIdVH.IsKey(TermId))||
       (TermIdToSubTermIdVH.GetDat(TermId).Len()==0)){
        printf("    %d/%d\r", 1+TermN, Terms);
        ProcTermIdH.AddKey(TermId);
        // collect document-ids
        TIntV TermDIdV;
        if (TermIdToDIdVH.IsKey(TermId)){
          TermDIdV.AddV(TermIdToDIdVH.GetDat(TermId));}
        if (TermIdToSubTermDIdVH.IsKey(TermId)){
          TermDIdV.AddV(TermIdToSubTermDIdVH.GetDat(TermId));}
        // create concept-vector if any documents
        if (TermDIdV.Len()>0){
          PBowSpV ConceptSpV=
           TBowClust::GetConceptSpV(BowDocWgtBs, BowSim, TermDIdV, CutWordWgtSumPrc);
          TermIdToConceptSpVH.AddDat(TermId, ConceptSpV);
        }
        // correct upper-term
        if (TermIdToUpTermIdVH.IsKey(TermId)){
          TIntV& UpTermIdV=TermIdToUpTermIdVH.GetDat(TermId);
          for (int UpTermIdN=0; UpTermIdN<UpTermIdV.Len(); UpTermIdN++){
            int UpTermId=UpTermIdV[UpTermIdN];
            TermIdToSubTermIdVH.GetDat(UpTermId).DelIfIn(TermId);
            if (TermDIdV.Len()>0){
              TermIdToSubTermDIdVH.AddDat(UpTermId).AddV(TermDIdV);}
          }
        }
      }
    }
  }
  printf("  Done.\n");
  // create & return classifier
  PLwOntoGround OntoGround=
   TLwOntoGround::New(LwOnto, BowDocBs, BowDocWgtBs, TermIdToConceptSpVH);
  printf("Done.\n");
  return OntoGround;
}
Example #15
0
File: ghash.cpp Project: pikma/Snap
TGraphKey::TGraphKey(const TIntV& GraphSigV) : Nodes(-1), EdgeV(), SigV(), VariantId(0) {
  SigV.Gen(GraphSigV.Len());
  for (int i = 0; i < GraphSigV.Len(); i++) {
    SigV[i] = TFlt(GraphSigV[i]());
  }
}
int main(int argc, char *argv[]) {
  // #### SETUP: Parse Arguments
  LogOutput Log;
  THash<TStr, TStr> Arguments;
  ArgumentParser::ParseArguments(argc, argv, Arguments, Log);

  TStr OutputDirectory;
  TStr StartString = ArgumentParser::GetArgument(Arguments, "start", "2009-02-01");
  TStr QBDBDirectory = ArgumentParser::GetArgument(Arguments, "qbdb", QBDB_DIR_DEFAULT);
  TStr OutDirectory = ArgumentParser::GetArgument(Arguments, "out", "/lfs/1/tmp/curis/");
  TInt WindowSize = ArgumentParser::GetArgument(Arguments, "window", "14").GetInt();

  if (ArgumentParser::GetArgument(Arguments, "nolog", "") == "") {
    Log.DisableLogging();
  } else if (!Arguments.IsKeyGetDat("directory", OutputDirectory)) {
    Log.SetupNewOutputDirectory("");
  } else {
    Log.SetDirectory(OutputDirectory);
  }

  // #### DATA LOADING: Load ALL the things!
  TQuoteBase QB;
  TDocBase DB;
  fprintf(stderr, "Loading QB and DB from file for %d days, starting from %s...\n", WindowSize.Val, StartString.CStr());
  Err("%s\n", QBDBDirectory.CStr());
  TSecTm PresentTime = TDataLoader::LoadQBDBByWindow(QBDBDirectory, StartString, WindowSize, QB, DB);
  fprintf(stderr, "QBDB successfully loaded!\n");

  TVec<TSecTm> PubTmV;
  TVec<TStr> PostUrlV;
  TVec<TStr> QuoteV;

  fprintf(stderr, "Dumping quotes to file...\n");
  TIntV QuoteIds;
  QB.GetAllQuoteIds(QuoteIds);
  for (int i = 0; i < QuoteIds.Len(); i++) {
    TQuote Q;
    QB.GetQuote(QuoteIds[i], Q);
    TStr QContentString;
    Q.GetContentString(QContentString);

    TVec<TUInt> Sources;
    Q.GetSources(Sources);
    for (int j = 0; j < Sources.Len(); j++) {
      TDoc D;
      DB.GetDoc(Sources[j], D);
      TStr PostUrl;
      D.GetUrl(PostUrl);
      TSecTm PostTime = D.GetDate();
      QuoteV.Add(QContentString);
      PubTmV.Add(PostTime);
      PostUrlV.Add(PostUrl);
    }
  }

  TFOut FOut(OutDirectory + "QuoteList" + ".bin");
  PubTmV.Save(FOut);
  PostUrlV.Save(FOut);
  QuoteV.Save(FOut);

  fprintf(stderr, "Done!\n");
  return 0;
}
Example #17
0
void TempMotifCounter::Count3TEdge3NodeStars(double delta, Counter3D& pre_counts,
                                             Counter3D& pos_counts,
                                             Counter3D& mid_counts) {
  TIntV centers;
  GetAllNodes(centers);
  pre_counts = Counter3D(2, 2, 2);
  pos_counts = Counter3D(2, 2, 2);
  mid_counts = Counter3D(2, 2, 2);
  // Get counts for each node as the center
  #pragma omp parallel for schedule(dynamic)  
  for (int c = 0; c < centers.Len(); c++) {
    // Gather all adjacent events
    int center = centers[c];
    TVec<TIntPair> ts_indices;
    TVec<StarEdgeData> events;
    TNGraph::TNodeI NI = static_graph_->GetNI(center);
    int index = 0;
    TIntV nbrs;
    GetAllNeighbors(center, nbrs);
    int nbr_index = 0;
    for (int i = 0; i < nbrs.Len(); i++) {
      int nbr = nbrs[i];
      AddStarEdgeData(ts_indices, events, index, center, nbr, nbr_index, 0);
      AddStarEdgeData(ts_indices, events, index, nbr, center, nbr_index, 1);
      nbr_index++;
    }
    ts_indices.Sort();
    TIntV timestamps;
    TVec<StarEdgeData> ordered_events;
    for (int j = 0; j < ts_indices.Len(); j++) {
      timestamps.Add(ts_indices[j].Key);
      ordered_events.Add(events[ts_indices[j].Dat]);
    }
    
    ThreeTEdgeStarCounter tesc(nbr_index);
    // dirs: outgoing --> 0, incoming --> 1
    tesc.Count(ordered_events, timestamps, delta);
    #pragma omp critical
    { // Update counts
      for (int dir1 = 0; dir1 < 2; ++dir1) {
        for (int dir2 = 0; dir2 < 2; ++dir2) {
          for (int dir3 = 0; dir3 < 2; ++dir3) {
            pre_counts(dir1, dir2, dir3) += tesc.PreCount(dir1, dir2, dir3);
            pos_counts(dir1, dir2, dir3) += tesc.PosCount(dir1, dir2, dir3);
            mid_counts(dir1, dir2, dir3) += tesc.MidCount(dir1, dir2, dir3);
          }
        }
      }
    }

    // Subtract off edge-wise counts
    for (int nbr_id = 0; nbr_id < nbrs.Len(); nbr_id++) {
      int nbr = nbrs[nbr_id];
      Counter3D edge_counts;
      Count3TEdge2Node(center, nbr, delta, edge_counts);
      #pragma omp critical
      {
        for (int dir1 = 0; dir1 < 2; ++dir1) {
          for (int dir2 = 0; dir2 < 2; ++dir2) {
            for (int dir3 = 0; dir3 < 2; ++dir3) {
              pre_counts(dir1, dir2, dir3) -= edge_counts(dir1, dir2, dir3);
              pos_counts(dir1, dir2, dir3) -= edge_counts(dir1, dir2, dir3);
              mid_counts(dir1, dir2, dir3) -= edge_counts(dir1, dir2, dir3);
            }
          }
        }
      }
    }
  }
}
/////////////////////////////////////////////////
// NIST-score
double TEvalScoreNist::Eval(const PTransCorpus& TransCorpus, const TIntV& _SentIdV) {
    // check if the corpus has translations
    IAssert(TransCorpus->IsTrans());

    // ngram counts (cliped and full)
    TIntH ClipCountNGramH, CountNGramH;
    // ngram info score
    TIntFltH NGramInfoH;
    // candidate and effective reference length
    double FullTransLen = 0.0, FullRefLen = 0.0;

    // iterate over sentences
    TIntV SentIdV = _SentIdV;
    if (SentIdV.Empty()) { TransCorpus->GetSentIdV(SentIdV); }
    const int Sents = SentIdV.Len();
    for (int SentIdN = 0; SentIdN < Sents; SentIdN++) {
        const int SentId = SentIdV[SentIdN];
        // tokenize translation
        TIntV TransWIdV; Parse(TransCorpus->GetTransStr(SentId), TransWIdV);
        TIntH TransNGramH; GetNGramH(TransWIdV, MxNGramLen, TransNGramH);
        TIntH FreeTransNGramH = TransNGramH; // number of non-matched ngrams
        // counters for getting the closest length of reference sentences
        const int TransLen = TransWIdV.Len(); int RefLenSum = 0;
        // go over reference translations and count ngram matches
        TStrV RefTransStrV = TransCorpus->GetRefTransStrV(SentId);
        // we assume that there is at least one reference translation
        IAssert(!RefTransStrV.Empty());
        for (int RefN = 0; RefN < RefTransStrV.Len(); RefN++) {
            // parse reference translation sentence
            TIntV RefWIdV; Parse(RefTransStrV[RefN], RefWIdV);
            TIntH RefNGramH; GetNGramH(RefWIdV, MxNGramLen, RefNGramH);
            // check for matches
            int TransNGramKeyId = TransNGramH.FFirstKeyId();
            while(TransNGramH.FNextKeyId(TransNGramKeyId)) {
                const int NGramId = TransNGramH.GetKey(TransNGramKeyId);
                const int FreeTransNGrams = FreeTransNGramH(NGramId);
                if (RefNGramH.IsKey(NGramId) && (FreeTransNGrams>0)) {
                    // ngram match and still some free ngrams left to clip
                    const int RefNGrams = RefNGramH(NGramId);
                    FreeTransNGramH(NGramId) = TInt::GetMx(0, FreeTransNGrams - RefNGrams);
                }
            }
            // check the length difference
            const int RefLen = RefWIdV.Len();
            RefLenSum += RefLen;
        }
        // count ngrams
        int TransNGramKeyId = TransNGramH.FFirstKeyId();
        while(TransNGramH.FNextKeyId(TransNGramKeyId)) {
            // get ngram
            const int NGramId = TransNGramH.GetKey(TransNGramKeyId);
            IAssert(NGramId != -1);
            // check if two hash tables are aligned (should be...)
            const int FreeNGramId = FreeTransNGramH.GetKey(TransNGramKeyId);
            IAssert(NGramId == FreeNGramId);
            // get ngram count and clip-count
            const int Count = TransNGramH[TransNGramKeyId];
            const int ClipCount = Count - FreeTransNGramH[TransNGramKeyId];
            // add ngram to the coprus ngram counts
            CountNGramH.AddDat(NGramId) += Count;
            ClipCountNGramH.AddDat(NGramId) += ClipCount;
        }
        // count length
        FullTransLen += double(TransLen);
        FullRefLen += double(RefLenSum) / double(RefTransStrV.Len());
    }

    // calculate ngram info scores
    int CountKeyId = CountNGramH.FFirstKeyId();
    while (CountNGramH.FNextKeyId(CountKeyId)) {
        // get the n-gram
        const int NGramId = CountNGramH.GetKey(CountKeyId);
        TIntV NGram = GetNGram(NGramId);
        // prepare counts
        if (NGram.Len() == 1) {
            // n-gram is a word
            const int WordCount = CountNGramH[CountKeyId];
            const double NGramInfoScore = TMath::Log2(FullTransLen / double(WordCount));
            NGramInfoH.AddDat(NGramId, NGramInfoScore);
        } else {
            // more then one word in the n-gram
            // get a n-gram with removed last element
            TIntV N1Gram = NGram; N1Gram.DelLast();
            const int N1GramId = NGramH.GetKeyId(N1Gram);
            // get the counts
            const int NGramCount = CountNGramH(NGramId);
            const int N1GramCount = CountNGramH(N1GramId);
            // get the score
            const double NGramInfoScore = TMath::Log2(double(N1GramCount) / double(NGramCount));
            NGramInfoH.AddDat(NGramId, NGramInfoScore);
        }
    }

    // calcualte ngram precisions
    TFltV ClipCountV(MxNGramLen); ClipCountV.PutAll(0);
    int ClipCountKeyId = ClipCountNGramH.FFirstKeyId();
    while (ClipCountNGramH.FNextKeyId(ClipCountKeyId)) {
        const int NGramId = ClipCountNGramH.GetKey(ClipCountKeyId);
        const int NGramLen = GetNGramLen(NGramId);
        const double NGramInfo = NGramInfoH(NGramId);
        IAssert(0 < NGramLen && NGramLen <= MxNGramLen);
        const int ClipCountNGram = ClipCountNGramH[ClipCountKeyId];
        ClipCountV[NGramLen-1] += double(ClipCountNGram) * NGramInfo;
    }
    TIntV CountV(MxNGramLen); CountV.PutAll(0);
    CountKeyId = CountNGramH.FFirstKeyId();
    while (CountNGramH.FNextKeyId(CountKeyId)) {
        const int NGramId = CountNGramH.GetKey(CountKeyId);
        const int NGramLen = GetNGramLen(NGramId);
        IAssert(0 < NGramLen && NGramLen <= MxNGramLen);
        CountV[NGramLen-1] += CountNGramH[CountKeyId];
    }
    TFltV PrecV(MxNGramLen, 0);
    for (int NGramLen = 0; NGramLen < MxNGramLen; NGramLen++) {
        const double ClipCount = ClipCountV[NGramLen];
        const int Count = CountV[NGramLen];
        const double Prec = (Count > 0) ? ClipCount / double(Count) : 0.0;
        PrecV.Add(Prec);
    }

    // calcualte brevity penalty
    const double LenFrac = double(FullTransLen)/double(FullRefLen);
    double BP = 0.0;
    if (LenFrac >= 1.0) { BP = 1.0; }
    else if (LenFrac <= 0.0) { BP = 0.0; }
    else {
        // calculate beta
        const double LenFracX = 1.5, BPX = 0.5;
        const double Beta = log(BPX) / TMath::Sqr(log(LenFracX));
        // calculate BP score
        BP = exp(Beta * TMath::Sqr(log(LenFrac)));
    }

    // calculate full NIST score
    double NistScore = 0.0; 
    for (int NGramLen = 0; NGramLen < MxNGramLen; NGramLen++) {
        NistScore += PrecV[NGramLen];
    }    
    NistScore *= BP;
    printf("NIST Score: %.5f\n", NistScore);
    
    // done!
    return NistScore;
}
Example #19
0
void TempMotifCounter::Count3TEdgeTriads(double delta, Counter3D& counts) {
  counts = Counter3D(2, 2, 2);

  // Get the counts on each undirected edge
  TVec< THash<TInt, TInt> > edge_counts(static_graph_->GetMxNId());
  TVec< THash<TInt, TIntV> > assignments(static_graph_->GetMxNId());
  for (TNGraph::TEdgeI it = static_graph_->BegEI();
       it < static_graph_->EndEI(); it++) {
    int src = it.GetSrcNId();
    int dst = it.GetDstNId();
    int min_node = MIN(src, dst);
    int max_node = MAX(src, dst);
    edge_counts[min_node](max_node) += temporal_data_[src](dst).Len();
    assignments[min_node](max_node) = TIntV();
  }
  
  // Assign triangles to the edge with the most events
  TIntV Us, Vs, Ws;
  GetAllStaticTriangles(Us, Vs, Ws);
  #pragma omp parallel for schedule(dynamic)
  for (int i = 0; i < Us.Len(); i++) {
    int u = Us[i];
    int v = Vs[i];
    int w = Ws[i];
    int counts_uv = edge_counts[MIN(u, v)].GetDat(MAX(u, v));
    int counts_uw = edge_counts[MIN(u, w)].GetDat(MAX(u, w));
    int counts_vw = edge_counts[MIN(v, w)].GetDat(MAX(v, w));
    if        (counts_uv >= MAX(counts_uw, counts_vw)) {
      #pragma omp critical
      {
        TIntV& assignment = assignments[MIN(u, v)].GetDat(MAX(u, v));
        assignment.Add(w);
      }
    } else if (counts_uw >= MAX(counts_uv, counts_vw)) {
      #pragma omp critical
      {
        TIntV& assignment = assignments[MIN(u, w)].GetDat(MAX(u, w));
        assignment.Add(v);      
      }
    } else if (counts_vw >= MAX(counts_uv, counts_uw)) {
      #pragma omp critical
      {
        TIntV& assignment = assignments[MIN(v, w)].GetDat(MAX(v, w));
        assignment.Add(u);              
      }
    }
  }

  TVec<TIntPair> all_edges;
  TIntV all_nodes;
  GetAllNodes(all_nodes);  
  for (int node_id = 0; node_id < all_nodes.Len(); node_id++) {
    int u = all_nodes[node_id];
    TIntV nbrs;
    GetAllNeighbors(u, nbrs);
    for (int nbr_id = 0; nbr_id < nbrs.Len(); nbr_id++) {
      int v = nbrs[nbr_id];
      if (assignments[u].IsKey(v) && assignments[u].GetDat(v).Len() > 0) {
        all_edges.Add(TIntPair(u, v));
      }
    }
  }

  // Count triangles on edges with the assigned neighbors
  #pragma omp parallel for schedule(dynamic)
  for (int edge_id = 0; edge_id < all_edges.Len(); edge_id++) {
    TIntPair edge = all_edges[edge_id];
    int u = edge.Key;
    int v = edge.Dat;
    // Continue if no assignment
    if (!assignments[u].IsKey(v)) { continue; }
    TIntV& uv_assignment = assignments[u].GetDat(v);
    // Continue if no data
    if (uv_assignment.Len() == 0) { continue; }
    // Get all events on (u, v)
    TVec<TriadEdgeData> events;
    TVec<TIntPair> ts_indices;
    int index = 0;
    int nbr_index = 0;
    // Assign indices from 0, 1, ..., num_nbrs + 2
    AddTriadEdgeData(events, ts_indices, index, u, v, nbr_index, 0, 1);
    nbr_index++;
    AddTriadEdgeData(events, ts_indices, index, v, u, nbr_index, 0, 0);
    nbr_index++;
    // Get all events on triangles assigned to (u, v)
    for (int w_id = 0; w_id < uv_assignment.Len(); w_id++) {
      int w = uv_assignment[w_id];
      AddTriadEdgeData(events, ts_indices, index, w, u, nbr_index, 0, 0);
      AddTriadEdgeData(events, ts_indices, index, w, v, nbr_index, 0, 1);
      AddTriadEdgeData(events, ts_indices, index, u, w, nbr_index, 1, 0);
      AddTriadEdgeData(events, ts_indices, index, v, w, nbr_index, 1, 1);
      nbr_index++;      
    }
    // Put events in sorted order
    ts_indices.Sort();
    TIntV timestamps(ts_indices.Len());
    TVec<TriadEdgeData> sorted_events(ts_indices.Len());
    for (int i = 0; i < ts_indices.Len(); i++) {
      timestamps[i] = ts_indices[i].Key;
      sorted_events[i] = events[ts_indices[i].Dat];
    }
    
    // Get the counts and update the counter
    ThreeTEdgeTriadCounter tetc(nbr_index, 0, 1);
    tetc.Count(sorted_events, timestamps, delta);
    #pragma omp critical
    {
      for (int dir1 = 0; dir1 < 2; dir1++) {
        for (int dir2 = 0; dir2 < 2; dir2++) {
          for (int dir3 = 0; dir3 < 2; dir3++) {        
            counts(dir1, dir2, dir3) += tetc.Counts(dir1, dir2, dir3);
          }
        }
      }
    }
  }
}
Example #20
0
double TStringKernel::KDynamic(const TIntV& s, const TIntV& t, const TFltV& lc, const double& lb) {
    const int k = lc.Len() + 1;
    int x,y,i;

    int ls = s.Len(), lt = t.Len();
    
    //TVec<TFltVV> Kd(2);
    //for (i = 0; i < 2; i++) Kd[i].Gen(ls+1, lt+1);

    // s is on X-axis and t is on Y-axis
    TVec<double *> Kd(2);
    if ((ls+1)*(lt+1) > BufN) {
        if (Buf1 != NULL) delete[] Buf1;
        if (Buf2 != NULL) delete[] Buf2;
        BufN = (ls+2)*(lt+2) + 10;
        Buf1 = new double[BufN];
        Buf2 = new double[BufN];
    }
    Kd[0] = Buf1; Kd[1] = Buf2;
    double *Kdii, *Kdi; //ii == (i-1)%2, i == i%2

    // initialize Kd for i == 0
    int MxSize = (ls+1)*(lt+1) + 10;
    for (i = 0, Kdi = Kd[0]; i < MxSize; i++) Kdi[i] = 1.0;
    //for (x = 0; x <= ls; x++) {
    //    for (y = 0; y <= lt; y++) {
    //        Kd[0](x,y) = 1.0;
    //    }
    //}

    // calculate Kd for i == 1..k-1
    double K = 0.0; 
    for (i = 1; i < k; i++) {
        Kdi = Kd[i%2]; Kdii = Kd[(i-1)%2];
        for (x = 0; x <= ls; x++) Kdi[x*(lt+1) + (i-1)] = 0.0;
        for (y = 0; y <= lt; y++) Kdi[(i-1)*(lt+1) + y] = 0.0;

        //for (x = 0; x <= ls; x++) Kd[i%2](x,i-1) = 0.0;
        //for (y = 0; y <= lt; y++) Kd[i%2](i-1,y) = 0.0;

        double Ki = 0.0;
        for (x = i; x <= ls; x++) {
            double Kdd = 0.0; int u = s[x-1];
            for (y = i; y <= lt; y++) {
                if (u == t[y-1]) {
                    Kdd = lb * (Kdd + lb*Kdii[(x-1)*(lt+1) + (y-1)]);
                    Ki += lb*lb * Kdi[(x-1)*(lt+1) + (y-1)];
                    //Kdd = lb * (Kdd + lb*Kd[(i-1)%2](x-1,y-1));
                    //Ki += lb*lb * Kd[i%2](x-1, y-1);
                } else {
                    Kdd *= lb;
                }
                Kdi[x*(lt+1) + y] = lb*Kdi[(x-1)*(lt+1) + y] + Kdd;
                //Kd[i%2](x,y) = lb*Kd[i%2](x-1, y) + Kdd;
            }
        }
        K += lc[i-1] * Ki;
    }

    return K;
}
Example #21
0
void TempMotifCounter::GetAllStaticTriangles(TIntV& Us, TIntV& Vs, TIntV& Ws) {
  Us.Clr();
  Vs.Clr();
  Ws.Clr();
  // Get degree ordering of the graph
  int max_nodes = static_graph_->GetMxNId();
  TVec<TIntPair> degrees(max_nodes);
  degrees.PutAll(TIntPair(0, 0));
  // Set the degree of a node to be the number of nodes adjacent to the node in
  // the undirected graph.
  TIntV nodes;
  GetAllNodes(nodes);
  #pragma omp parallel for schedule(dynamic)  
  for (int node_id = 0; node_id < nodes.Len(); node_id++) {
    int src = nodes[node_id];
    TIntV nbrs;
    GetAllNeighbors(src, nbrs);
    degrees[src] = TIntPair(nbrs.Len(), src);
  }
  degrees.Sort();
  TIntV order = TIntV(max_nodes);
  #pragma omp parallel for schedule(dynamic)  
  for (int i = 0; i < order.Len(); i++) {
    order[degrees[i].Dat] = i;
  }

  // Get triangles centered at a given node where that node is the smallest in
  // the degree ordering.
  #pragma omp parallel for schedule(dynamic)  
  for (int node_id = 0; node_id < nodes.Len(); node_id++) {
    int src = nodes[node_id];
    int src_pos = order[src];
    
    // Get all neighbors who come later in the ordering
    TIntV nbrs;
    GetAllNeighbors(src, nbrs);    
    TIntV neighbors_higher;
    for (int i = 0; i < nbrs.Len(); i++) {
      int nbr = nbrs[i];
      if (order[nbr] > src_pos) { neighbors_higher.Add(nbr); }
    }

    for (int ind1 = 0; ind1 < neighbors_higher.Len(); ind1++) {
      for (int ind2 = ind1 + 1; ind2 < neighbors_higher.Len(); ind2++) {
        int dst1 = neighbors_higher[ind1];
        int dst2 = neighbors_higher[ind2];
        // Check for triangle formation
        if (static_graph_->IsEdge(dst1, dst2) || static_graph_->IsEdge(dst2, dst1)) {
          #pragma omp critical
          {
            Us.Add(src);
            Vs.Add(dst1);
            Ws.Add(dst2);
          }
        }
      }
    }
  }
}
Example #22
0
void TNGramBs::ConcPass(){
  IAssert(!IsFinished());
  if (PassN==1){ // first pass
    // collect stop words and words with too low frequency
    TIntV DelWIdV; // vector for word-ids for deleting
    int WIds=WordStrToFqH.Len(); // get number of words-ids
    for (int WId=0; WId<WIds; WId++){
      TStr WordStr=WordStrToFqH.GetKey(WId); // get word string
      //if ((!SwSet.Empty())&&(SwSet->IsIn(WordStr))){
      //  WordStrToFqH[WId]=-1; // reset stop-word frequency to -1
      //} else
      if (WordStrToFqH[WId]<MnNGramFq){
        DelWIdV.Add(WId); // add infrequent word-id to delete-list
      }
    }
    // delete words
    WordStrToFqH.DelKeyIdV(DelWIdV);
    WordStrToFqH.Defrag();
  } else
  if (PassN==2){ // second pass
    int Cands=CandWIdPrToFqH.Len(); // get number of candidates
    TIntV WIdV(2); // pre-decl for frequent-candidates vector
    for (int CandId=0; CandId<Cands; CandId++){
      int CandFq=CandWIdPrToFqH[CandId]; // get candidate frequency
      if (CandFq>=MnNGramFq){ // if candidate is frequent
        const TIntPr& WIdPr=CandWIdPrToFqH.GetKey(CandId); // get word-id pair
        WIdV[0]=WIdPr.Val1; WIdV[1]=WIdPr.Val2; // assign word-id to vector
        WIdVToFqH.AddDat(WIdV, CandFq); // add frequent vector
      }
    }
    // clear candidate word-id pairs
    CandWIdPrToFqH.Clr();
  } else
  if (PassN>2){ // higher passes
    int Cands=CandWIdVToFqH.Len(); // get number of candidates
    for (int CandId=0; CandId<Cands; CandId++){
      int CandFq=CandWIdVToFqH[CandId]; // get candidate frequency
      if (CandFq>=MnNGramFq){ // if candidate is frequent
        const TIntV& CandWIdV=CandWIdVToFqH.GetKey(CandId); // get word-id vector
        WIdVToFqH.AddDat(CandWIdV, CandWIdVToFqH[CandId]); // add frequent vector
      }
    }
    // clear candidate word-id vectors
    CandWIdVToFqH.Clr();
  } else {
    Fail;
  }

  // increment pass-number
  PassN++;

  // conclude or prepare for new pass
  if (IsFinished()){
    // clear queue
    CandWIdQ.Clr();
    // reset stop-words
    int WIds=WordStrToFqH.Len(); // get number of words-ids
    for (int WId=0; WId<WIds; WId++){
      TStr WordStr=WordStrToFqH.GetKey(WId); // get word string
      if ((!SwSet.Empty())&&(SwSet->IsIn(WordStr))){
        WordStrToFqH[WId]=-1; // reset stop-word frequency to -1
      }
    }
    // reset ngrams starting with with stop-words
    for (int WIdVId=0; WIdVId<WIdVToFqH.Len(); WIdVId++){
      int FirstWId=WIdVToFqH.GetKey(WIdVId)[0]; // get first word-id
      int LastWId=WIdVToFqH.GetKey(WIdVId).Last(); // get last word-id
      if ((WordStrToFqH[FirstWId]==-1)||(WordStrToFqH[LastWId]==-1)){
        TStr NGramStr=GetWIdVStr(WIdVToFqH.GetKey(WIdVId));
        WIdVToFqH[WIdVId]=-1;
      }
    }
    // print frequent ngrams
    for (int WIdVId=0; WIdVId<WIdVToFqH.Len(); WIdVId++){
      if (WIdVToFqH.GetKey(WIdVId).Len()>1){
        //TStr NGramStr=GetWIdVStr(WIdVToFqH.GetKey(WIdVId));
        //int NGramFq=WIdVToFqH[WIdVId];
        //printf("%s: %d\n", NGramStr.CStr(), NGramFq);
      }
    }
  } else {
    // prepare new queue length
    CandWIdQ.Gen(100*PassN, PassN);
  }
}
Example #23
0
void TBPGraph::GetLNIdV(TIntV& NIdV) const {
  NIdV.Gen(GetLNodes(), 0);
  for (int N=LeftH.FFirstKeyId(); LeftH.FNextKeyId(N); ) {
    NIdV.Add(LeftH.GetKey(N)); }
}
Example #24
0
void TNEANet::GetNIdV(TIntV& NIdV) const {
  NIdV.Gen(GetNodes(), 0);
  for (int N=NodeH.FFirstKeyId(); NodeH.FNextKeyId(N); ) {
    NIdV.Add(NodeH.GetKey(N));
  }
}
Example #25
0
void TBPGraph::GetRNIdV(TIntV& NIdV) const {
  NIdV.Gen(GetRNodes(), 0);
  for (int N=RightH.FFirstKeyId(); RightH.FNextKeyId(N); ) {
    NIdV.Add(RightH.GetKey(N)); }
}
int main(int argc, char* argv[]) {
  //// what type of graph do you want to use?
  //typedef PUNGraph PGraph; // undirected graph
  typedef PNGraph PGraph;  //   directed graph
  //typedef PNEGraph PGraph;  //   directed multigraph
  //typedef TPt<TNodeNet<TInt> > PGraph;
  //typedef TPt<TNodeEdgeNet<TInt, TInt> > PGraph;

  // this code is independent of what particular graph implementation/type we use
  printf("Creating graph:\n");
  PGraph G = PGraph::TObj::New();
  for (int n = 0; n < 14; n++) {
    G->AddNode(); // if no parameter is given, node ids are 0,1,...,9
  }
  G->AddEdge(1, 4);
  printf("  Edge 1 -- 4 added\n");
  G->AddEdge(1, 3);
  printf("  Edge 1 -- 3 added\n");
  G->AddEdge(2, 5);
  printf("  Edge 2 -- 5 added\n");
  G->AddEdge(3, 2);
  printf("  Edge 3 -- 2 added\n");
  G->AddEdge(3, 5);
  printf("  Edge 3 -- 5 added\n");
  G->AddEdge(3, 10);
  printf("  Edge 3 -- 10 added\n");

  /*for (int e = 0; e < 10; e++) {
    const int NId1 = G->GetRndNId();
    const int NId2 = G->GetRndNId();
    if (G->AddEdge(NId1, NId2) != -2) {
      printf("  Edge %d -- %d added\n", NId1,  NId2); }
    else {
      printf("  Edge %d -- %d already exists\n", NId1, NId2); }
  }*/
  IAssert(G->IsOk());
  //G->Dump();
  // delete
  PGraph::TObj::TNodeI NI = G->GetNI(0);
  printf("Delete edge %d -- %d\n", NI.GetId(), NI.GetOutNId(0));
  G->DelEdge(NI.GetId(), NI.GetOutNId(0));
  const int RndNId = G->GetRndNId();
  printf("Delete node %d\n", RndNId);
  G->DelNode(RndNId);
  IAssert(G->IsOk());
  // dump the graph
  printf("Graph (%d, %d)\n", G->GetNodes(), G->GetEdges());
  for (PGraph::TObj::TNodeI NI = G->BegNI(); NI < G->EndNI(); NI++) {
    printf("  %d: ", NI.GetId());
    for (int e = 0; e < NI.GetDeg(); e++) {
      printf(" %d", NI.GetNbrNId(e)); }
    printf("\n");
  }
  // dump subgraph
  TIntV NIdV;
  for (PGraph::TObj::TNodeI NI = G->BegNI(); NI < G->EndNI(); NI++) {
    if (NIdV.Len() < G->GetNodes()/2) { NIdV.Add(NI.GetId()); }
  }
  PGraph SubG = TSnap::GetSubGraph(G, NIdV);
  //SubG->Dump();
  // get UNGraph
  { PUNGraph UNG = TSnap::ConvertGraph<PUNGraph>(SubG);
  UNG->Dump();
  IAssert(UNG->IsOk());
  TSnap::ConvertSubGraph<PNGraph>(G, NIdV)->Dump(); }
  // get NGraph
  { PNGraph NG = TSnap::ConvertGraph<PNGraph>(SubG);
  NG->Dump();
  IAssert(NG->IsOk());
  TSnap::ConvertSubGraph<PNGraph>(G, NIdV)->Dump(); }
  // get NEGraph
  { PNEGraph NEG = TSnap::ConvertGraph<PNEGraph>(SubG);
  NEG->Dump();
  IAssert(NEG->IsOk());
  TSnap::ConvertSubGraph<PNGraph>(G, NIdV)->Dump(); }

  TSnap::TestAnf<PUNGraph>();
  return 0;
}
Example #27
0
void ComputeMissingProperties (const TStr &Dir, const TStr &TriplesFilename)
{
  // Parse the rdf file and create the graph.
  TFIn File(TriplesFilename);
  TRDFParser DBpediaDataset(File);

  printf("Creating graph from input file...\n");
  TGraph G;
  TStrSet NodeStrs;
  TStrSet PropStrs;
  bool Parsed = TSnap::GetGraphFromRDFParser(DBpediaDataset, G, NodeStrs, PropStrs);
  if (!Parsed) {
    return;
  }

  // Store the graph and associated data
  G.Save(*TFOut::New(Dir + "graph.bin"));
  NodeStrs.Save(*TFOut::New(Dir + "nodeStrs.bin"));
  PropStrs.Save(*TFOut::New(Dir + "propStrs.bin"));

  printf("Computing objects...\n");
  // Get the objects of the graph. 
  TIntV Objects;
  // We defined the objects to be the nodes with prefix http://dbpedia.org/resource/. 
  TObjectFunctor ObjectFunctor(NodeStrs);
  TObjectUtils::GetObjects(G, ObjectFunctor, Objects);
  // Store and print the objects.
  Objects.Save(*TFOut::New(Dir + "objects.bin"));
  TObjectUtils::PrintObjects(Objects, NodeStrs, *TFOut::New(Dir + "objects.txt"));

  printf("Computing object matrix...\n");
  // Here we choose the descriptors for the objects.
  // We chose property + nbh (value) descriptors for objects
  // We could also use more complicated descriptors such as subgraphs or subnetworks.
  TSparseColMatrix ObjectMatrix1;
  TSparseColMatrix ObjectMatrix2;
  TObjectUtils::GetPropertyCount(Objects, G, ObjectMatrix1);
  TObjectUtils::GetNbhCount(Objects, G, ObjectMatrix2);
  TLAUtils::NormalizeMatrix(ObjectMatrix1);
  TLAUtils::NormalizeMatrix(ObjectMatrix2);

  TSparseColMatrix ObjectMatrix;
  TLAUtils::ConcatenateMatricesRowWise(ObjectMatrix1, ObjectMatrix2, ObjectMatrix);
  TLAUtils::NormalizeMatrix(ObjectMatrix);
  ObjectMatrix.Save(*TFOut::New(Dir + "objectMatrix.bin"));

  printf("Clustering objects...\n");
  // Partition the objects into 64 partitions (clusters).
  int K = 64;
  int NumIterations = 20;
  TIntV Assigments;
  TVec<TIntV> Clusters;
  TClusterUtils::GetClusters(ObjectMatrix, K, NumIterations, Assigments, Clusters);
  // Store the clustering data.
  Assigments.Save(*TFOut::New(Dir + "assigments.bin"));
  Clusters.Save(*TFOut::New(Dir + "clusters.bin"));
  // Print some details about the clusters.
  TClusterUtils::PrintClusterSizes(Clusters, *TFOut::New(Dir + "clusterSizes.txt"));
  TClusterUtils::PrintClusters(Clusters, Objects, NodeStrs, *TFOut::New(Dir + "clusters.txt"));

  printf("Computing similarities...\n");
  // Compute the similarity betweeen the objects.
  const int MaxNumSimilarObjects = 100;
  const int NumThreads = 10;
  TVec<TIntFltKdV> Similarities;
  TSimilarityUtils::ComputeSimilarities(ObjectMatrix, Assigments, Clusters, MaxNumSimilarObjects, NumThreads, Similarities);
  // Store the object similarities.
  Similarities.Save(*TFOut::New(Dir + "objectSimilarities.bin"));
  // Print the object similarities.
  TSimilarityUtils::PrintSimilarities(Similarities, Objects, NodeStrs, 10, *TFOut::New(Dir + "objectSimilarities.txt"));

  printf("Computing existing property matrix...\n");
  // Our goal is to compute the missing out-going properties.
  // Therefore, we create the matrix of existing out-going properties of the objects.
  TSparseColMatrix OutPropertyCountMatrix;
  TObjectUtils::GetOutPropertyCount(Objects, G, OutPropertyCountMatrix);
  TObjectUtils::PrintPropertyMatrix(OutPropertyCountMatrix, Objects, NodeStrs, PropStrs, *TFOut::New(Dir + "outPropertyCountMatrix.txt"));
  OutPropertyCountMatrix.Save(*TFOut::New(Dir + "outPropertyCountMatrix.bin"));

  printf("Computing missing properties...\n");
  // And finally, compute the missing properties.
  int MaxNumMissingProperties = 100;
  TVec<TIntFltKdV> MissingProperties;
  TPropertyUtils::GetMissingProperties(Similarities, OutPropertyCountMatrix, MaxNumMissingProperties, NumThreads, MissingProperties);
  // Store the missing properties data.
  MissingProperties.Save(*TFOut::New(Dir + "missingProperties.bin"));
  // Print missing properties.
  TPropertyUtils::PrintMissingProperties(MissingProperties, Objects, NodeStrs, PropStrs, 10, *TFOut::New(Dir + "missingProperties.txt"));
}
Example #28
0
PBowMd TBowWinnowMd::New(
 const PBowDocBs& BowDocBs, const TStr& CatNm, const double& Beta){
  // create model
  TBowWinnowMd* WinnowMd=new TBowWinnowMd(BowDocBs); PBowMd BowMd(WinnowMd);
  WinnowMd->CatNm=CatNm;
  WinnowMd->Beta=Beta;
  WinnowMd->VoteTsh=0.5;
  // prepare Winnow parameters
  const double MnExpertWgtSum=1e-15;
  // get cat-id
  int CId=BowDocBs->GetCId(CatNm);
  if (CId==-1){
    TExcept::Throw(TStr::GetStr(CatNm, "Invalid Category Name ('%s')!"));}
  // get training documents
  TIntV TrainDIdV; BowDocBs->GetAllDIdV(TrainDIdV);
  int TrainDocs=TrainDIdV.Len();
  // prepare mini-experts
  int Words=BowDocBs->GetWords();
  WinnowMd->PosExpertWgtV.Gen(Words); WinnowMd->PosExpertWgtV.PutAll(1);
  WinnowMd->NegExpertWgtV.Gen(Words); WinnowMd->NegExpertWgtV.PutAll(1);
  // winnow loop
  double PrevAcc=0; double PrevPrec=0; double PrevRec=0; double PrevF1=0;
  const double MxDiff=-0.005; const int MxWorseIters=3; int WorseIters=0;
  const int MxIters=50; int IterN=0;
  while ((IterN<MxIters)&&(WorseIters<MxWorseIters)){
    IterN++;
    int FalsePos=0; int FalseNeg=0; int TruePos=0; int TrueNeg=0;
    for (int DIdN=0; DIdN<TrainDocs; DIdN++){
      int DId=TrainDIdV[DIdN];
      bool ClassVal=BowDocBs->IsCatInDoc(DId, CId);
      double PosWgt=0; double NegWgt=0;
      double OldSum=0; double NewSum=0;
      int WIds=BowDocBs->GetDocWIds(DId);
      // change only experts of words that occur in the document
      for (int WIdN=0; WIdN<WIds; WIdN++){
        int WId=BowDocBs->GetDocWId(DId, WIdN);
        OldSum+=WinnowMd->PosExpertWgtV[WId]+WinnowMd->NegExpertWgtV[WId];
        // penalize expert giving wrong class prediction
        if (ClassVal){
          WinnowMd->NegExpertWgtV[WId]*=Beta;
        } else {
          WinnowMd->PosExpertWgtV[WId]*=Beta;
        }
        NewSum+=WinnowMd->PosExpertWgtV[WId]+WinnowMd->NegExpertWgtV[WId];
        PosWgt+=WinnowMd->PosExpertWgtV[WId];
        NegWgt+=WinnowMd->NegExpertWgtV[WId];
      }
      // normalize all experts
      if (NewSum>MnExpertWgtSum){
        for (int WIdN=0; WIdN<WIds; WIdN++){
          int WId=BowDocBs->GetDocWId(DId, WIdN);
          WinnowMd->PosExpertWgtV[WId]*=OldSum/NewSum;
          WinnowMd->NegExpertWgtV[WId]*=OldSum/NewSum;
        }
      }
      bool PredClassVal;
      if (PosWgt+NegWgt==0){PredClassVal=TBool::GetRnd();}
      else {PredClassVal=(PosWgt/(PosWgt+NegWgt))>WinnowMd->VoteTsh;}
      if (PredClassVal==ClassVal){
        if (PredClassVal){TruePos++;} else {TrueNeg++;}
      } else {
        if (PredClassVal){FalsePos++;} else {FalseNeg++;}
      }
    }
    // calculate temporary results
    if (TrainDocs==0){break;}
    double Acc=0; double Prec=0; double Rec=0; double F1=0;
    if (TrainDocs>0){
      Acc=100*(TruePos+TrueNeg)/double(TrainDocs);
      if (TruePos+FalsePos>0){
        Prec=(TruePos/double(TruePos+FalsePos));
        Rec=(TruePos/double(TruePos+FalseNeg));
        if (Prec+Rec>0){
          F1=(2*Prec*Rec/(Prec+Rec));
        }
      }
    }
    // check if the current iteration gave worse results then the previous
    if (((Acc-PrevAcc)<MxDiff)||((F1-PrevF1)<MxDiff)||(((Prec-PrevPrec)<MxDiff)&&
     ((Rec-PrevRec)<MxDiff))){WorseIters++;}
    else {WorseIters=0;}
    PrevAcc=Acc; PrevPrec=Prec; PrevRec=Rec; PrevF1=F1;
    printf("%d. Precision:%0.3f   Recall:%0.3f   F1:%0.3f   Accuracy:%0.3f%%\n",
     IterN, Prec, Rec, F1, Acc);
  }
  // return model
  return BowMd;
}
Example #29
0
/**
 * Used for benchmarking sorting by source algorithm.
 * Takes as input starting point of
 * a top cascade and outputs time taken for casacade detection. 
 * Input : Source, Dest, Start, Duration 
 * Output : Prints the time for cascade detection
 */
int main(int argc,char* argv[]) {
  TTableContext Context;
  Schema TimeS;
  TimeS.Add(TPair<TStr,TAttrType>("Source",atInt));
  TimeS.Add(TPair<TStr,TAttrType>("Dest",atInt));
  TimeS.Add(TPair<TStr,TAttrType>("Start",atInt));
  TimeS.Add(TPair<TStr,TAttrType>("Duration",atInt));
  PTable P1 = TTable::LoadSS(TimeS,"./../../../../datasets/temporal/yemen_call_201001.txt",&Context,' ');
  TIntV MapV;
  TStrV SortBy;
  SortBy.Add("Source");
  P1->Order(SortBy);
  TIntV Source; // Sorted vec of start time
  P1->ReadIntCol("Source",Source);
  for (TRowIterator RI = P1->BegRI(); RI < P1->EndRI(); RI++) {
    MapV.Add(RI.GetRowIdx());
  }
  // Attribute to Int mapping
  TInt SIdx = P1->GetColIdx("Source");
  TInt DIdx = P1->GetColIdx("Dest");
  TInt StIdx = P1->GetColIdx("Start");
  TInt DuIdx = P1->GetColIdx("Duration");
  int W = atoi(argv[1]);
  int len = 0;
  // Find the starting point
  int TSource = atoi(argv[2]);
  int TDest = atoi(argv[3]);
  int TStart = atoi(argv[4]);
  int TDur = atoi(argv[5]);
  TInt RIdx;
  for (TRowIterator RI = P1->BegRI(); RI < P1->EndRI(); RI++) {
    RIdx = RI.GetRowIdx();
    int RSource = P1->GetIntValAtRowIdx(SIdx,RIdx).Val;
    int RDest = P1->GetIntValAtRowIdx(DIdx,RIdx).Val;
    int RStart = P1->GetIntValAtRowIdx(StIdx,RIdx).Val;
    int RDur = P1->GetIntValAtRowIdx(DuIdx,RIdx).Val;
    if (TSource == RSource && TDest == RDest && TStart == RStart && TDur == RDur) break;
  }
  // Start building the cascade from the start point
  clock_t st,et;
  st = clock();
  for (int i = 0; i < 1; i++) {
    THashSet<TInt> VisitedH;
    TSnapQueue<TInt> EventQ;
    EventQ.Push(RIdx);
    VisitedH.AddKey(RIdx);
    while (!EventQ.Empty()) {
      TInt CIdx = EventQ.Top();
      EventQ.Pop();
      int CDest = P1->GetIntValAtRowIdx(DIdx,CIdx).Val;
      int CStart = P1->GetIntValAtRowIdx(StIdx,CIdx).Val;
      int CDur = P1->GetIntValAtRowIdx(DuIdx,CIdx).Val;
      // In line binary search
      int val = CDest;
      int lo = 0;
      int hi = Source.Len() - 1;
      int index = -1;
      while (hi >= lo) {
        int mid = lo + (hi - lo)/2;
        if (Source.GetVal(mid) > val) { hi = mid - 1;}
        else if (Source.GetVal(mid) < val) { lo = mid + 1;}
        else { index = mid; hi = mid - 1;}
      } 
      // End of binary search
      int BIdx = index;
      for(int i = BIdx; i < Source.Len(); i++) {
        int PId = MapV.GetVal(i).Val;
        if (! VisitedH.IsKey(PId)) {
          int TSource = P1->GetIntValAtRowIdx(SIdx,PId).Val;
          int TStart = P1->GetIntValAtRowIdx(StIdx,PId).Val;
          if (TSource != CDest) {
            break;
          }
          if (TStart >= (CDur + CStart) && TStart - (CDur + CStart) <= W) {
            VisitedH.AddKey(PId);
            EventQ.Push(PId);
          }
        }
      }
    }
    len = VisitedH.Len();
  }
  et = clock();
  float diff = ((float) et - (float) st)/CLOCKS_PER_SEC;
  printf("Size %d,Time %f\n",len,diff);
  return 0;
}