double ave_path_length (PUNGraph p) {
  TVec<TInt> v;
  double tot_lengths = 0.0;
  for (TUNGraph::TNodeI n = p->BegNI(); n != p->EndNI(); n++) {
    v = v + n.GetId();
  }
  //  cerr << "vlen: " << v.Len() << endl;
  TBreathFS<PUNGraph> b(p);
  double tot_pairs = 0.0;
  while (v.Len () > 0) {
    TInt last = v[v.Len()-1];
    b.DoBfs (last, true, true);
    for (TVec<TInt>::TIter i = v.BegI(); (*i) != last; i++) {
      int length;
      length = b.GetHops (last, (*i));
      if (length == length) {
	tot_lengths += length;
	tot_pairs += 1;
      }
    }
    //    cerr << "tps: " << tot_pairs << ", last: " << last << ", beg: " << v[*(v.BegI())] << endl;
    v.Del(v.Len()-1);
  } 
  // cerr << "paths: " << tot_lengths << " " << tot_pairs << " " << (tot_lengths/tot_pairs) << endl;
  return tot_lengths / tot_pairs;
}
// I embarassingly don't know how templating works.
void QuoteGraph::CompareUsingMinHash(TVec<THash<TMd5Sig, TIntSet> >& BucketsVector) {
  THashSet<TIntPr> EdgeCache;
  int Count = 0;
  int RealCount = 0;

  Err("Beginning edge creation step...\n");
  for (int i = 0; i < BucketsVector.Len(); i++) {
    Err("Processing band signature %d of %d - %d signatures\n", i+1, BucketsVector.Len(), BucketsVector[i].Len());
    TVec<TMd5Sig> Buckets;
    BucketsVector[i].GetKeyV(Buckets);
    TVec<TMd5Sig>::TIter BucketEnd = Buckets.EndI();
    for (TVec<TMd5Sig>::TIter BucketSig = Buckets.BegI(); BucketSig < BucketEnd; BucketSig++) {
      TIntSet Bucket  = BucketsVector[i].GetDat(*BucketSig);
      Count += Bucket.Len() * (Bucket.Len() - 1) / 2;
      for (TIntSet::TIter Quote1 = Bucket.BegI(); Quote1 < Bucket.EndI(); Quote1++) {
        TIntSet::TIter Quote1Copy = Quote1;
        Quote1Copy++;
        for (TIntSet::TIter Quote2 = Quote1Copy; Quote2 < Bucket.EndI(); Quote2++) {
          if (!EdgeCache.IsKey(TIntPr(Quote1.GetKey(), Quote2.GetKey())) && !EdgeCache.IsKey(TIntPr(Quote2.GetKey(), Quote1.GetKey()))) {
            EdgeCache.AddKey(TIntPr(Quote1.GetKey(), Quote2.GetKey()));
            EdgeCache.AddKey(TIntPr(Quote2.GetKey(), Quote1.GetKey()));
            RealCount++;
            AddEdgeIfSimilar(Quote1.GetKey(), Quote2.GetKey());
          }
        }
      }
    }
  }
  fprintf(stderr, "NUMBER OF COMPARES: %d\n", Count);
  fprintf(stderr, "NUMBER OF REAL COMPARES: %d\n", RealCount);
}
Exemple #3
0
int main() {
  TLSHash LSH(7, 7, DIM, TLSHash::EUCLIDEAN);
  LSH.Init();

  TRnd Gen;
  Gen.Randomize();

  TVec<TFltV> DataV;
  for (int i=0; i<1000000; i++) {
    TFltV Datum;
    for (int j=0; j<3; j++) {
      Datum.Add(Gen.GetUniDev()*2100);
    }
    DataV.Add(Datum);
  }
  LSH.AddV(DataV);
  
  TVec<TPair<TFltV, TFltV> > NeighborsV = LSH.GetAllCandidatePairs();
  printf("Number of Candidates: %d\n", NeighborsV.Len());

  NeighborsV = LSH.GetAllNearPairs();
  printf("Number of Close Pairs: %d\n", NeighborsV.Len());
  for (int i=0; i<NeighborsV.Len(); i++) {
    outputPoint(NeighborsV[i].GetVal1());
    printf(" ");
    outputPoint(NeighborsV[i].GetVal2());
    printf("\n");
  }
  return 0;
}
Exemple #4
0
///////////////////////////////////////////////////////////////////////////////
// Triad counting methods
void TempMotifCounter::Count3TEdgeTriadsNaive(double delta, Counter3D& counts) {
  TIntV Us, Vs, Ws;
  GetAllStaticTriangles(Us, Vs, Ws);
  counts = Counter3D(2, 2, 2);
  #pragma omp parallel for schedule(dynamic)
  for (int i = 0; i < Us.Len(); i++) {
    int u = Us[i];
    int v = Vs[i];
    int w = Ws[i];
    // Gather all edges in triangle (u, v, w)
    int uv = 0, vu = 1, uw = 2, wu = 3, vw = 4, wv = 5;
    TVec<TIntPair> combined;
    AddStarEdges(combined, u, v, uv);
    AddStarEdges(combined, v, u, vu);
    AddStarEdges(combined, u, w, uw);
    AddStarEdges(combined, w, u, wu);
    AddStarEdges(combined, v, w, vw);
    AddStarEdges(combined, w, v, wv);        
    // Get the counts for this triangle
    combined.Sort();
    ThreeTEdgeMotifCounter counter(6);
    TIntV edge_id(combined.Len());
    TIntV timestamps(combined.Len());
    for (int k = 0; k < combined.Len(); k++) {
      edge_id[k] = combined[k].Dat;
      timestamps[k] = combined[k].Key;
    }
    Counter3D local;
    counter.Count(edge_id, timestamps, delta, local);

    // Update the global counter with the various symmetries
    #pragma omp critical
    {
      // i --> j, k --> j, i --> k
      counts(0, 0, 0) += local(uv, wv, uw) + local(vu, wu, vw) + local(uw, vw, uv)
        + local(wu, vu, wv) + local(vw, uw, vu) + local(wv, uv, wu);
      // i --> j, k --> j, k --> i
      counts(0, 0, 1) += local(uv, wv, wu) + local(vu, wu, wv) + local(uw, vw, vu)
        + local(wu, vu, vw) + local(vw, uw, uv) + local(wv, uv, uw);
      // i --> j, j --> k, i --> k
      counts(0, 1, 0) += local(uv, vw, uw) + local(vu, uw, vw) + local(uw, wv, uv)
        + local(wu, uv, wv) + local(vw, wu, vu) + local(wv, vu, wu);
      // i --> j, j --> k, k --> i
      counts(0, 1, 1) += local(uv, vw, wu) + local(vu, uw, wv) + local(uw, wv, vu)
        + local(wu, uv, vw) + local(vw, wu, uv) + local(wv, vu, uw);
      // i --> j, k --> i, j --> k
      counts(1, 0, 0) += local(uv, wu, vw) + local(vu, wv, uw) + local(uw, vu, wv)
        + local(wu, vw, uv) + local(vw, uv, wu) + local(wv, uw, vu);
      // i --> j, k --> i, k --> j
      counts(1, 0, 1) += local(uv, wu, wv) + local(vu, wv, wu) + local(uw, vu, vw)
        + local(wu, vw, vu) + local(vw, uv, uw) + local(wv, uw, uv);
      // i --> j, i --> k, j --> k
      counts(1, 1, 0) += local(uv, uw, vw) + local(vu, vw, uw) + local(uw, uv, wv)
        + local(wu, wv, uv) + local(vw, vu, wu) + local(wv, wu, vu);      
      // i --> j, i --> k, k --> j
      counts(1, 1, 1) += local(uv, uw, wv) + local(vu, vw, wu) + local(uw, uv, vw)
        + local(wu, wv, vu) + local(vw, vu, uw) + local(wv, wu, uv);
    }
  }
}
Exemple #5
0
void TFfGGen::GenFFGraphs(const double& FProb, const double& BProb, const TStr& FNm) {
	const int NRuns = 10;
	const int NNodes = 10000;
	TGStat::NDiamRuns = 10;
	//const double FProb = 0.35, BProb = 0.20;  // ff1
	//const double FProb = 0.37, BProb = 0.32;  // ff2
	//const double FProb = 0.37, BProb = 0.325; // ff22
	//const double FProb = 0.37, BProb = 0.33;  // ff3
	//const double FProb = 0.37, BProb = 0.35;  // ff4
	//const double FProb = 0.38, BProb = 0.35;  // ff5
	TVec<PGStatVec> GAtTmV;
	TFfGGen FF(false, 1, FProb, BProb, 1.0, 0, 0);
	for (int r = 0; r < NRuns; r++) {
		PGStatVec GV = TGStatVec::New(tmuNodes, TGStat::AllStat());
		FF.GenGraph(NNodes, GV, true);
		for (int i = 0; i < GV->Len(); i++) {
			if (i == GAtTmV.Len()) {
				GAtTmV.Add(TGStatVec::New(tmuNodes, TGStat::AllStat()));
			}
			GAtTmV[i]->Add(GV->At(i));
		}
		IAssert(GAtTmV.Len() == GV->Len());
	}
	PGStatVec AvgStat = TGStatVec::New(tmuNodes, TGStat::AllStat());
	for (int i = 0; i < GAtTmV.Len(); i++) {
		AvgStat->Add(GAtTmV[i]->GetAvgGStat(false));
	}
	AvgStat->PlotAllVsX(gsvNodes, FNm, TStr::Fmt("Forest Fire: F:%g  B:%g (%d runs)", FProb, BProb, NRuns));
	AvgStat->Last()->PlotAll(FNm, TStr::Fmt("Forest Fire: F:%g  B:%g (%d runs)", FProb, BProb, NRuns));
}
///Generate graph using the AGM model. CProbV = vector of Pc
PUNGraph TAGM::GenAGM(TVec<TIntV>& CmtyVV, const TFltV& CProbV, TRnd& Rnd, const double PNoCom) {
    PUNGraph G = TUNGraph::New(100 * CmtyVV.Len(), -1);
    printf("AGM begins\n");
    for (int i = 0; i < CmtyVV.Len(); i++) {
        TIntV& CmtyV = CmtyVV[i];
        for (int u = 0; u < CmtyV.Len(); u++) {
            if ( G->IsNode(CmtyV[u])) {
                continue;
            }
            G->AddNode(CmtyV[u]);
        }
        double Prob = CProbV[i];
        RndConnectInsideCommunity(G, CmtyV, Prob, Rnd);
    }
    if (PNoCom > 0.0) { //if we want to connect nodes that do not share any community
        TIntSet NIDS;
        for (int c = 0; c < CmtyVV.Len(); c++) {
            for (int u = 0; u < CmtyVV[c].Len(); u++) {
                NIDS.AddKey(CmtyVV[c][u]);
            }
        }
        TIntV NIDV;
        NIDS.GetKeyV(NIDV);
        RndConnectInsideCommunity(G,NIDV,PNoCom,Rnd);
    }
    printf("AGM completed (%d nodes %d edges)\n",G->GetNodes(),G->GetEdges());
    G->Defrag();
    return G;
}
void QuoteGraph::CompareUsingShingles(THash<TMd5Sig, TIntSet>& Shingles) {
  int Count = 0;
  int RealCount = 0;
  TVec<TMd5Sig> ShingleKeys;
  Shingles.GetKeyV(ShingleKeys);
  THashSet<TIntPr> EdgeCache;

  for (int i = 0; i < ShingleKeys.Len(); i++) {
    if (i % 100 == 0) {
      Err("Processed %d out of %d shingles, count = %d\n", i, ShingleKeys.Len(), Count);
    }
    TIntSet Bucket;
    Shingles.IsKeyGetDat(ShingleKeys[i], Bucket);

    for (TIntSet::TIter Quote1 = Bucket.BegI(); Quote1 < Bucket.EndI(); Quote1++) {
      TIntSet::TIter Quote1Copy = Quote1;
      Quote1Copy++;
      for (TIntSet::TIter Quote2 = Quote1Copy; Quote2 < Bucket.EndI(); Quote2++) {
        if (!EdgeCache.IsKey(TIntPr(Quote1.GetKey(), Quote2.GetKey())) && !EdgeCache.IsKey(TIntPr(Quote2.GetKey(), Quote1.GetKey()))) {
          EdgeCache.AddKey(TIntPr(Quote1.GetKey(), Quote2.GetKey()));
          EdgeCache.AddKey(TIntPr(Quote2.GetKey(), Quote1.GetKey()));
          RealCount++;
          AddEdgeIfSimilar(Quote1.GetKey(), Quote2.GetKey());
        }
      }
    }
    int Len = Bucket.Len() * (Bucket.Len() - 1) / 2;
    Count += Len;
  }
  fprintf(stderr, "NUMBER OF COMPARES: %d\n", Count);
  fprintf(stderr, "NUMBER OF REAL COMPARES: %d\n", RealCount);
}
void TStrFeatureSpace::ToStr(const TVec<TStrFSSize>& FeatureIds, TChA& ChA, char Sep) const {
	for (TStrFSSize i = 0; i < FeatureIds.Len(); i++) {
		ChA += ISpace.KeyFromOfs(Space[FeatureIds[i]]);
		if (i < FeatureIds.Len() - 1) {
			ChA += Sep;
		}
	}
}
/// save bipartite community affiliation into gexf file
void TAGMUtil::SaveBipartiteGephi(const TStr& OutFNm, const TIntV& NIDV, const TVec<TIntV>& CmtyVV, const double MaxSz, const double MinSz, const TIntStrH& NIDNameH, const THash<TInt, TIntTr>& NIDColorH, const THash<TInt, TIntTr>& CIDColorH ) {
    /// Plot bipartite graph
    if (CmtyVV.Len() == 0) {
        return;
    }
    double NXMin = 0.1, YMin = 0.1, NXMax = 250.00, YMax = 30.0;
    double CXMin = 0.3 * NXMax, CXMax = 0.7 * NXMax;
    double CStep = (CXMax - CXMin) / (double) CmtyVV.Len(), NStep = (NXMax - NXMin) / (double) NIDV.Len();
    THash<TInt,TIntV> NIDComVH;
    TAGMUtil::GetNodeMembership(NIDComVH, CmtyVV);

    FILE* F = fopen(OutFNm.CStr(), "wt");
    fprintf(F, "<?xml version='1.0' encoding='UTF-8'?>\n");
    fprintf(F, "<gexf xmlns='http://www.gexf.net/1.2draft' xmlns:viz='http://www.gexf.net/1.1draft/viz' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd' version='1.2'>\n");
    fprintf(F, "\t<graph mode='static' defaultedgetype='directed'>\n");
    fprintf(F, "\t\t<nodes>\n");
    for (int c = 0; c < CmtyVV.Len(); c++) {
        int CID = c;
        double XPos = c * CStep + CXMin;
        TIntTr Color = CIDColorH.IsKey(CID)? CIDColorH.GetDat(CID) : TIntTr(120, 120, 120);
        fprintf(F, "\t\t\t<node id='C%d' label='C%d'>\n", CID, CID);
        fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val);
        fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", MaxSz);
        fprintf(F, "\t\t\t\t<viz:shape value='square'/>\n");
        fprintf(F, "\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMax);
        fprintf(F, "\t\t\t</node>\n");
    }

    for (int u = 0; u < NIDV.Len(); u++) {
        int NID = NIDV[u];
        TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): "";
        double Size = MinSz;
        double XPos = NXMin + u * NStep;
        TIntTr Color = NIDColorH.IsKey(NID)? NIDColorH.GetDat(NID) : TIntTr(120, 120, 120);
        double Alpha = 1.0;
        fprintf(F, "\t\t\t<node id='%d' label='%s'>\n", NID, Label.CStr());
        fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d' a='%.1f'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val, Alpha);
        fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", Size);
        fprintf(F, "\t\t\t\t<viz:shape value='square'/>\n");
        fprintf(F, "\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMin);
        fprintf(F, "\t\t\t</node>\n");
    }
    fprintf(F, "\t\t</nodes>\n");
    fprintf(F, "\t\t<edges>\n");
    int EID = 0;
    for (int u = 0; u < NIDV.Len(); u++) {
        int NID = NIDV[u];
        if (NIDComVH.IsKey(NID)) {
            for (int c = 0; c < NIDComVH.GetDat(NID).Len(); c++) {
                int CID = NIDComVH.GetDat(NID)[c];
                fprintf(F, "\t\t\t<edge id='%d' source='C%d' target='%d'/>\n", EID++, CID, NID);
            }
        }
    }
    fprintf(F, "\t\t</edges>\n");
    fprintf(F, "\t</graph>\n");
    fprintf(F, "</gexf>\n");
}
Exemple #10
0
int TStrUtil::CountWords(const TChA& ChA, const TStrHash<TInt>& StopWordH) {
  TChA Tmp;
  TVec<char *> WrdV;
  SplitWords(Tmp, WrdV);
  int SWordCnt = 0;
  for (int w = 0; w < WrdV.Len(); w++) {
    if (StopWordH.IsKey(WrdV[w])) { SWordCnt++; }
  }
  return WrdV.Len() - SWordCnt;
}
Exemple #11
0
///////////////////////////////////////////////////////////////////////
// BagOfWords-Column-Matrix
TBowMatrix::TBowMatrix(const TVec<PBowSpV>& BowSpV): TMatrix() {
    RowN = 0;
    ColSpVV.Gen(BowSpV.Len(), 0);
    for (int i = 0; i < BowSpV.Len(); i++) {
        ColSpVV.Add(BowSpV[i]);
        if (BowSpV[i]->Len() > 0) {
            RowN = TInt::GetMx(RowN, BowSpV[i]->GetWId(BowSpV[i]->GetWIds()-1)+1);
        }
    }
}
Exemple #12
0
void TIndex::TQmGixSumMerger<TQmGixItem>::Intrs(TVec<TQmGixItem>& MainV, const TVec<TQmGixItem>& JoinV) const {
    TVec<TQmGixItem> ResV; int ValN1 = 0; int ValN2 = 0;
    while ((ValN1 < MainV.Len()) && (ValN2 < JoinV.Len())) {
        const TQmGixItem& Val1 = MainV.GetVal(ValN1);
        const TQmGixItem& Val2 = JoinV.GetVal(ValN2);
        if (Val1 < Val2) { ValN1++; }
        else if (Val1 > Val2) { ValN2++; }
        else { ResV.Add(TQmGixItem(Val1.Key, Val1.Dat + Val2.Dat)); ValN1++; ValN2++; }
    }
    MainV = ResV;
}
Exemple #13
0
void TempMotifCounter::Count3TEdge3NodeStarsNaive(
        double delta, Counter3D& pre_counts, Counter3D& pos_counts,
        Counter3D& mid_counts) {
  TIntV centers;
  GetAllNodes(centers);
  pre_counts = Counter3D(2, 2, 2);
  pos_counts = Counter3D(2, 2, 2);
  mid_counts = Counter3D(2, 2, 2);
  // Get counts for each node as the center
  #pragma omp parallel for schedule(dynamic)
  for (int c = 0; c < centers.Len(); c++) {
    // Gather all adjacent events
    int center = centers[c];
    TIntV nbrs;
    GetAllNeighbors(center, nbrs);
    for (int i = 0; i < nbrs.Len(); i++) {
      for (int j = i + 1; j < nbrs.Len(); j++) {
        int nbr1 = nbrs[i];
        int nbr2 = nbrs[j];
        TVec<TIntPair> combined;
        AddStarEdges(combined, center, nbr1, 0);
        AddStarEdges(combined, nbr1, center, 1);
        AddStarEdges(combined, center, nbr2, 2);
        AddStarEdges(combined, nbr2, center, 3);
        combined.Sort();
        ThreeTEdgeMotifCounter counter(4);
        TIntV edge_id(combined.Len());
        TIntV timestamps(combined.Len());
        for (int k = 0; k < combined.Len(); k++) {
          edge_id[k] = combined[k].Dat;
          timestamps[k] = combined[k].Key;
        }
        Counter3D local;
        counter.Count(edge_id, timestamps, delta, local);

        #pragma omp critical
        {  // Update with local counts
          for (int dir1 = 0; dir1 < 2; ++dir1) {
            for (int dir2 = 0; dir2 < 2; ++dir2) {
              for (int dir3 = 0; dir3 < 2; ++dir3) {
                pre_counts(dir1, dir2, dir3) +=
                  local(dir1, dir2, dir3 + 2) + local(dir1 + 2, dir2 + 2, dir3);
                pos_counts(dir1, dir2, dir3) +=
                  local(dir1, dir2 + 2, dir3 + 2) + local(dir1 + 2, dir2, dir3);
                mid_counts(dir1, dir2, dir3) +=
                  local(dir1, dir2 + 2, dir3) + local(dir1 + 2, dir2, dir3 + 2);
              }
            }
          }
        }
      }
    }
  }
}
Exemple #14
0
void TBTreeIndex<TVal>::SearchRange(const TPair<TVal, TVal>& RangeMinMax, TUInt64V& RecIdV) const {

    TVec<TTreeVal> ResValRecIdV;
    // execute query
    BTree.RangeQuery(TTreeVal(RangeMinMax.Val1, 0), TTreeVal(RangeMinMax.Val2, TUInt64::Mx), ResValRecIdV);
    // parse out record ids
    RecIdV.Gen(ResValRecIdV.Len(), 0);
    for (int ResN = 0; ResN < ResValRecIdV.Len(); ResN++) {
        RecIdV.Add(ResValRecIdV[ResN].Val2);
    }
}
Exemple #15
0
int TNEANetMP::AddEdge(const int& SrcNId, const int& DstNId, int EId) {
    int i;

    if (EId == -1) {
        EId = MxEId;
        MxEId++;
    }
    else {
        MxEId = TMath::Mx(EId+1, MxEId());
    }
    IAssertR(!IsEdge(EId), TStr::Fmt("EdgeId %d already exists", EId));
    IAssertR(IsNode(SrcNId) && IsNode(DstNId), TStr::Fmt("%d or %d not a node.", SrcNId, DstNId).CStr());
    EdgeH.AddDat(EId, TEdge(EId, SrcNId, DstNId));
    GetNode(SrcNId).OutEIdV.AddSorted(EId);
    GetNode(DstNId).InEIdV.AddSorted(EId);

    // update attribute columns
    for (i = 0; i < VecOfIntVecsE.Len(); i++) {
        TVec<TInt>& IntVec = VecOfIntVecsE[i];
        IntVec.Ins(EdgeH.GetKeyId(EId), TInt::Mn);
    }
    TVec<TStr> DefIntVec = TVec<TStr>();
    IntDefaultsE.GetKeyV(DefIntVec);
    for (i = 0; i < DefIntVec.Len(); i++) {
        TStr attr = DefIntVec[i];
        TVec<TInt>& IntVec = VecOfIntVecsE[KeyToIndexTypeE.GetDat(DefIntVec[i]).Val2];
        IntVec[EdgeH.GetKeyId(EId)] = GetIntAttrDefaultE(attr);
    }

    for (i = 0; i < VecOfStrVecsE.Len(); i++) {
        TVec<TStr>& StrVec = VecOfStrVecsE[i];
        StrVec.Ins(EdgeH.GetKeyId(EId), TStr::GetNullStr());
    }
    TVec<TStr> DefStrVec = TVec<TStr>();
    IntDefaultsE.GetKeyV(DefStrVec);
    for (i = 0; i < DefStrVec.Len(); i++) {
        TStr attr = DefStrVec[i];
        TVec<TStr>& StrVec = VecOfStrVecsE[KeyToIndexTypeE.GetDat(DefStrVec[i]).Val2];
        StrVec[EdgeH.GetKeyId(EId)] = GetStrAttrDefaultE(attr);
    }

    for (i = 0; i < VecOfFltVecsE.Len(); i++) {
        TVec<TFlt>& FltVec = VecOfFltVecsE[i];
        FltVec.Ins(EdgeH.GetKeyId(EId), TFlt::Mn);
    }
    TVec<TStr> DefFltVec = TVec<TStr>();
    FltDefaultsE.GetKeyV(DefFltVec);
    for (i = 0; i < DefFltVec.Len(); i++) {
        TStr attr = DefFltVec[i];
        TVec<TFlt>& FltVec = VecOfFltVecsE[KeyToIndexTypeE.GetDat(DefFltVec[i]).Val2];
        FltVec[NodeH.GetKeyId(EId)] = GetFltAttrDefaultE(attr);
    }
    return EId;
}
Exemple #16
0
    // returns a set of clusters such that separate types containted in the input set of clusters
    static TVec<TCluster> ExpandClusters(const TVec<TCluster>& clusters, const THash<TInt,TVec<TInt> >& quotePages, THash<TInt,TWebpage> pageHash){
      TVec<TCluster> types = TVec<TCluster>::TVec<TCluster>();
      TVec<TCluster> curtypes;
      printf("[ExpandClustres]\texpanding clusters..\n");
      for (int i = 0; i < clusters.Len(); i++) {
	curtypes = TCluster::GetSingleTypeClusters(clusters[i], quotePages, pageHash);
	if (curtypes.Len() != clusters[i].NoTypes()) printf("clusters[%d] doesn't match (%d/%d) \n",i, curtypes.Len(), clusters[i].NoTypes());
	for (int j = 0; j < curtypes.Len(); j++) types.Add(curtypes[j]);
     }
     printf("[ExpandClustrs]\texpanded %d clusters into %d types\n",clusters.Len(),types.Len());
     return types;
      }
Exemple #17
0
void LSH::MinHash(THash<TMd5Sig, TShingleIdSet>& ShingleToQuoteIds,
    TVec<THash<TIntV, TIntSet> >& SignatureBandBuckets) {
  TRnd RandomGenerator; // TODO: make this "more random" by incorporating time
  for (int i = 0; i < NumBands; ++i) {
    THash<TShingleId, TIntV> Inverted; // (QuoteID, QuoteSignatureForBand)
    THash < TIntV, TIntSet > BandBuckets; // (BandSignature, QuoteIDs)
    for (int j = 0; j < BandSize; ++j) {
      // Create new signature
      TVec < TMd5Sig > Signature;
      ShingleToQuoteIds.GetKeyV(Signature);
      Signature.Shuffle(RandomGenerator);

      // Place in bucket - not very efficient
      int SigLen = Signature.Len();
      for (int k = 0; k < SigLen; ++k) {
        TShingleIdSet CurSet = ShingleToQuoteIds.GetDat(Signature[k]);
        for (TShingleIdSet::TIter l = CurSet.BegI(); l < CurSet.EndI(); l++) {
          TShingleId Key = l.GetKey();
          if (Inverted.IsKey(Key)) {
            TIntV CurSignature = Inverted.GetDat(Key);
            if (CurSignature.Len() <= j) {
              CurSignature.Add(k);
              Inverted.AddDat(Key, CurSignature);
            }
          } else {
            TIntV NewSignature;
            NewSignature.Add(k);
            Inverted.AddDat(Key, NewSignature);
          }
        }
      }
    }

    TVec<TShingleId> InvertedKeys;
    Inverted.GetKeyV(InvertedKeys);
    TInt InvertedLen = InvertedKeys.Len();
    for (int k = 0; k < InvertedLen; ++k) {
      TIntSet Bucket;
      TIntV Signature = Inverted.GetDat(InvertedKeys[k]);
      if (BandBuckets.IsKey(Signature)) {
        Bucket = BandBuckets.GetDat(Signature);
      }
      Bucket.AddKey(InvertedKeys[k].Val1);
      BandBuckets.AddDat(Signature, Bucket);
    }

    SignatureBandBuckets.Add(BandBuckets);
    Err("%d out of %d band signatures computed\n", i + 1, NumBands);
  }
  Err("Minhash step complete!\n");
}
Exemple #18
0
uint64 TGraphEnumUtils::GetMinAndGraphIds(const TVec<PNGraph> &isoG, TVec<uint64> &graphIds) {
	IAssert(isoG.Len() > 0);
	//
	uint64 minGraphId = GraphId(isoG[0]);
	graphIds.Add(minGraphId);
	//
	for(int i=1; i<isoG.Len(); i++) {
		uint64 curGraphId = GraphId(isoG[i]);
		if(minGraphId > curGraphId) minGraphId=curGraphId;
		//
		graphIds.Add(curGraphId);
	}
	//
	return minGraphId;
}
int main(int argc, char *argv[]) {
  TStr BaseString = "/lfs/1/tmp/curis/week/QBDB.bin";
  TFIn BaseFile(BaseString);
  TQuoteBase *QB = new TQuoteBase;
  TDocBase *DB = new TDocBase;
  QB->Load(BaseFile);
  DB->Load(BaseFile);

  TIntV QuoteIds;
  QB->GetAllQuoteIds(QuoteIds);

  int NumQuotes = QuoteIds.Len();
  THash<TInt, TStrSet> PeakCounts;
  for (int i = 0; i < NumQuotes; i++) {
    TQuote CurQuote;
    if (QB->GetQuote(QuoteIds[i], CurQuote)) {
      TVec<TSecTm> Peaks;
      CurQuote.GetPeaks(DB, Peaks);
      TStr QuoteString;
      CurQuote.GetParsedContentString(QuoteString);
      TStrSet StringSet;
      if (PeakCounts.IsKey(Peaks.Len())) {
        StringSet = PeakCounts.GetDat(Peaks.Len());
      }
      StringSet.AddKey(QuoteString);
      PeakCounts.AddDat(Peaks.Len(), StringSet);
    }
  }

  TIntV PeakCountKeys;
  PeakCounts.GetKeyV(PeakCountKeys);
  PeakCountKeys.Sort(true);
  for (int i = 0; i < PeakCountKeys.Len(); i++) {
    TStrSet CurSet = PeakCounts.GetDat(PeakCountKeys[i]);
    if (CurSet.Len() > 0) {
      printf("QUOTES WITH %d PEAKS\n", PeakCountKeys[i].Val);
      printf("#########################################\n");
      THashSet<TStr> StringSet = PeakCounts.GetDat(PeakCountKeys[i]);
      for (THashSet<TStr>::TIter l = StringSet.BegI(); l < StringSet.EndI(); l++) {
        printf("%s\n", l.GetKey().CStr());
      }
      printf("\n");
    }
  }
  delete QB;
  delete DB;
  return 0;
}
Exemple #20
0
// parse:
//   10:16, 16 Sep 2004
//   10:20, 2004 Sep 16
//   2005-07-07 20:30:35
//   23:24:07, 2005-07-10
//   9 July 2005 14:38
//   21:16, July 9, 2005
//   06:02, 10 July 2005
bool TStrUtil::GetTmFromStr(const char* TmStr, TSecTm& Tm) {
  static TStrV MonthV1, MonthV2;
  if (MonthV1.Empty()) {
    TStr("january|february|march|april|may|june|july|august|september|october|november|december").SplitOnAllCh('|', MonthV1);
    TStr("jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec").SplitOnAllCh('|', MonthV2);
  }
  TChA Tmp(TmStr);
  Tmp.ToLc();
  TVec<char *> WrdV;
  const char* End = Tmp.CStr()+Tmp.Len();
  int Col = -1, Cols=0;
  for (char *b = Tmp.CStr(); b <End; ) {
    WrdV.Add(b);
    while (*b && ! (*b==' ' || *b=='-' || *b==':' || *b==',')) { b++; }
    if (*b==':') { if(Col==-1) { Col=WrdV.Len(); } Cols++;  }
    *b=0; b++;
    while (*b && (*b==' ' || *b=='-' || *b==':' || *b==',')) { b++; }
  }
  if (Cols == 2) {
    if (Col+1 >= WrdV.Len()) { return false; }
    WrdV.Del(Col+1);
  }
  if (Col<1) { return false; }
  const int Hr = atoi(WrdV[Col-1]);
  const int Min = atoi(WrdV[Col]);
  WrdV.Del(Col);  WrdV.Del(Col-1);
  if (WrdV.Len() != 3) { return false; }
  int y=0,m=1,d=2, Mon=-1;
  if (TCh::IsAlpha(WrdV[0][0])) {
    y=2; m=0; d=1;
  } else if (TCh::IsAlpha(WrdV[1][0])) {
    y=2; m=1; d=0;
  } else if (TCh::IsAlpha(WrdV[2][0])) {
    y=0; m=2; d=1;
  } else {
    y=0; m=1; d=2;
    Mon = atoi(WrdV[m]);
  }
  int Day = atoi(WrdV[d]);
  if (Mon <= 0) { Mon = MonthV1.SearchForw(WrdV[m])+1; }
  if (Mon <= 0) { Mon = MonthV2.SearchForw(WrdV[m])+1; }
  if (Mon == 0) { return false; }
  int Year = atoi(WrdV[y]);
  if (Day > Year) { ::Swap(Day, Year); }
  //printf("%d-%02d-%02d  %02d:%02d\n", Year, Mon, Day, Hr, Min);
  Tm = TSecTm(Year, Mon, Day, Hr, Min, 0);
  return true;
}
Exemple #21
0
void TAGM::GetNodeMembership(THash<TInt,TIntV >& NIDComVH, const TVec<TIntV>& CmtyVV) {
	THash<TInt,TIntV> CmtyVH;
	for(int i=0;i<CmtyVV.Len();i++) {
		CmtyVH.AddDat(i,CmtyVV[i]);
	}
	GetNodeMembership(NIDComVH,CmtyVH);
}
Exemple #22
0
void StarTriad3TEdgeCounter<EdgeData>::Count(const TVec<EdgeData>& events,
                                             const TIntV& timestamps, double delta) {
  InitializeCounters();
  if (events.Len() != timestamps.Len()) {
    TExcept::Throw("Number of events must match number of timestamps.");
  }
  int start = 0;
  int end = 0;
  int L = timestamps.Len();
  for (int j = 0; j < L; j++) {
    double tj = double(timestamps[j]);
    // Adjust counts in pre-window [tj - delta, tj)
    while (start < L && double(timestamps[start]) < tj - delta) {
      PopPre(events[start]);
      start++;
    }
    // Adjust counts in post-window (tj, tj + delta]
    while (end < L && double(timestamps[end]) <= tj + delta) {
      PushPos(events[end]);
      end++;
    }
    // Move current event off post-window
    PopPos(events[j]);
    ProcessCurrent(events[j]);
    PushPre(events[j]);
  }
}
Exemple #23
0
void TBackupProfile::Restore(const TStr& BackupFolderName, const ERestoringMode& RestoringMode, const bool& ReportP) const
{
    for (int N = 0; N < LogV.Len(); N++) {
        // find the folder that matches the BackupFolderName
        if (LogV[N].GetFolderName() == BackupFolderName) {
            const TVec<TBackupFolderInfo> Folders = GetFolders();
            for (int N = 0; N < Folders.Len(); N++) {
                const TStr TargetFolder = Folders[N].Folder;
                TStrV PartV; TDir::SplitPath(TargetFolder, PartV);
                const TStr LastFolderNamePart = PartV[PartV.Len() - 1];

                // do we want to first remove any existing data in the target folder?
                if (RestoringMode == RemoveExistingFirst)
                    TDir::DelNonEmptyDir(TargetFolder);

                // copy data from backup to the destination folder
                const TStr SourceFolder = Destination + ProfileName + "/" + BackupFolderName + "/" + LastFolderNamePart;
                if (ReportP)
                    TNotify::StdNotify->OnStatusFmt("Copying folder: %s", SourceFolder.CStr());
                if (TDir::Exists(SourceFolder))
                    TDir::CopyDir(SourceFolder, TargetFolder, RestoringMode == OverwriteIfExisting);
                else
                    TNotify::StdNotify->OnStatusFmt("WARNING: Unable to create a restore of the folder %s. The folder does not exist.", SourceFolder.CStr());
            }
        }
    }
}
void LogOutput::PrintClusterInformationToText(TDocBase *DB, TQuoteBase *QB, TClusterBase *CB, TIntV& ClusterIds, TSecTm PresentTime) {
  if (!ShouldLog) return;

  TStr CurDateString = PresentTime.GetDtYmdStr();
  TStr TopFileName = Directory + "/text/top/topclusters_" + CurDateString + ".txt";
  FILE *T = fopen(TopFileName.CStr(), "w");

  for (int i = 0; i < ClusterIds.Len(); i++) {
    TCluster C;
    CB->GetCluster(ClusterIds[i], C);
    TStr CRepQuote;
    C.GetRepresentativeQuoteString(CRepQuote, QB);

    TIntV CQuoteIds;
	TVec<TUInt> CUniqueSources;
    C.GetQuoteIds(CQuoteIds);
    TCluster::GetUniqueSources(CUniqueSources, CQuoteIds, QB);

    fprintf(T, "%d\t%d\t%s\n", CUniqueSources.Len(), CQuoteIds.Len(), CRepQuote.CStr());

    for (int j = 0; j < CQuoteIds.Len(); j++) {
      TQuote Q;
      if (QB->GetQuote(CQuoteIds[j], Q)) {
        TStr QuoteStr;
        Q.GetContentString(QuoteStr);
        fprintf(T, "\t%d\t%s\n", Q.GetNumSources().Val, QuoteStr.CStr());
      }
    }
  }
  fclose(T);
}
void TSQLCommand::Bind(const TVec<PSQLParameter> &ParamV) const
{
	for(int i = 0; i < ParamV.Len(); i++)
	{
		Bind(ParamV[i], i);
	}
}
Exemple #26
0
TVec<TPair<TFltV, TFltV> > TLSHash::GetAllCandidatePairs() {
  THashSet<TPair<TInt, TInt> > CandidateIdPairs;
  for (int i=0; i<Bands; i++) {
    TVec<TIntV> BucketVV;
    SigBucketVHV[i].GetDatV(BucketVV);
    for (int j=0; j<BucketVV.Len(); j++) {
      TIntV BucketV = BucketVV[j];

      for (int k=0; k<BucketV.Len(); k++) {
        for (int l=k+1; l<BucketV.Len(); l++) {
          int First = BucketV[k], Second = BucketV[l];
          if (First > Second) { 
            int Temp = First;
            First = Second;
            Second = Temp;
          }
          CandidateIdPairs.AddKey(TPair<TInt, TInt> (First, Second));
        }
      }
    }
  }

  TVec<TPair<TFltV, TFltV> > CandidatePairs;
  int Ind = CandidateIdPairs.FFirstKeyId();
  while (CandidateIdPairs.FNextKeyId(Ind)) {
    TPair<TInt, TInt> IdPair = CandidateIdPairs[Ind];
    TPair<TFltV, TFltV> Pair(DataV[IdPair.GetVal1()], DataV[IdPair.GetVal2()]);
    CandidatePairs.Add(Pair);
  }
  return CandidatePairs;
}
void TestEigSvd() {
  PNGraph G = TSnap::GenRndGnm<PNGraph>(100,1000, true);
  PUNGraph UG = TSnap::ConvertGraph<PUNGraph>(G);

  TSnap::SaveMatlabSparseMtx(G, "test1.mtx");
  TSnap::SaveMatlabSparseMtx(UG, "test2.mtx");

  TFltV SngValV; TVec<TFltV> LeftV, RightV;
  TSnap::GetSngVec(G, 20, SngValV, LeftV, RightV);
  printf("Singular Values:\n");
  for (int i =0; i < SngValV.Len(); i++) {
    printf("%d\t%f\n", i, SngValV[i]()); }
  printf("LEFT Singular Vectors:\n");
  for (int i=0; i < LeftV[0].Len(); i++) {
    printf("%d\t%f\t%f\t%f\t%f\t%f\n", i, LeftV[0][i](), LeftV[1][i](), LeftV[2][i](), LeftV[3][i](), LeftV[4][i]());
  }
  printf("RIGHT Singular Vectors:\n");
  for (int i=0; i < RightV[0].Len(); i++) {
    printf("%d\t%f\t%f\t%f\t%f\t%f\n", i, RightV[0][i](), RightV[1][i](), RightV[2][i](), RightV[3][i](), RightV[4][i]());
  }
  TFltV EigValV;
  TVec<TFltV> EigV;
  TSnap::GetEigVec(UG, 20, EigValV, EigV);
  printf("Eigen Values:\n");
  for (int i =0; i < EigValV.Len(); i++) {
    printf("%d\t%f\n", i, EigValV[i]()); }
  printf("Eigen Vectors %d:\n", EigV.Len());
  for (int i =0; i < EigV[0].Len(); i++) {
    printf("%d\t%f\t%f\t%f\t%f\t%f\n", i, EigV[0][i](), EigV[1][i](), EigV[2][i](), EigV[3][i](), EigV[4][i]());
  }

}
Exemple #28
0
bool TGraphKey::IsIsomorph(const TGraphKey& Key1, const TGraphKey& Key2, const TVec<TIntV>& NodeIdMapV, int& IsoPermId) {
  const TIntPrV& EdgeV1 = Key1.EdgeV;
  const TIntPrV& EdgeV2 = Key2.EdgeV;
  //for (int i = 0; i < EdgeV1.Len(); i++) printf("\t%d - %d\n", EdgeV1[i].Val1, EdgeV1[i].Val2);  printf("\n");
  //for (int i = 0; i < EdgeV2.Len(); i++) printf("\t%d - %d\n", EdgeV2[i].Val1, EdgeV2[i].Val2);
  if (Key1.Nodes != Key2.Nodes || EdgeV1.Len() != EdgeV2.Len()) return false;
  const int Nodes = NodeIdMapV[0].Len();
  // fast adjecency matrix
  TIntV AdjMtx2(Nodes*Nodes);
  for (int i = 0; i < EdgeV2.Len(); i++) {
    AdjMtx2[EdgeV2[i].Val1*Nodes + EdgeV2[i].Val2] = 1;
  }
  for (int perm = 0; perm < NodeIdMapV.Len(); perm++) {
    const TIntV& NodeIdMap = NodeIdMapV[perm];
    bool IsIso = true;
    for (int e1 = 0; e1 < EdgeV1.Len(); e1++) {
      const int NId1 = NodeIdMap[EdgeV1[e1].Val1];
      const int NId2 = NodeIdMap[EdgeV1[e1].Val2];
      if (AdjMtx2[NId1*Nodes + NId2] != 1) {
        IsIso = false;  break; }
    }
    if (IsIso) {
      IsoPermId = perm;
      return true; }
  }
  IsoPermId = -1;
  return false;
}
Exemple #29
0
/// extract community affiliation from F_uc
void TAGMFast::GetCmtyVV(TVec<TIntV>& CmtyVV, const double Thres, const int MinSz) {
  CmtyVV.Gen(NumComs, 0);
  TIntFltH CIDSumFH(NumComs);
  for (int c = 0; c < SumFV.Len(); c++) {
    CIDSumFH.AddDat(c, SumFV[c]);
  }
  CIDSumFH.SortByDat(false);
  for (int c = 0; c < NumComs; c++) {
    int CID = CIDSumFH.GetKey(c);
    TIntFltH NIDFucH(F.Len() / 10);
    TIntV CmtyV;
    IAssert(SumFV[CID] == CIDSumFH.GetDat(CID));
    if (SumFV[CID] < Thres) { continue; }
    for (int u = 0; u < F.Len(); u++) {
      int NID = u;
      if (! NodesOk) { NID = NIDV[u]; }
      if (GetCom(u, CID) >= Thres) { NIDFucH.AddDat(NID, GetCom(u, CID)); }
    }
    NIDFucH.SortByDat(false);
    NIDFucH.GetKeyV(CmtyV);
    if (CmtyV.Len() >= MinSz) { CmtyVV.Add(CmtyV); }
  }
  if ( NumComs != CmtyVV.Len()) {
    printf("Community vector generated. %d communities are ommitted\n", NumComs.Val - CmtyVV.Len());
  }
}
Exemple #30
0
void TIndex::TQmGixSumItemHandler<TQmGixItem>::Merge(TVec<TQmGixItem>& ItemV, const bool& IsLocal) const {
    if (ItemV.Empty()) { return; } // nothing to do in this case
    if (!ItemV.IsSorted()) { ItemV.Sort(); } // sort if not yet sorted
    // merge counts
    int LastItemN = 0; bool ZeroP = false;
    for (int ItemN = 1; ItemN < ItemV.Len(); ItemN++) {
        if (ItemV[ItemN].Key != ItemV[ItemN - 1].Key) {
            LastItemN++;
            ItemV[LastItemN] = ItemV[ItemN];
        } else {
            ItemV[LastItemN].Dat += ItemV[ItemN].Dat;
        }
        ZeroP = ZeroP || (ItemV[LastItemN].Dat <= 0);
    }
    // remove items with zero count
    if (ZeroP) {
        int LastIndN = 0;
        for (int ItemN = 0; ItemN < LastItemN + 1; ItemN++) {
            const TQmGixItem& Item = ItemV[ItemN];
            if (Item.Dat.Val > 0 || (IsLocal && Item.Dat.Val < 0)) {
                ItemV[LastIndN] = Item;
                LastIndN++;
            } else if (Item.Dat.Val < 0) {
                TEnv::Error->OnStatusFmt("Warning: negative item count %d:%d!", (int)Item.Key, (int)Item.Dat);
            }
        }
        ItemV.Reserve(ItemV.Reserved(), LastIndN);
    } else {
        ItemV.Reserve(ItemV.Reserved(), LastItemN + 1);
    }
}