void TTable::Count(TStr CountColName, TStr Col){ if(!ColTypeMap.IsKey(Col)){TExcept::Throw("no such column " + Col);} TIntV CntCol(NumRows); switch(GetColType(Col)){ case INT:{ THash<TInt,TIntV> T; // can't really estimate the size of T for constructor hinting TIntV& Column = IntCols[GetColIdx(Col)]; GroupByIntCol(Col, T, TIntV(0), true); for(TRowIterator it = BegRI(); it < EndRI(); it++){ CntCol[it.GetRowIdx()] = T.GetDat(Column[it.GetRowIdx()]).Len(); } break; } case FLT:{ THash<TFlt,TIntV> T; TFltV& Column = FltCols[GetColIdx(Col)]; GroupByFltCol(Col, T, TIntV(0), true); for(TRowIterator it = BegRI(); it < EndRI(); it++){ CntCol[it.GetRowIdx()] = T.GetDat(Column[it.GetRowIdx()]).Len(); } break; } case STR:{ THash<TStr,TIntV> T; GroupByStrCol(Col, T, TIntV(0), true); for(TRowIterator it = BegRI(); it < EndRI(); it++){ CntCol[it.GetRowIdx()] = T.GetDat(GetStrVal(Col, it.GetRowIdx())).Len(); } } } // add count column IntCols.Add(CntCol); AddSchemaCol(CountColName, INT); ColTypeMap.AddDat(CountColName, TPair<TYPE,TInt>(INT, IntCols.Len()-1)); }
// IN-OUT edges are swapped (so that the prog runs faster) // Send message via IN edge proportional to the OUT edge weight void TWgtNet::ReinforceEdges(const int& NIters) { THash<TInt, TFlt> OutWgtSumH; for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { double wgt = 0; for (int e = 0; e < NI.GetOutDeg(); e++) { wgt += NI.GetOutEDat(e); } OutWgtSumH.AddDat(NI.GetId(), wgt); } printf("Reinforcing edges for %d iterations\n", NIters); // iterate TExeTm ExeTm; for (int iter = 0; iter < NIters; iter++) { for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { const double X = TInt::Rnd.GetUniDev() * OutWgtSumH.GetDat(NI.GetId()); double x = 0; int e = 0; for ( ; x + NI.GetOutEDat(e) < X; e++) { x += NI.GetOutEDat(e); } IAssert(IsEdge(NI.GetOutNId(e), NI.GetId())); GetEDat(NI.GetOutNId(e), NI.GetId()) += 1; // reinforce the edge OutWgtSumH.GetDat(NI.GetOutNId(e)) += 1; } if (iter % (NIters/100) == 0) { printf("\r%d [%s]", iter, ExeTm.GetStr()); } } printf(" done.\n"); }
/// save bipartite community affiliation into gexf file void TAGMUtil::SaveBipartiteGephi(const TStr& OutFNm, const TIntV& NIDV, const TVec<TIntV>& CmtyVV, const double MaxSz, const double MinSz, const TIntStrH& NIDNameH, const THash<TInt, TIntTr>& NIDColorH, const THash<TInt, TIntTr>& CIDColorH ) { /// Plot bipartite graph if (CmtyVV.Len() == 0) { return; } double NXMin = 0.1, YMin = 0.1, NXMax = 250.00, YMax = 30.0; double CXMin = 0.3 * NXMax, CXMax = 0.7 * NXMax; double CStep = (CXMax - CXMin) / (double) CmtyVV.Len(), NStep = (NXMax - NXMin) / (double) NIDV.Len(); THash<TInt,TIntV> NIDComVH; TAGMUtil::GetNodeMembership(NIDComVH, CmtyVV); FILE* F = fopen(OutFNm.CStr(), "wt"); fprintf(F, "<?xml version='1.0' encoding='UTF-8'?>\n"); fprintf(F, "<gexf xmlns='http://www.gexf.net/1.2draft' xmlns:viz='http://www.gexf.net/1.1draft/viz' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd' version='1.2'>\n"); fprintf(F, "\t<graph mode='static' defaultedgetype='directed'>\n"); fprintf(F, "\t\t<nodes>\n"); for (int c = 0; c < CmtyVV.Len(); c++) { int CID = c; double XPos = c * CStep + CXMin; TIntTr Color = CIDColorH.IsKey(CID)? CIDColorH.GetDat(CID) : TIntTr(120, 120, 120); fprintf(F, "\t\t\t<node id='C%d' label='C%d'>\n", CID, CID); fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val); fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", MaxSz); fprintf(F, "\t\t\t\t<viz:shape value='square'/>\n"); fprintf(F, "\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMax); fprintf(F, "\t\t\t</node>\n"); } for (int u = 0; u < NIDV.Len(); u++) { int NID = NIDV[u]; TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): ""; double Size = MinSz; double XPos = NXMin + u * NStep; TIntTr Color = NIDColorH.IsKey(NID)? NIDColorH.GetDat(NID) : TIntTr(120, 120, 120); double Alpha = 1.0; fprintf(F, "\t\t\t<node id='%d' label='%s'>\n", NID, Label.CStr()); fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d' a='%.1f'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val, Alpha); fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", Size); fprintf(F, "\t\t\t\t<viz:shape value='square'/>\n"); fprintf(F, "\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMin); fprintf(F, "\t\t\t</node>\n"); } fprintf(F, "\t\t</nodes>\n"); fprintf(F, "\t\t<edges>\n"); int EID = 0; for (int u = 0; u < NIDV.Len(); u++) { int NID = NIDV[u]; if (NIDComVH.IsKey(NID)) { for (int c = 0; c < NIDComVH.GetDat(NID).Len(); c++) { int CID = NIDComVH.GetDat(NID)[c]; fprintf(F, "\t\t\t<edge id='%d' source='C%d' target='%d'/>\n", EID++, CID, NID); } } } fprintf(F, "\t\t</edges>\n"); fprintf(F, "\t</graph>\n"); fprintf(F, "</gexf>\n"); }
TFltIntIntTr FindMxQEdge() { while (true) { if (MxQHeap.Empty()) { break; } const TFltIntIntTr TopQ = MxQHeap.PopHeap(); if (! CmtyQH.IsKey(TopQ.Val2) || ! CmtyQH.IsKey(TopQ.Val3)) { continue; } if (TopQ.Val1!=CmtyQH.GetDat(TopQ.Val2).GetMxQ() && TopQ.Val1!=CmtyQH.GetDat(TopQ.Val3).GetMxQ()) { continue; } return TopQ; } return TFltIntIntTr(-1, -1, -1); }
// YES I COPIED AND PASTED CODE my section leader would be so ashamed :D void LSH::MinHash(THash<TMd5Sig, TIntSet>& ShingleToQuoteIds, TVec<THash<TIntV, TIntSet> >& SignatureBandBuckets) { TRnd RandomGenerator; // TODO: make this "more random" by incorporating time for (int i = 0; i < NumBands; ++i) { THash < TInt, TIntV > Inverted; // (QuoteID, QuoteSignatureForBand) THash < TIntV, TIntSet > BandBuckets; // (BandSignature, QuoteIDs) for (int j = 0; j < BandSize; ++j) { // Create new signature TVec < TMd5Sig > Signature; ShingleToQuoteIds.GetKeyV(Signature); Signature.Shuffle(RandomGenerator); // Place in bucket - not very efficient int SigLen = Signature.Len(); for (int k = 0; k < SigLen; ++k) { TIntSet CurSet = ShingleToQuoteIds.GetDat(Signature[k]); for (TIntSet::TIter l = CurSet.BegI(); l < CurSet.EndI(); l++) { TInt Key = l.GetKey(); if (Inverted.IsKey(Key)) { TIntV CurSignature = Inverted.GetDat(Key); if (CurSignature.Len() <= j) { CurSignature.Add(k); Inverted.AddDat(Key, CurSignature); } } else { TIntV NewSignature; NewSignature.Add(k); Inverted.AddDat(Key, NewSignature); } } } } TIntV InvertedKeys; Inverted.GetKeyV(InvertedKeys); TInt InvertedLen = InvertedKeys.Len(); for (int k = 0; k < InvertedLen; ++k) { TIntSet Bucket; TIntV Signature = Inverted.GetDat(InvertedKeys[k]); if (BandBuckets.IsKey(Signature)) { Bucket = BandBuckets.GetDat(Signature); } Bucket.AddKey(InvertedKeys[k]); BandBuckets.AddDat(Signature, Bucket); } SignatureBandBuckets.Add(BandBuckets); Err("%d out of %d band signatures computed\n", i + 1, NumBands); } Err("Minhash step complete!\n"); }
int main(int argc, char *argv[]) { TStr BaseString = "/lfs/1/tmp/curis/week/QBDB.bin"; TFIn BaseFile(BaseString); TQuoteBase *QB = new TQuoteBase; TDocBase *DB = new TDocBase; QB->Load(BaseFile); DB->Load(BaseFile); TIntV QuoteIds; QB->GetAllQuoteIds(QuoteIds); int NumQuotes = QuoteIds.Len(); THash<TInt, TStrSet> PeakCounts; for (int i = 0; i < NumQuotes; i++) { TQuote CurQuote; if (QB->GetQuote(QuoteIds[i], CurQuote)) { TVec<TSecTm> Peaks; CurQuote.GetPeaks(DB, Peaks); TStr QuoteString; CurQuote.GetParsedContentString(QuoteString); TStrSet StringSet; if (PeakCounts.IsKey(Peaks.Len())) { StringSet = PeakCounts.GetDat(Peaks.Len()); } StringSet.AddKey(QuoteString); PeakCounts.AddDat(Peaks.Len(), StringSet); } } TIntV PeakCountKeys; PeakCounts.GetKeyV(PeakCountKeys); PeakCountKeys.Sort(true); for (int i = 0; i < PeakCountKeys.Len(); i++) { TStrSet CurSet = PeakCounts.GetDat(PeakCountKeys[i]); if (CurSet.Len() > 0) { printf("QUOTES WITH %d PEAKS\n", PeakCountKeys[i].Val); printf("#########################################\n"); THashSet<TStr> StringSet = PeakCounts.GetDat(PeakCountKeys[i]); for (THashSet<TStr>::TIter l = StringSet.BegI(); l < StringSet.EndI(); l++) { printf("%s\n", l.GetKey().CStr()); } printf("\n"); } } delete QB; delete DB; return 0; }
void LSH::ElCheapoHashing(TQuoteBase *QuoteBase, TInt ShingleLen, THash<TMd5Sig, TIntSet>& ShingleToQuoteIds) { fprintf(stderr, "Hashing shingles the el cheapo way...\n"); TIntV QuoteIds; QuoteBase->GetAllQuoteIds(QuoteIds); for (int qt = 0; qt < QuoteIds.Len(); qt++) { if (qt % 1000 == 0) { fprintf(stderr, "%d out of %d completed\n", qt, QuoteIds.Len()); } TQuote Q; QuoteBase->GetQuote(QuoteIds[qt], Q); // Put x-character (or x-word) shingles into hash table; x is specified by ShingleLen parameter TStr QContentStr; Q.GetParsedContentString(QContentStr); TChA QContentChA = TChA(QContentStr); for (int i = 0; i < QContentChA.Len() - ShingleLen + 1; i++) { TChA ShingleChA = TChA(); for (int j = 0; j < ShingleLen; j++) { ShingleChA.AddCh(QContentChA.GetCh(i + j)); } TStr Shingle = TStr(ShingleChA); const TMd5Sig ShingleMd5(Shingle); TIntSet ShingleQuoteIds; if (ShingleToQuoteIds.IsKey(ShingleMd5)) { ShingleQuoteIds = ShingleToQuoteIds.GetDat(ShingleMd5); } ShingleQuoteIds.AddKey(QuoteIds[qt]); ShingleToQuoteIds.AddDat(ShingleMd5, ShingleQuoteIds); } } Err("Done with el cheapo hashing!\n"); }
/// Shingles by words void LSH::HashShinglesOfClusters(TQuoteBase *QuoteBase, TClusterBase *ClusterBase, TIntV& ClusterIds, TInt ShingleLen, THash<TMd5Sig, TIntV>& ShingleToClusterIds) { Err("Hashing shingles of clusters...\n"); for (int i = 0; i < ClusterIds.Len(); i++) { if (i % 1000 == 0) { fprintf(stderr, "%d out of %d completed\n", i, ClusterIds.Len()); } TCluster C; ClusterBase->GetCluster(ClusterIds[i], C); //fprintf(stderr, "%d vs. %d\n", ClusterIds[i].Val, C.GetId().Val); // Put x-word shingles into hash table; x is specified by ShingleLen parameter THashSet < TMd5Sig > CHashedShingles; GetHashedShinglesOfCluster(QuoteBase, C, ShingleLen, CHashedShingles); for (THashSet<TMd5Sig>::TIter Hash = CHashedShingles.BegI(); Hash < CHashedShingles.EndI(); Hash++) { TIntV ShingleClusterIds; if (ShingleToClusterIds.IsKey(*Hash)) { ShingleClusterIds = ShingleToClusterIds.GetDat(*Hash); } ShingleClusterIds.Add(ClusterIds[i]); ShingleToClusterIds.AddDat(*Hash, ShingleClusterIds); } } Err("Done hashing!\n"); }
void TSockSys::Connect(const uint64& SockId, const PSockHost& SockHost, const int& PortN) { // make sure it's a valid socket IAssert(IsSock(SockId)); uv_tcp_t* SockHnd = SockIdToHndH.GetDat(SockId); // make sure we got a valid socket host IAssert(SockHost->IsOk()); // get connection handle uv_connect_t* ConnectHnd = (uv_connect_t*)malloc(sizeof(uv_connect_t)); // special handling for v4 and v6 int ResCd = 0; if (SockHost->IsIpv4()) { // get address struct sockaddr_in Addr = uv_ip4_addr(SockHost->GetIpNum().CStr(), PortN); // establish connection ResCd = uv_tcp_connect(ConnectHnd, SockHnd, Addr, TSockSys::OnConnect); } else if (SockHost->IsIpv6()) { // get address struct sockaddr_in6 Addr = uv_ip6_addr(SockHost->GetIpNum().CStr(), PortN); // establish connection ResCd = uv_tcp_connect6(ConnectHnd, SockHnd, Addr, TSockSys::OnConnect); } // check for errors if (ResCd != 0) { // cleanup first free(SockHnd); // and throw exception throw TExcept::New("SockSys.Connect: Error establishing socket connection: " + SockSys.GetLastErr()); } }
int ComputeKCore(const PUNGraph& G) { int cnt = 0; for(TUNGraph::TNodeI NI = G->BegNI(); NI < G->EndNI(); NI++) cnt = max(cnt, NI.GetOutDeg()); THashSet <TInt> D[cnt+1]; THash <TInt, TInt> deg; for(TUNGraph::TNodeI NI = G->BegNI(); NI < G->EndNI(); NI++) { TInt tmp = NI.GetOutDeg() - G->IsEdge(NI.GetId(), NI.GetId() ); D[tmp.Val].AddKey(NI.GetId()); deg.AddDat(NI.GetId()) = tmp; } int max_k = 0; for(int num_iters = 0;num_iters < G->GetNodes(); num_iters++) for(int i = 0; i < cnt; i++) if(D[i].Empty() == 0) { max_k = max(max_k, i); TInt a = *(D[i].BegI()); D[i].DelKey(a); deg.AddDat(a.Val) = -1; // Hope overwriting works TUNGraph::TNodeI NI = G->GetNI(a.Val); for(int e = 0; e < NI.GetOutDeg(); e++) { TInt b = NI.GetOutNId(e); if(deg.GetDat(b) >= 0) { int Id = deg.GetKeyId(b); D[deg[Id].Val].DelKey(b); deg[Id] = deg[Id] - 1; //Hope the overwriting works D[deg[Id]].AddKey(b); } } break; } return max_k; }
// wrong reading of string attributes void TTable::BuildGraphTopology(PNEAGraph& Graph, THash<TFlt, TInt>& FSrNodeMap, THash<TFlt, TInt>& FDsNodeMap) { TYPE SrCT = GetColType(SrcCol); TInt SrIdx = GetColIdx(SrcCol); TYPE DsCT = GetColType(DstCol); TInt DsIdx = GetColIdx(DstCol); TInt SrcCnt = 0; TInt DstCnt = 0; for(TRowIterator RowI = BegRI(); RowI < EndRI(); RowI++) { if (SrCT == INT && DsCT == INT) { Graph->AddNode(IntCols[SrIdx][RowI.GetRowIdx()]); Graph->AddNode(IntCols[DsIdx][RowI.GetRowIdx()]); Graph->AddEdge(IntCols[SrIdx][RowI.GetRowIdx()], IntCols[DsIdx][RowI.GetRowIdx()], RowI.GetRowIdx()); } else if (SrCT == INT && DsCT == FLT) { Graph->AddNode(IntCols[SrIdx][RowI.GetRowIdx()]); TFlt val = FltCols[DsIdx][RowI.GetRowIdx()]; if (!FDsNodeMap.IsKey(val)) { FDsNodeMap.AddDat(val, DstCnt++); } Graph->AddNode(FDsNodeMap.GetDat(val)); Graph->AddEdge(IntCols[SrIdx][RowI.GetRowIdx()], FDsNodeMap.GetDat(val)); } else if (SrCT == INT && DsCT == STR) { Graph->AddNode(IntCols[SrIdx][RowI.GetRowIdx()]); Graph->AddNode(StrColMaps[DsIdx][RowI.GetRowIdx()]); Graph->AddEdge(IntCols[SrIdx][RowI.GetRowIdx()], StrColMaps[DsIdx][RowI.GetRowIdx()], RowI.GetRowIdx()); } else if (SrCT == FLT && DsCT == INT) { Graph->AddNode(IntCols[DsIdx][RowI.GetRowIdx()]); TFlt val = FltCols[SrIdx][RowI.GetRowIdx()]; if (!FSrNodeMap.IsKey(val)) { FSrNodeMap.AddDat(val, SrcCnt++); } Graph->AddNode(FSrNodeMap.GetDat(val)); Graph->AddEdge(FSrNodeMap.GetDat(val), IntCols[SrIdx][RowI.GetRowIdx()], RowI.GetRowIdx()); } else if (SrCT == FLT && DsCT == STR) { Graph->AddNode(StrColMaps[DsIdx][RowI.GetRowIdx()]); TFlt val = FltCols[SrIdx][RowI.GetRowIdx()]; if (!FSrNodeMap.IsKey(val)) { FSrNodeMap.AddDat(val, SrcCnt++); } Graph->AddNode(FSrNodeMap.GetDat(val)); Graph->AddEdge(FSrNodeMap.GetDat(val), IntCols[SrIdx][RowI.GetRowIdx()], RowI.GetRowIdx()); } else if (SrCT == FLT && DsCT == FLT) { TFlt val = FltCols[SrIdx][RowI.GetRowIdx()]; if (!FSrNodeMap.IsKey(val)) { FSrNodeMap.AddDat(val, SrcCnt++); } Graph->AddNode(FSrNodeMap.GetDat(val)); val = FltCols[DsIdx][RowI.GetRowIdx()]; if (!FDsNodeMap.IsKey(val)) { FDsNodeMap.AddDat(val, DstCnt++); } Graph->AddNode(FDsNodeMap.GetDat(val)); Graph->AddEdge(FSrNodeMap.GetDat(val), FDsNodeMap.GetDat(val), RowI.GetRowIdx()); } } }
TInt TTable::GetNId(TStr Col, TInt RowIdx, THash<TFlt, TInt>& FSrNodeMap, THash<TFlt, TInt>& FDsNodeMap) { TYPE CT = GetColType(Col); TInt Idx = GetColIdx(Col); if (CT == INT) { return IntCols[RowIdx][Idx]; } else if (CT == FLT) { if (Col == SrcCol) { return FSrNodeMap.GetDat(FltCols[Idx][RowIdx]); } else if (Col == DstCol) { return FDsNodeMap.GetDat(FltCols[Idx][RowIdx]); } else { TExcept::Throw("Column " + Col + " is not source node or destination column"); } } else { return StrColMaps[RowIdx][Idx](); } return 0; }
void TSockSys::DelSock(const uint64& SockId) { if (Active) { uv_tcp_t* SockHnd = SockIdToHndH.GetDat(SockId); // close the handle uv_close((uv_handle_t*)SockHnd, TSockSys::OnClose); // mark it's under closing so we don't handle it again in the destructor ClosedSockIdSet.AddKey(SockId); } }
void TSockSys::DelIfSockTimer(const uint64& SockId) { if (SockIdToTimerHndH.IsKey(SockId)) { // get timer handle uv_timer_t* TimerHnd = SockIdToTimerHndH.GetDat(SockId); // stop the timer uv_timer_stop(TimerHnd); // remove shortcuts SockIdToTimerHndH.DelKey(SockId); TimerHndToSockIdH.DelKey((uint64)TimerHnd); // remove shortcuts } }
TStr GetFlagStr(const TGraphFlag& GraphFlag) { static THash<TInt, TStr> GraphFlagToStrH; if (GraphFlagToStrH.Empty()) { GraphFlagToStrH.AddDat((int) gfUndef, "Undef"); GraphFlagToStrH.AddDat((int) gfDirected, "Directed"); GraphFlagToStrH.AddDat((int) gfMultiGraph, "MultiGraph"); GraphFlagToStrH.AddDat((int) gfNodeDat, "NodeDat"); GraphFlagToStrH.AddDat((int) gfEdgeDat, "EdgeDat"); GraphFlagToStrH.AddDat((int) gfSources, "Sources"); } return GraphFlagToStrH.GetDat((int) GraphFlag); }
void TNetInfBs::Init() { THash<TInt, TIntV> CascPN; Graph = TNGraph::New(); // reset vectors EdgeGainV.Clr(); CascPerEdge.Clr(); PrecisionRecall.Clr(); for (int c = 0; c < CascV.Len(); c++) { for (int i = 0; i < CascV[c].Len(); i++) { if (!Graph->IsNode(CascV[c].GetNode(i))) Graph->AddNode(CascV[c].GetNode(i)); if (!CascPN.IsKey(CascV[c].GetNode(i))) CascPN.AddDat(CascV[c].GetNode(i)) = TIntV(); CascPN.GetDat(CascV[c].GetNode(i)).Add(c); } CascV[c].InitProb(); } // only add edges that make sense (i.e., at least once coherent in time) for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { TIntV &Cascs = CascPN.GetDat(NI.GetId()); for (int c = 0; c < Cascs.Len(); c++) { for (int i=0; i < CascV[Cascs[c]].Len(); i++) { if (CascV[Cascs[c]].GetNode(i)==NI.GetId()) continue; if (CascV[Cascs[c]].GetTm(CascV[Cascs[c]].GetNode(i)) < CascV[Cascs[c]].GetTm(NI.GetId()) ) { if (!CascPerEdge.IsKey(TIntPr(CascV[Cascs[c]].GetNode(i), NI.GetId()))) { EdgeGainV.Add(TPair<TFlt, TIntPr>(TFlt::Mx, TIntPr(CascV[Cascs[c]].GetNode(i), NI.GetId()))); CascPerEdge.AddDat(TIntPr(CascV[Cascs[c]].GetNode(i), NI.GetId())) = TIntV(); } // Add cascade to hash of cascades per edge (to implement localized update) CascPerEdge.GetDat(TIntPr(CascV[Cascs[c]].GetNode(i), NI.GetId())).Add(Cascs[c]); } } } } }
bool MergeBestQ() { const TFltIntIntTr TopQ = FindMxQEdge(); if (TopQ.Val1 <= 0.0) { return false; } // joint communities const int I = TopQ.Val3; const int J = TopQ.Val2; CmtyIdUF.Union(I, J); // join Q += TopQ.Val1; TCmtyDat& DatJ = CmtyQH.GetDat(J); { TCmtyDat& DatI = CmtyQH.GetDat(I); DatI.DelLink(J); DatJ.DelLink(I); for (int i = -1; DatJ.NIdQH.FNextKeyId(i); ) { const int K = DatJ.NIdQH.GetKey(i); TCmtyDat& DatK = CmtyQH.GetDat(K); double NewQ = DatJ.NIdQH[i]; if (DatI.NIdQH.IsKey(K)) { NewQ = NewQ+DatI.NIdQH.GetDat(K); DatK.DelLink(I); } // K connected to I and J else { NewQ = NewQ-2*DatI.DegFrac*DatK.DegFrac; } // K connected to J not I DatJ.AddQ(K, NewQ); DatK.AddQ(J, NewQ); MxQHeap.PushHeap(TFltIntIntTr(NewQ, TMath::Mn(J,K), TMath::Mx(J,K))); } for (int i = -1; DatI.NIdQH.FNextKeyId(i); ) { const int K = DatI.NIdQH.GetKey(i); if (! DatJ.NIdQH.IsKey(K)) { // K connected to I not J TCmtyDat& DatK = CmtyQH.GetDat(K); const double NewQ = DatI.NIdQH[i]-2*DatJ.DegFrac*DatK.DegFrac; DatJ.AddQ(K, NewQ); DatK.DelLink(I); DatK.AddQ(J, NewQ); MxQHeap.PushHeap(TFltIntIntTr(NewQ, TMath::Mn(J,K), TMath::Mx(J,K))); } } DatJ.DegFrac += DatI.DegFrac; } if (DatJ.NIdQH.Empty()) { CmtyQH.DelKey(J); } // isolated community (done) CmtyQH.DelKey(I); return true; }
void TNodeJsRf24Radio::set(const v8::FunctionCallbackInfo<v8::Value>& Args) { v8::Isolate* Isolate = v8::Isolate::GetCurrent(); v8::HandleScope HandleScope(Isolate); TNodeJsRf24Radio* JsRadio = ObjectWrap::Unwrap<TNodeJsRf24Radio>(Args.Holder()); if (Args.Length() == 0) { return; } const PJsonVal ArgVal = TNodeJsUtil::GetArgJson(Args, 0); bool Success = true; if (ArgVal->IsArr()) { THash<TInt, TIntPrV> NodeIdValIdValPrVH; for (int ArgN = 0; ArgN < ArgVal->GetArrVals(); ArgN++) { const PJsonVal& ValJson = ArgVal->GetArrVal(ArgN); const TStr& ValNm = ValJson->GetObjStr("sensorId"); const int& Val = ValJson->GetObjInt("value"); const TIntPr& NodeIdValIdPr = JsRadio->ValNmNodeIdValIdPrH.GetDat(ValNm); const uint16 NodeId = NodeIdValIdPr.Val1; const int ValId = NodeIdValIdPr.Val2; if (!NodeIdValIdValPrVH.IsKey(NodeId)) { NodeIdValIdValPrVH.AddDat(NodeId); } TIntPrV& ValIdValPrV = NodeIdValIdValPrVH.GetDat(NodeId); ValIdValPrV.Add(TIntPr(ValId, Val)); } int KeyId = NodeIdValIdValPrVH.FFirstKeyId(); while (NodeIdValIdValPrVH.FNextKeyId(KeyId)) { const uint16 NodeId = NodeIdValIdValPrVH.GetKey(KeyId); const TIntPrV& ValIdValPrV = NodeIdValIdValPrVH[KeyId]; Success &= JsRadio->Radio.Set(NodeId, ValIdValPrV); } } else { const TStr& ValueNm = ArgVal->GetObjStr("sensorId"); const int Val = ArgVal->GetObjInt("value"); const TIntPr& NodeIdValIdPr = JsRadio->ValNmNodeIdValIdPrH.GetDat(ValueNm); const uint16 NodeId = (uint16) NodeIdValIdPr.Val1; const int ValId = NodeIdValIdPr.Val2; Success = JsRadio->Radio.Set(NodeId, ValId, Val); } Args.GetReturnValue().Set(v8::Boolean::New(Isolate, Success)); }
/// For every quote, add it to corresponding bucket for each hashed x-character shingle of the quote // (Shingles by characters) void LSH::HashShingles(TQuoteBase *QuoteBase, TClusterBase *CB, TInt ShingleLen, THash<TMd5Sig, TShingleIdSet>& ShingleToQuoteIds) { Err("Hashing shingles...\n"); TIntV QuoteIds; QuoteBase->GetAllQuoteIds(QuoteIds); for (int qt = 0; qt < QuoteIds.Len(); qt++) { if (qt % 1000 == 0) { fprintf(stderr, "%d out of %d completed\n", qt, QuoteIds.Len()); } if (CB->IsQuoteInArchivedCluster(QuoteIds[qt])) continue; TQuote Q; QuoteBase->GetQuote(QuoteIds[qt], Q); // Put x-character (or x-word) shingles into hash table; x is specified by ShingleLen parameter TStr QContentStr; Q.GetParsedContentString(QContentStr); TChA QContentChA = TChA(QContentStr); int CurWord = 0; for (int i = 0; i < QContentChA.Len() - ShingleLen + 1; i++) { TChA ShingleChA = TChA(); for (int j = 0; j < ShingleLen; j++) { ShingleChA.AddCh(QContentChA.GetCh(i + j)); } TStr Shingle = TStr(ShingleChA); const TMd5Sig ShingleMd5(Shingle); TShingleIdSet ShingleQuoteIds; if (ShingleToQuoteIds.IsKey(ShingleMd5)) { ShingleQuoteIds = ShingleToQuoteIds.GetDat(ShingleMd5); } for (int j = CurWord; j > CurWord - WordWindow && j >= 0; j--) { ShingleQuoteIds.AddKey(TShingleId(QuoteIds[qt], j)); } ShingleToQuoteIds.AddDat(ShingleMd5, ShingleQuoteIds); // up the current word index if we see a space if (QContentChA.GetCh(i + ShingleLen - 1) == ' ') { CurWord++; } } } Err("Done hashing!\n"); }
void TGraphEnumUtils::GetNormalizedGraph(const PNGraph &G, PNGraph &nG) { //Get bijective map from original node ids to normalized node ids(0,1,2,...) THash<TInt,TInt> map; GetNormalizedMap(G, map); //Add nodes for(int i=0; i<G->GetNodes(); i++) nG->AddNode(i); //Add edges for(TNGraph::TEdgeI eIt=G->BegEI(); eIt<G->EndEI(); eIt++) { int srcId = eIt.GetSrcNId(); int dstId = eIt.GetDstNId(); // int mSrcId = map.GetDat(srcId); int mDstId = map.GetDat(dstId); // nG->AddEdge(mSrcId, mDstId); } }
void LSH::WordHashing(TQuoteBase *QuoteBase, THash<TMd5Sig, TIntSet>& ShingleToQuoteIds) { fprintf(stderr, "Hashing shingles using words...\n"); TIntV QuoteIds; QuoteBase->GetAllQuoteIds(QuoteIds); THash<TStr, TIntSet> Temp; for (int qt = 0; qt < QuoteIds.Len(); qt++) { if (qt % 1000 == 0) { fprintf(stderr, "%d out of %d completed\n", qt, QuoteIds.Len()); } TQuote Q; QuoteBase->GetQuote(QuoteIds[qt], Q); TStrV Content; Q.GetParsedContent(Content); int ContentLen = Content.Len(); for (int i = 0; i < ContentLen; i++) { const TMd5Sig ShingleMd5(Content[i]); TIntSet ShingleQuoteIds; ShingleToQuoteIds.IsKeyGetDat(ShingleMd5, ShingleQuoteIds); ShingleQuoteIds.AddKey(QuoteIds[qt]); ShingleToQuoteIds.AddDat(ShingleMd5, ShingleQuoteIds); ///// COMMENT OUT LATER TIntSet TempSet; Temp.IsKeyGetDat(Content[i], TempSet); TempSet.AddKey(QuoteIds[qt]); Temp.AddDat(Content[i], TempSet); } } TVec<TStr> ShingleKeys; Temp.GetKeyV(ShingleKeys); ShingleKeys.SortCmp(TCmpSetByLen(false, &Temp)); for (int i = 0; i < 100; i++) { TIntSet TempSet = Temp.GetDat(ShingleKeys[i]); Err("%d: %s - %d \n", i, ShingleKeys[i].CStr(), TempSet.Len()); } Err("Done with word hashing!\n"); }
void TSockSys::AddSockTimer(const uint64& SockId, const int& MSecs) { if (SockIdToTimerHndH.IsKey(SockId)) { // socket already has timer, stop and start with MSecs uv_timer_t* TimerHnd = SockIdToTimerHndH.GetDat(SockId); // stop existing count uv_timer_stop(TimerHnd); // start new one uv_timer_start(TimerHnd, OnTimeOut, MSecs, 0); } else { // create new timer uv_timer_t* TimerHnd = (uv_timer_t*)malloc(sizeof(uv_timer_t)); // initialize uv_timer_init(SockSys.Loop, TimerHnd); // start the timer uv_timer_start(TimerHnd, OnTimeOut, MSecs, 0); // remember handle SockIdToTimerHndH.AddDat(SockId, TimerHnd); TimerHndToSockIdH.AddDat((uint64)TimerHnd, SockId); } }
TStr TSockSys::GetLocalIpNum(const uint64& SockId) { // make sure it's a valid socket IAssert(IsSock(SockId)); uv_tcp_t* SockHnd = SockIdToHndH.GetDat(SockId); // get peer IP struct sockaddr SockName; int NameLen = sizeof(SockName); const int ResCd = uv_tcp_getsockname(SockHnd, &SockName, &NameLen); EAssertR(ResCd == 0, "SockSys.GetLocalIpNum: " + SockSys.GetLastErr()); // decode IP char SockIpNum[64]; if (SockName.sa_family == AF_INET) { uv_ip4_name((sockaddr_in*)&SockName, SockIpNum, sizeof(SockIpNum)); } else if (SockName.sa_family == AF_INET6) { uv_ip6_name((sockaddr_in6*)&SockName, SockIpNum, sizeof(SockIpNum)); } else { throw TExcept::New("SockSys.GetLocalIpNum: unkown address family"); } // return return TStr(SockIpNum); }
void TSockSys::Send(const uint64& SockId, const PSIn& SIn) { // make sure it's a valid socket IAssert(IsSock(SockId)); uv_tcp_t* SockHnd = SockIdToHndH.GetDat(SockId); // create write request uv_write_req_t* WriteHnd = (uv_write_req_t*)malloc(sizeof(uv_write_req_t)); // copy the data in the buffer WriteHnd->Buffer.len = SIn->Len(); //TODO: handle cases when SIn doesn't have known Len() WriteHnd->Buffer.base = (char*)malloc(WriteHnd->Buffer.len); SIn->GetBf(WriteHnd->Buffer.base, WriteHnd->Buffer.len); // execute the request int ResCd = uv_write((uv_write_t*)WriteHnd, (uv_stream_t*)SockHnd, &WriteHnd->Buffer, 1, OnWrite); // check for errors if (ResCd != 0) { // cleanup first free(WriteHnd->Buffer.base); free(WriteHnd); // and throw exception throw TExcept::New("SockSys.Send: Error sending data: " + SockSys.GetLastErr()); } }
void TSockSys::Listen(const uint64& SockId, const int& PortN, const bool& IPv6P) { // make sure it's a valid socket IAssert(IsSock(SockId)); uv_tcp_t* SockHnd = SockIdToHndH.GetDat(SockId); // special handling for v4 and v6 when binding if (!IPv6P) { // get address struct sockaddr_in Addr = uv_ip4_addr("0.0.0.0", PortN); // bind socket to port const int BindResCd = uv_tcp_bind(SockHnd, Addr); EAssertR(BindResCd == 0, "SockSys.Listen: Error bidning socket to port: " + SockSys.GetLastErr()); } else { // get address struct sockaddr_in6 Addr = uv_ip6_addr("::", PortN); // bind socket to port const int BindResCd = uv_tcp_bind6(SockHnd, Addr); EAssertR(BindResCd == 0, "SockSys.Listen: Error bidning socket to port: " + SockSys.GetLastErr()); } // make sure we have backlog of at least 128 const int BacklogQueue = (SOMAXCONN < 128) ? 128 : SOMAXCONN; // enable callbacks const int ListenResCd = uv_listen((uv_stream_t*)SockHnd, BacklogQueue, TSockSys::OnAccept); EAssertR(ListenResCd == 0, "SockSys.Listen: Error setting listener on socket: " + SockSys.GetLastErr()); }
///////////////////////////////////////////////// // Best-Paths void GetBestPaths( const TStr& SrcNmObjStr, const TStr& DstNmObjStr, const PNmObjBs& NmObjBs){ int SrcNmObjId=NmObjBs->GetNmObjId(SrcNmObjStr); int DstNmObjId=NmObjBs->GetNmObjId(DstNmObjStr); int NmObjs=NmObjBs->GetNmObjs(); TIntPrV ParLevPrV(NmObjs); TIntPrV DstParLevPrV; ParLevPrV.PutAll(TIntPr(-1, -1)); int CurLev=0; ParLevPrV[SrcNmObjId]=TIntPr(SrcNmObjId, CurLev); forever{ CurLev++; int NewEdges=0; for (int NmObjId1=0; NmObjId1<NmObjs; NmObjId1++){ if (ParLevPrV[NmObjId1].Val2==CurLev-1){ TIntV DocIdV1; NmObjBs->GetNmObjDocIdV(NmObjId1, DocIdV1); for (int NmObjId2=0; NmObjId2<NmObjs; NmObjId2++){ if ((NmObjId2==DstNmObjId)||(ParLevPrV[NmObjId2].Val2==-1)){ TIntV DocIdV2; NmObjBs->GetNmObjDocIdV(NmObjId2, DocIdV2); TIntV IntrsDocIdV; DocIdV1.Intrs(DocIdV2, IntrsDocIdV); if (!IntrsDocIdV.Empty()){ ParLevPrV[NmObjId2]=TIntPr(NmObjId1, CurLev); NewEdges++; if (NmObjId2==DstNmObjId){ DstParLevPrV.Add(TIntPr(NmObjId1, CurLev)); } } } } } } if ((NewEdges==0)||(ParLevPrV[DstNmObjId].Val2!=-1)){ break; } } // prepare graph THash<TStr, PVrtx> VrtxNmToVrtxH; TStrPrV VrtxNmPrV; VrtxNmToVrtxH.AddKey(SrcNmObjStr); VrtxNmToVrtxH.AddKey(DstNmObjStr); // write path ContexterF->NmObjLinkageREd->Clear(); for (int DstParLevPrN=0; DstParLevPrN<DstParLevPrV.Len(); DstParLevPrN++){ ParLevPrV[DstNmObjId]=DstParLevPrV[DstParLevPrN]; int DstParLev=ParLevPrV[DstNmObjId].Val2; TStr DstNmObjStr=NmObjBs->GetNmObjStr(DstNmObjId); ContexterF->NmObjLinkageREd->Lines->Add(DstNmObjStr.CStr()); int ParNmObjId=DstNmObjId; TStr PrevNmObjStr=DstNmObjStr; forever { if (ParNmObjId==SrcNmObjId){break;} ParNmObjId=ParLevPrV[ParNmObjId].Val1; int ParLev=ParLevPrV[ParNmObjId].Val2; TStr CurNmObjStr=NmObjBs->GetNmObjStr(ParNmObjId); TStr ParNmObjStr=TStr::GetSpaceStr((DstParLev-ParLev)*4)+CurNmObjStr; ContexterF->NmObjLinkageREd->Lines->Add(ParNmObjStr.CStr()); // create vertex & edge VrtxNmToVrtxH.AddKey(CurNmObjStr); if (!PrevNmObjStr.Empty()){ if (PrevNmObjStr<CurNmObjStr){ VrtxNmPrV.AddUnique(TStrPr(PrevNmObjStr, CurNmObjStr)); } else if (PrevNmObjStr>CurNmObjStr){ VrtxNmPrV.AddUnique(TStrPr(CurNmObjStr, PrevNmObjStr)); } } // save curent named-object PrevNmObjStr=CurNmObjStr; } } // generate graph // create graph PGraph Graph=TGGraph::New(); // create vertices for (int VrtxN=0; VrtxN<VrtxNmToVrtxH.Len(); VrtxN++){ TStr VrtxNm=VrtxNmToVrtxH.GetKey(VrtxN); PVrtx Vrtx=TGVrtx::New(VrtxNm); VrtxNmToVrtxH.GetDat(VrtxNm)=Vrtx; Graph->AddVrtx(Vrtx); } // create edges for (int EdgeN=0; EdgeN<VrtxNmPrV.Len(); EdgeN++){ PVrtx Vrtx1=VrtxNmToVrtxH.GetDat(VrtxNmPrV[EdgeN].Val1); PVrtx Vrtx2=VrtxNmToVrtxH.GetDat(VrtxNmPrV[EdgeN].Val2); PEdge Edge=new TGEdge(Vrtx1, Vrtx2, TStr::Fmt("_%d", EdgeN), false); Graph->AddEdge(Edge); } // place graph ContexterF->State->ElGraph=Graph; TRnd Rnd(1); ContexterF->State->ElGraph->PlaceSimAnnXY(Rnd, ContexterF->State->ElGks); // draw graph ContexterF->State->ElGks->Clr(); ContexterF->ElPbPaint(NULL); }
int main(int argc, char* argv[]) { TExeTm ExeTm; PGconn *conn; PGresult *res; int id,start,rec_count,row,indx,end; unsigned int q; int total_number_tweets = 0; double tweet_date = 0; TStr TweetStr(""); TStr TweetStrLc(""); if(argc > 1) { start = atoi(argv[1]); } else { printf("YOU SHOULD SET THE INDICES...\n\n"); return 1; } indx = start * LENGTH; end = indx + LENGTH; printf(":::::::: Find Cascades of Quotes In Twitter Separately ::::::::\n"); const TStr StartDate = Env.GetIfArgPrefixStr("-sd:", "2008-08-01 00:00:00", "Starting date"); const TStr EndDate = Env.GetIfArgPrefixStr("-ed:", "2009-10-01 00:00:00", "Ending date"); Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("\nFinding the cascades of the desired quotes. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); try { while(indx < end) { TStr qContentFname = TStr::Fmt("QuotesData/Q%d.rar",indx); TStr resultFname = TStr::Fmt("QuotesCascResult/R%d.rar",indx++); if(fileExists(resultFname)) { if(fileExists(qContentFname)) { // removing the quotes' content file system(TStr::Fmt("rm %s",qContentFname.CStr()).CStr()); } } else { if(fileExists(qContentFname)) { THash<TStr,TInt> quotesContent; THash<TInt,TSecTmV> CascadesOnTwitter; TZipIn ZquotesIn(qContentFname); quotesContent.Load(ZquotesIn); printf("Q%d loading done, it contains %d quotes.\n",indx-1,quotesContent.Len()); conn = PQconnectdb("dbname=twitter host=postgresql01.mpi-sws.org user=twitter password=tweet@84"); if (PQstatus(conn) == CONNECTION_BAD) { printf("We were unable to connect to the database"); return 1; } // we use cursors/fetch to speed up the process; batch of 10000 tweets PQexec(conn, "begin work"); PQexec(conn,TStr::Fmt("declare mycursor cursor for select tweettext, extract(epoch from tweettime) from tweets where tweettime >= timestamp '%s' and tweettime < timestamp '%s'", StartDate.CStr(), EndDate.CStr()).CStr()); do { res = PQexec(conn, "FETCH 1000000 IN mycursor"); // all of them are: 1675401026 if (PQresultStatus(res) == PGRES_TUPLES_OK) { rec_count = PQntuples(res); total_number_tweets += rec_count; printf("Adding %d tweets... (total: %d)\n", rec_count, total_number_tweets); for (row=0; row<rec_count; row++) { TweetStr = PQgetvalue(res, row, 0); tweet_date = TStr(PQgetvalue(res, row, 1)).GetFlt(); TweetStrLc = TweetStr.ToLc(); for(q=0;q<quotesContent.Len();q++) { if (TweetStrLc.SearchStr(quotesContent.GetKey(q)) > -1) { TSecTm td(tweet_date); id = CascadesOnTwitter.GetKeyId(quotesContent[q]); if(id == -1) { CascadesOnTwitter.AddDat(quotesContent[q]).Add(td); } else { CascadesOnTwitter.GetDat(quotesContent[q]).AddSorted(td); } } } } PQclear(res); } else { rec_count = 0; } } while (rec_count); PQexec(conn, "close mycursor"); PQexec(conn, "commit work"); PQfinish(conn); // Save the results TZipOut zout(resultFname); CascadesOnTwitter.Save(zout); // Remove the qoutes' content file system(TStr::Fmt("rm %s",qContentFname.CStr()).CStr()); } } } printf("\n\nD O N E\n\n"); } catch(exception& ex) { printf("\nError1 happened, it was: %s\n\n",ex.what()); } catch(TPt<TExcept>& ex) { printf("\nError2 happened: %s\n\n",ex[0].GetStr().CStr()); } printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
int main(int argc, char* argv[]) { int i,quoteIndex,j,k; TExeTm ExeTm; printf("Starting The SAVE CODE For Matlab Processing ...\n"); try { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("\nCreating the volumes of the quotes. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TZipIn ZquotesIn("RESULTS/QuotesPreprocessedData_NIFTY.rar"); quotes.Load(ZquotesIn); printf("Loaded QuotesPreprocessedData_NIFTY has instances: %d\n\n\n",quotes.Len()); TZipIn ZcascadesOnTwitterIn("RESULTS/CascadesFullUrlsOnTwitterData.rar"); cascadesOnTwitterUrls.Load(ZcascadesOnTwitterIn); printf("Loaded CascadesFullUrlsOnTwitterData has instances: %d\n\n\n",cascadesOnTwitterUrls.Len()); TZipIn ZIn("RESULTS/CascadesOnTwitterData.rar"); cascadesOnTwitterContents.Load(ZIn); printf("Loaded CascadesOnTwitterData has instances: %d\n\n\n",cascadesOnTwitterContents.Len()); // Quote's Cascades over Memes ofstream quotesContent1("MEMES_QuotesContent.csv",ios::out|ios::app); ofstream memeWebs1("MEMES_MemesCascadesWebs.csv",ios::out|ios::app); ofstream memeTimes1("MEMES_MemesCascadesTimes.csv",ios::out|ios::app); ofstream externalLinks1("MEMES_MemesExternalLinks.csv",ios::out|ios::app); for(i=0;i<quotes.Len();i++) { quotesContent1 << quotes.GetKey(i).CStr() << "\r\n"; for(j=0;j<quotes[i].Len();j++) { for(k=0;k<quotes[i][j].explicit_links.Len();k++) { externalLinks1 << quotes[i][j].explicit_links[k].Val << "," << quotes[i][j].post.Val<<"\r\n"; } memeTimes1 << quotes[i][j].time.GetAbsSecs() << ","; memeWebs1 << quotes[i][j].post.Val << ","; } memeTimes1 << "\r\n"; memeWebs1 << "\r\n"; externalLinks1 << "-1\r\n"; // this means that the external links for this quote is finished } quotesContent1.close(); memeWebs1.close(); memeTimes1.close(); externalLinks1.close(); // TEXTS Cascades Over Memes and Twitter ofstream quotesContent2("MEMES_TWITTER_TXT_QuotesContent.csv",ios::out|ios::app); ofstream memeWebs2("MEMES_TWITTER_TXT_MemesCascadesWebs.csv",ios::out|ios::app); ofstream memeTimes2("MEMES_TWITTER_TXT_MemesCascadesTimes.csv",ios::out|ios::app); ofstream externalLinks2("MEMES_TWITTER_TXT_MemesExternalLinks.csv",ios::out|ios::app); ofstream twitterContent2("MEMES_TWITTER_TXT_TwitterTextCascades.csv",ios::out|ios::app); for(i=0;i<cascadesOnTwitterContents.Len();i++) { quoteIndex = cascadesOnTwitterContents.GetKey(i); quotesContent2 << quotes.GetKey(quoteIndex).CStr() << "\r\n"; for(j=0;j<quotes[quoteIndex].Len();j++) { for(k=0;k<quotes[quoteIndex][j].explicit_links.Len();k++) { externalLinks2 << quotes[quoteIndex][j].explicit_links[k].Val << "," << quotes[quoteIndex][j].post.Val<<"\r\n"; // << CHECK HERE >> CHANGE -> TO SPACE } memeTimes2 << quotes[quoteIndex][j].time.GetAbsSecs() << ","; memeWebs2 << quotes[quoteIndex][j].post.Val << ","; } memeTimes2 << "\r\n"; memeWebs2 << "\r\n"; externalLinks2 << "-1\r\n"; // this means that the external links for this quote is finished for(j=0;j<cascadesOnTwitterContents.GetDat(quoteIndex).Len();j++) { twitterContent2 << cascadesOnTwitterContents.GetDat(quoteIndex)[j] << ","; } twitterContent2 << "\r\n"; } quotesContent2.close(); memeWebs2.close(); memeTimes2.close(); externalLinks2.close(); twitterContent2.close(); // URLS Cascades Over Memes and Twitter ofstream quotesContent3("MEMES_TWITTER_URL_QuotesContent.csv",ios::out|ios::app); ofstream memeWebs3("MEMES_TWITTER_URL_MemesCascadesWebs.csv",ios::out|ios::app); ofstream memeTimes3("MEMES_TWITTER_URL_MemesCascadesTimes.csv",ios::out|ios::app); ofstream externalLinks3("MEMES_TWITTER_URL_MemesExternalLinks.csv",ios::out|ios::app); ofstream twitter3("MEMES_TWITTER_URL_TwitterUrlCascades.csv",ios::out|ios::app); for(i=0;i<cascadesOnTwitterUrls.Len();i++) { quoteIndex = cascadesOnTwitterUrls.GetKey(i); quotesContent3 << quotes.GetKey(quoteIndex).CStr() << "\r\n"; for(j=0;j<quotes[quoteIndex].Len();j++) { for(k=0;k<quotes[quoteIndex][j].explicit_links.Len();k++) { externalLinks3 << quotes[quoteIndex][j].explicit_links[k].Val << "," << quotes[quoteIndex][j].post.Val<<"\r\n"; // << CHECK HERE >> CHANGE -> TO SPACE } memeTimes3 << quotes[quoteIndex][j].time.GetAbsSecs() << ","; memeWebs3 << quotes[quoteIndex][j].post.Val << ","; } memeTimes3 << "\r\n"; memeWebs3 << "\r\n"; externalLinks3 << "-1\r\n"; // this means that the external links for this quote is finished for(j=0;j<cascadesOnTwitterUrls.GetDat(quoteIndex).Len();j++) { twitter3 << cascadesOnTwitterUrls.GetDat(quoteIndex)[j] << ","; } twitter3 << "\r\n"; } quotesContent3.close(); memeWebs3.close(); memeTimes3.close(); externalLinks3.close(); twitter3.close(); // INTERSECT OF URLS OF TEXTS Cascades Over Memes and Twitter ofstream quotesContent4("TRIPLE_QuotesContent.csv",ios::out|ios::app); ofstream memeWebs4("TRIPLE_MemesCascadesWebs.csv",ios::out|ios::app); ofstream memeTimes4("TRIPLE_MemesCascadesTimes.csv",ios::out|ios::app); ofstream externalLinks4("TRIPLE_MemesExternalLinks.csv",ios::out|ios::app); ofstream twitter4("TRIPLE_TwitterUrlCascades.csv",ios::out|ios::app); ofstream twitterContent4("TRIPLE_TwitterTextCascades.csv",ios::out|ios::app); for(i=0;i<cascadesOnTwitterUrls.Len();i++) { quoteIndex = cascadesOnTwitterUrls.GetKey(i); if(cascadesOnTwitterContents.GetKeyId(quoteIndex) == -1) { continue; } quotesContent4 << quotes.GetKey(quoteIndex).CStr() << "\r\n"; for(j=0;j<quotes[quoteIndex].Len();j++) { for(k=0;k<quotes[quoteIndex][j].explicit_links.Len();k++) { externalLinks4 << quotes[quoteIndex][j].explicit_links[k].Val << "," << quotes[quoteIndex][j].post.Val<<"\r\n"; // << CHECK HERE >> CHANGE -> TO SPACE } memeTimes4 << quotes[quoteIndex][j].time.GetAbsSecs() << ","; memeWebs4 << quotes[quoteIndex][j].post.Val << ","; } memeTimes4 << "\r\n"; memeWebs4 << "\r\n"; externalLinks4 << "-1\r\n"; // this means that the external links for this quote is finished for(j=0;j<cascadesOnTwitterContents.GetDat(quoteIndex).Len();j++) { twitterContent4 << cascadesOnTwitterContents.GetDat(quoteIndex)[j] << ","; } for(j=0;j<cascadesOnTwitterUrls.GetDat(quoteIndex).Len();j++) { twitter4 << cascadesOnTwitterUrls.GetDat(quoteIndex)[j] << ","; } twitter4 << "\r\n"; twitterContent4 << "\r\n"; } quotesContent4.close(); memeWebs4.close(); memeTimes4.close(); externalLinks4.close(); twitter4.close(); twitterContent4.close(); } catch(exception& ex) { printf("\nError1 happened, it was: %s\n\n",ex.what()); } catch(TPt<TExcept>& ex) { printf("\nError2 happened: %s\n\n",ex[0].GetStr().CStr()); } printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
// Q: Do we want to have any gurantees in terms of order of the 0t rows - i.e. // ordered by "this" table row idx as primary key and "Table" row idx as secondary key // This means only keeping joint row indices (pairs of original row indices), sorting them // and adding all rows in the end. Sorting can be expensive, but we would be able to pre-allocate // memory for the joint table.. PTable TTable::Join(TStr Col1, const TTable& Table, TStr Col2) { if(!ColTypeMap.IsKey(Col1)){ TExcept::Throw("no such column " + Col1); } if(!ColTypeMap.IsKey(Col2)){ TExcept::Throw("no such column " + Col2); } if (GetColType(Col1) != GetColType(Col2)) { TExcept::Throw("Trying to Join on columns of different type"); } // initialize result table PTable JointTable = InitializeJointTable(Table); // hash smaller table (group by column) TYPE ColType = GetColType(Col1); TBool ThisIsSmaller = (NumValidRows <= Table.NumValidRows); const TTable& TS = ThisIsSmaller ? *this : Table; const TTable& TB = ThisIsSmaller ? Table : *this; TStr ColS = ThisIsSmaller ? Col1 : Col2; TStr ColB = ThisIsSmaller ? Col2 : Col1; // iterate over the rows of the bigger table and check for "collisions" // with the group keys for the small table. switch(ColType){ case INT:{ THash<TInt, TIntV> T; TS.GroupByIntCol(Col1, T, TIntV(), true); for(TRowIterator RowI = TB.BegRI(); RowI < TB.EndRI(); RowI++){ TInt K = RowI.GetIntAttr(ColB); if(T.IsKey(K)){ TIntV& Group = T.GetDat(K); for(TInt i = 0; i < Group.Len(); i++){ if(ThisIsSmaller){ JointTable->AddJointRow(*this, Table, Group[i], RowI.GetRowIdx()); } else{ JointTable->AddJointRow(*this, Table, RowI.GetRowIdx(), Group[i]); } } } } break; } case FLT:{ THash<TFlt, TIntV> T; TS.GroupByFltCol(Col1, T, TIntV(), true); for(TRowIterator RowI = TB.BegRI(); RowI < TB.EndRI(); RowI++){ TFlt K = RowI.GetFltAttr(ColB); if(T.IsKey(K)){ TIntV& Group = T.GetDat(K); for(TInt i = 0; i < Group.Len(); i++){ if(ThisIsSmaller){ JointTable->AddJointRow(*this, Table, Group[i], RowI.GetRowIdx()); } else{ JointTable->AddJointRow(*this, Table, RowI.GetRowIdx(), Group[i]); } } } } break; } case STR:{ THash<TStr, TIntV> T; TS.GroupByStrCol(Col1, T, TIntV(), true); for(TRowIterator RowI = TB.BegRI(); RowI < TB.EndRI(); RowI++){ TStr K = RowI.GetStrAttr(ColB); if(T.IsKey(K)){ TIntV& Group = T.GetDat(K); for(TInt i = 0; i < Group.Len(); i++){ if(ThisIsSmaller){ JointTable->AddJointRow(*this, Table, Group[i], RowI.GetRowIdx()); } else{ JointTable->AddJointRow(*this, Table, RowI.GetRowIdx(), Group[i]); } } } } } break; } return JointTable; }
PSockEvent TSockSys::GetSockEvent(const uint64& SockEventId) const { return IdToSockEventH.GetDat(SockEventId); }