Пример #1
0
void TTable::Count(TStr CountColName, TStr Col){
  if(!ColTypeMap.IsKey(Col)){TExcept::Throw("no such column " + Col);}
  TIntV CntCol(NumRows);
  switch(GetColType(Col)){
    case INT:{
      THash<TInt,TIntV> T;  // can't really estimate the size of T for constructor hinting
      TIntV& Column = IntCols[GetColIdx(Col)];
      GroupByIntCol(Col, T, TIntV(0), true);
      for(TRowIterator it = BegRI(); it < EndRI(); it++){
        CntCol[it.GetRowIdx()] = T.GetDat(Column[it.GetRowIdx()]).Len();
      }
      break;
    }
    case FLT:{
      THash<TFlt,TIntV> T;
      TFltV& Column = FltCols[GetColIdx(Col)];
      GroupByFltCol(Col, T, TIntV(0), true);
      for(TRowIterator it = BegRI(); it < EndRI(); it++){
         CntCol[it.GetRowIdx()] = T.GetDat(Column[it.GetRowIdx()]).Len();
      }
      break;
    }
    case STR:{
      THash<TStr,TIntV> T;
      GroupByStrCol(Col, T, TIntV(0), true);
      for(TRowIterator it = BegRI(); it < EndRI(); it++){
        CntCol[it.GetRowIdx()] = T.GetDat(GetStrVal(Col, it.GetRowIdx())).Len();
      }
    }
  }
  // add count column
  IntCols.Add(CntCol);
  AddSchemaCol(CountColName, INT);
  ColTypeMap.AddDat(CountColName, TPair<TYPE,TInt>(INT, IntCols.Len()-1));
}
Пример #2
0
// IN-OUT edges are swapped (so that the prog runs faster)
// Send message via IN edge proportional to the OUT edge weight
void TWgtNet::ReinforceEdges(const int& NIters) {
  THash<TInt, TFlt> OutWgtSumH;
  for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
    double wgt = 0;
    for (int e = 0; e < NI.GetOutDeg(); e++) { 
      wgt += NI.GetOutEDat(e); }
    OutWgtSumH.AddDat(NI.GetId(), wgt);
  }
  printf("Reinforcing edges for %d iterations\n", NIters);
  // iterate
  TExeTm ExeTm;
  for (int iter = 0; iter < NIters; iter++) {
    for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
      const double X = TInt::Rnd.GetUniDev() * OutWgtSumH.GetDat(NI.GetId());
      double x = 0;  int e = 0;
      for ( ; x + NI.GetOutEDat(e) < X; e++) {
        x += NI.GetOutEDat(e); }
      IAssert(IsEdge(NI.GetOutNId(e), NI.GetId()));
      GetEDat(NI.GetOutNId(e), NI.GetId()) += 1; // reinforce the edge
      OutWgtSumH.GetDat(NI.GetOutNId(e)) += 1; 
    }
    if (iter % (NIters/100) == 0) {
      printf("\r%d [%s]", iter, ExeTm.GetStr()); 
    }
  }
  printf(" done.\n");
}
Пример #3
0
/// save bipartite community affiliation into gexf file
void TAGMUtil::SaveBipartiteGephi(const TStr& OutFNm, const TIntV& NIDV, const TVec<TIntV>& CmtyVV, const double MaxSz, const double MinSz, const TIntStrH& NIDNameH, const THash<TInt, TIntTr>& NIDColorH, const THash<TInt, TIntTr>& CIDColorH ) {
    /// Plot bipartite graph
    if (CmtyVV.Len() == 0) {
        return;
    }
    double NXMin = 0.1, YMin = 0.1, NXMax = 250.00, YMax = 30.0;
    double CXMin = 0.3 * NXMax, CXMax = 0.7 * NXMax;
    double CStep = (CXMax - CXMin) / (double) CmtyVV.Len(), NStep = (NXMax - NXMin) / (double) NIDV.Len();
    THash<TInt,TIntV> NIDComVH;
    TAGMUtil::GetNodeMembership(NIDComVH, CmtyVV);

    FILE* F = fopen(OutFNm.CStr(), "wt");
    fprintf(F, "<?xml version='1.0' encoding='UTF-8'?>\n");
    fprintf(F, "<gexf xmlns='http://www.gexf.net/1.2draft' xmlns:viz='http://www.gexf.net/1.1draft/viz' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd' version='1.2'>\n");
    fprintf(F, "\t<graph mode='static' defaultedgetype='directed'>\n");
    fprintf(F, "\t\t<nodes>\n");
    for (int c = 0; c < CmtyVV.Len(); c++) {
        int CID = c;
        double XPos = c * CStep + CXMin;
        TIntTr Color = CIDColorH.IsKey(CID)? CIDColorH.GetDat(CID) : TIntTr(120, 120, 120);
        fprintf(F, "\t\t\t<node id='C%d' label='C%d'>\n", CID, CID);
        fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val);
        fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", MaxSz);
        fprintf(F, "\t\t\t\t<viz:shape value='square'/>\n");
        fprintf(F, "\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMax);
        fprintf(F, "\t\t\t</node>\n");
    }

    for (int u = 0; u < NIDV.Len(); u++) {
        int NID = NIDV[u];
        TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): "";
        double Size = MinSz;
        double XPos = NXMin + u * NStep;
        TIntTr Color = NIDColorH.IsKey(NID)? NIDColorH.GetDat(NID) : TIntTr(120, 120, 120);
        double Alpha = 1.0;
        fprintf(F, "\t\t\t<node id='%d' label='%s'>\n", NID, Label.CStr());
        fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d' a='%.1f'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val, Alpha);
        fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", Size);
        fprintf(F, "\t\t\t\t<viz:shape value='square'/>\n");
        fprintf(F, "\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMin);
        fprintf(F, "\t\t\t</node>\n");
    }
    fprintf(F, "\t\t</nodes>\n");
    fprintf(F, "\t\t<edges>\n");
    int EID = 0;
    for (int u = 0; u < NIDV.Len(); u++) {
        int NID = NIDV[u];
        if (NIDComVH.IsKey(NID)) {
            for (int c = 0; c < NIDComVH.GetDat(NID).Len(); c++) {
                int CID = NIDComVH.GetDat(NID)[c];
                fprintf(F, "\t\t\t<edge id='%d' source='C%d' target='%d'/>\n", EID++, CID, NID);
            }
        }
    }
    fprintf(F, "\t\t</edges>\n");
    fprintf(F, "\t</graph>\n");
    fprintf(F, "</gexf>\n");
}
Пример #4
0
 TFltIntIntTr FindMxQEdge() {
   while (true) {
     if (MxQHeap.Empty()) { break; }
     const TFltIntIntTr TopQ = MxQHeap.PopHeap();
     if (! CmtyQH.IsKey(TopQ.Val2) || ! CmtyQH.IsKey(TopQ.Val3)) { continue; }
     if (TopQ.Val1!=CmtyQH.GetDat(TopQ.Val2).GetMxQ() && TopQ.Val1!=CmtyQH.GetDat(TopQ.Val3).GetMxQ()) { continue; }
     return TopQ;
   }
   return TFltIntIntTr(-1, -1, -1);
 }
Пример #5
0
// YES I COPIED AND PASTED CODE my section leader would be so ashamed :D
void LSH::MinHash(THash<TMd5Sig, TIntSet>& ShingleToQuoteIds,
    TVec<THash<TIntV, TIntSet> >& SignatureBandBuckets) {
  TRnd RandomGenerator; // TODO: make this "more random" by incorporating time
  for (int i = 0; i < NumBands; ++i) {
    THash < TInt, TIntV > Inverted; // (QuoteID, QuoteSignatureForBand)
    THash < TIntV, TIntSet > BandBuckets; // (BandSignature, QuoteIDs)
    for (int j = 0; j < BandSize; ++j) {
      // Create new signature
      TVec < TMd5Sig > Signature;
      ShingleToQuoteIds.GetKeyV(Signature);
      Signature.Shuffle(RandomGenerator);

      // Place in bucket - not very efficient
      int SigLen = Signature.Len();
      for (int k = 0; k < SigLen; ++k) {
        TIntSet CurSet = ShingleToQuoteIds.GetDat(Signature[k]);
        for (TIntSet::TIter l = CurSet.BegI(); l < CurSet.EndI(); l++) {
          TInt Key = l.GetKey();
          if (Inverted.IsKey(Key)) {
            TIntV CurSignature = Inverted.GetDat(Key);
            if (CurSignature.Len() <= j) {
              CurSignature.Add(k);
              Inverted.AddDat(Key, CurSignature);
            }
          } else {
            TIntV NewSignature;
            NewSignature.Add(k);
            Inverted.AddDat(Key, NewSignature);
          }
        }
      }
    }

    TIntV InvertedKeys;
    Inverted.GetKeyV(InvertedKeys);
    TInt InvertedLen = InvertedKeys.Len();
    for (int k = 0; k < InvertedLen; ++k) {
      TIntSet Bucket;
      TIntV Signature = Inverted.GetDat(InvertedKeys[k]);
      if (BandBuckets.IsKey(Signature)) {
        Bucket = BandBuckets.GetDat(Signature);
      }
      Bucket.AddKey(InvertedKeys[k]);
      BandBuckets.AddDat(Signature, Bucket);
    }

    SignatureBandBuckets.Add(BandBuckets);
    Err("%d out of %d band signatures computed\n", i + 1, NumBands);
  }
  Err("Minhash step complete!\n");
}
Пример #6
0
int main(int argc, char *argv[]) {
  TStr BaseString = "/lfs/1/tmp/curis/week/QBDB.bin";
  TFIn BaseFile(BaseString);
  TQuoteBase *QB = new TQuoteBase;
  TDocBase *DB = new TDocBase;
  QB->Load(BaseFile);
  DB->Load(BaseFile);

  TIntV QuoteIds;
  QB->GetAllQuoteIds(QuoteIds);

  int NumQuotes = QuoteIds.Len();
  THash<TInt, TStrSet> PeakCounts;
  for (int i = 0; i < NumQuotes; i++) {
    TQuote CurQuote;
    if (QB->GetQuote(QuoteIds[i], CurQuote)) {
      TVec<TSecTm> Peaks;
      CurQuote.GetPeaks(DB, Peaks);
      TStr QuoteString;
      CurQuote.GetParsedContentString(QuoteString);
      TStrSet StringSet;
      if (PeakCounts.IsKey(Peaks.Len())) {
        StringSet = PeakCounts.GetDat(Peaks.Len());
      }
      StringSet.AddKey(QuoteString);
      PeakCounts.AddDat(Peaks.Len(), StringSet);
    }
  }

  TIntV PeakCountKeys;
  PeakCounts.GetKeyV(PeakCountKeys);
  PeakCountKeys.Sort(true);
  for (int i = 0; i < PeakCountKeys.Len(); i++) {
    TStrSet CurSet = PeakCounts.GetDat(PeakCountKeys[i]);
    if (CurSet.Len() > 0) {
      printf("QUOTES WITH %d PEAKS\n", PeakCountKeys[i].Val);
      printf("#########################################\n");
      THashSet<TStr> StringSet = PeakCounts.GetDat(PeakCountKeys[i]);
      for (THashSet<TStr>::TIter l = StringSet.BegI(); l < StringSet.EndI(); l++) {
        printf("%s\n", l.GetKey().CStr());
      }
      printf("\n");
    }
  }
  delete QB;
  delete DB;
  return 0;
}
Пример #7
0
void LSH::ElCheapoHashing(TQuoteBase *QuoteBase, TInt ShingleLen,
    THash<TMd5Sig, TIntSet>& ShingleToQuoteIds) {
  fprintf(stderr, "Hashing shingles the el cheapo way...\n");
  TIntV QuoteIds;
  QuoteBase->GetAllQuoteIds(QuoteIds);
  for (int qt = 0; qt < QuoteIds.Len(); qt++) {
    if (qt % 1000 == 0) {
      fprintf(stderr, "%d out of %d completed\n", qt, QuoteIds.Len());
    }
    TQuote Q;
    QuoteBase->GetQuote(QuoteIds[qt], Q);

    // Put x-character (or x-word) shingles into hash table; x is specified by ShingleLen parameter
    TStr QContentStr;
    Q.GetParsedContentString(QContentStr);
    TChA QContentChA = TChA(QContentStr);

    for (int i = 0; i < QContentChA.Len() - ShingleLen + 1; i++) {
      TChA ShingleChA = TChA();
      for (int j = 0; j < ShingleLen; j++) {
        ShingleChA.AddCh(QContentChA.GetCh(i + j));
      }
      TStr Shingle = TStr(ShingleChA);
      const TMd5Sig ShingleMd5(Shingle);
      TIntSet ShingleQuoteIds;
      if (ShingleToQuoteIds.IsKey(ShingleMd5)) {
        ShingleQuoteIds = ShingleToQuoteIds.GetDat(ShingleMd5);
      }

      ShingleQuoteIds.AddKey(QuoteIds[qt]);
      ShingleToQuoteIds.AddDat(ShingleMd5, ShingleQuoteIds);
    }
  }
  Err("Done with el cheapo hashing!\n");
}
Пример #8
0
/// Shingles by words
void LSH::HashShinglesOfClusters(TQuoteBase *QuoteBase,
    TClusterBase *ClusterBase, TIntV& ClusterIds, TInt ShingleLen,
    THash<TMd5Sig, TIntV>& ShingleToClusterIds) {
  Err("Hashing shingles of clusters...\n");
  for (int i = 0; i < ClusterIds.Len(); i++) {
    if (i % 1000 == 0) {
      fprintf(stderr, "%d out of %d completed\n", i, ClusterIds.Len());
    }
    TCluster C;
    ClusterBase->GetCluster(ClusterIds[i], C);
    //fprintf(stderr, "%d vs. %d\n", ClusterIds[i].Val, C.GetId().Val);

    // Put x-word shingles into hash table; x is specified by ShingleLen parameter
    THashSet < TMd5Sig > CHashedShingles;
    GetHashedShinglesOfCluster(QuoteBase, C, ShingleLen, CHashedShingles);
    for (THashSet<TMd5Sig>::TIter Hash = CHashedShingles.BegI();
        Hash < CHashedShingles.EndI(); Hash++) {
      TIntV ShingleClusterIds;
      if (ShingleToClusterIds.IsKey(*Hash)) {
        ShingleClusterIds = ShingleToClusterIds.GetDat(*Hash);
      }
      ShingleClusterIds.Add(ClusterIds[i]);
      ShingleToClusterIds.AddDat(*Hash, ShingleClusterIds);
    }
  }
  Err("Done hashing!\n");
}
Пример #9
0
void TSockSys::Connect(const uint64& SockId, const PSockHost& SockHost, const int& PortN) {
	// make sure it's a valid socket
	IAssert(IsSock(SockId));
	uv_tcp_t* SockHnd = SockIdToHndH.GetDat(SockId);
	// make sure we got a valid socket host
	IAssert(SockHost->IsOk());
	// get connection handle
	uv_connect_t* ConnectHnd = (uv_connect_t*)malloc(sizeof(uv_connect_t));
	// special handling for v4 and v6
	int ResCd = 0;
	if (SockHost->IsIpv4()) {
		// get address
		struct sockaddr_in Addr = uv_ip4_addr(SockHost->GetIpNum().CStr(), PortN);
		// establish connection
		ResCd = uv_tcp_connect(ConnectHnd, SockHnd, Addr, TSockSys::OnConnect);
	} else if (SockHost->IsIpv6()) {
		// get address
		struct sockaddr_in6 Addr = uv_ip6_addr(SockHost->GetIpNum().CStr(), PortN);
		// establish connection
		ResCd = uv_tcp_connect6(ConnectHnd, SockHnd, Addr, TSockSys::OnConnect);
	}
	// check for errors
	if (ResCd != 0) {
		// cleanup first
		free(SockHnd);
		// and throw exception
		throw TExcept::New("SockSys.Connect: Error establishing socket connection: " + SockSys.GetLastErr());
	}
}
Пример #10
0
int ComputeKCore(const PUNGraph& G) {
  int cnt = 0;
  for(TUNGraph::TNodeI NI = G->BegNI(); NI < G->EndNI(); NI++)
    cnt = max(cnt, NI.GetOutDeg());
  THashSet <TInt> D[cnt+1];
  THash <TInt, TInt> deg;
  for(TUNGraph::TNodeI NI = G->BegNI(); NI < G->EndNI(); NI++) { 
    TInt tmp = NI.GetOutDeg() - G->IsEdge(NI.GetId(), NI.GetId() );
    D[tmp.Val].AddKey(NI.GetId());
    deg.AddDat(NI.GetId()) = tmp;
  }
  int max_k = 0;
  for(int num_iters = 0;num_iters < G->GetNodes(); num_iters++)
    for(int i = 0; i < cnt; i++)
      if(D[i].Empty() == 0) {
        max_k = max(max_k, i);
        TInt a = *(D[i].BegI());
        D[i].DelKey(a);
        deg.AddDat(a.Val) = -1; // Hope overwriting works
        TUNGraph::TNodeI NI = G->GetNI(a.Val);
        for(int e = 0; e < NI.GetOutDeg(); e++) {
          TInt b = NI.GetOutNId(e);
          if(deg.GetDat(b) >= 0) {
            int Id = deg.GetKeyId(b); 
            D[deg[Id].Val].DelKey(b);
            deg[Id] = deg[Id] - 1;  //Hope the overwriting works
            D[deg[Id]].AddKey(b);
          }
        }
        break;
      }
  return max_k;
}
Пример #11
0
// wrong reading of string attributes
void TTable::BuildGraphTopology(PNEAGraph& Graph, THash<TFlt, TInt>& FSrNodeMap, THash<TFlt, TInt>& FDsNodeMap) {
  TYPE SrCT = GetColType(SrcCol);
  TInt SrIdx = GetColIdx(SrcCol);
  TYPE DsCT = GetColType(DstCol);
  TInt DsIdx = GetColIdx(DstCol);
  TInt SrcCnt = 0;
  TInt DstCnt = 0;
  
  for(TRowIterator RowI = BegRI(); RowI < EndRI(); RowI++) {
    if (SrCT == INT && DsCT == INT) {
      Graph->AddNode(IntCols[SrIdx][RowI.GetRowIdx()]);
      Graph->AddNode(IntCols[DsIdx][RowI.GetRowIdx()]);
      Graph->AddEdge(IntCols[SrIdx][RowI.GetRowIdx()], IntCols[DsIdx][RowI.GetRowIdx()], RowI.GetRowIdx());
    } else if (SrCT == INT && DsCT == FLT) {
      Graph->AddNode(IntCols[SrIdx][RowI.GetRowIdx()]);
      TFlt val = FltCols[DsIdx][RowI.GetRowIdx()];
      if (!FDsNodeMap.IsKey(val)) {
	      FDsNodeMap.AddDat(val, DstCnt++);
      }
      Graph->AddNode(FDsNodeMap.GetDat(val));
      Graph->AddEdge(IntCols[SrIdx][RowI.GetRowIdx()], FDsNodeMap.GetDat(val));
    } else if (SrCT == INT && DsCT == STR) {
      Graph->AddNode(IntCols[SrIdx][RowI.GetRowIdx()]);
      Graph->AddNode(StrColMaps[DsIdx][RowI.GetRowIdx()]);
      Graph->AddEdge(IntCols[SrIdx][RowI.GetRowIdx()], StrColMaps[DsIdx][RowI.GetRowIdx()], RowI.GetRowIdx());
    } else if (SrCT == FLT && DsCT == INT) {
      Graph->AddNode(IntCols[DsIdx][RowI.GetRowIdx()]);
      TFlt val = FltCols[SrIdx][RowI.GetRowIdx()];
      if (!FSrNodeMap.IsKey(val)) {
	      FSrNodeMap.AddDat(val, SrcCnt++);
      }
      Graph->AddNode(FSrNodeMap.GetDat(val));
      Graph->AddEdge(FSrNodeMap.GetDat(val), IntCols[SrIdx][RowI.GetRowIdx()], RowI.GetRowIdx());
    } else if (SrCT == FLT && DsCT == STR) {
      Graph->AddNode(StrColMaps[DsIdx][RowI.GetRowIdx()]);
      TFlt val = FltCols[SrIdx][RowI.GetRowIdx()];
      if (!FSrNodeMap.IsKey(val)) {
	      FSrNodeMap.AddDat(val, SrcCnt++);
      }
      Graph->AddNode(FSrNodeMap.GetDat(val));
      Graph->AddEdge(FSrNodeMap.GetDat(val), IntCols[SrIdx][RowI.GetRowIdx()], RowI.GetRowIdx());
    } else if (SrCT == FLT && DsCT == FLT) {
      TFlt val = FltCols[SrIdx][RowI.GetRowIdx()];
      if (!FSrNodeMap.IsKey(val)) {
	      FSrNodeMap.AddDat(val, SrcCnt++);
      }
      Graph->AddNode(FSrNodeMap.GetDat(val));
      val = FltCols[DsIdx][RowI.GetRowIdx()];
      if (!FDsNodeMap.IsKey(val)) {
	      FDsNodeMap.AddDat(val, DstCnt++);
      }
      Graph->AddNode(FDsNodeMap.GetDat(val));
      Graph->AddEdge(FSrNodeMap.GetDat(val), FDsNodeMap.GetDat(val), RowI.GetRowIdx());
    }
  }
}
Пример #12
0
TInt TTable::GetNId(TStr Col, TInt RowIdx, THash<TFlt, TInt>& FSrNodeMap, THash<TFlt, TInt>& FDsNodeMap) {
  TYPE CT = GetColType(Col);
  TInt Idx = GetColIdx(Col);
  if (CT == INT) {
    return IntCols[RowIdx][Idx];
  } else if (CT == FLT) {
    if (Col == SrcCol) {
      return FSrNodeMap.GetDat(FltCols[Idx][RowIdx]);
    } else if (Col == DstCol) {
      return FDsNodeMap.GetDat(FltCols[Idx][RowIdx]);
    } else {
      TExcept::Throw("Column " + Col + " is not source node or destination column");
    }
  } else {
    return StrColMaps[RowIdx][Idx]();
  }
  return 0;
}
Пример #13
0
void TSockSys::DelSock(const uint64& SockId) {
	if (Active) {
		uv_tcp_t* SockHnd = SockIdToHndH.GetDat(SockId);
		// close the handle
		uv_close((uv_handle_t*)SockHnd, TSockSys::OnClose);
		// mark it's under closing so we don't handle it again in the destructor
		ClosedSockIdSet.AddKey(SockId);
	}
}
Пример #14
0
void TSockSys::DelIfSockTimer(const uint64& SockId) {
	if (SockIdToTimerHndH.IsKey(SockId)) {
		// get timer handle
		uv_timer_t* TimerHnd = SockIdToTimerHndH.GetDat(SockId);
		// stop the timer
		uv_timer_stop(TimerHnd);
		// remove shortcuts
		SockIdToTimerHndH.DelKey(SockId);
		TimerHndToSockIdH.DelKey((uint64)TimerHnd);
		// remove shortcuts
	}
}
Пример #15
0
TStr GetFlagStr(const TGraphFlag& GraphFlag) {
  static THash<TInt, TStr> GraphFlagToStrH;
  if (GraphFlagToStrH.Empty()) {
    GraphFlagToStrH.AddDat((int) gfUndef, "Undef");
    GraphFlagToStrH.AddDat((int) gfDirected, "Directed");
    GraphFlagToStrH.AddDat((int) gfMultiGraph, "MultiGraph");
    GraphFlagToStrH.AddDat((int) gfNodeDat, "NodeDat");
    GraphFlagToStrH.AddDat((int) gfEdgeDat, "EdgeDat");
    GraphFlagToStrH.AddDat((int) gfSources, "Sources");
  }
  return GraphFlagToStrH.GetDat((int) GraphFlag);
}
Пример #16
0
void TNetInfBs::Init() {
	THash<TInt, TIntV> CascPN;
    Graph = TNGraph::New();

    // reset vectors
    EdgeGainV.Clr();
    CascPerEdge.Clr();
    PrecisionRecall.Clr();

    for (int c = 0; c < CascV.Len(); c++) {
      for (int i = 0; i < CascV[c].Len(); i++) {
        if (!Graph->IsNode(CascV[c].GetNode(i))) Graph->AddNode(CascV[c].GetNode(i));
        if (!CascPN.IsKey(CascV[c].GetNode(i))) CascPN.AddDat(CascV[c].GetNode(i)) = TIntV();
        CascPN.GetDat(CascV[c].GetNode(i)).Add(c);
      }
      CascV[c].InitProb();
    }

    // only add edges that make sense (i.e., at least once coherent in time)
    for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
    	TIntV &Cascs = CascPN.GetDat(NI.GetId());
    	for (int c = 0; c < Cascs.Len(); c++) {
    		for (int i=0; i < CascV[Cascs[c]].Len(); i++) {
    			if (CascV[Cascs[c]].GetNode(i)==NI.GetId())
    				continue;

    			if (CascV[Cascs[c]].GetTm(CascV[Cascs[c]].GetNode(i)) < CascV[Cascs[c]].GetTm(NI.GetId()) ) {
    				if (!CascPerEdge.IsKey(TIntPr(CascV[Cascs[c]].GetNode(i), NI.GetId()))) {
    					EdgeGainV.Add(TPair<TFlt, TIntPr>(TFlt::Mx, TIntPr(CascV[Cascs[c]].GetNode(i), NI.GetId())));
    					CascPerEdge.AddDat(TIntPr(CascV[Cascs[c]].GetNode(i), NI.GetId())) = TIntV();
    				}
    				// Add cascade to hash of cascades per edge (to implement localized update)
    				CascPerEdge.GetDat(TIntPr(CascV[Cascs[c]].GetNode(i), NI.GetId())).Add(Cascs[c]);
    			}
    		}
    	}
    }
}
Пример #17
0
 bool MergeBestQ() {
   const TFltIntIntTr TopQ = FindMxQEdge();
   if (TopQ.Val1 <= 0.0) { return false; }
   // joint communities
   const int I = TopQ.Val3;
   const int J = TopQ.Val2;
   CmtyIdUF.Union(I, J); // join
   Q += TopQ.Val1;
   TCmtyDat& DatJ = CmtyQH.GetDat(J);
   { TCmtyDat& DatI = CmtyQH.GetDat(I);
   DatI.DelLink(J);  DatJ.DelLink(I);
   for (int i = -1; DatJ.NIdQH.FNextKeyId(i); ) {
     const int K = DatJ.NIdQH.GetKey(i);
     TCmtyDat& DatK = CmtyQH.GetDat(K);
     double NewQ = DatJ.NIdQH[i];
     if (DatI.NIdQH.IsKey(K)) { NewQ = NewQ+DatI.NIdQH.GetDat(K);  DatK.DelLink(I); }     // K connected to I and J
     else { NewQ = NewQ-2*DatI.DegFrac*DatK.DegFrac; }  // K connected to J not I
     DatJ.AddQ(K, NewQ);
     DatK.AddQ(J, NewQ);
     MxQHeap.PushHeap(TFltIntIntTr(NewQ, TMath::Mn(J,K), TMath::Mx(J,K)));
   }
   for (int i = -1; DatI.NIdQH.FNextKeyId(i); ) {
     const int K = DatI.NIdQH.GetKey(i);
     if (! DatJ.NIdQH.IsKey(K)) { // K connected to I not J
       TCmtyDat& DatK = CmtyQH.GetDat(K);
       const double NewQ = DatI.NIdQH[i]-2*DatJ.DegFrac*DatK.DegFrac;
       DatJ.AddQ(K, NewQ);
       DatK.DelLink(I);
       DatK.AddQ(J, NewQ);
       MxQHeap.PushHeap(TFltIntIntTr(NewQ, TMath::Mn(J,K), TMath::Mx(J,K)));
     }
   }
   DatJ.DegFrac += DatI.DegFrac; }
   if (DatJ.NIdQH.Empty()) { CmtyQH.DelKey(J); } // isolated community (done)
   CmtyQH.DelKey(I);
   return true;
 }
Пример #18
0
void TNodeJsRf24Radio::set(const v8::FunctionCallbackInfo<v8::Value>& Args) {
	v8::Isolate* Isolate = v8::Isolate::GetCurrent();
	v8::HandleScope HandleScope(Isolate);

	TNodeJsRf24Radio* JsRadio = ObjectWrap::Unwrap<TNodeJsRf24Radio>(Args.Holder());

	if (Args.Length() == 0) { return; }

	const PJsonVal ArgVal = TNodeJsUtil::GetArgJson(Args, 0);
	bool Success = true;

	if (ArgVal->IsArr()) {
		THash<TInt, TIntPrV> NodeIdValIdValPrVH;

		for (int ArgN = 0; ArgN < ArgVal->GetArrVals(); ArgN++) {
			const PJsonVal& ValJson = ArgVal->GetArrVal(ArgN);

			const TStr& ValNm = ValJson->GetObjStr("sensorId");
			const int& Val = ValJson->GetObjInt("value");

			const TIntPr& NodeIdValIdPr = JsRadio->ValNmNodeIdValIdPrH.GetDat(ValNm);
			const uint16 NodeId = NodeIdValIdPr.Val1;
			const int ValId = NodeIdValIdPr.Val2;

			if (!NodeIdValIdValPrVH.IsKey(NodeId)) { NodeIdValIdValPrVH.AddDat(NodeId); }

			TIntPrV& ValIdValPrV = NodeIdValIdValPrVH.GetDat(NodeId);
			ValIdValPrV.Add(TIntPr(ValId, Val));
		}

		int KeyId = NodeIdValIdValPrVH.FFirstKeyId();
		while (NodeIdValIdValPrVH.FNextKeyId(KeyId)) {
			const uint16 NodeId = NodeIdValIdValPrVH.GetKey(KeyId);
			const TIntPrV& ValIdValPrV = NodeIdValIdValPrVH[KeyId];
			Success &= JsRadio->Radio.Set(NodeId, ValIdValPrV);
		}
	} else {
		const TStr& ValueNm = ArgVal->GetObjStr("sensorId");
		const int Val = ArgVal->GetObjInt("value");

		const TIntPr& NodeIdValIdPr = JsRadio->ValNmNodeIdValIdPrH.GetDat(ValueNm);
		const uint16 NodeId = (uint16) NodeIdValIdPr.Val1;
		const int ValId = NodeIdValIdPr.Val2;

		Success = JsRadio->Radio.Set(NodeId, ValId, Val);
	}

	Args.GetReturnValue().Set(v8::Boolean::New(Isolate, Success));
}
Пример #19
0
/// For every quote, add it to corresponding bucket for each hashed x-character shingle of the quote
// (Shingles by characters)
void LSH::HashShingles(TQuoteBase *QuoteBase, TClusterBase *CB, TInt ShingleLen,
    THash<TMd5Sig, TShingleIdSet>& ShingleToQuoteIds) {
  Err("Hashing shingles...\n");
  TIntV QuoteIds;
  QuoteBase->GetAllQuoteIds(QuoteIds);
  for (int qt = 0; qt < QuoteIds.Len(); qt++) {
    if (qt % 1000 == 0) {
      fprintf(stderr, "%d out of %d completed\n", qt, QuoteIds.Len());
    }

    if (CB->IsQuoteInArchivedCluster(QuoteIds[qt]))
      continue;
    TQuote Q;
    QuoteBase->GetQuote(QuoteIds[qt], Q);

    // Put x-character (or x-word) shingles into hash table; x is specified by ShingleLen parameter
    TStr QContentStr;
    Q.GetParsedContentString(QContentStr);
    TChA QContentChA = TChA(QContentStr);

    int CurWord = 0;

    for (int i = 0; i < QContentChA.Len() - ShingleLen + 1; i++) {
      TChA ShingleChA = TChA();
      for (int j = 0; j < ShingleLen; j++) {
        ShingleChA.AddCh(QContentChA.GetCh(i + j));
      }
      TStr Shingle = TStr(ShingleChA);
      const TMd5Sig ShingleMd5(Shingle);
      TShingleIdSet ShingleQuoteIds;
      if (ShingleToQuoteIds.IsKey(ShingleMd5)) {
        ShingleQuoteIds = ShingleToQuoteIds.GetDat(ShingleMd5);
      }

      for (int j = CurWord; j > CurWord - WordWindow && j >= 0; j--) {
        ShingleQuoteIds.AddKey(TShingleId(QuoteIds[qt], j));
      }

      ShingleToQuoteIds.AddDat(ShingleMd5, ShingleQuoteIds);

      // up the current word index if we see a space
      if (QContentChA.GetCh(i + ShingleLen - 1) == ' ') {
        CurWord++;
      }
    }
  }
  Err("Done hashing!\n");
}
Пример #20
0
void TGraphEnumUtils::GetNormalizedGraph(const PNGraph &G, PNGraph &nG) {
	//Get bijective map from original node ids to normalized node ids(0,1,2,...)
	THash<TInt,TInt> map;
	GetNormalizedMap(G, map);
	//Add nodes
	for(int i=0; i<G->GetNodes(); i++) nG->AddNode(i);
	//Add edges
	for(TNGraph::TEdgeI eIt=G->BegEI(); eIt<G->EndEI(); eIt++) {
		int srcId = eIt.GetSrcNId();
		int dstId = eIt.GetDstNId();
		//
		int mSrcId = map.GetDat(srcId);
		int mDstId = map.GetDat(dstId);
		//
		nG->AddEdge(mSrcId, mDstId);
	}
}
Пример #21
0
void LSH::WordHashing(TQuoteBase *QuoteBase,
    THash<TMd5Sig, TIntSet>& ShingleToQuoteIds) {
  fprintf(stderr, "Hashing shingles using words...\n");
  TIntV QuoteIds;
  QuoteBase->GetAllQuoteIds(QuoteIds);

  THash<TStr, TIntSet> Temp;

  for (int qt = 0; qt < QuoteIds.Len(); qt++) {
    if (qt % 1000 == 0) {
      fprintf(stderr, "%d out of %d completed\n", qt, QuoteIds.Len());
    }
    TQuote Q;
    QuoteBase->GetQuote(QuoteIds[qt], Q);

    TStrV Content;
    Q.GetParsedContent(Content);

    int ContentLen = Content.Len();
    for (int i = 0; i < ContentLen; i++) {
      const TMd5Sig ShingleMd5(Content[i]);
      TIntSet ShingleQuoteIds;
      ShingleToQuoteIds.IsKeyGetDat(ShingleMd5, ShingleQuoteIds);
      ShingleQuoteIds.AddKey(QuoteIds[qt]);
      ShingleToQuoteIds.AddDat(ShingleMd5, ShingleQuoteIds);

      ///// COMMENT OUT LATER
      TIntSet TempSet;
      Temp.IsKeyGetDat(Content[i], TempSet);
      TempSet.AddKey(QuoteIds[qt]);
      Temp.AddDat(Content[i], TempSet);
    }
  }

  TVec<TStr> ShingleKeys;
  Temp.GetKeyV(ShingleKeys);
  ShingleKeys.SortCmp(TCmpSetByLen(false, &Temp));
  for (int i = 0; i < 100; i++) {
    TIntSet TempSet = Temp.GetDat(ShingleKeys[i]);
    Err("%d: %s - %d \n", i, ShingleKeys[i].CStr(), TempSet.Len());
  }

  Err("Done with word hashing!\n");
}
Пример #22
0
void TSockSys::AddSockTimer(const uint64& SockId, const int& MSecs) {
	if (SockIdToTimerHndH.IsKey(SockId)) {
		// socket already has timer, stop and start with MSecs
		uv_timer_t* TimerHnd = SockIdToTimerHndH.GetDat(SockId);
		// stop existing count
		uv_timer_stop(TimerHnd);
		// start new one
		uv_timer_start(TimerHnd, OnTimeOut, MSecs, 0);
	} else {
		// create new timer
		uv_timer_t* TimerHnd = (uv_timer_t*)malloc(sizeof(uv_timer_t));
		// initialize
		uv_timer_init(SockSys.Loop, TimerHnd);
		// start the timer
		uv_timer_start(TimerHnd, OnTimeOut, MSecs, 0);
		// remember handle
		SockIdToTimerHndH.AddDat(SockId, TimerHnd);
		TimerHndToSockIdH.AddDat((uint64)TimerHnd, SockId);	
	}
}
Пример #23
0
TStr TSockSys::GetLocalIpNum(const uint64& SockId) {
	// make sure it's a valid socket
	IAssert(IsSock(SockId));
	uv_tcp_t* SockHnd = SockIdToHndH.GetDat(SockId);
	// get peer IP
	struct sockaddr SockName;
	int NameLen = sizeof(SockName);
	const int ResCd = uv_tcp_getsockname(SockHnd, &SockName, &NameLen);
	EAssertR(ResCd == 0, "SockSys.GetLocalIpNum: " + SockSys.GetLastErr());
	// decode IP
	char SockIpNum[64];
	if (SockName.sa_family == AF_INET) {
		uv_ip4_name((sockaddr_in*)&SockName, SockIpNum, sizeof(SockIpNum));
	} else if (SockName.sa_family == AF_INET6) {
		uv_ip6_name((sockaddr_in6*)&SockName, SockIpNum, sizeof(SockIpNum));
	} else {
		throw TExcept::New("SockSys.GetLocalIpNum: unkown address family");
	}
	// return
	return TStr(SockIpNum);
}
Пример #24
0
void TSockSys::Send(const uint64& SockId, const PSIn& SIn) {
	// make sure it's a valid socket
	IAssert(IsSock(SockId));
	uv_tcp_t* SockHnd = SockIdToHndH.GetDat(SockId);
	// create write request
	uv_write_req_t* WriteHnd = (uv_write_req_t*)malloc(sizeof(uv_write_req_t));
	// copy the data in the buffer
	WriteHnd->Buffer.len = SIn->Len(); //TODO: handle cases when SIn doesn't have known Len()
	WriteHnd->Buffer.base = (char*)malloc(WriteHnd->Buffer.len);
	SIn->GetBf(WriteHnd->Buffer.base, WriteHnd->Buffer.len);
	// execute the request
	int ResCd = uv_write((uv_write_t*)WriteHnd, (uv_stream_t*)SockHnd, &WriteHnd->Buffer, 1, OnWrite);
	// check for errors
	if (ResCd != 0) {
		// cleanup first
		free(WriteHnd->Buffer.base);
		free(WriteHnd);
		// and throw exception
		throw TExcept::New("SockSys.Send: Error sending data: " + SockSys.GetLastErr());
	}
}
Пример #25
0
void TSockSys::Listen(const uint64& SockId, const int& PortN, const bool& IPv6P) {
	// make sure it's a valid socket
	IAssert(IsSock(SockId));
	uv_tcp_t* SockHnd = SockIdToHndH.GetDat(SockId);
	// special handling for v4 and v6 when binding
	if (!IPv6P) {
		// get address
		struct sockaddr_in Addr = uv_ip4_addr("0.0.0.0", PortN);
		// bind socket to port
		const int BindResCd = uv_tcp_bind(SockHnd, Addr);
		EAssertR(BindResCd == 0, "SockSys.Listen: Error bidning socket to port: " + SockSys.GetLastErr());
	} else {
		// get address
		struct sockaddr_in6 Addr = uv_ip6_addr("::", PortN);
		// bind socket to port
		const int BindResCd = uv_tcp_bind6(SockHnd, Addr);
		EAssertR(BindResCd == 0, "SockSys.Listen: Error bidning socket to port: " + SockSys.GetLastErr());
	}
	// make sure we have backlog of at least 128
	const int BacklogQueue = (SOMAXCONN < 128) ? 128 : SOMAXCONN;
	// enable callbacks
	const int ListenResCd = uv_listen((uv_stream_t*)SockHnd, BacklogQueue, TSockSys::OnAccept);
	EAssertR(ListenResCd == 0, "SockSys.Listen: Error setting listener on socket: " + SockSys.GetLastErr());
}
Пример #26
0
/////////////////////////////////////////////////
// Best-Paths
void GetBestPaths(
 const TStr& SrcNmObjStr, const TStr& DstNmObjStr, const PNmObjBs& NmObjBs){
  int SrcNmObjId=NmObjBs->GetNmObjId(SrcNmObjStr);
  int DstNmObjId=NmObjBs->GetNmObjId(DstNmObjStr);
  int NmObjs=NmObjBs->GetNmObjs();
  TIntPrV ParLevPrV(NmObjs); TIntPrV DstParLevPrV;
  ParLevPrV.PutAll(TIntPr(-1, -1));
  int CurLev=0;
  ParLevPrV[SrcNmObjId]=TIntPr(SrcNmObjId, CurLev);
  forever{
    CurLev++; int NewEdges=0;
    for (int NmObjId1=0; NmObjId1<NmObjs; NmObjId1++){
      if (ParLevPrV[NmObjId1].Val2==CurLev-1){
        TIntV DocIdV1; NmObjBs->GetNmObjDocIdV(NmObjId1, DocIdV1);
        for (int NmObjId2=0; NmObjId2<NmObjs; NmObjId2++){
          if ((NmObjId2==DstNmObjId)||(ParLevPrV[NmObjId2].Val2==-1)){
            TIntV DocIdV2; NmObjBs->GetNmObjDocIdV(NmObjId2, DocIdV2);
            TIntV IntrsDocIdV; DocIdV1.Intrs(DocIdV2, IntrsDocIdV);
            if (!IntrsDocIdV.Empty()){
              ParLevPrV[NmObjId2]=TIntPr(NmObjId1, CurLev); NewEdges++;
              if (NmObjId2==DstNmObjId){
                DstParLevPrV.Add(TIntPr(NmObjId1, CurLev));
              }
            }
          }
        }
      }
    }
    if ((NewEdges==0)||(ParLevPrV[DstNmObjId].Val2!=-1)){
      break;
    }
  }
  // prepare graph
  THash<TStr, PVrtx> VrtxNmToVrtxH; TStrPrV VrtxNmPrV;
  VrtxNmToVrtxH.AddKey(SrcNmObjStr);
  VrtxNmToVrtxH.AddKey(DstNmObjStr);
  // write path
  ContexterF->NmObjLinkageREd->Clear();
  for (int DstParLevPrN=0; DstParLevPrN<DstParLevPrV.Len(); DstParLevPrN++){
    ParLevPrV[DstNmObjId]=DstParLevPrV[DstParLevPrN];
    int DstParLev=ParLevPrV[DstNmObjId].Val2;
    TStr DstNmObjStr=NmObjBs->GetNmObjStr(DstNmObjId);
    ContexterF->NmObjLinkageREd->Lines->Add(DstNmObjStr.CStr());
    int ParNmObjId=DstNmObjId;
    TStr PrevNmObjStr=DstNmObjStr;
    forever {
      if (ParNmObjId==SrcNmObjId){break;}
      ParNmObjId=ParLevPrV[ParNmObjId].Val1;
      int ParLev=ParLevPrV[ParNmObjId].Val2;
      TStr CurNmObjStr=NmObjBs->GetNmObjStr(ParNmObjId);
      TStr ParNmObjStr=TStr::GetSpaceStr((DstParLev-ParLev)*4)+CurNmObjStr;
      ContexterF->NmObjLinkageREd->Lines->Add(ParNmObjStr.CStr());
      // create vertex & edge
      VrtxNmToVrtxH.AddKey(CurNmObjStr);
      if (!PrevNmObjStr.Empty()){
        if (PrevNmObjStr<CurNmObjStr){
          VrtxNmPrV.AddUnique(TStrPr(PrevNmObjStr, CurNmObjStr));
        } else
        if (PrevNmObjStr>CurNmObjStr){
          VrtxNmPrV.AddUnique(TStrPr(CurNmObjStr, PrevNmObjStr));
        }
      }
      // save curent named-object
      PrevNmObjStr=CurNmObjStr;
    }
  }
  // generate graph
  // create graph
  PGraph Graph=TGGraph::New();
  // create vertices
  for (int VrtxN=0; VrtxN<VrtxNmToVrtxH.Len(); VrtxN++){
    TStr VrtxNm=VrtxNmToVrtxH.GetKey(VrtxN);
    PVrtx Vrtx=TGVrtx::New(VrtxNm);
    VrtxNmToVrtxH.GetDat(VrtxNm)=Vrtx;
    Graph->AddVrtx(Vrtx);
  }
  // create edges
  for (int EdgeN=0; EdgeN<VrtxNmPrV.Len(); EdgeN++){
    PVrtx Vrtx1=VrtxNmToVrtxH.GetDat(VrtxNmPrV[EdgeN].Val1);
    PVrtx Vrtx2=VrtxNmToVrtxH.GetDat(VrtxNmPrV[EdgeN].Val2);
    PEdge Edge=new TGEdge(Vrtx1, Vrtx2, TStr::Fmt("_%d", EdgeN), false);
    Graph->AddEdge(Edge);
  }
  // place graph
  ContexterF->State->ElGraph=Graph;
  TRnd Rnd(1);
  ContexterF->State->ElGraph->PlaceSimAnnXY(Rnd, ContexterF->State->ElGks);
  // draw graph
  ContexterF->State->ElGks->Clr();
  ContexterF->ElPbPaint(NULL);
}
int main(int argc, char* argv[])
{
	TExeTm ExeTm;
	PGconn *conn;
	PGresult *res;
	int id,start,rec_count,row,indx,end;
	unsigned int q;
	int total_number_tweets = 0;
	double tweet_date = 0;
	TStr TweetStr("");
	TStr TweetStrLc("");

	if(argc > 1)
	{
		start = atoi(argv[1]);
	}
	else
	{
		printf("YOU SHOULD SET THE INDICES...\n\n");
		return 1;
	}
	indx = start * LENGTH;
	end = indx + LENGTH;

	printf(":::::::: Find Cascades of Quotes In Twitter Separately ::::::::\n");
	const TStr StartDate = Env.GetIfArgPrefixStr("-sd:", "2008-08-01 00:00:00", "Starting date");
	const TStr EndDate = Env.GetIfArgPrefixStr("-ed:", "2009-10-01 00:00:00", "Ending date");

	Env = TEnv(argc, argv, TNotify::StdNotify);
	Env.PrepArgs(TStr::Fmt("\nFinding the cascades of the desired quotes. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
	try
	{
		while(indx < end)
		{
			TStr qContentFname = TStr::Fmt("QuotesData/Q%d.rar",indx);
			TStr resultFname = TStr::Fmt("QuotesCascResult/R%d.rar",indx++);

			if(fileExists(resultFname))
			{
				if(fileExists(qContentFname))
				{
					// removing the quotes' content file
					system(TStr::Fmt("rm %s",qContentFname.CStr()).CStr());
				}
			}
			else
			{
				if(fileExists(qContentFname))
				{
					THash<TStr,TInt> quotesContent;
					THash<TInt,TSecTmV> CascadesOnTwitter;

					TZipIn ZquotesIn(qContentFname);
					quotesContent.Load(ZquotesIn);
					printf("Q%d loading done, it contains %d quotes.\n",indx-1,quotesContent.Len());

					conn = PQconnectdb("dbname=twitter host=postgresql01.mpi-sws.org user=twitter password=tweet@84");

					if (PQstatus(conn) == CONNECTION_BAD)
					{
						printf("We were unable to connect to the database");
						return 1;
					}

					// we use cursors/fetch to speed up the process; batch of 10000 tweets
					PQexec(conn, "begin work");
					PQexec(conn,TStr::Fmt("declare mycursor cursor for select tweettext, extract(epoch from tweettime) from tweets where tweettime >= timestamp '%s' and tweettime < timestamp '%s'", StartDate.CStr(), EndDate.CStr()).CStr());

					do
					{
						res = PQexec(conn, "FETCH 1000000 IN mycursor");    // all of them are: 1675401026
						if (PQresultStatus(res) == PGRES_TUPLES_OK)
						{
							rec_count = PQntuples(res);
							total_number_tweets += rec_count;
							printf("Adding %d tweets... (total: %d)\n", rec_count, total_number_tweets);

							for (row=0; row<rec_count; row++)
							{
								TweetStr = PQgetvalue(res, row, 0);
								tweet_date = TStr(PQgetvalue(res, row, 1)).GetFlt();
								TweetStrLc = TweetStr.ToLc();
								for(q=0;q<quotesContent.Len();q++)
								{
									if (TweetStrLc.SearchStr(quotesContent.GetKey(q)) > -1)
									{
										TSecTm td(tweet_date);
										id = CascadesOnTwitter.GetKeyId(quotesContent[q]);
										if(id == -1)
										{
											CascadesOnTwitter.AddDat(quotesContent[q]).Add(td);
										}
										else
										{
											CascadesOnTwitter.GetDat(quotesContent[q]).AddSorted(td);
										}
									}
								}
							}

							PQclear(res);
						}
						else
						{
							rec_count = 0;
						}
					}
					while (rec_count);

					PQexec(conn, "close mycursor");
					PQexec(conn, "commit work");
					PQfinish(conn);


					// Save the results
					TZipOut zout(resultFname);
					CascadesOnTwitter.Save(zout);

					// Remove the qoutes' content file
					system(TStr::Fmt("rm %s",qContentFname.CStr()).CStr());
				}
			}
		}

		printf("\n\nD O N E\n\n");
	}
	catch(exception& ex)
	{
		printf("\nError1 happened, it was: %s\n\n",ex.what());
	}
	catch(TPt<TExcept>& ex)
	{
		printf("\nError2 happened: %s\n\n",ex[0].GetStr().CStr());
	}

	printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
	return 0;
}
int main(int argc, char* argv[])
{
	int i,quoteIndex,j,k;
	TExeTm ExeTm;

	printf("Starting The SAVE CODE For Matlab Processing ...\n");
	try
	{
		Env = TEnv(argc, argv, TNotify::StdNotify);
		Env.PrepArgs(TStr::Fmt("\nCreating the volumes of the quotes. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));

		TZipIn ZquotesIn("RESULTS/QuotesPreprocessedData_NIFTY.rar");
		quotes.Load(ZquotesIn);
		printf("Loaded QuotesPreprocessedData_NIFTY has instances: %d\n\n\n",quotes.Len());

		TZipIn ZcascadesOnTwitterIn("RESULTS/CascadesFullUrlsOnTwitterData.rar");
		cascadesOnTwitterUrls.Load(ZcascadesOnTwitterIn);
		printf("Loaded CascadesFullUrlsOnTwitterData has instances: %d\n\n\n",cascadesOnTwitterUrls.Len());

		TZipIn ZIn("RESULTS/CascadesOnTwitterData.rar");
		cascadesOnTwitterContents.Load(ZIn);
		printf("Loaded CascadesOnTwitterData has instances: %d\n\n\n",cascadesOnTwitterContents.Len());


		// Quote's Cascades over Memes
		ofstream quotesContent1("MEMES_QuotesContent.csv",ios::out|ios::app);
		ofstream memeWebs1("MEMES_MemesCascadesWebs.csv",ios::out|ios::app);
		ofstream memeTimes1("MEMES_MemesCascadesTimes.csv",ios::out|ios::app);
		ofstream externalLinks1("MEMES_MemesExternalLinks.csv",ios::out|ios::app);
		for(i=0;i<quotes.Len();i++)
		{
			quotesContent1 << quotes.GetKey(i).CStr() << "\r\n";
			for(j=0;j<quotes[i].Len();j++)
			{
				for(k=0;k<quotes[i][j].explicit_links.Len();k++)
				{
					externalLinks1 << quotes[i][j].explicit_links[k].Val << "," << quotes[i][j].post.Val<<"\r\n";
				}
				memeTimes1 << quotes[i][j].time.GetAbsSecs() << ",";
				memeWebs1 << quotes[i][j].post.Val << ",";
			}
			memeTimes1 << "\r\n";
			memeWebs1 << "\r\n";
			externalLinks1 << "-1\r\n";  // this means that the external links for this quote is finished
		}
		quotesContent1.close();
		memeWebs1.close();
		memeTimes1.close();
		externalLinks1.close();


		// TEXTS Cascades Over Memes and Twitter
		ofstream quotesContent2("MEMES_TWITTER_TXT_QuotesContent.csv",ios::out|ios::app);
		ofstream memeWebs2("MEMES_TWITTER_TXT_MemesCascadesWebs.csv",ios::out|ios::app);
		ofstream memeTimes2("MEMES_TWITTER_TXT_MemesCascadesTimes.csv",ios::out|ios::app);
		ofstream externalLinks2("MEMES_TWITTER_TXT_MemesExternalLinks.csv",ios::out|ios::app);
		ofstream twitterContent2("MEMES_TWITTER_TXT_TwitterTextCascades.csv",ios::out|ios::app);
		for(i=0;i<cascadesOnTwitterContents.Len();i++)
		{
			quoteIndex = cascadesOnTwitterContents.GetKey(i);
			quotesContent2 << quotes.GetKey(quoteIndex).CStr() << "\r\n";

			for(j=0;j<quotes[quoteIndex].Len();j++)
			{
				for(k=0;k<quotes[quoteIndex][j].explicit_links.Len();k++)
				{
					externalLinks2 << quotes[quoteIndex][j].explicit_links[k].Val << "," << quotes[quoteIndex][j].post.Val<<"\r\n";   // << CHECK HERE >> CHANGE -> TO SPACE
				}
				memeTimes2 << quotes[quoteIndex][j].time.GetAbsSecs() << ",";
				memeWebs2 << quotes[quoteIndex][j].post.Val << ",";
			}
			memeTimes2 << "\r\n";
			memeWebs2 << "\r\n";
			externalLinks2 << "-1\r\n";  // this means that the external links for this quote is finished

			for(j=0;j<cascadesOnTwitterContents.GetDat(quoteIndex).Len();j++)
			{
				twitterContent2 << cascadesOnTwitterContents.GetDat(quoteIndex)[j] << ",";
			}
			twitterContent2 << "\r\n";
		}
		quotesContent2.close();
		memeWebs2.close();
		memeTimes2.close();
		externalLinks2.close();
		twitterContent2.close();


		// URLS Cascades Over Memes and Twitter
		ofstream quotesContent3("MEMES_TWITTER_URL_QuotesContent.csv",ios::out|ios::app);
		ofstream memeWebs3("MEMES_TWITTER_URL_MemesCascadesWebs.csv",ios::out|ios::app);
		ofstream memeTimes3("MEMES_TWITTER_URL_MemesCascadesTimes.csv",ios::out|ios::app);
		ofstream externalLinks3("MEMES_TWITTER_URL_MemesExternalLinks.csv",ios::out|ios::app);
		ofstream twitter3("MEMES_TWITTER_URL_TwitterUrlCascades.csv",ios::out|ios::app);
		for(i=0;i<cascadesOnTwitterUrls.Len();i++)
		{
			quoteIndex = cascadesOnTwitterUrls.GetKey(i);
			quotesContent3 << quotes.GetKey(quoteIndex).CStr() << "\r\n";

			for(j=0;j<quotes[quoteIndex].Len();j++)
			{
				for(k=0;k<quotes[quoteIndex][j].explicit_links.Len();k++)
				{
					externalLinks3 << quotes[quoteIndex][j].explicit_links[k].Val << "," << quotes[quoteIndex][j].post.Val<<"\r\n";   // << CHECK HERE >> CHANGE -> TO SPACE
				}
				memeTimes3 << quotes[quoteIndex][j].time.GetAbsSecs() << ",";
				memeWebs3 << quotes[quoteIndex][j].post.Val << ",";
			}
			memeTimes3 << "\r\n";
			memeWebs3 << "\r\n";
			externalLinks3 << "-1\r\n";  // this means that the external links for this quote is finished

			for(j=0;j<cascadesOnTwitterUrls.GetDat(quoteIndex).Len();j++)
			{
				twitter3 << cascadesOnTwitterUrls.GetDat(quoteIndex)[j] << ",";
			}
			twitter3 << "\r\n";
		}
		quotesContent3.close();
		memeWebs3.close();
		memeTimes3.close();
		externalLinks3.close();
		twitter3.close();


		// INTERSECT OF URLS OF TEXTS Cascades Over Memes and Twitter
		ofstream quotesContent4("TRIPLE_QuotesContent.csv",ios::out|ios::app);
		ofstream memeWebs4("TRIPLE_MemesCascadesWebs.csv",ios::out|ios::app);
		ofstream memeTimes4("TRIPLE_MemesCascadesTimes.csv",ios::out|ios::app);
		ofstream externalLinks4("TRIPLE_MemesExternalLinks.csv",ios::out|ios::app);
		ofstream twitter4("TRIPLE_TwitterUrlCascades.csv",ios::out|ios::app);
		ofstream twitterContent4("TRIPLE_TwitterTextCascades.csv",ios::out|ios::app);
		for(i=0;i<cascadesOnTwitterUrls.Len();i++)
		{
			quoteIndex = cascadesOnTwitterUrls.GetKey(i);
			if(cascadesOnTwitterContents.GetKeyId(quoteIndex) == -1)
			{
				continue;
			}
			quotesContent4 << quotes.GetKey(quoteIndex).CStr() << "\r\n";

			for(j=0;j<quotes[quoteIndex].Len();j++)
			{
				for(k=0;k<quotes[quoteIndex][j].explicit_links.Len();k++)
				{
					externalLinks4 << quotes[quoteIndex][j].explicit_links[k].Val << "," << quotes[quoteIndex][j].post.Val<<"\r\n";   // << CHECK HERE >> CHANGE -> TO SPACE
				}
				memeTimes4 << quotes[quoteIndex][j].time.GetAbsSecs() << ",";
				memeWebs4 << quotes[quoteIndex][j].post.Val << ",";
			}
			memeTimes4 << "\r\n";
			memeWebs4 << "\r\n";
			externalLinks4 << "-1\r\n";  // this means that the external links for this quote is finished

			for(j=0;j<cascadesOnTwitterContents.GetDat(quoteIndex).Len();j++)
			{
				twitterContent4 << cascadesOnTwitterContents.GetDat(quoteIndex)[j] << ",";
			}

			for(j=0;j<cascadesOnTwitterUrls.GetDat(quoteIndex).Len();j++)
			{
				twitter4 << cascadesOnTwitterUrls.GetDat(quoteIndex)[j] << ",";
			}
			twitter4 << "\r\n";
			twitterContent4 << "\r\n";
		}
		quotesContent4.close();
		memeWebs4.close();
		memeTimes4.close();
		externalLinks4.close();
		twitter4.close();
		twitterContent4.close();
	}
	catch(exception& ex)
	{
		printf("\nError1 happened, it was: %s\n\n",ex.what());
	}
	catch(TPt<TExcept>& ex)
	{
		printf("\nError2 happened: %s\n\n",ex[0].GetStr().CStr());
	}

	printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
	return 0;
}
Пример #29
0
// Q: Do we want to have any gurantees in terms of order of the 0t rows - i.e. 
// ordered by "this" table row idx as primary key and "Table" row idx as secondary key
 // This means only keeping joint row indices (pairs of original row indices), sorting them
 // and adding all rows in the end. Sorting can be expensive, but we would be able to pre-allocate 
 // memory for the joint table..
PTable TTable::Join(TStr Col1, const TTable& Table, TStr Col2) {
  if(!ColTypeMap.IsKey(Col1)){
    TExcept::Throw("no such column " + Col1);
  }
  if(!ColTypeMap.IsKey(Col2)){
    TExcept::Throw("no such column " + Col2);
  }
  if (GetColType(Col1) != GetColType(Col2)) {
    TExcept::Throw("Trying to Join on columns of different type");
  }
  // initialize result table
  PTable JointTable = InitializeJointTable(Table);
  // hash smaller table (group by column)
  TYPE ColType = GetColType(Col1);
  TBool ThisIsSmaller = (NumValidRows <= Table.NumValidRows);
  const TTable& TS = ThisIsSmaller ? *this : Table;
  const TTable& TB = ThisIsSmaller ?  Table : *this;
  TStr ColS = ThisIsSmaller ? Col1 : Col2;
  TStr ColB = ThisIsSmaller ? Col2 : Col1;
  // iterate over the rows of the bigger table and check for "collisions" 
  // with the group keys for the small table.
  switch(ColType){
    case INT:{
      THash<TInt, TIntV> T;
      TS.GroupByIntCol(Col1, T, TIntV(), true);
      for(TRowIterator RowI = TB.BegRI(); RowI < TB.EndRI(); RowI++){
        TInt K = RowI.GetIntAttr(ColB);
        if(T.IsKey(K)){
          TIntV& Group = T.GetDat(K);
          for(TInt i = 0; i < Group.Len(); i++){
            if(ThisIsSmaller){
              JointTable->AddJointRow(*this, Table, Group[i], RowI.GetRowIdx());
            } else{
              JointTable->AddJointRow(*this, Table, RowI.GetRowIdx(), Group[i]);
            }
          }
        }
      }
      break;
    }
    case FLT:{
      THash<TFlt, TIntV> T;
      TS.GroupByFltCol(Col1, T, TIntV(), true);
      for(TRowIterator RowI = TB.BegRI(); RowI < TB.EndRI(); RowI++){
        TFlt K = RowI.GetFltAttr(ColB);
        if(T.IsKey(K)){
          TIntV& Group = T.GetDat(K);
          for(TInt i = 0; i < Group.Len(); i++){
            if(ThisIsSmaller){
              JointTable->AddJointRow(*this, Table, Group[i], RowI.GetRowIdx());
            } else{
              JointTable->AddJointRow(*this, Table, RowI.GetRowIdx(), Group[i]);
            }
          }
        }
      }
      break;
    }
    case STR:{
      THash<TStr, TIntV> T;
      TS.GroupByStrCol(Col1, T, TIntV(), true);
      for(TRowIterator RowI = TB.BegRI(); RowI < TB.EndRI(); RowI++){
        TStr K = RowI.GetStrAttr(ColB);
        if(T.IsKey(K)){
          TIntV& Group = T.GetDat(K);
          for(TInt i = 0; i < Group.Len(); i++){
            if(ThisIsSmaller){
              JointTable->AddJointRow(*this, Table, Group[i], RowI.GetRowIdx());
            } else{
              JointTable->AddJointRow(*this, Table, RowI.GetRowIdx(), Group[i]);
            }
          }
        }
      }
    }
    break;
  }
 return JointTable; 
}
Пример #30
0
PSockEvent TSockSys::GetSockEvent(const uint64& SockEventId) const {
	return IdToSockEventH.GetDat(SockEventId);
}