Example #1
0
void TSAppSrvFun::GetFldValSet(const TStrKdV& FldNmValPrV, const TStr& FldNm, TStrSet& FldValSet) {
	FldValSet.Clr();
	int ValN = FldNmValPrV.SearchForw(TStrKd(FldNm, ""));
	while (ValN != -1) {
		FldValSet.AddKey(FldNmValPrV[ValN].Dat);
		ValN = FldNmValPrV.SearchForw(TStrKd(FldNm, ""), ValN + 1);
	}
}
Example #2
0
// Arxiv co-authorship network
// Network is undirected (edges of equal weight go both ways)
// "W:\\Data\\Arxiv\\Arxiv-CoAuth\\gr-qc.lis"
PWgtNet TWgtNet::LoadArxivCoAuth(const TStr& FNm) {
  TArxivPaperList Arxiv(FNm);
  PWgtNet Net = TWgtNet::New();
  TStrSet AuthorSet;
  while (Arxiv.Next()) {
    for (int a1 = 0; a1 < Arxiv.AuthorV.Len(); a1++) {
      const int n1 = AuthorSet.AddKey(Arxiv.AuthorV[a1]);
      for (int a2 = 0; a2 < Arxiv.AuthorV.Len(); a2++) {
        if (a1 == a2) { continue; }
        const int n2 = AuthorSet.AddKey(Arxiv.AuthorV[a2]);
        if (! Net->IsNode(n1)) { Net->AddNode(n1, Arxiv.AuthorV[a1]); }
        if (! Net->IsNode(n2)) { Net->AddNode(n2, Arxiv.AuthorV[a2]); }
        if (Net->IsEdge(n1, n2)) { Net->GetEDat(n1, n2) += 1; }
        else { Net->AddEdge(n1, n2, 1); }
      }
    }
  }
  TGBase::PrintInfo(Net);
  printf("  Edge weight: %f\n", Net->GetEdgeWgt());
  return Net;
}
Example #3
0
// "W:\\Data\\CiteSeer\\old\\citeseer-links.csv"
PWgtNet TWgtNet::LoadCiteSeerCoAuth(const TStr& FNm) {
  PWgtNet Net = TWgtNet::New();
  TStrSet AuthorSet;
  TSsParser Ss(FNm, ssfCommaSep);
  while (Ss.Next()) {
    for (int a1 = 2; a1 < Ss.Len(); a1++) {
      const int n1 = AuthorSet.AddKey(Ss[a1]);
      for (int a2 = 2; a2 < Ss.Len(); a2++) {
        if (a1 == a2) { continue; }
        const int n2 = AuthorSet.AddKey(Ss[a2]);
        if (! Net->IsNode(n1)) { Net->AddNode(n1, Ss[a1]); }
        if (! Net->IsNode(n2)) { Net->AddNode(n2, Ss[a2]); }
        if (Net->IsEdge(n1, n2)) { Net->GetEDat(n1, n2) += 1; }
        else { Net->AddEdge(n1, n2, 1); }
      }
    }
  }
  TGBase::PrintInfo(Net);
  printf("  Edge weight: %f\n", Net->GetEdgeWgt());
  return Net;
}
Example #4
0
// Network is undirected (edges of equal weight go both ways)
// "W:\\Data\\DBLP\\dblp.xml.gz"
PWgtNet TWgtNet::LoadDblpCoAuth(const TStr& FNm) {
  TDblpLoader Dblp(FNm);
  TStrSet AuthorSet;
  PWgtNet Net = TWgtNet::New();
  for (int c = 0; Dblp.Next(); c++) {
    for (int a1 = 0; a1 < Dblp.AuthorV.Len(); a1++) {
      const int n1 = AuthorSet.AddKey(Dblp.AuthorV[a1]);
      for (int a2 = 0; a2 < Dblp.AuthorV.Len(); a2++) {
        if (a1 == a2) { continue; }
        const int n2 = AuthorSet.AddKey(Dblp.AuthorV[a2]);
        if (! Net->IsNode(n1)) { Net->AddNode(n1, Dblp.AuthorV[a1]); }
        if (! Net->IsNode(n2)) { Net->AddNode(n2, Dblp.AuthorV[a2]); }
        if (Net->IsEdge(n1, n2)) { Net->GetEDat(n1, n2) += 1; }
        else { Net->AddEdge(n1, n2, 1); }
      }
    }
    if (c % 1000 == 0) { printf("\r%d", c); }
  }
  printf("\n");
  TGBase::PrintInfo(Net);
  printf("  Edge weight: %f\n", Net->GetEdgeWgt());
  return Net;
}
int main(int argc, char *argv[]) {
  TStr BaseString = "/lfs/1/tmp/curis/week/QBDB.bin";
  TFIn BaseFile(BaseString);
  TQuoteBase *QB = new TQuoteBase;
  TDocBase *DB = new TDocBase;
  QB->Load(BaseFile);
  DB->Load(BaseFile);

  TIntV QuoteIds;
  QB->GetAllQuoteIds(QuoteIds);

  int NumQuotes = QuoteIds.Len();
  THash<TInt, TStrSet> PeakCounts;
  for (int i = 0; i < NumQuotes; i++) {
    TQuote CurQuote;
    if (QB->GetQuote(QuoteIds[i], CurQuote)) {
      TVec<TSecTm> Peaks;
      CurQuote.GetPeaks(DB, Peaks);
      TStr QuoteString;
      CurQuote.GetParsedContentString(QuoteString);
      TStrSet StringSet;
      if (PeakCounts.IsKey(Peaks.Len())) {
        StringSet = PeakCounts.GetDat(Peaks.Len());
      }
      StringSet.AddKey(QuoteString);
      PeakCounts.AddDat(Peaks.Len(), StringSet);
    }
  }

  TIntV PeakCountKeys;
  PeakCounts.GetKeyV(PeakCountKeys);
  PeakCountKeys.Sort(true);
  for (int i = 0; i < PeakCountKeys.Len(); i++) {
    TStrSet CurSet = PeakCounts.GetDat(PeakCountKeys[i]);
    if (CurSet.Len() > 0) {
      printf("QUOTES WITH %d PEAKS\n", PeakCountKeys[i].Val);
      printf("#########################################\n");
      THashSet<TStr> StringSet = PeakCounts.GetDat(PeakCountKeys[i]);
      for (THashSet<TStr>::TIter l = StringSet.BegI(); l < StringSet.EndI(); l++) {
        printf("%s\n", l.GetKey().CStr());
      }
      printf("\n");
    }
  }
  delete QB;
  delete DB;
  return 0;
}
Example #6
0
// Eve communication network
PWgtNet TWgtNet::LoadEveCommNet(const TStr& FNm) {
  PWgtNet Net = TWgtNet::New();
  TStrSet AuthorSet;
  TChA Ln;
  TVec<char*> WrdV;
  TFIn FIn(FNm);
  for (int c=0; FIn.GetNextLn(Ln); c++) {
    TStrUtil::SplitOnCh(Ln, WrdV, ';');
    const int n1 = AuthorSet.AddKey(WrdV[0]);
    const int n2 = AuthorSet.AddKey(WrdV[1]);
    if (! Net->IsNode(n1)) { Net->AddNode(n1, WrdV[0]); }
    if (! Net->IsNode(n2)) { Net->AddNode(n2, WrdV[1]); }
    if (Net->IsEdge(n1, n2)) { Net->GetEDat(n1, n2) += 1; }
    else { Net->AddEdge(n1, n2, 1); }
    if (c % Kilo(10) == 0) { printf("\r%dk", c/1000); }
  }
  printf("\n");
  TGBase::PrintInfo(Net);
  printf("  Edge weight: %f\n", Net->GetEdgeWgt());
  return Net;
}