void TSAppSrvFun::GetFldValSet(const TStrKdV& FldNmValPrV, const TStr& FldNm, TStrSet& FldValSet) { FldValSet.Clr(); int ValN = FldNmValPrV.SearchForw(TStrKd(FldNm, "")); while (ValN != -1) { FldValSet.AddKey(FldNmValPrV[ValN].Dat); ValN = FldNmValPrV.SearchForw(TStrKd(FldNm, ""), ValN + 1); } }
// Arxiv co-authorship network // Network is undirected (edges of equal weight go both ways) // "W:\\Data\\Arxiv\\Arxiv-CoAuth\\gr-qc.lis" PWgtNet TWgtNet::LoadArxivCoAuth(const TStr& FNm) { TArxivPaperList Arxiv(FNm); PWgtNet Net = TWgtNet::New(); TStrSet AuthorSet; while (Arxiv.Next()) { for (int a1 = 0; a1 < Arxiv.AuthorV.Len(); a1++) { const int n1 = AuthorSet.AddKey(Arxiv.AuthorV[a1]); for (int a2 = 0; a2 < Arxiv.AuthorV.Len(); a2++) { if (a1 == a2) { continue; } const int n2 = AuthorSet.AddKey(Arxiv.AuthorV[a2]); if (! Net->IsNode(n1)) { Net->AddNode(n1, Arxiv.AuthorV[a1]); } if (! Net->IsNode(n2)) { Net->AddNode(n2, Arxiv.AuthorV[a2]); } if (Net->IsEdge(n1, n2)) { Net->GetEDat(n1, n2) += 1; } else { Net->AddEdge(n1, n2, 1); } } } } TGBase::PrintInfo(Net); printf(" Edge weight: %f\n", Net->GetEdgeWgt()); return Net; }
// "W:\\Data\\CiteSeer\\old\\citeseer-links.csv" PWgtNet TWgtNet::LoadCiteSeerCoAuth(const TStr& FNm) { PWgtNet Net = TWgtNet::New(); TStrSet AuthorSet; TSsParser Ss(FNm, ssfCommaSep); while (Ss.Next()) { for (int a1 = 2; a1 < Ss.Len(); a1++) { const int n1 = AuthorSet.AddKey(Ss[a1]); for (int a2 = 2; a2 < Ss.Len(); a2++) { if (a1 == a2) { continue; } const int n2 = AuthorSet.AddKey(Ss[a2]); if (! Net->IsNode(n1)) { Net->AddNode(n1, Ss[a1]); } if (! Net->IsNode(n2)) { Net->AddNode(n2, Ss[a2]); } if (Net->IsEdge(n1, n2)) { Net->GetEDat(n1, n2) += 1; } else { Net->AddEdge(n1, n2, 1); } } } } TGBase::PrintInfo(Net); printf(" Edge weight: %f\n", Net->GetEdgeWgt()); return Net; }
// Network is undirected (edges of equal weight go both ways) // "W:\\Data\\DBLP\\dblp.xml.gz" PWgtNet TWgtNet::LoadDblpCoAuth(const TStr& FNm) { TDblpLoader Dblp(FNm); TStrSet AuthorSet; PWgtNet Net = TWgtNet::New(); for (int c = 0; Dblp.Next(); c++) { for (int a1 = 0; a1 < Dblp.AuthorV.Len(); a1++) { const int n1 = AuthorSet.AddKey(Dblp.AuthorV[a1]); for (int a2 = 0; a2 < Dblp.AuthorV.Len(); a2++) { if (a1 == a2) { continue; } const int n2 = AuthorSet.AddKey(Dblp.AuthorV[a2]); if (! Net->IsNode(n1)) { Net->AddNode(n1, Dblp.AuthorV[a1]); } if (! Net->IsNode(n2)) { Net->AddNode(n2, Dblp.AuthorV[a2]); } if (Net->IsEdge(n1, n2)) { Net->GetEDat(n1, n2) += 1; } else { Net->AddEdge(n1, n2, 1); } } } if (c % 1000 == 0) { printf("\r%d", c); } } printf("\n"); TGBase::PrintInfo(Net); printf(" Edge weight: %f\n", Net->GetEdgeWgt()); return Net; }
int main(int argc, char *argv[]) { TStr BaseString = "/lfs/1/tmp/curis/week/QBDB.bin"; TFIn BaseFile(BaseString); TQuoteBase *QB = new TQuoteBase; TDocBase *DB = new TDocBase; QB->Load(BaseFile); DB->Load(BaseFile); TIntV QuoteIds; QB->GetAllQuoteIds(QuoteIds); int NumQuotes = QuoteIds.Len(); THash<TInt, TStrSet> PeakCounts; for (int i = 0; i < NumQuotes; i++) { TQuote CurQuote; if (QB->GetQuote(QuoteIds[i], CurQuote)) { TVec<TSecTm> Peaks; CurQuote.GetPeaks(DB, Peaks); TStr QuoteString; CurQuote.GetParsedContentString(QuoteString); TStrSet StringSet; if (PeakCounts.IsKey(Peaks.Len())) { StringSet = PeakCounts.GetDat(Peaks.Len()); } StringSet.AddKey(QuoteString); PeakCounts.AddDat(Peaks.Len(), StringSet); } } TIntV PeakCountKeys; PeakCounts.GetKeyV(PeakCountKeys); PeakCountKeys.Sort(true); for (int i = 0; i < PeakCountKeys.Len(); i++) { TStrSet CurSet = PeakCounts.GetDat(PeakCountKeys[i]); if (CurSet.Len() > 0) { printf("QUOTES WITH %d PEAKS\n", PeakCountKeys[i].Val); printf("#########################################\n"); THashSet<TStr> StringSet = PeakCounts.GetDat(PeakCountKeys[i]); for (THashSet<TStr>::TIter l = StringSet.BegI(); l < StringSet.EndI(); l++) { printf("%s\n", l.GetKey().CStr()); } printf("\n"); } } delete QB; delete DB; return 0; }
// Eve communication network PWgtNet TWgtNet::LoadEveCommNet(const TStr& FNm) { PWgtNet Net = TWgtNet::New(); TStrSet AuthorSet; TChA Ln; TVec<char*> WrdV; TFIn FIn(FNm); for (int c=0; FIn.GetNextLn(Ln); c++) { TStrUtil::SplitOnCh(Ln, WrdV, ';'); const int n1 = AuthorSet.AddKey(WrdV[0]); const int n2 = AuthorSet.AddKey(WrdV[1]); if (! Net->IsNode(n1)) { Net->AddNode(n1, WrdV[0]); } if (! Net->IsNode(n2)) { Net->AddNode(n2, WrdV[1]); } if (Net->IsEdge(n1, n2)) { Net->GetEDat(n1, n2) += 1; } else { Net->AddEdge(n1, n2, 1); } if (c % Kilo(10) == 0) { printf("\r%dk", c/1000); } } printf("\n"); TGBase::PrintInfo(Net); printf(" Edge weight: %f\n", Net->GetEdgeWgt()); return Net; }