TStr TSockSys::GetStatusStr(){ TChA ChA; ChA+="Sockets: "; ChA+=TInt::GetStr(SockIdToHndH.Len()); ChA+="\r\n"; ChA+="Host-Resolutions: "; ChA+=TInt::GetStr(HndToSockHostH.Len()); ChA+="\r\n"; ChA+="Socket-Events: "; ChA+=TInt::GetStr(IdToSockEventH.Len()); ChA+="\r\n"; ChA+="Timers: "; ChA+=TInt::GetStr(SockIdToTimerHndH.Len()); ChA+="\r\n"; return ChA; }
static double CmtyCMN(const PUNGraph& Graph, TCnComV& CmtyV) { TCNMQMatrix QMatrix(Graph); // maximize modularity while (QMatrix.MergeBestQ()) { } // reconstruct communities THash<TInt, TIntV> IdCmtyH; for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { IdCmtyH.AddDat(QMatrix.CmtyIdUF.Find(NI.GetId())).Add(NI.GetId()); } CmtyV.Gen(IdCmtyH.Len()); for (int j = 0; j < IdCmtyH.Len(); j++) { CmtyV[j].NIdV.Swap(IdCmtyH[j]); } return QMatrix.Q; }
void TAGM::GetNodeMembership(THash<TInt,TIntV >& NIDComVH, const THash<TInt,TIntV>& CmtyVH) { for(int i=0;i<CmtyVH.Len();i++){ int CID = CmtyVH.GetKey(i); for(int j=0;j<CmtyVH[i].Len();j++) { int NID = CmtyVH[i][j]; NIDComVH.AddDat(NID).Add(CID); } } }
void TTop2FriendNet::GetAvgSDevV(const THash<TFlt, TMom>& MomH, TFltTrV& ValAvgSDevV) { ValAvgSDevV.Clr(false); for (int i = 0; i < MomH.Len(); i++) { TMom Mom=MomH[i]; Mom.Def(); ValAvgSDevV.Add(TFltTr(MomH.GetKey(i), Mom.GetMean(), Mom.GetSDev())); } ValAvgSDevV.Sort(); }
void SaveAll() { printf("\n<<< SAVING STARTS (PLEASE BE PATIENT!!!) >>> .......\n"); TZipOut resOut("CascadesFullUrlsOnTwitterData.rar"); cascadesInUrlsOnTwitter.Save(resOut); printf("The size of CascadesFullUrlsOnTwitterData was: %d\n",cascadesInUrlsOnTwitter.Len()); printf("\n<<<<<<<< SAVING DONE >>>>>>>>\n\n"); }
/// rewire bipartite community affiliation graphs void TAGMUtil::RewireCmtyNID(THash<TInt,TIntV >& CmtyVH, TRnd& Rnd) { THash<TInt,TIntV > NewCmtyVH(CmtyVH.Len()); TIntV NDegV; TIntV CDegV; for (int i = 0; i < CmtyVH.Len(); i++) { int CID = CmtyVH.GetKey(i); for (int j = 0; j < CmtyVH[i].Len(); j++) { int NID = CmtyVH[i][j]; NDegV.Add(NID); CDegV.Add(CID); } } TIntPrSet CNIDSet(CDegV.Len()); int c=0; while (c++ < 15 && CDegV.Len() > 1) { for (int i = 0; i < CDegV.Len(); i++) { int u = Rnd.GetUniDevInt(CDegV.Len()); int v = Rnd.GetUniDevInt(NDegV.Len()); if (CNIDSet.IsKey(TIntPr(CDegV[u], NDegV[v]))) { continue; } CNIDSet.AddKey(TIntPr(CDegV[u], NDegV[v])); if (u == CDegV.Len() - 1) { CDegV.DelLast(); } else { CDegV[u] = CDegV.Last(); CDegV.DelLast(); } if ( v == NDegV.Len() - 1) { NDegV.DelLast(); } else { NDegV[v] = NDegV.Last(); NDegV.DelLast(); } } } for (int i = 0; i < CNIDSet.Len(); i++) { TIntPr CNIDPr = CNIDSet[i]; IAssert(CmtyVH.IsKey(CNIDPr.Val1)); NewCmtyVH.AddDat(CNIDPr.Val1); NewCmtyVH.GetDat(CNIDPr.Val1).Add(CNIDPr.Val2); } CmtyVH = NewCmtyVH; }
int main(int argc, char* argv[]) { // TFltPrV v; // v.Add(TFltPr(1,4)); // v.Add(TFltPr(5,5)); // v.Add(TFltPr(9,11)); // v.Add(TFltPr(20,8)); // v.Add(TFltPr(21,30)); // cout << "C: " << Tools::computeCorrelation(v,Pearson) << endl; // return 0; TExeTm ExeTm; try { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("\nPlotting Individually Memes-Twitter Cascades. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); // URLS THash< TStr , CascadeElementV > quotes = Tools::loadQuotes("DATA/QuotesPreprocessedData_NIFTY_RANGEFIXED_FINALFILTERED_HAVINGBOTH.rar"); // QuotesPreprocessedData_NIFTY_RANGEFIXED_FINALFILTERED_4URLS THash< TUInt , TSecTmV > twitterUrls = Tools::loadTwitter("DATA/CascadesFullUrlsOnTwitterData_FINALFILTERED_HAVINGBOTH.rar"); // CascadesFullUrlsOnTwitterData_FINALFILTERED // CONTENTS //THash< TStr , CascadeElementV > quotes2 = Tools::loadQuotes("DATA/QuotesPreprocessedData_NIFTY_RANGEFIXED_FINALFILTERED_HAVINGBOTH.rar"); // QuotesPreprocessedData_NIFTY_RANGEFIXED_FINALFILTERED_4Contents THash< TUInt , TSecTmV > twitterContents = Tools::loadTwitter("DATA/CascadesOnTwitterData_FINALFILTERED_HAVINGBOTH.rar"); // CascadesOnTwitterData_FINALFILTERED // Plotting THash< TUInt , TSecTmV > twitterTotal; for(int i=0;i<twitterContents.Len();i++) { TSecTmV tmp; tmp.AddV(twitterContents[i]); tmp.AddV(twitterUrls[i]); twitterTotal.AddDat(i,tmp); } plotScatterLengthOfEachCascade(quotes,twitterUrls,"Urls"); plotScatterLengthOfEachCascade(quotes,twitterContents,"Contents"); plotScatterLengthOfEachCascade(quotes,twitterTotal,"Full"); printf("\nPlots had been drawn successfully."); } catch(exception& ex) { printf("\nError1 happened, it was: %s\n\n",ex.what()); } catch(TPt<TExcept>& ex) { printf("\nError2 happened: %s\n\n",ex[0].GetStr().CStr()); } printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
void plotScatterLengthOfEachCascade(THash<TUInt,TSecTmV>& c1, THash<TUInt,TSecTmV>& c2) { printf("\n\nPlotting ...\n"); TFltPrV plotdata; for(int q=0;q<c1.Len();q++) { TFltPr elem; elem.Val1 = c1[q].Len(); elem.Val2 = c2[q].Len(); plotdata.Add(elem); } Tools::plotScatter(plotdata, "TwitterUrlsOverContents", "Urls on Twitter", "Contents on Twitter"); }
void plotScatterLengthOfEachCascade(THash<TStr,CascadeElementV>& quotes, THash<TUInt,TSecTmV>& twitter, char* name) { printf("\n\nPlotting ...\n"); TFltPrV plotdata; for(int q=0;q<quotes.Len();q++) { TFltPr elem; elem.Val1 = quotes[q].Len(); elem.Val2 = twitter[q].Len(); plotdata.Add(elem); } Tools::plotScatter(plotdata, name, "Blogs/News", TStr::Fmt("%s on Twitter",name).CStr()); }
void TTable::GroupAux(const TStrV& GroupBy, TInt GroupByStartIdx, THash<TInt,TIntV>& grouping, const TIntV& IndexSet, TBool All){ /* recursion base - add IndexSet as group */ if(GroupByStartIdx == GroupBy.Len()){ if(IndexSet.Len() == 0){return;} TInt key = grouping.Len(); grouping.AddDat(key, IndexSet); return; } if(!ColTypeMap.IsKey(GroupBy[GroupByStartIdx])){TExcept::Throw("no such column " + GroupBy[GroupByStartIdx]);} switch(GetColType(GroupBy[GroupByStartIdx])){ case INT:{ // group by current column // not sure of to estimate the size of T for constructor hinting purpose. // It is bounded by the length of the IndexSet or the length of the grouping column if the IndexSet vector is empty // but this bound may be way too big THash<TInt,TIntV> T; GroupByIntCol(GroupBy[GroupByStartIdx], T, IndexSet, All); for(THash<TInt,TIntV>::TIter it = T.BegI(); it < T.EndI(); it++){ TIntV& CurrGroup = it->Dat; // each group according to current column will be used as an IndexSet // for grouping according to next column GroupAux(GroupBy, GroupByStartIdx+1, grouping, CurrGroup, false); } break; } case FLT:{ THash<TFlt,TIntV> T; GroupByFltCol(GroupBy[GroupByStartIdx], T, IndexSet, All); for(THash<TFlt,TIntV>::TIter it = T.BegI(); it < T.EndI(); it++){ TIntV& CurrGroup = it->Dat; GroupAux(GroupBy, GroupByStartIdx+1, grouping, CurrGroup, false); } break; } case STR:{ THash<TStr,TIntV> T; GroupByStrCol(GroupBy[GroupByStartIdx], T, IndexSet, All); for(THash<TStr,TIntV>::TIter it = T.BegI(); it < T.EndI(); it++){ TIntV& CurrGroup = it->Dat; GroupAux(GroupBy, GroupByStartIdx+1, grouping, CurrGroup, false); } break; } } }
int main(int argc, char* argv[]) { TExeTm ExeTm; PGconn *conn; PGresult *res; int id,start,rec_count,row,indx,end; unsigned int q; int total_number_tweets = 0; double tweet_date = 0; TStr TweetStr(""); TStr TweetStrLc(""); if(argc > 1) { start = atoi(argv[1]); } else { printf("YOU SHOULD SET THE INDICES...\n\n"); return 1; } indx = start * LENGTH; end = indx + LENGTH; printf(":::::::: Find Cascades of Quotes In Twitter Separately ::::::::\n"); const TStr StartDate = Env.GetIfArgPrefixStr("-sd:", "2008-08-01 00:00:00", "Starting date"); const TStr EndDate = Env.GetIfArgPrefixStr("-ed:", "2009-10-01 00:00:00", "Ending date"); Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("\nFinding the cascades of the desired quotes. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); try { while(indx < end) { TStr qContentFname = TStr::Fmt("QuotesData/Q%d.rar",indx); TStr resultFname = TStr::Fmt("QuotesCascResult/R%d.rar",indx++); if(fileExists(resultFname)) { if(fileExists(qContentFname)) { // removing the quotes' content file system(TStr::Fmt("rm %s",qContentFname.CStr()).CStr()); } } else { if(fileExists(qContentFname)) { THash<TStr,TInt> quotesContent; THash<TInt,TSecTmV> CascadesOnTwitter; TZipIn ZquotesIn(qContentFname); quotesContent.Load(ZquotesIn); printf("Q%d loading done, it contains %d quotes.\n",indx-1,quotesContent.Len()); conn = PQconnectdb("dbname=twitter host=postgresql01.mpi-sws.org user=twitter password=tweet@84"); if (PQstatus(conn) == CONNECTION_BAD) { printf("We were unable to connect to the database"); return 1; } // we use cursors/fetch to speed up the process; batch of 10000 tweets PQexec(conn, "begin work"); PQexec(conn,TStr::Fmt("declare mycursor cursor for select tweettext, extract(epoch from tweettime) from tweets where tweettime >= timestamp '%s' and tweettime < timestamp '%s'", StartDate.CStr(), EndDate.CStr()).CStr()); do { res = PQexec(conn, "FETCH 1000000 IN mycursor"); // all of them are: 1675401026 if (PQresultStatus(res) == PGRES_TUPLES_OK) { rec_count = PQntuples(res); total_number_tweets += rec_count; printf("Adding %d tweets... (total: %d)\n", rec_count, total_number_tweets); for (row=0; row<rec_count; row++) { TweetStr = PQgetvalue(res, row, 0); tweet_date = TStr(PQgetvalue(res, row, 1)).GetFlt(); TweetStrLc = TweetStr.ToLc(); for(q=0;q<quotesContent.Len();q++) { if (TweetStrLc.SearchStr(quotesContent.GetKey(q)) > -1) { TSecTm td(tweet_date); id = CascadesOnTwitter.GetKeyId(quotesContent[q]); if(id == -1) { CascadesOnTwitter.AddDat(quotesContent[q]).Add(td); } else { CascadesOnTwitter.GetDat(quotesContent[q]).AddSorted(td); } } } } PQclear(res); } else { rec_count = 0; } } while (rec_count); PQexec(conn, "close mycursor"); PQexec(conn, "commit work"); PQfinish(conn); // Save the results TZipOut zout(resultFname); CascadesOnTwitter.Save(zout); // Remove the qoutes' content file system(TStr::Fmt("rm %s",qContentFname.CStr()).CStr()); } } } printf("\n\nD O N E\n\n"); } catch(exception& ex) { printf("\nError1 happened, it was: %s\n\n",ex.what()); } catch(TPt<TExcept>& ex) { printf("\nError2 happened: %s\n\n",ex[0].GetStr().CStr()); } printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
int main(int argc, char* argv[]) { int i,quoteIndex,j,k; TExeTm ExeTm; printf("Starting The SAVE CODE For Matlab Processing ...\n"); try { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("\nCreating the volumes of the quotes. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TZipIn ZquotesIn("RESULTS/QuotesPreprocessedData_NIFTY.rar"); quotes.Load(ZquotesIn); printf("Loaded QuotesPreprocessedData_NIFTY has instances: %d\n\n\n",quotes.Len()); TZipIn ZcascadesOnTwitterIn("RESULTS/CascadesFullUrlsOnTwitterData.rar"); cascadesOnTwitterUrls.Load(ZcascadesOnTwitterIn); printf("Loaded CascadesFullUrlsOnTwitterData has instances: %d\n\n\n",cascadesOnTwitterUrls.Len()); TZipIn ZIn("RESULTS/CascadesOnTwitterData.rar"); cascadesOnTwitterContents.Load(ZIn); printf("Loaded CascadesOnTwitterData has instances: %d\n\n\n",cascadesOnTwitterContents.Len()); // Quote's Cascades over Memes ofstream quotesContent1("MEMES_QuotesContent.csv",ios::out|ios::app); ofstream memeWebs1("MEMES_MemesCascadesWebs.csv",ios::out|ios::app); ofstream memeTimes1("MEMES_MemesCascadesTimes.csv",ios::out|ios::app); ofstream externalLinks1("MEMES_MemesExternalLinks.csv",ios::out|ios::app); for(i=0;i<quotes.Len();i++) { quotesContent1 << quotes.GetKey(i).CStr() << "\r\n"; for(j=0;j<quotes[i].Len();j++) { for(k=0;k<quotes[i][j].explicit_links.Len();k++) { externalLinks1 << quotes[i][j].explicit_links[k].Val << "," << quotes[i][j].post.Val<<"\r\n"; } memeTimes1 << quotes[i][j].time.GetAbsSecs() << ","; memeWebs1 << quotes[i][j].post.Val << ","; } memeTimes1 << "\r\n"; memeWebs1 << "\r\n"; externalLinks1 << "-1\r\n"; // this means that the external links for this quote is finished } quotesContent1.close(); memeWebs1.close(); memeTimes1.close(); externalLinks1.close(); // TEXTS Cascades Over Memes and Twitter ofstream quotesContent2("MEMES_TWITTER_TXT_QuotesContent.csv",ios::out|ios::app); ofstream memeWebs2("MEMES_TWITTER_TXT_MemesCascadesWebs.csv",ios::out|ios::app); ofstream memeTimes2("MEMES_TWITTER_TXT_MemesCascadesTimes.csv",ios::out|ios::app); ofstream externalLinks2("MEMES_TWITTER_TXT_MemesExternalLinks.csv",ios::out|ios::app); ofstream twitterContent2("MEMES_TWITTER_TXT_TwitterTextCascades.csv",ios::out|ios::app); for(i=0;i<cascadesOnTwitterContents.Len();i++) { quoteIndex = cascadesOnTwitterContents.GetKey(i); quotesContent2 << quotes.GetKey(quoteIndex).CStr() << "\r\n"; for(j=0;j<quotes[quoteIndex].Len();j++) { for(k=0;k<quotes[quoteIndex][j].explicit_links.Len();k++) { externalLinks2 << quotes[quoteIndex][j].explicit_links[k].Val << "," << quotes[quoteIndex][j].post.Val<<"\r\n"; // << CHECK HERE >> CHANGE -> TO SPACE } memeTimes2 << quotes[quoteIndex][j].time.GetAbsSecs() << ","; memeWebs2 << quotes[quoteIndex][j].post.Val << ","; } memeTimes2 << "\r\n"; memeWebs2 << "\r\n"; externalLinks2 << "-1\r\n"; // this means that the external links for this quote is finished for(j=0;j<cascadesOnTwitterContents.GetDat(quoteIndex).Len();j++) { twitterContent2 << cascadesOnTwitterContents.GetDat(quoteIndex)[j] << ","; } twitterContent2 << "\r\n"; } quotesContent2.close(); memeWebs2.close(); memeTimes2.close(); externalLinks2.close(); twitterContent2.close(); // URLS Cascades Over Memes and Twitter ofstream quotesContent3("MEMES_TWITTER_URL_QuotesContent.csv",ios::out|ios::app); ofstream memeWebs3("MEMES_TWITTER_URL_MemesCascadesWebs.csv",ios::out|ios::app); ofstream memeTimes3("MEMES_TWITTER_URL_MemesCascadesTimes.csv",ios::out|ios::app); ofstream externalLinks3("MEMES_TWITTER_URL_MemesExternalLinks.csv",ios::out|ios::app); ofstream twitter3("MEMES_TWITTER_URL_TwitterUrlCascades.csv",ios::out|ios::app); for(i=0;i<cascadesOnTwitterUrls.Len();i++) { quoteIndex = cascadesOnTwitterUrls.GetKey(i); quotesContent3 << quotes.GetKey(quoteIndex).CStr() << "\r\n"; for(j=0;j<quotes[quoteIndex].Len();j++) { for(k=0;k<quotes[quoteIndex][j].explicit_links.Len();k++) { externalLinks3 << quotes[quoteIndex][j].explicit_links[k].Val << "," << quotes[quoteIndex][j].post.Val<<"\r\n"; // << CHECK HERE >> CHANGE -> TO SPACE } memeTimes3 << quotes[quoteIndex][j].time.GetAbsSecs() << ","; memeWebs3 << quotes[quoteIndex][j].post.Val << ","; } memeTimes3 << "\r\n"; memeWebs3 << "\r\n"; externalLinks3 << "-1\r\n"; // this means that the external links for this quote is finished for(j=0;j<cascadesOnTwitterUrls.GetDat(quoteIndex).Len();j++) { twitter3 << cascadesOnTwitterUrls.GetDat(quoteIndex)[j] << ","; } twitter3 << "\r\n"; } quotesContent3.close(); memeWebs3.close(); memeTimes3.close(); externalLinks3.close(); twitter3.close(); // INTERSECT OF URLS OF TEXTS Cascades Over Memes and Twitter ofstream quotesContent4("TRIPLE_QuotesContent.csv",ios::out|ios::app); ofstream memeWebs4("TRIPLE_MemesCascadesWebs.csv",ios::out|ios::app); ofstream memeTimes4("TRIPLE_MemesCascadesTimes.csv",ios::out|ios::app); ofstream externalLinks4("TRIPLE_MemesExternalLinks.csv",ios::out|ios::app); ofstream twitter4("TRIPLE_TwitterUrlCascades.csv",ios::out|ios::app); ofstream twitterContent4("TRIPLE_TwitterTextCascades.csv",ios::out|ios::app); for(i=0;i<cascadesOnTwitterUrls.Len();i++) { quoteIndex = cascadesOnTwitterUrls.GetKey(i); if(cascadesOnTwitterContents.GetKeyId(quoteIndex) == -1) { continue; } quotesContent4 << quotes.GetKey(quoteIndex).CStr() << "\r\n"; for(j=0;j<quotes[quoteIndex].Len();j++) { for(k=0;k<quotes[quoteIndex][j].explicit_links.Len();k++) { externalLinks4 << quotes[quoteIndex][j].explicit_links[k].Val << "," << quotes[quoteIndex][j].post.Val<<"\r\n"; // << CHECK HERE >> CHANGE -> TO SPACE } memeTimes4 << quotes[quoteIndex][j].time.GetAbsSecs() << ","; memeWebs4 << quotes[quoteIndex][j].post.Val << ","; } memeTimes4 << "\r\n"; memeWebs4 << "\r\n"; externalLinks4 << "-1\r\n"; // this means that the external links for this quote is finished for(j=0;j<cascadesOnTwitterContents.GetDat(quoteIndex).Len();j++) { twitterContent4 << cascadesOnTwitterContents.GetDat(quoteIndex)[j] << ","; } for(j=0;j<cascadesOnTwitterUrls.GetDat(quoteIndex).Len();j++) { twitter4 << cascadesOnTwitterUrls.GetDat(quoteIndex)[j] << ","; } twitter4 << "\r\n"; twitterContent4 << "\r\n"; } quotesContent4.close(); memeWebs4.close(); memeTimes4.close(); externalLinks4.close(); twitter4.close(); twitterContent4.close(); } catch(exception& ex) { printf("\nError1 happened, it was: %s\n\n",ex.what()); } catch(TPt<TExcept>& ex) { printf("\nError2 happened: %s\n\n",ex[0].GetStr().CStr()); } printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
///////////////////////////////////////////////// // Best-Paths void GetBestPaths( const TStr& SrcNmObjStr, const TStr& DstNmObjStr, const PNmObjBs& NmObjBs){ int SrcNmObjId=NmObjBs->GetNmObjId(SrcNmObjStr); int DstNmObjId=NmObjBs->GetNmObjId(DstNmObjStr); int NmObjs=NmObjBs->GetNmObjs(); TIntPrV ParLevPrV(NmObjs); TIntPrV DstParLevPrV; ParLevPrV.PutAll(TIntPr(-1, -1)); int CurLev=0; ParLevPrV[SrcNmObjId]=TIntPr(SrcNmObjId, CurLev); forever{ CurLev++; int NewEdges=0; for (int NmObjId1=0; NmObjId1<NmObjs; NmObjId1++){ if (ParLevPrV[NmObjId1].Val2==CurLev-1){ TIntV DocIdV1; NmObjBs->GetNmObjDocIdV(NmObjId1, DocIdV1); for (int NmObjId2=0; NmObjId2<NmObjs; NmObjId2++){ if ((NmObjId2==DstNmObjId)||(ParLevPrV[NmObjId2].Val2==-1)){ TIntV DocIdV2; NmObjBs->GetNmObjDocIdV(NmObjId2, DocIdV2); TIntV IntrsDocIdV; DocIdV1.Intrs(DocIdV2, IntrsDocIdV); if (!IntrsDocIdV.Empty()){ ParLevPrV[NmObjId2]=TIntPr(NmObjId1, CurLev); NewEdges++; if (NmObjId2==DstNmObjId){ DstParLevPrV.Add(TIntPr(NmObjId1, CurLev)); } } } } } } if ((NewEdges==0)||(ParLevPrV[DstNmObjId].Val2!=-1)){ break; } } // prepare graph THash<TStr, PVrtx> VrtxNmToVrtxH; TStrPrV VrtxNmPrV; VrtxNmToVrtxH.AddKey(SrcNmObjStr); VrtxNmToVrtxH.AddKey(DstNmObjStr); // write path ContexterF->NmObjLinkageREd->Clear(); for (int DstParLevPrN=0; DstParLevPrN<DstParLevPrV.Len(); DstParLevPrN++){ ParLevPrV[DstNmObjId]=DstParLevPrV[DstParLevPrN]; int DstParLev=ParLevPrV[DstNmObjId].Val2; TStr DstNmObjStr=NmObjBs->GetNmObjStr(DstNmObjId); ContexterF->NmObjLinkageREd->Lines->Add(DstNmObjStr.CStr()); int ParNmObjId=DstNmObjId; TStr PrevNmObjStr=DstNmObjStr; forever { if (ParNmObjId==SrcNmObjId){break;} ParNmObjId=ParLevPrV[ParNmObjId].Val1; int ParLev=ParLevPrV[ParNmObjId].Val2; TStr CurNmObjStr=NmObjBs->GetNmObjStr(ParNmObjId); TStr ParNmObjStr=TStr::GetSpaceStr((DstParLev-ParLev)*4)+CurNmObjStr; ContexterF->NmObjLinkageREd->Lines->Add(ParNmObjStr.CStr()); // create vertex & edge VrtxNmToVrtxH.AddKey(CurNmObjStr); if (!PrevNmObjStr.Empty()){ if (PrevNmObjStr<CurNmObjStr){ VrtxNmPrV.AddUnique(TStrPr(PrevNmObjStr, CurNmObjStr)); } else if (PrevNmObjStr>CurNmObjStr){ VrtxNmPrV.AddUnique(TStrPr(CurNmObjStr, PrevNmObjStr)); } } // save curent named-object PrevNmObjStr=CurNmObjStr; } } // generate graph // create graph PGraph Graph=TGGraph::New(); // create vertices for (int VrtxN=0; VrtxN<VrtxNmToVrtxH.Len(); VrtxN++){ TStr VrtxNm=VrtxNmToVrtxH.GetKey(VrtxN); PVrtx Vrtx=TGVrtx::New(VrtxNm); VrtxNmToVrtxH.GetDat(VrtxNm)=Vrtx; Graph->AddVrtx(Vrtx); } // create edges for (int EdgeN=0; EdgeN<VrtxNmPrV.Len(); EdgeN++){ PVrtx Vrtx1=VrtxNmToVrtxH.GetDat(VrtxNmPrV[EdgeN].Val1); PVrtx Vrtx2=VrtxNmToVrtxH.GetDat(VrtxNmPrV[EdgeN].Val2); PEdge Edge=new TGEdge(Vrtx1, Vrtx2, TStr::Fmt("_%d", EdgeN), false); Graph->AddEdge(Edge); } // place graph ContexterF->State->ElGraph=Graph; TRnd Rnd(1); ContexterF->State->ElGraph->PlaceSimAnnXY(Rnd, ContexterF->State->ElGks); // draw graph ContexterF->State->ElGks->Clr(); ContexterF->ElPbPaint(NULL); }
// load from allactors.zip that was prepared by Brad Malin in 2005 PImdbNet TImdbNet::LoadTxt(const TStr& ActorFNm) { PImdbNet Net = TImdbNet::New(); TStrV ColV; char line [2024]; int NLines=0, DupEdge=0, Year, Position, ActorNId, MovieNId; TIntH ActorNIdH; THash<TIntPr, TInt> MovieNIdH; FILE *F = fopen(ActorFNm.CStr(), "rt"); fgets(line, 2024, F); while (! feof(F)) { memset(line, 0, 2024); fgets(line, 2024, F); if (strlen(line) == 0) break; TStr(line).SplitOnAllCh('|', ColV, false); IAssert(ColV.Len() == 7); const int NameStrId = Net->AddStr(ColV[0].GetTrunc().GetLc()+" "+ColV[1].GetTrunc().GetLc()); const int MovieStrId = Net->AddStr(ColV[2].GetTrunc().GetLc()); TStr YearStr = ColV[3].GetTrunc(); if (YearStr.Len() > 4) YearStr = YearStr.GetSubStr(0, 3); Year = 1; YearStr.IsInt(Year); const TMovieTy MovieTy = TImdbNet::GetMovieTy(ColV[4]); Position = TInt::Mx; ColV[5].GetTrunc().IsInt(Position); IAssert(ColV[6].GetTrunc()[0] == 'M' || ColV[6].GetTrunc()[0]=='F'); const bool IsMale = ColV[6].GetTrunc()[0] == 'M'; // create nodes if (ActorNIdH.IsKey(NameStrId)) { ActorNId = ActorNIdH.GetDat(NameStrId); } else { ActorNId = Net->AddNode(-1, TImdbNode(NameStrId, Year, Position, IsMale)); ActorNIdH.AddDat(NameStrId, ActorNId); } if (MovieNIdH.IsKey(TIntPr(MovieStrId, Year))) { MovieNId = MovieNIdH.GetDat(TIntPr(MovieStrId, Year)); } else { MovieNId = Net->AddNode(-1, TImdbNode(NameStrId, Year, MovieTy)); MovieNIdH.AddDat(TIntPr(MovieStrId, Year), MovieNId); } if (! Net->IsEdge(ActorNId, MovieNId)) { Net->AddEdge(ActorNId, MovieNId); } else { DupEdge++; } if (++NLines % 100000 == 0) printf("\r %dk ", NLines/1000); } fclose(F); printf("duplicate edges: %d\n", DupEdge); printf("nodes: %d\n", Net->GetNodes()); printf("edges: %d\n", Net->GetEdges()); printf("actors: %d\n", ActorNIdH.Len()); printf("movies: %d\n", MovieNIdH.Len()); // set the actor year to the year of his first movie int NUpdates=0; for (TNet::TNodeI NI = Net->BegNI(); NI < Net->EndNI(); NI++) { if (NI().IsActor()) { int MinYear = NI().GetYear(); for (int e = 0; e < NI.GetOutDeg(); e++) { const TImdbNode& NodeDat = Net->GetNDat(NI.GetOutNId(e)); if (NodeDat.IsMovie()) MinYear = TMath::Mn(MinYear, NodeDat.GetYear()); } if (NI().Year != MinYear) NUpdates++; NI().Year = MinYear; } } printf("updated actor times: %d\n", NUpdates); return Net; }
int main(int argc, char* argv[]) { TExeTm ExeTm; THash< TStr , CascadeElementV > quotesFiltered; double* vol_me; uint period = 9 * 3600; // 9 days because of NIFTY paper printf("((((( Starting The Filtering Cascades CODE )))))\n"); try { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("\nFiltering Memes Cascades. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); // ---== Loading Data ==--- TZipIn ZquotesIn("QuotesPreprocessedData_NIFTY.rar"); ///("/agbs/cluster/oaskaris/Data_Preparing_Codes/RESULTS/QuotesPreprocessedData_NIFTY.rar"); THash< TStr , CascadeElementV > quotes; quotes.Load(ZquotesIn); printf("Loaded QuotesPreprocessedData_NIFTY has instances: %d\n\n\n",quotes.Len()); // NIFTY Method for Filtering by Peaks uint begin = TSecTm(2008,7,31,0,0,0).GetAbsSecs(); uint end = TSecTm(2009,10,1,0,0,0).GetAbsSecs(); TSecTmV memesTimes; int bins = (end - begin) / period; for(int c=0;c<quotes.Len();c++) { memesTimes.Clr(); for(int i=0;i<quotes[c].Len();i++) { memesTimes.Add(quotes[c][i].time); } vol_me = Tools::calculateHistOfCascade(memesTimes,begin,period,false); // calculating mean and standard deviation double mean = 0; for(int i=0;i<bins;i++) { mean += vol_me[i]; } mean /= bins; double std = 0; for(int i=0;i<bins;i++) { std += pow(vol_me[i]-mean , 2); } std = sqrt(std / (bins-1)); // peak definition by NIFTY: a point is a peak if its volume in 9 days binning is 1 standard deviation higher than the average frequency double maxVolume = mean + std; int peakCnt = 0; for(int i=0;i<bins;i++) { if(vol_me[i] > maxVolume) { peakCnt++; } } // if there is more than 5 peaks ignore this quote, since it is not a meme if(peakCnt > 5) { delete[] vol_me; continue; } quotesFiltered.AddDat(quotes.GetKey(c),quotes[c]); delete[] vol_me; } TZipOut mout("QuotesPreprocessedData_NIFTY_FINALFILTERED.rar"); quotesFiltered.Save(mout); printf("Saved QuotesPreprocessedData_NIFTY_FINALFILTERED has instances: %d\n\n\n",quotesFiltered.Len()); printf("\nThe Meme Filter for plotting had been done successfully.\n"); } catch(exception& ex) { printf("\nError1 happened, it was: %s\n\n",ex.what()); } catch(TPt<TExcept>& ex) { printf("\nError2 happened: %s\n\n",ex[0].GetStr().CStr()); } printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
int main(int argc, char* argv[]) { cout << "START...\n"; THash< TChA , TUInt > posts; TZipIn ZpostsIn("/NS/twitter-5/work/oaskaris/PostsData.rar"); posts.Load(ZpostsIn); printf("PostsData loading done, it contains %d posts.\n",posts.Len()); TZipIn z2("CascadesFullUrlsOnTwitterData.rar"); cascadesInUrlsOnTwitter.Load(z2); printf("CascadesFullUrlsOnTwitterData loading done, it contains %d items.\n",cascadesInUrlsOnTwitter.Len()); TZipIn ZquotesIn("/NS/twitter-5/work/oaskaris/QuotesPreprocessedData_NIFTY.rar"); preprocessedQuotes.Load(ZquotesIn); printf("QuotesPreprocessedData_NIFTY loading done, it contains %d quotes.\n",preprocessedQuotes.Len()); int qoId = 1337058; cout << "\n\nQUOTE: " << preprocessedQuotes.GetKey(qoId).CStr() << endl << endl; for(int l=0;l<preprocessedQuotes[qoId].Len();l++) { cout << posts.GetKey(preprocessedQuotes[qoId][l].post).CStr() << " : " << preprocessedQuotes[qoId][l].time.GetYmdTmStr().CStr() << endl; } return 0; int cnt = 3; bool printed = false; while(!printed) { cnt++; for(int l=0;l<cascadesInUrlsOnTwitter.Len();l++) { int qID = cascadesInUrlsOnTwitter.GetKey(l); if(cascadesInUrlsOnTwitter[l][0].GetAbsSecs() < preprocessedQuotes[qID][0].time.GetAbsSecs()) { if(preprocessedQuotes[qID].Len() > cnt) { continue; } printed = true; cout << "Twitter time: " << cascadesInUrlsOnTwitter[l][0].GetYmdTmStr().CStr() << endl; cout << "Memes time: " << preprocessedQuotes[qID][0].time.GetYmdTmStr().CStr() << endl; cout << "QID: " << qID << endl; cout << "\n\n\n"; } } } return 0; // TZipIn z1("CascadesRawUrlsOnTwitterData.rar"); // cascadesInUrlsOnTwitter.Load(z1); // printf("CascadesRawUrlsOnTwitterData loading done, it contains %d items.\n",cascadesInUrlsOnTwitter.Len()); // long long l1 = 0; // for(int i=0;i<cascadesInUrlsOnTwitter.Len();i++) // { // l1+=cascadesInUrlsOnTwitter[i].Len(); // } // cascadesInUrlsOnTwitter.Clr(); // // TZipIn z2("CascadesFullUrlsOnTwitterData.rar"); // cascadesInUrlsOnTwitter.Load(z2); // printf("CascadesFullUrlsOnTwitterData loading done, it contains %d items.\n",cascadesInUrlsOnTwitter.Len()); // long long l2 = 0; // for(int i=0;i<cascadesInUrlsOnTwitter.Len();i++) // { // l2+=cascadesInUrlsOnTwitter[i].Len(); // } // // cout << "\n\n\nRaw All Items: " << l1 << "\n" << "Full All Items: " << l2 << endl; // return 0; // TZipIn resIn("CascadesUrlsOnTwitterData.rar"); // cascadesInUrlsOnTwitter.Load(resIn); // printf("The size of CascadesUrlsOnTwitter was: %d\n",cascadesInUrlsOnTwitter.Len()); // for(int l=0;l<5;l++) // { // printf("\n\n\nQ%d:\n\n",l); // for(int o=0;o<cascadesInUrlsOnTwitter[l].Len();o++) // { // printf("%d. %s\n",o,cascadesInUrlsOnTwitter[l][o].GetYmdTmStr().CStr()); // } // } // return 0; TExeTm ExeTm; unsigned int q,p; int id,i; TSecTmV* cascade; TSecTmV* res; Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("\n***(((Finding the cascades of the desired quotes in their urls)))***. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); try { TZipIn ZquotesIn("/NS/twitter-5/work/oaskaris/QuotesPreprocessedData_NIFTY.rar"); preprocessedQuotes.Load(ZquotesIn); printf("QuotesPreprocessed loading done, it contains %d quotes.\n",preprocessedQuotes.Len()); TZipIn ZpostsPropIn("PostsPropagatedOverTwitterFullUrls.rar"); postsPropagatedOverTw.Load(ZpostsPropIn); printf("PostsPropagatedOverTwitterFullUrls loading done, it contains %d items.\n",postsPropagatedOverTw.Len()); for(q=0;q<preprocessedQuotes.Len();q++) { for(p=0;p<preprocessedQuotes[q].Len();p++) { TUInt postId = preprocessedQuotes[q][p].post; if(postsPropagatedOverTw.GetKeyId(postId) != -1) { cascade = &postsPropagatedOverTw.GetDat(postId); id = cascadesInUrlsOnTwitter.GetKeyId(q); if(id == -1) { res = &cascadesInUrlsOnTwitter.AddDat(q); for(i=0;i<cascade->Len();i++) { res->AddSorted((*cascade)[i]); } } else { res = &cascadesInUrlsOnTwitter.GetDat(q); for(i=0;i<cascade->Len();i++) { res->AddSorted((*cascade)[i]); } } } } } printf("\n\nFINDING THE CASCADES OF QUOTES WITH URLS OVER TWITTER IS DONE\n\n"); SaveAll(); } catch(exception& ex) { SaveAll(); printf("\nError1 happened, it was: %s\n\n",ex.what()); } catch(TPt<TExcept>& ex) { SaveAll(); printf("\nError2 happened: %s\n\n",ex[0].GetStr().CStr()); } printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }