int main(int argc, char* argv[]) { TExeTm ExeTm; PGconn *conn; PGresult *res; int id,start,rec_count,row,indx,end; unsigned int q; int total_number_tweets = 0; double tweet_date = 0; TStr TweetStr(""); TStr TweetStrLc(""); if(argc > 1) { start = atoi(argv[1]); } else { printf("YOU SHOULD SET THE INDICES...\n\n"); return 1; } indx = start * LENGTH; end = indx + LENGTH; printf(":::::::: Find Cascades of Quotes In Twitter Separately ::::::::\n"); const TStr StartDate = Env.GetIfArgPrefixStr("-sd:", "2008-08-01 00:00:00", "Starting date"); const TStr EndDate = Env.GetIfArgPrefixStr("-ed:", "2009-10-01 00:00:00", "Ending date"); Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("\nFinding the cascades of the desired quotes. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); try { while(indx < end) { TStr qContentFname = TStr::Fmt("QuotesData/Q%d.rar",indx); TStr resultFname = TStr::Fmt("QuotesCascResult/R%d.rar",indx++); if(fileExists(resultFname)) { if(fileExists(qContentFname)) { // removing the quotes' content file system(TStr::Fmt("rm %s",qContentFname.CStr()).CStr()); } } else { if(fileExists(qContentFname)) { THash<TStr,TInt> quotesContent; THash<TInt,TSecTmV> CascadesOnTwitter; TZipIn ZquotesIn(qContentFname); quotesContent.Load(ZquotesIn); printf("Q%d loading done, it contains %d quotes.\n",indx-1,quotesContent.Len()); conn = PQconnectdb("dbname=twitter host=postgresql01.mpi-sws.org user=twitter password=tweet@84"); if (PQstatus(conn) == CONNECTION_BAD) { printf("We were unable to connect to the database"); return 1; } // we use cursors/fetch to speed up the process; batch of 10000 tweets PQexec(conn, "begin work"); PQexec(conn,TStr::Fmt("declare mycursor cursor for select tweettext, extract(epoch from tweettime) from tweets where tweettime >= timestamp '%s' and tweettime < timestamp '%s'", StartDate.CStr(), EndDate.CStr()).CStr()); do { res = PQexec(conn, "FETCH 1000000 IN mycursor"); // all of them are: 1675401026 if (PQresultStatus(res) == PGRES_TUPLES_OK) { rec_count = PQntuples(res); total_number_tweets += rec_count; printf("Adding %d tweets... (total: %d)\n", rec_count, total_number_tweets); for (row=0; row<rec_count; row++) { TweetStr = PQgetvalue(res, row, 0); tweet_date = TStr(PQgetvalue(res, row, 1)).GetFlt(); TweetStrLc = TweetStr.ToLc(); for(q=0;q<quotesContent.Len();q++) { if (TweetStrLc.SearchStr(quotesContent.GetKey(q)) > -1) { TSecTm td(tweet_date); id = CascadesOnTwitter.GetKeyId(quotesContent[q]); if(id == -1) { CascadesOnTwitter.AddDat(quotesContent[q]).Add(td); } else { CascadesOnTwitter.GetDat(quotesContent[q]).AddSorted(td); } } } } PQclear(res); } else { rec_count = 0; } } while (rec_count); PQexec(conn, "close mycursor"); PQexec(conn, "commit work"); PQfinish(conn); // Save the results TZipOut zout(resultFname); CascadesOnTwitter.Save(zout); // Remove the qoutes' content file system(TStr::Fmt("rm %s",qContentFname.CStr()).CStr()); } } } printf("\n\nD O N E\n\n"); } catch(exception& ex) { printf("\nError1 happened, it was: %s\n\n",ex.what()); } catch(TPt<TExcept>& ex) { printf("\nError2 happened: %s\n\n",ex[0].GetStr().CStr()); } printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
int main(int argc, char* argv[]) { TExeTm ExeTm; THash< TStr , CascadeElementV > quotesFiltered; double* vol_me; uint period = 9 * 3600; // 9 days because of NIFTY paper printf("((((( Starting The Filtering Cascades CODE )))))\n"); try { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("\nFiltering Memes Cascades. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); // ---== Loading Data ==--- TZipIn ZquotesIn("QuotesPreprocessedData_NIFTY.rar"); ///("/agbs/cluster/oaskaris/Data_Preparing_Codes/RESULTS/QuotesPreprocessedData_NIFTY.rar"); THash< TStr , CascadeElementV > quotes; quotes.Load(ZquotesIn); printf("Loaded QuotesPreprocessedData_NIFTY has instances: %d\n\n\n",quotes.Len()); // NIFTY Method for Filtering by Peaks uint begin = TSecTm(2008,7,31,0,0,0).GetAbsSecs(); uint end = TSecTm(2009,10,1,0,0,0).GetAbsSecs(); TSecTmV memesTimes; int bins = (end - begin) / period; for(int c=0;c<quotes.Len();c++) { memesTimes.Clr(); for(int i=0;i<quotes[c].Len();i++) { memesTimes.Add(quotes[c][i].time); } vol_me = Tools::calculateHistOfCascade(memesTimes,begin,period,false); // calculating mean and standard deviation double mean = 0; for(int i=0;i<bins;i++) { mean += vol_me[i]; } mean /= bins; double std = 0; for(int i=0;i<bins;i++) { std += pow(vol_me[i]-mean , 2); } std = sqrt(std / (bins-1)); // peak definition by NIFTY: a point is a peak if its volume in 9 days binning is 1 standard deviation higher than the average frequency double maxVolume = mean + std; int peakCnt = 0; for(int i=0;i<bins;i++) { if(vol_me[i] > maxVolume) { peakCnt++; } } // if there is more than 5 peaks ignore this quote, since it is not a meme if(peakCnt > 5) { delete[] vol_me; continue; } quotesFiltered.AddDat(quotes.GetKey(c),quotes[c]); delete[] vol_me; } TZipOut mout("QuotesPreprocessedData_NIFTY_FINALFILTERED.rar"); quotesFiltered.Save(mout); printf("Saved QuotesPreprocessedData_NIFTY_FINALFILTERED has instances: %d\n\n\n",quotesFiltered.Len()); printf("\nThe Meme Filter for plotting had been done successfully.\n"); } catch(exception& ex) { printf("\nError1 happened, it was: %s\n\n",ex.what()); } catch(TPt<TExcept>& ex) { printf("\nError2 happened: %s\n\n",ex[0].GetStr().CStr()); } printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
int main(int argc, char* argv[]) { int i,quoteIndex,j,k; TExeTm ExeTm; printf("Starting The SAVE CODE For Matlab Processing ...\n"); try { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("\nCreating the volumes of the quotes. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); TZipIn ZquotesIn("RESULTS/QuotesPreprocessedData_NIFTY.rar"); quotes.Load(ZquotesIn); printf("Loaded QuotesPreprocessedData_NIFTY has instances: %d\n\n\n",quotes.Len()); TZipIn ZcascadesOnTwitterIn("RESULTS/CascadesFullUrlsOnTwitterData.rar"); cascadesOnTwitterUrls.Load(ZcascadesOnTwitterIn); printf("Loaded CascadesFullUrlsOnTwitterData has instances: %d\n\n\n",cascadesOnTwitterUrls.Len()); TZipIn ZIn("RESULTS/CascadesOnTwitterData.rar"); cascadesOnTwitterContents.Load(ZIn); printf("Loaded CascadesOnTwitterData has instances: %d\n\n\n",cascadesOnTwitterContents.Len()); // Quote's Cascades over Memes ofstream quotesContent1("MEMES_QuotesContent.csv",ios::out|ios::app); ofstream memeWebs1("MEMES_MemesCascadesWebs.csv",ios::out|ios::app); ofstream memeTimes1("MEMES_MemesCascadesTimes.csv",ios::out|ios::app); ofstream externalLinks1("MEMES_MemesExternalLinks.csv",ios::out|ios::app); for(i=0;i<quotes.Len();i++) { quotesContent1 << quotes.GetKey(i).CStr() << "\r\n"; for(j=0;j<quotes[i].Len();j++) { for(k=0;k<quotes[i][j].explicit_links.Len();k++) { externalLinks1 << quotes[i][j].explicit_links[k].Val << "," << quotes[i][j].post.Val<<"\r\n"; } memeTimes1 << quotes[i][j].time.GetAbsSecs() << ","; memeWebs1 << quotes[i][j].post.Val << ","; } memeTimes1 << "\r\n"; memeWebs1 << "\r\n"; externalLinks1 << "-1\r\n"; // this means that the external links for this quote is finished } quotesContent1.close(); memeWebs1.close(); memeTimes1.close(); externalLinks1.close(); // TEXTS Cascades Over Memes and Twitter ofstream quotesContent2("MEMES_TWITTER_TXT_QuotesContent.csv",ios::out|ios::app); ofstream memeWebs2("MEMES_TWITTER_TXT_MemesCascadesWebs.csv",ios::out|ios::app); ofstream memeTimes2("MEMES_TWITTER_TXT_MemesCascadesTimes.csv",ios::out|ios::app); ofstream externalLinks2("MEMES_TWITTER_TXT_MemesExternalLinks.csv",ios::out|ios::app); ofstream twitterContent2("MEMES_TWITTER_TXT_TwitterTextCascades.csv",ios::out|ios::app); for(i=0;i<cascadesOnTwitterContents.Len();i++) { quoteIndex = cascadesOnTwitterContents.GetKey(i); quotesContent2 << quotes.GetKey(quoteIndex).CStr() << "\r\n"; for(j=0;j<quotes[quoteIndex].Len();j++) { for(k=0;k<quotes[quoteIndex][j].explicit_links.Len();k++) { externalLinks2 << quotes[quoteIndex][j].explicit_links[k].Val << "," << quotes[quoteIndex][j].post.Val<<"\r\n"; // << CHECK HERE >> CHANGE -> TO SPACE } memeTimes2 << quotes[quoteIndex][j].time.GetAbsSecs() << ","; memeWebs2 << quotes[quoteIndex][j].post.Val << ","; } memeTimes2 << "\r\n"; memeWebs2 << "\r\n"; externalLinks2 << "-1\r\n"; // this means that the external links for this quote is finished for(j=0;j<cascadesOnTwitterContents.GetDat(quoteIndex).Len();j++) { twitterContent2 << cascadesOnTwitterContents.GetDat(quoteIndex)[j] << ","; } twitterContent2 << "\r\n"; } quotesContent2.close(); memeWebs2.close(); memeTimes2.close(); externalLinks2.close(); twitterContent2.close(); // URLS Cascades Over Memes and Twitter ofstream quotesContent3("MEMES_TWITTER_URL_QuotesContent.csv",ios::out|ios::app); ofstream memeWebs3("MEMES_TWITTER_URL_MemesCascadesWebs.csv",ios::out|ios::app); ofstream memeTimes3("MEMES_TWITTER_URL_MemesCascadesTimes.csv",ios::out|ios::app); ofstream externalLinks3("MEMES_TWITTER_URL_MemesExternalLinks.csv",ios::out|ios::app); ofstream twitter3("MEMES_TWITTER_URL_TwitterUrlCascades.csv",ios::out|ios::app); for(i=0;i<cascadesOnTwitterUrls.Len();i++) { quoteIndex = cascadesOnTwitterUrls.GetKey(i); quotesContent3 << quotes.GetKey(quoteIndex).CStr() << "\r\n"; for(j=0;j<quotes[quoteIndex].Len();j++) { for(k=0;k<quotes[quoteIndex][j].explicit_links.Len();k++) { externalLinks3 << quotes[quoteIndex][j].explicit_links[k].Val << "," << quotes[quoteIndex][j].post.Val<<"\r\n"; // << CHECK HERE >> CHANGE -> TO SPACE } memeTimes3 << quotes[quoteIndex][j].time.GetAbsSecs() << ","; memeWebs3 << quotes[quoteIndex][j].post.Val << ","; } memeTimes3 << "\r\n"; memeWebs3 << "\r\n"; externalLinks3 << "-1\r\n"; // this means that the external links for this quote is finished for(j=0;j<cascadesOnTwitterUrls.GetDat(quoteIndex).Len();j++) { twitter3 << cascadesOnTwitterUrls.GetDat(quoteIndex)[j] << ","; } twitter3 << "\r\n"; } quotesContent3.close(); memeWebs3.close(); memeTimes3.close(); externalLinks3.close(); twitter3.close(); // INTERSECT OF URLS OF TEXTS Cascades Over Memes and Twitter ofstream quotesContent4("TRIPLE_QuotesContent.csv",ios::out|ios::app); ofstream memeWebs4("TRIPLE_MemesCascadesWebs.csv",ios::out|ios::app); ofstream memeTimes4("TRIPLE_MemesCascadesTimes.csv",ios::out|ios::app); ofstream externalLinks4("TRIPLE_MemesExternalLinks.csv",ios::out|ios::app); ofstream twitter4("TRIPLE_TwitterUrlCascades.csv",ios::out|ios::app); ofstream twitterContent4("TRIPLE_TwitterTextCascades.csv",ios::out|ios::app); for(i=0;i<cascadesOnTwitterUrls.Len();i++) { quoteIndex = cascadesOnTwitterUrls.GetKey(i); if(cascadesOnTwitterContents.GetKeyId(quoteIndex) == -1) { continue; } quotesContent4 << quotes.GetKey(quoteIndex).CStr() << "\r\n"; for(j=0;j<quotes[quoteIndex].Len();j++) { for(k=0;k<quotes[quoteIndex][j].explicit_links.Len();k++) { externalLinks4 << quotes[quoteIndex][j].explicit_links[k].Val << "," << quotes[quoteIndex][j].post.Val<<"\r\n"; // << CHECK HERE >> CHANGE -> TO SPACE } memeTimes4 << quotes[quoteIndex][j].time.GetAbsSecs() << ","; memeWebs4 << quotes[quoteIndex][j].post.Val << ","; } memeTimes4 << "\r\n"; memeWebs4 << "\r\n"; externalLinks4 << "-1\r\n"; // this means that the external links for this quote is finished for(j=0;j<cascadesOnTwitterContents.GetDat(quoteIndex).Len();j++) { twitterContent4 << cascadesOnTwitterContents.GetDat(quoteIndex)[j] << ","; } for(j=0;j<cascadesOnTwitterUrls.GetDat(quoteIndex).Len();j++) { twitter4 << cascadesOnTwitterUrls.GetDat(quoteIndex)[j] << ","; } twitter4 << "\r\n"; twitterContent4 << "\r\n"; } quotesContent4.close(); memeWebs4.close(); memeTimes4.close(); externalLinks4.close(); twitter4.close(); twitterContent4.close(); } catch(exception& ex) { printf("\nError1 happened, it was: %s\n\n",ex.what()); } catch(TPt<TExcept>& ex) { printf("\nError2 happened: %s\n\n",ex[0].GetStr().CStr()); } printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
int main(int argc, char* argv[]) { cout << "START...\n"; THash< TChA , TUInt > posts; TZipIn ZpostsIn("/NS/twitter-5/work/oaskaris/PostsData.rar"); posts.Load(ZpostsIn); printf("PostsData loading done, it contains %d posts.\n",posts.Len()); TZipIn z2("CascadesFullUrlsOnTwitterData.rar"); cascadesInUrlsOnTwitter.Load(z2); printf("CascadesFullUrlsOnTwitterData loading done, it contains %d items.\n",cascadesInUrlsOnTwitter.Len()); TZipIn ZquotesIn("/NS/twitter-5/work/oaskaris/QuotesPreprocessedData_NIFTY.rar"); preprocessedQuotes.Load(ZquotesIn); printf("QuotesPreprocessedData_NIFTY loading done, it contains %d quotes.\n",preprocessedQuotes.Len()); int qoId = 1337058; cout << "\n\nQUOTE: " << preprocessedQuotes.GetKey(qoId).CStr() << endl << endl; for(int l=0;l<preprocessedQuotes[qoId].Len();l++) { cout << posts.GetKey(preprocessedQuotes[qoId][l].post).CStr() << " : " << preprocessedQuotes[qoId][l].time.GetYmdTmStr().CStr() << endl; } return 0; int cnt = 3; bool printed = false; while(!printed) { cnt++; for(int l=0;l<cascadesInUrlsOnTwitter.Len();l++) { int qID = cascadesInUrlsOnTwitter.GetKey(l); if(cascadesInUrlsOnTwitter[l][0].GetAbsSecs() < preprocessedQuotes[qID][0].time.GetAbsSecs()) { if(preprocessedQuotes[qID].Len() > cnt) { continue; } printed = true; cout << "Twitter time: " << cascadesInUrlsOnTwitter[l][0].GetYmdTmStr().CStr() << endl; cout << "Memes time: " << preprocessedQuotes[qID][0].time.GetYmdTmStr().CStr() << endl; cout << "QID: " << qID << endl; cout << "\n\n\n"; } } } return 0; // TZipIn z1("CascadesRawUrlsOnTwitterData.rar"); // cascadesInUrlsOnTwitter.Load(z1); // printf("CascadesRawUrlsOnTwitterData loading done, it contains %d items.\n",cascadesInUrlsOnTwitter.Len()); // long long l1 = 0; // for(int i=0;i<cascadesInUrlsOnTwitter.Len();i++) // { // l1+=cascadesInUrlsOnTwitter[i].Len(); // } // cascadesInUrlsOnTwitter.Clr(); // // TZipIn z2("CascadesFullUrlsOnTwitterData.rar"); // cascadesInUrlsOnTwitter.Load(z2); // printf("CascadesFullUrlsOnTwitterData loading done, it contains %d items.\n",cascadesInUrlsOnTwitter.Len()); // long long l2 = 0; // for(int i=0;i<cascadesInUrlsOnTwitter.Len();i++) // { // l2+=cascadesInUrlsOnTwitter[i].Len(); // } // // cout << "\n\n\nRaw All Items: " << l1 << "\n" << "Full All Items: " << l2 << endl; // return 0; // TZipIn resIn("CascadesUrlsOnTwitterData.rar"); // cascadesInUrlsOnTwitter.Load(resIn); // printf("The size of CascadesUrlsOnTwitter was: %d\n",cascadesInUrlsOnTwitter.Len()); // for(int l=0;l<5;l++) // { // printf("\n\n\nQ%d:\n\n",l); // for(int o=0;o<cascadesInUrlsOnTwitter[l].Len();o++) // { // printf("%d. %s\n",o,cascadesInUrlsOnTwitter[l][o].GetYmdTmStr().CStr()); // } // } // return 0; TExeTm ExeTm; unsigned int q,p; int id,i; TSecTmV* cascade; TSecTmV* res; Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("\n***(((Finding the cascades of the desired quotes in their urls)))***. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); try { TZipIn ZquotesIn("/NS/twitter-5/work/oaskaris/QuotesPreprocessedData_NIFTY.rar"); preprocessedQuotes.Load(ZquotesIn); printf("QuotesPreprocessed loading done, it contains %d quotes.\n",preprocessedQuotes.Len()); TZipIn ZpostsPropIn("PostsPropagatedOverTwitterFullUrls.rar"); postsPropagatedOverTw.Load(ZpostsPropIn); printf("PostsPropagatedOverTwitterFullUrls loading done, it contains %d items.\n",postsPropagatedOverTw.Len()); for(q=0;q<preprocessedQuotes.Len();q++) { for(p=0;p<preprocessedQuotes[q].Len();p++) { TUInt postId = preprocessedQuotes[q][p].post; if(postsPropagatedOverTw.GetKeyId(postId) != -1) { cascade = &postsPropagatedOverTw.GetDat(postId); id = cascadesInUrlsOnTwitter.GetKeyId(q); if(id == -1) { res = &cascadesInUrlsOnTwitter.AddDat(q); for(i=0;i<cascade->Len();i++) { res->AddSorted((*cascade)[i]); } } else { res = &cascadesInUrlsOnTwitter.GetDat(q); for(i=0;i<cascade->Len();i++) { res->AddSorted((*cascade)[i]); } } } } } printf("\n\nFINDING THE CASCADES OF QUOTES WITH URLS OVER TWITTER IS DONE\n\n"); SaveAll(); } catch(exception& ex) { SaveAll(); printf("\nError1 happened, it was: %s\n\n",ex.what()); } catch(TPt<TExcept>& ex) { SaveAll(); printf("\nError2 happened: %s\n\n",ex[0].GetStr().CStr()); } printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }