void SaveAll() { printf("\n<<< SAVING STARTS (PLEASE BE PATIENT!!!) >>> .......\n"); TZipOut resOut("CascadesFullUrlsOnTwitterData.rar"); cascadesInUrlsOnTwitter.Save(resOut); printf("The size of CascadesFullUrlsOnTwitterData was: %d\n",cascadesInUrlsOnTwitter.Len()); printf("\n<<<<<<<< SAVING DONE >>>>>>>>\n\n"); }
int main(int argc, char* argv[]) { TExeTm ExeTm; PGconn *conn; PGresult *res; int id,start,rec_count,row,indx,end; unsigned int q; int total_number_tweets = 0; double tweet_date = 0; TStr TweetStr(""); TStr TweetStrLc(""); if(argc > 1) { start = atoi(argv[1]); } else { printf("YOU SHOULD SET THE INDICES...\n\n"); return 1; } indx = start * LENGTH; end = indx + LENGTH; printf(":::::::: Find Cascades of Quotes In Twitter Separately ::::::::\n"); const TStr StartDate = Env.GetIfArgPrefixStr("-sd:", "2008-08-01 00:00:00", "Starting date"); const TStr EndDate = Env.GetIfArgPrefixStr("-ed:", "2009-10-01 00:00:00", "Ending date"); Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("\nFinding the cascades of the desired quotes. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); try { while(indx < end) { TStr qContentFname = TStr::Fmt("QuotesData/Q%d.rar",indx); TStr resultFname = TStr::Fmt("QuotesCascResult/R%d.rar",indx++); if(fileExists(resultFname)) { if(fileExists(qContentFname)) { // removing the quotes' content file system(TStr::Fmt("rm %s",qContentFname.CStr()).CStr()); } } else { if(fileExists(qContentFname)) { THash<TStr,TInt> quotesContent; THash<TInt,TSecTmV> CascadesOnTwitter; TZipIn ZquotesIn(qContentFname); quotesContent.Load(ZquotesIn); printf("Q%d loading done, it contains %d quotes.\n",indx-1,quotesContent.Len()); conn = PQconnectdb("dbname=twitter host=postgresql01.mpi-sws.org user=twitter password=tweet@84"); if (PQstatus(conn) == CONNECTION_BAD) { printf("We were unable to connect to the database"); return 1; } // we use cursors/fetch to speed up the process; batch of 10000 tweets PQexec(conn, "begin work"); PQexec(conn,TStr::Fmt("declare mycursor cursor for select tweettext, extract(epoch from tweettime) from tweets where tweettime >= timestamp '%s' and tweettime < timestamp '%s'", StartDate.CStr(), EndDate.CStr()).CStr()); do { res = PQexec(conn, "FETCH 1000000 IN mycursor"); // all of them are: 1675401026 if (PQresultStatus(res) == PGRES_TUPLES_OK) { rec_count = PQntuples(res); total_number_tweets += rec_count; printf("Adding %d tweets... (total: %d)\n", rec_count, total_number_tweets); for (row=0; row<rec_count; row++) { TweetStr = PQgetvalue(res, row, 0); tweet_date = TStr(PQgetvalue(res, row, 1)).GetFlt(); TweetStrLc = TweetStr.ToLc(); for(q=0;q<quotesContent.Len();q++) { if (TweetStrLc.SearchStr(quotesContent.GetKey(q)) > -1) { TSecTm td(tweet_date); id = CascadesOnTwitter.GetKeyId(quotesContent[q]); if(id == -1) { CascadesOnTwitter.AddDat(quotesContent[q]).Add(td); } else { CascadesOnTwitter.GetDat(quotesContent[q]).AddSorted(td); } } } } PQclear(res); } else { rec_count = 0; } } while (rec_count); PQexec(conn, "close mycursor"); PQexec(conn, "commit work"); PQfinish(conn); // Save the results TZipOut zout(resultFname); CascadesOnTwitter.Save(zout); // Remove the qoutes' content file system(TStr::Fmt("rm %s",qContentFname.CStr()).CStr()); } } } printf("\n\nD O N E\n\n"); } catch(exception& ex) { printf("\nError1 happened, it was: %s\n\n",ex.what()); } catch(TPt<TExcept>& ex) { printf("\nError2 happened: %s\n\n",ex[0].GetStr().CStr()); } printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }
int main(int argc, char* argv[]) { TExeTm ExeTm; THash< TStr , CascadeElementV > quotesFiltered; double* vol_me; uint period = 9 * 3600; // 9 days because of NIFTY paper printf("((((( Starting The Filtering Cascades CODE )))))\n"); try { Env = TEnv(argc, argv, TNotify::StdNotify); Env.PrepArgs(TStr::Fmt("\nFiltering Memes Cascades. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm())); // ---== Loading Data ==--- TZipIn ZquotesIn("QuotesPreprocessedData_NIFTY.rar"); ///("/agbs/cluster/oaskaris/Data_Preparing_Codes/RESULTS/QuotesPreprocessedData_NIFTY.rar"); THash< TStr , CascadeElementV > quotes; quotes.Load(ZquotesIn); printf("Loaded QuotesPreprocessedData_NIFTY has instances: %d\n\n\n",quotes.Len()); // NIFTY Method for Filtering by Peaks uint begin = TSecTm(2008,7,31,0,0,0).GetAbsSecs(); uint end = TSecTm(2009,10,1,0,0,0).GetAbsSecs(); TSecTmV memesTimes; int bins = (end - begin) / period; for(int c=0;c<quotes.Len();c++) { memesTimes.Clr(); for(int i=0;i<quotes[c].Len();i++) { memesTimes.Add(quotes[c][i].time); } vol_me = Tools::calculateHistOfCascade(memesTimes,begin,period,false); // calculating mean and standard deviation double mean = 0; for(int i=0;i<bins;i++) { mean += vol_me[i]; } mean /= bins; double std = 0; for(int i=0;i<bins;i++) { std += pow(vol_me[i]-mean , 2); } std = sqrt(std / (bins-1)); // peak definition by NIFTY: a point is a peak if its volume in 9 days binning is 1 standard deviation higher than the average frequency double maxVolume = mean + std; int peakCnt = 0; for(int i=0;i<bins;i++) { if(vol_me[i] > maxVolume) { peakCnt++; } } // if there is more than 5 peaks ignore this quote, since it is not a meme if(peakCnt > 5) { delete[] vol_me; continue; } quotesFiltered.AddDat(quotes.GetKey(c),quotes[c]); delete[] vol_me; } TZipOut mout("QuotesPreprocessedData_NIFTY_FINALFILTERED.rar"); quotesFiltered.Save(mout); printf("Saved QuotesPreprocessedData_NIFTY_FINALFILTERED has instances: %d\n\n\n",quotesFiltered.Len()); printf("\nThe Meme Filter for plotting had been done successfully.\n"); } catch(exception& ex) { printf("\nError1 happened, it was: %s\n\n",ex.what()); } catch(TPt<TExcept>& ex) { printf("\nError2 happened: %s\n\n",ex[0].GetStr().CStr()); } printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr()); return 0; }