Exemple #1
0
TStr TSockSys::GetStatusStr(){
  TChA ChA;
  ChA+="Sockets: "; ChA+=TInt::GetStr(SockIdToHndH.Len()); ChA+="\r\n";
  ChA+="Host-Resolutions: "; ChA+=TInt::GetStr(HndToSockHostH.Len()); ChA+="\r\n";
  ChA+="Socket-Events: "; ChA+=TInt::GetStr(IdToSockEventH.Len()); ChA+="\r\n";
  ChA+="Timers: "; ChA+=TInt::GetStr(SockIdToTimerHndH.Len()); ChA+="\r\n";
  return ChA;
}
Exemple #2
0
 static double CmtyCMN(const PUNGraph& Graph, TCnComV& CmtyV) {
   TCNMQMatrix QMatrix(Graph);
   // maximize modularity
   while (QMatrix.MergeBestQ()) { }
   // reconstruct communities
   THash<TInt, TIntV> IdCmtyH;
   for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
     IdCmtyH.AddDat(QMatrix.CmtyIdUF.Find(NI.GetId())).Add(NI.GetId());
   }
   CmtyV.Gen(IdCmtyH.Len());
   for (int j = 0; j < IdCmtyH.Len(); j++) {
     CmtyV[j].NIdV.Swap(IdCmtyH[j]);
   }
   return QMatrix.Q;
 }
Exemple #3
0
void TAGM::GetNodeMembership(THash<TInt,TIntV >& NIDComVH, const THash<TInt,TIntV>& CmtyVH) {
	for(int i=0;i<CmtyVH.Len();i++){
		int CID = CmtyVH.GetKey(i);
		for(int j=0;j<CmtyVH[i].Len();j++) {
			int NID = CmtyVH[i][j];
			NIDComVH.AddDat(NID).Add(CID);
		}
	}
}
Exemple #4
0
void TTop2FriendNet::GetAvgSDevV(const THash<TFlt, TMom>& MomH, TFltTrV& ValAvgSDevV) {
  ValAvgSDevV.Clr(false);
  for (int i = 0; i < MomH.Len(); i++) {
    TMom Mom=MomH[i];
    Mom.Def();
    ValAvgSDevV.Add(TFltTr(MomH.GetKey(i), Mom.GetMean(), Mom.GetSDev()));
  }
  ValAvgSDevV.Sort();
}
void SaveAll()
{
	printf("\n<<< SAVING STARTS (PLEASE BE PATIENT!!!) >>> .......\n");
	TZipOut resOut("CascadesFullUrlsOnTwitterData.rar");
	cascadesInUrlsOnTwitter.Save(resOut);
	printf("The size of CascadesFullUrlsOnTwitterData was: %d\n",cascadesInUrlsOnTwitter.Len());

	printf("\n<<<<<<<< SAVING DONE >>>>>>>>\n\n");
}
/// rewire bipartite community affiliation graphs
void TAGMUtil::RewireCmtyNID(THash<TInt,TIntV >& CmtyVH, TRnd& Rnd) {
    THash<TInt,TIntV > NewCmtyVH(CmtyVH.Len());
    TIntV NDegV;
    TIntV CDegV;
    for (int i = 0; i < CmtyVH.Len(); i++) {
        int CID = CmtyVH.GetKey(i);
        for (int j = 0; j < CmtyVH[i].Len(); j++) {
            int NID = CmtyVH[i][j];
            NDegV.Add(NID);
            CDegV.Add(CID);
        }
    }
    TIntPrSet CNIDSet(CDegV.Len());
    int c=0;
    while (c++ < 15 && CDegV.Len() > 1) {
        for (int i = 0; i < CDegV.Len(); i++) {
            int u = Rnd.GetUniDevInt(CDegV.Len());
            int v = Rnd.GetUniDevInt(NDegV.Len());
            if (CNIDSet.IsKey(TIntPr(CDegV[u], NDegV[v]))) {
                continue;
            }
            CNIDSet.AddKey(TIntPr(CDegV[u], NDegV[v]));
            if (u == CDegV.Len() - 1) {
                CDegV.DelLast();
            }  else {
                CDegV[u] = CDegV.Last();
                CDegV.DelLast();
            }
            if ( v == NDegV.Len() - 1) {
                NDegV.DelLast();
            }  else {
                NDegV[v] = NDegV.Last();
                NDegV.DelLast();
            }
        }
    }
    for (int i = 0; i < CNIDSet.Len(); i++) {
        TIntPr CNIDPr = CNIDSet[i];
        IAssert(CmtyVH.IsKey(CNIDPr.Val1));
        NewCmtyVH.AddDat(CNIDPr.Val1);
        NewCmtyVH.GetDat(CNIDPr.Val1).Add(CNIDPr.Val2);
    }
    CmtyVH = NewCmtyVH;
}
Exemple #7
0
int main(int argc, char* argv[])
{
//	TFltPrV v;
//	v.Add(TFltPr(1,4));
//	v.Add(TFltPr(5,5));
//	v.Add(TFltPr(9,11));
//	v.Add(TFltPr(20,8));
//	v.Add(TFltPr(21,30));
//	cout << "C: " << Tools::computeCorrelation(v,Pearson) << endl;
//	return 0;


	TExeTm ExeTm;
	try
	{
		Env = TEnv(argc, argv, TNotify::StdNotify);
		Env.PrepArgs(TStr::Fmt("\nPlotting Individually Memes-Twitter Cascades. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));

		// URLS
		THash< TStr , CascadeElementV > quotes = Tools::loadQuotes("DATA/QuotesPreprocessedData_NIFTY_RANGEFIXED_FINALFILTERED_HAVINGBOTH.rar");    // QuotesPreprocessedData_NIFTY_RANGEFIXED_FINALFILTERED_4URLS
		THash< TUInt , TSecTmV > twitterUrls = Tools::loadTwitter("DATA/CascadesFullUrlsOnTwitterData_FINALFILTERED_HAVINGBOTH.rar");     // CascadesFullUrlsOnTwitterData_FINALFILTERED

		// CONTENTS
		//THash< TStr , CascadeElementV > quotes2 = Tools::loadQuotes("DATA/QuotesPreprocessedData_NIFTY_RANGEFIXED_FINALFILTERED_HAVINGBOTH.rar");    // QuotesPreprocessedData_NIFTY_RANGEFIXED_FINALFILTERED_4Contents
		THash< TUInt , TSecTmV > twitterContents = Tools::loadTwitter("DATA/CascadesOnTwitterData_FINALFILTERED_HAVINGBOTH.rar");    // CascadesOnTwitterData_FINALFILTERED

		// Plotting
		THash< TUInt , TSecTmV > twitterTotal;
		for(int i=0;i<twitterContents.Len();i++)
		{
			TSecTmV tmp;
			tmp.AddV(twitterContents[i]);
			tmp.AddV(twitterUrls[i]);
			twitterTotal.AddDat(i,tmp);
		}

		plotScatterLengthOfEachCascade(quotes,twitterUrls,"Urls");
		plotScatterLengthOfEachCascade(quotes,twitterContents,"Contents");
		plotScatterLengthOfEachCascade(quotes,twitterTotal,"Full");

		printf("\nPlots had been drawn successfully.");
	}
	catch(exception& ex)
	{
		printf("\nError1 happened, it was: %s\n\n",ex.what());
	}
	catch(TPt<TExcept>& ex)
	{
		printf("\nError2 happened: %s\n\n",ex[0].GetStr().CStr());
	}

	printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
	return 0;
}
void plotScatterLengthOfEachCascade(THash<TUInt,TSecTmV>& c1, THash<TUInt,TSecTmV>& c2)
{
	printf("\n\nPlotting ...\n");
	TFltPrV plotdata;
	for(int q=0;q<c1.Len();q++)
	{
		TFltPr elem;
		elem.Val1 = c1[q].Len();
		elem.Val2 = c2[q].Len();
		plotdata.Add(elem);
	}
	Tools::plotScatter(plotdata, "TwitterUrlsOverContents", "Urls on Twitter", "Contents on Twitter");
}
Exemple #9
0
void plotScatterLengthOfEachCascade(THash<TStr,CascadeElementV>& quotes, THash<TUInt,TSecTmV>& twitter, char* name)
{
	printf("\n\nPlotting ...\n");
	TFltPrV plotdata;
	for(int q=0;q<quotes.Len();q++)
	{
		TFltPr elem;
		elem.Val1 = quotes[q].Len();
		elem.Val2 = twitter[q].Len();
		plotdata.Add(elem);
	}
	Tools::plotScatter(plotdata, name, "Blogs/News", TStr::Fmt("%s on Twitter",name).CStr());
}
Exemple #10
0
void TTable::GroupAux(const TStrV& GroupBy, TInt GroupByStartIdx, THash<TInt,TIntV>& grouping, const TIntV& IndexSet, TBool All){
  /* recursion base - add IndexSet as group */
  if(GroupByStartIdx == GroupBy.Len()){
    if(IndexSet.Len() == 0){return;}
	  TInt key = grouping.Len();
	  grouping.AddDat(key, IndexSet);
	  return;
  }
  if(!ColTypeMap.IsKey(GroupBy[GroupByStartIdx])){TExcept::Throw("no such column " + GroupBy[GroupByStartIdx]);}
  switch(GetColType(GroupBy[GroupByStartIdx])){
    case INT:{
      // group by current column
      // not sure of to estimate the size of T for constructor hinting purpose.
      // It is bounded by the length of the IndexSet or the length of the grouping column if the IndexSet vector is empty
      // but this bound may be way too big
	    THash<TInt,TIntV> T;  
	    GroupByIntCol(GroupBy[GroupByStartIdx], T, IndexSet, All);
	    for(THash<TInt,TIntV>::TIter it = T.BegI(); it < T.EndI(); it++){
	      TIntV& CurrGroup = it->Dat;
        // each group according to current column will be used as an IndexSet
        // for grouping according to next column
		    GroupAux(GroupBy, GroupByStartIdx+1, grouping, CurrGroup, false);
	   }
	    break;
	  }
	  case FLT:{
	    THash<TFlt,TIntV> T;
	    GroupByFltCol(GroupBy[GroupByStartIdx], T, IndexSet, All);
	    for(THash<TFlt,TIntV>::TIter it = T.BegI(); it < T.EndI(); it++){
	      TIntV& CurrGroup = it->Dat;
		    GroupAux(GroupBy, GroupByStartIdx+1, grouping, CurrGroup, false);
	    }
	    break;
	  }
	  case STR:{
	    THash<TStr,TIntV> T;
	    GroupByStrCol(GroupBy[GroupByStartIdx], T, IndexSet, All);
	    for(THash<TStr,TIntV>::TIter it = T.BegI(); it < T.EndI(); it++){
	      TIntV& CurrGroup = it->Dat;
	      GroupAux(GroupBy, GroupByStartIdx+1, grouping, CurrGroup, false);
	    }
	    break;
	  }
  }
}
int main(int argc, char* argv[])
{
	TExeTm ExeTm;
	PGconn *conn;
	PGresult *res;
	int id,start,rec_count,row,indx,end;
	unsigned int q;
	int total_number_tweets = 0;
	double tweet_date = 0;
	TStr TweetStr("");
	TStr TweetStrLc("");

	if(argc > 1)
	{
		start = atoi(argv[1]);
	}
	else
	{
		printf("YOU SHOULD SET THE INDICES...\n\n");
		return 1;
	}
	indx = start * LENGTH;
	end = indx + LENGTH;

	printf(":::::::: Find Cascades of Quotes In Twitter Separately ::::::::\n");
	const TStr StartDate = Env.GetIfArgPrefixStr("-sd:", "2008-08-01 00:00:00", "Starting date");
	const TStr EndDate = Env.GetIfArgPrefixStr("-ed:", "2009-10-01 00:00:00", "Ending date");

	Env = TEnv(argc, argv, TNotify::StdNotify);
	Env.PrepArgs(TStr::Fmt("\nFinding the cascades of the desired quotes. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
	try
	{
		while(indx < end)
		{
			TStr qContentFname = TStr::Fmt("QuotesData/Q%d.rar",indx);
			TStr resultFname = TStr::Fmt("QuotesCascResult/R%d.rar",indx++);

			if(fileExists(resultFname))
			{
				if(fileExists(qContentFname))
				{
					// removing the quotes' content file
					system(TStr::Fmt("rm %s",qContentFname.CStr()).CStr());
				}
			}
			else
			{
				if(fileExists(qContentFname))
				{
					THash<TStr,TInt> quotesContent;
					THash<TInt,TSecTmV> CascadesOnTwitter;

					TZipIn ZquotesIn(qContentFname);
					quotesContent.Load(ZquotesIn);
					printf("Q%d loading done, it contains %d quotes.\n",indx-1,quotesContent.Len());

					conn = PQconnectdb("dbname=twitter host=postgresql01.mpi-sws.org user=twitter password=tweet@84");

					if (PQstatus(conn) == CONNECTION_BAD)
					{
						printf("We were unable to connect to the database");
						return 1;
					}

					// we use cursors/fetch to speed up the process; batch of 10000 tweets
					PQexec(conn, "begin work");
					PQexec(conn,TStr::Fmt("declare mycursor cursor for select tweettext, extract(epoch from tweettime) from tweets where tweettime >= timestamp '%s' and tweettime < timestamp '%s'", StartDate.CStr(), EndDate.CStr()).CStr());

					do
					{
						res = PQexec(conn, "FETCH 1000000 IN mycursor");    // all of them are: 1675401026
						if (PQresultStatus(res) == PGRES_TUPLES_OK)
						{
							rec_count = PQntuples(res);
							total_number_tweets += rec_count;
							printf("Adding %d tweets... (total: %d)\n", rec_count, total_number_tweets);

							for (row=0; row<rec_count; row++)
							{
								TweetStr = PQgetvalue(res, row, 0);
								tweet_date = TStr(PQgetvalue(res, row, 1)).GetFlt();
								TweetStrLc = TweetStr.ToLc();
								for(q=0;q<quotesContent.Len();q++)
								{
									if (TweetStrLc.SearchStr(quotesContent.GetKey(q)) > -1)
									{
										TSecTm td(tweet_date);
										id = CascadesOnTwitter.GetKeyId(quotesContent[q]);
										if(id == -1)
										{
											CascadesOnTwitter.AddDat(quotesContent[q]).Add(td);
										}
										else
										{
											CascadesOnTwitter.GetDat(quotesContent[q]).AddSorted(td);
										}
									}
								}
							}

							PQclear(res);
						}
						else
						{
							rec_count = 0;
						}
					}
					while (rec_count);

					PQexec(conn, "close mycursor");
					PQexec(conn, "commit work");
					PQfinish(conn);


					// Save the results
					TZipOut zout(resultFname);
					CascadesOnTwitter.Save(zout);

					// Remove the qoutes' content file
					system(TStr::Fmt("rm %s",qContentFname.CStr()).CStr());
				}
			}
		}

		printf("\n\nD O N E\n\n");
	}
	catch(exception& ex)
	{
		printf("\nError1 happened, it was: %s\n\n",ex.what());
	}
	catch(TPt<TExcept>& ex)
	{
		printf("\nError2 happened: %s\n\n",ex[0].GetStr().CStr());
	}

	printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
	return 0;
}
int main(int argc, char* argv[])
{
	int i,quoteIndex,j,k;
	TExeTm ExeTm;

	printf("Starting The SAVE CODE For Matlab Processing ...\n");
	try
	{
		Env = TEnv(argc, argv, TNotify::StdNotify);
		Env.PrepArgs(TStr::Fmt("\nCreating the volumes of the quotes. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));

		TZipIn ZquotesIn("RESULTS/QuotesPreprocessedData_NIFTY.rar");
		quotes.Load(ZquotesIn);
		printf("Loaded QuotesPreprocessedData_NIFTY has instances: %d\n\n\n",quotes.Len());

		TZipIn ZcascadesOnTwitterIn("RESULTS/CascadesFullUrlsOnTwitterData.rar");
		cascadesOnTwitterUrls.Load(ZcascadesOnTwitterIn);
		printf("Loaded CascadesFullUrlsOnTwitterData has instances: %d\n\n\n",cascadesOnTwitterUrls.Len());

		TZipIn ZIn("RESULTS/CascadesOnTwitterData.rar");
		cascadesOnTwitterContents.Load(ZIn);
		printf("Loaded CascadesOnTwitterData has instances: %d\n\n\n",cascadesOnTwitterContents.Len());


		// Quote's Cascades over Memes
		ofstream quotesContent1("MEMES_QuotesContent.csv",ios::out|ios::app);
		ofstream memeWebs1("MEMES_MemesCascadesWebs.csv",ios::out|ios::app);
		ofstream memeTimes1("MEMES_MemesCascadesTimes.csv",ios::out|ios::app);
		ofstream externalLinks1("MEMES_MemesExternalLinks.csv",ios::out|ios::app);
		for(i=0;i<quotes.Len();i++)
		{
			quotesContent1 << quotes.GetKey(i).CStr() << "\r\n";
			for(j=0;j<quotes[i].Len();j++)
			{
				for(k=0;k<quotes[i][j].explicit_links.Len();k++)
				{
					externalLinks1 << quotes[i][j].explicit_links[k].Val << "," << quotes[i][j].post.Val<<"\r\n";
				}
				memeTimes1 << quotes[i][j].time.GetAbsSecs() << ",";
				memeWebs1 << quotes[i][j].post.Val << ",";
			}
			memeTimes1 << "\r\n";
			memeWebs1 << "\r\n";
			externalLinks1 << "-1\r\n";  // this means that the external links for this quote is finished
		}
		quotesContent1.close();
		memeWebs1.close();
		memeTimes1.close();
		externalLinks1.close();


		// TEXTS Cascades Over Memes and Twitter
		ofstream quotesContent2("MEMES_TWITTER_TXT_QuotesContent.csv",ios::out|ios::app);
		ofstream memeWebs2("MEMES_TWITTER_TXT_MemesCascadesWebs.csv",ios::out|ios::app);
		ofstream memeTimes2("MEMES_TWITTER_TXT_MemesCascadesTimes.csv",ios::out|ios::app);
		ofstream externalLinks2("MEMES_TWITTER_TXT_MemesExternalLinks.csv",ios::out|ios::app);
		ofstream twitterContent2("MEMES_TWITTER_TXT_TwitterTextCascades.csv",ios::out|ios::app);
		for(i=0;i<cascadesOnTwitterContents.Len();i++)
		{
			quoteIndex = cascadesOnTwitterContents.GetKey(i);
			quotesContent2 << quotes.GetKey(quoteIndex).CStr() << "\r\n";

			for(j=0;j<quotes[quoteIndex].Len();j++)
			{
				for(k=0;k<quotes[quoteIndex][j].explicit_links.Len();k++)
				{
					externalLinks2 << quotes[quoteIndex][j].explicit_links[k].Val << "," << quotes[quoteIndex][j].post.Val<<"\r\n";   // << CHECK HERE >> CHANGE -> TO SPACE
				}
				memeTimes2 << quotes[quoteIndex][j].time.GetAbsSecs() << ",";
				memeWebs2 << quotes[quoteIndex][j].post.Val << ",";
			}
			memeTimes2 << "\r\n";
			memeWebs2 << "\r\n";
			externalLinks2 << "-1\r\n";  // this means that the external links for this quote is finished

			for(j=0;j<cascadesOnTwitterContents.GetDat(quoteIndex).Len();j++)
			{
				twitterContent2 << cascadesOnTwitterContents.GetDat(quoteIndex)[j] << ",";
			}
			twitterContent2 << "\r\n";
		}
		quotesContent2.close();
		memeWebs2.close();
		memeTimes2.close();
		externalLinks2.close();
		twitterContent2.close();


		// URLS Cascades Over Memes and Twitter
		ofstream quotesContent3("MEMES_TWITTER_URL_QuotesContent.csv",ios::out|ios::app);
		ofstream memeWebs3("MEMES_TWITTER_URL_MemesCascadesWebs.csv",ios::out|ios::app);
		ofstream memeTimes3("MEMES_TWITTER_URL_MemesCascadesTimes.csv",ios::out|ios::app);
		ofstream externalLinks3("MEMES_TWITTER_URL_MemesExternalLinks.csv",ios::out|ios::app);
		ofstream twitter3("MEMES_TWITTER_URL_TwitterUrlCascades.csv",ios::out|ios::app);
		for(i=0;i<cascadesOnTwitterUrls.Len();i++)
		{
			quoteIndex = cascadesOnTwitterUrls.GetKey(i);
			quotesContent3 << quotes.GetKey(quoteIndex).CStr() << "\r\n";

			for(j=0;j<quotes[quoteIndex].Len();j++)
			{
				for(k=0;k<quotes[quoteIndex][j].explicit_links.Len();k++)
				{
					externalLinks3 << quotes[quoteIndex][j].explicit_links[k].Val << "," << quotes[quoteIndex][j].post.Val<<"\r\n";   // << CHECK HERE >> CHANGE -> TO SPACE
				}
				memeTimes3 << quotes[quoteIndex][j].time.GetAbsSecs() << ",";
				memeWebs3 << quotes[quoteIndex][j].post.Val << ",";
			}
			memeTimes3 << "\r\n";
			memeWebs3 << "\r\n";
			externalLinks3 << "-1\r\n";  // this means that the external links for this quote is finished

			for(j=0;j<cascadesOnTwitterUrls.GetDat(quoteIndex).Len();j++)
			{
				twitter3 << cascadesOnTwitterUrls.GetDat(quoteIndex)[j] << ",";
			}
			twitter3 << "\r\n";
		}
		quotesContent3.close();
		memeWebs3.close();
		memeTimes3.close();
		externalLinks3.close();
		twitter3.close();


		// INTERSECT OF URLS OF TEXTS Cascades Over Memes and Twitter
		ofstream quotesContent4("TRIPLE_QuotesContent.csv",ios::out|ios::app);
		ofstream memeWebs4("TRIPLE_MemesCascadesWebs.csv",ios::out|ios::app);
		ofstream memeTimes4("TRIPLE_MemesCascadesTimes.csv",ios::out|ios::app);
		ofstream externalLinks4("TRIPLE_MemesExternalLinks.csv",ios::out|ios::app);
		ofstream twitter4("TRIPLE_TwitterUrlCascades.csv",ios::out|ios::app);
		ofstream twitterContent4("TRIPLE_TwitterTextCascades.csv",ios::out|ios::app);
		for(i=0;i<cascadesOnTwitterUrls.Len();i++)
		{
			quoteIndex = cascadesOnTwitterUrls.GetKey(i);
			if(cascadesOnTwitterContents.GetKeyId(quoteIndex) == -1)
			{
				continue;
			}
			quotesContent4 << quotes.GetKey(quoteIndex).CStr() << "\r\n";

			for(j=0;j<quotes[quoteIndex].Len();j++)
			{
				for(k=0;k<quotes[quoteIndex][j].explicit_links.Len();k++)
				{
					externalLinks4 << quotes[quoteIndex][j].explicit_links[k].Val << "," << quotes[quoteIndex][j].post.Val<<"\r\n";   // << CHECK HERE >> CHANGE -> TO SPACE
				}
				memeTimes4 << quotes[quoteIndex][j].time.GetAbsSecs() << ",";
				memeWebs4 << quotes[quoteIndex][j].post.Val << ",";
			}
			memeTimes4 << "\r\n";
			memeWebs4 << "\r\n";
			externalLinks4 << "-1\r\n";  // this means that the external links for this quote is finished

			for(j=0;j<cascadesOnTwitterContents.GetDat(quoteIndex).Len();j++)
			{
				twitterContent4 << cascadesOnTwitterContents.GetDat(quoteIndex)[j] << ",";
			}

			for(j=0;j<cascadesOnTwitterUrls.GetDat(quoteIndex).Len();j++)
			{
				twitter4 << cascadesOnTwitterUrls.GetDat(quoteIndex)[j] << ",";
			}
			twitter4 << "\r\n";
			twitterContent4 << "\r\n";
		}
		quotesContent4.close();
		memeWebs4.close();
		memeTimes4.close();
		externalLinks4.close();
		twitter4.close();
		twitterContent4.close();
	}
	catch(exception& ex)
	{
		printf("\nError1 happened, it was: %s\n\n",ex.what());
	}
	catch(TPt<TExcept>& ex)
	{
		printf("\nError2 happened: %s\n\n",ex[0].GetStr().CStr());
	}

	printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
	return 0;
}
Exemple #13
0
/////////////////////////////////////////////////
// Best-Paths
void GetBestPaths(
 const TStr& SrcNmObjStr, const TStr& DstNmObjStr, const PNmObjBs& NmObjBs){
  int SrcNmObjId=NmObjBs->GetNmObjId(SrcNmObjStr);
  int DstNmObjId=NmObjBs->GetNmObjId(DstNmObjStr);
  int NmObjs=NmObjBs->GetNmObjs();
  TIntPrV ParLevPrV(NmObjs); TIntPrV DstParLevPrV;
  ParLevPrV.PutAll(TIntPr(-1, -1));
  int CurLev=0;
  ParLevPrV[SrcNmObjId]=TIntPr(SrcNmObjId, CurLev);
  forever{
    CurLev++; int NewEdges=0;
    for (int NmObjId1=0; NmObjId1<NmObjs; NmObjId1++){
      if (ParLevPrV[NmObjId1].Val2==CurLev-1){
        TIntV DocIdV1; NmObjBs->GetNmObjDocIdV(NmObjId1, DocIdV1);
        for (int NmObjId2=0; NmObjId2<NmObjs; NmObjId2++){
          if ((NmObjId2==DstNmObjId)||(ParLevPrV[NmObjId2].Val2==-1)){
            TIntV DocIdV2; NmObjBs->GetNmObjDocIdV(NmObjId2, DocIdV2);
            TIntV IntrsDocIdV; DocIdV1.Intrs(DocIdV2, IntrsDocIdV);
            if (!IntrsDocIdV.Empty()){
              ParLevPrV[NmObjId2]=TIntPr(NmObjId1, CurLev); NewEdges++;
              if (NmObjId2==DstNmObjId){
                DstParLevPrV.Add(TIntPr(NmObjId1, CurLev));
              }
            }
          }
        }
      }
    }
    if ((NewEdges==0)||(ParLevPrV[DstNmObjId].Val2!=-1)){
      break;
    }
  }
  // prepare graph
  THash<TStr, PVrtx> VrtxNmToVrtxH; TStrPrV VrtxNmPrV;
  VrtxNmToVrtxH.AddKey(SrcNmObjStr);
  VrtxNmToVrtxH.AddKey(DstNmObjStr);
  // write path
  ContexterF->NmObjLinkageREd->Clear();
  for (int DstParLevPrN=0; DstParLevPrN<DstParLevPrV.Len(); DstParLevPrN++){
    ParLevPrV[DstNmObjId]=DstParLevPrV[DstParLevPrN];
    int DstParLev=ParLevPrV[DstNmObjId].Val2;
    TStr DstNmObjStr=NmObjBs->GetNmObjStr(DstNmObjId);
    ContexterF->NmObjLinkageREd->Lines->Add(DstNmObjStr.CStr());
    int ParNmObjId=DstNmObjId;
    TStr PrevNmObjStr=DstNmObjStr;
    forever {
      if (ParNmObjId==SrcNmObjId){break;}
      ParNmObjId=ParLevPrV[ParNmObjId].Val1;
      int ParLev=ParLevPrV[ParNmObjId].Val2;
      TStr CurNmObjStr=NmObjBs->GetNmObjStr(ParNmObjId);
      TStr ParNmObjStr=TStr::GetSpaceStr((DstParLev-ParLev)*4)+CurNmObjStr;
      ContexterF->NmObjLinkageREd->Lines->Add(ParNmObjStr.CStr());
      // create vertex & edge
      VrtxNmToVrtxH.AddKey(CurNmObjStr);
      if (!PrevNmObjStr.Empty()){
        if (PrevNmObjStr<CurNmObjStr){
          VrtxNmPrV.AddUnique(TStrPr(PrevNmObjStr, CurNmObjStr));
        } else
        if (PrevNmObjStr>CurNmObjStr){
          VrtxNmPrV.AddUnique(TStrPr(CurNmObjStr, PrevNmObjStr));
        }
      }
      // save curent named-object
      PrevNmObjStr=CurNmObjStr;
    }
  }
  // generate graph
  // create graph
  PGraph Graph=TGGraph::New();
  // create vertices
  for (int VrtxN=0; VrtxN<VrtxNmToVrtxH.Len(); VrtxN++){
    TStr VrtxNm=VrtxNmToVrtxH.GetKey(VrtxN);
    PVrtx Vrtx=TGVrtx::New(VrtxNm);
    VrtxNmToVrtxH.GetDat(VrtxNm)=Vrtx;
    Graph->AddVrtx(Vrtx);
  }
  // create edges
  for (int EdgeN=0; EdgeN<VrtxNmPrV.Len(); EdgeN++){
    PVrtx Vrtx1=VrtxNmToVrtxH.GetDat(VrtxNmPrV[EdgeN].Val1);
    PVrtx Vrtx2=VrtxNmToVrtxH.GetDat(VrtxNmPrV[EdgeN].Val2);
    PEdge Edge=new TGEdge(Vrtx1, Vrtx2, TStr::Fmt("_%d", EdgeN), false);
    Graph->AddEdge(Edge);
  }
  // place graph
  ContexterF->State->ElGraph=Graph;
  TRnd Rnd(1);
  ContexterF->State->ElGraph->PlaceSimAnnXY(Rnd, ContexterF->State->ElGks);
  // draw graph
  ContexterF->State->ElGks->Clr();
  ContexterF->ElPbPaint(NULL);
}
Exemple #14
0
// load from allactors.zip that was prepared by Brad Malin in 2005
PImdbNet TImdbNet::LoadTxt(const TStr& ActorFNm) {
  PImdbNet Net = TImdbNet::New();
  TStrV ColV;
  char line [2024];
  int NLines=0, DupEdge=0, Year, Position, ActorNId, MovieNId;
  TIntH ActorNIdH;
  THash<TIntPr, TInt> MovieNIdH;
  FILE *F = fopen(ActorFNm.CStr(), "rt");  fgets(line, 2024, F);
  while (! feof(F)) {
    memset(line, 0, 2024);
    fgets(line, 2024, F);
    if (strlen(line) == 0) break;
    TStr(line).SplitOnAllCh('|', ColV, false);  IAssert(ColV.Len() == 7);
    const int NameStrId = Net->AddStr(ColV[0].GetTrunc().GetLc()+" "+ColV[1].GetTrunc().GetLc());
    const int MovieStrId = Net->AddStr(ColV[2].GetTrunc().GetLc());
    TStr YearStr = ColV[3].GetTrunc();
    if (YearStr.Len() > 4) YearStr = YearStr.GetSubStr(0, 3);
    Year = 1;  YearStr.IsInt(Year);
    const TMovieTy MovieTy = TImdbNet::GetMovieTy(ColV[4]);
    Position = TInt::Mx;  ColV[5].GetTrunc().IsInt(Position);
    IAssert(ColV[6].GetTrunc()[0] == 'M' || ColV[6].GetTrunc()[0]=='F');
    const bool IsMale = ColV[6].GetTrunc()[0] == 'M';
    // create nodes  
    if (ActorNIdH.IsKey(NameStrId)) { 
      ActorNId = ActorNIdH.GetDat(NameStrId); }
    else { 
      ActorNId = Net->AddNode(-1, TImdbNode(NameStrId, Year, Position, IsMale));
      ActorNIdH.AddDat(NameStrId, ActorNId);
    }
    if (MovieNIdH.IsKey(TIntPr(MovieStrId, Year))) {
      MovieNId = MovieNIdH.GetDat(TIntPr(MovieStrId, Year)); }
    else {
      MovieNId = Net->AddNode(-1, TImdbNode(NameStrId, Year, MovieTy)); 
      MovieNIdH.AddDat(TIntPr(MovieStrId, Year), MovieNId); 
    }
    if (! Net->IsEdge(ActorNId, MovieNId)) { 
      Net->AddEdge(ActorNId, MovieNId); }
    else { DupEdge++; }
    if (++NLines % 100000 == 0) printf("\r  %dk  ", NLines/1000);
  }
  fclose(F);
  printf("duplicate edges: %d\n", DupEdge);
  printf("nodes:  %d\n", Net->GetNodes());
  printf("edges:  %d\n", Net->GetEdges());
  printf("actors: %d\n", ActorNIdH.Len());
  printf("movies: %d\n", MovieNIdH.Len());
  // set the actor year to the year of his first movie
  int NUpdates=0;
  for (TNet::TNodeI NI = Net->BegNI(); NI < Net->EndNI(); NI++) {
    if (NI().IsActor()) {
      int MinYear = NI().GetYear();
      for (int e = 0; e < NI.GetOutDeg(); e++) {
        const TImdbNode& NodeDat = Net->GetNDat(NI.GetOutNId(e));
        if (NodeDat.IsMovie()) MinYear = TMath::Mn(MinYear, NodeDat.GetYear());
      }
      if (NI().Year != MinYear) NUpdates++;
      NI().Year = MinYear;
    }
  }
  printf("updated actor times: %d\n", NUpdates);
  return Net;
}
int main(int argc, char* argv[])
{
	TExeTm ExeTm;
	THash< TStr , CascadeElementV > quotesFiltered;
	double* vol_me;
	uint period = 9 * 3600;   // 9 days because of NIFTY paper

	printf("((((( Starting The Filtering Cascades CODE )))))\n");
	try
	{
		Env = TEnv(argc, argv, TNotify::StdNotify);
		Env.PrepArgs(TStr::Fmt("\nFiltering Memes Cascades. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));

		// ---== Loading Data ==---
		TZipIn ZquotesIn("QuotesPreprocessedData_NIFTY.rar");		///("/agbs/cluster/oaskaris/Data_Preparing_Codes/RESULTS/QuotesPreprocessedData_NIFTY.rar");
		THash< TStr , CascadeElementV > quotes;
		quotes.Load(ZquotesIn);
		printf("Loaded QuotesPreprocessedData_NIFTY has instances: %d\n\n\n",quotes.Len());

		// NIFTY Method for Filtering by Peaks
		uint begin = TSecTm(2008,7,31,0,0,0).GetAbsSecs();
		uint end = TSecTm(2009,10,1,0,0,0).GetAbsSecs();
		TSecTmV memesTimes;
		int bins = (end - begin) / period;
		for(int c=0;c<quotes.Len();c++)
		{
			memesTimes.Clr();
			for(int i=0;i<quotes[c].Len();i++)
			{
				memesTimes.Add(quotes[c][i].time);
			}
			vol_me = Tools::calculateHistOfCascade(memesTimes,begin,period,false);

			// calculating mean and standard deviation
			double mean = 0;
			for(int i=0;i<bins;i++)
			{
				mean += vol_me[i];
			}
			mean /= bins;

			double std = 0;
			for(int i=0;i<bins;i++)
			{
				std += pow(vol_me[i]-mean , 2);
			}
			std = sqrt(std / (bins-1));

			// peak definition by NIFTY: a point is a peak if its volume in 9 days binning is 1 standard deviation higher than the average frequency
			double maxVolume = mean + std;
			int peakCnt = 0;
			for(int i=0;i<bins;i++)
			{
				if(vol_me[i] > maxVolume)
				{
					peakCnt++;
				}
			}
			// if there is more than 5 peaks ignore this quote, since it is not a meme
			if(peakCnt > 5)
			{
				delete[] vol_me;
				continue;
			}

			quotesFiltered.AddDat(quotes.GetKey(c),quotes[c]);
			delete[] vol_me;
		}

		TZipOut mout("QuotesPreprocessedData_NIFTY_FINALFILTERED.rar");
		quotesFiltered.Save(mout);
		printf("Saved QuotesPreprocessedData_NIFTY_FINALFILTERED has instances: %d\n\n\n",quotesFiltered.Len());

		printf("\nThe Meme Filter for plotting had been done successfully.\n");
	}
	catch(exception& ex)
	{
		printf("\nError1 happened, it was: %s\n\n",ex.what());
	}
	catch(TPt<TExcept>& ex)
	{
		printf("\nError2 happened: %s\n\n",ex[0].GetStr().CStr());
	}

	printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
	return 0;
}
int main(int argc, char* argv[])
{
	cout << "START...\n";
	THash< TChA , TUInt > posts;
	TZipIn ZpostsIn("/NS/twitter-5/work/oaskaris/PostsData.rar");
	posts.Load(ZpostsIn);
	printf("PostsData loading done, it contains %d posts.\n",posts.Len());

	TZipIn z2("CascadesFullUrlsOnTwitterData.rar");
	cascadesInUrlsOnTwitter.Load(z2);
	printf("CascadesFullUrlsOnTwitterData loading done, it contains %d items.\n",cascadesInUrlsOnTwitter.Len());

	TZipIn ZquotesIn("/NS/twitter-5/work/oaskaris/QuotesPreprocessedData_NIFTY.rar");
	preprocessedQuotes.Load(ZquotesIn);
	printf("QuotesPreprocessedData_NIFTY loading done, it contains %d quotes.\n",preprocessedQuotes.Len());

	int qoId = 1337058;
	cout << "\n\nQUOTE: " << preprocessedQuotes.GetKey(qoId).CStr() << endl << endl;
	for(int l=0;l<preprocessedQuotes[qoId].Len();l++)
	{
		cout << posts.GetKey(preprocessedQuotes[qoId][l].post).CStr() << " : " << preprocessedQuotes[qoId][l].time.GetYmdTmStr().CStr() << endl;
	}
	return 0;


	int cnt = 3;
	bool printed = false;
	while(!printed)
	{
		cnt++;
		for(int l=0;l<cascadesInUrlsOnTwitter.Len();l++)
		{
			int qID = cascadesInUrlsOnTwitter.GetKey(l);
			if(cascadesInUrlsOnTwitter[l][0].GetAbsSecs() < preprocessedQuotes[qID][0].time.GetAbsSecs())
			{
				if(preprocessedQuotes[qID].Len() > cnt)
				{
					continue;
				}
				printed = true;
				cout << "Twitter time: " << cascadesInUrlsOnTwitter[l][0].GetYmdTmStr().CStr() << endl;
				cout << "Memes time: " << preprocessedQuotes[qID][0].time.GetYmdTmStr().CStr() << endl;
				cout << "QID: " << qID << endl;
				cout << "\n\n\n";
			}
		}
	}
	return 0;



//	TZipIn z1("CascadesRawUrlsOnTwitterData.rar");
//	cascadesInUrlsOnTwitter.Load(z1);
//	printf("CascadesRawUrlsOnTwitterData loading done, it contains %d items.\n",cascadesInUrlsOnTwitter.Len());
//	long long l1 = 0;
//	for(int i=0;i<cascadesInUrlsOnTwitter.Len();i++)
//	{
//		l1+=cascadesInUrlsOnTwitter[i].Len();
//	}
//	cascadesInUrlsOnTwitter.Clr();
//
//	TZipIn z2("CascadesFullUrlsOnTwitterData.rar");
//	cascadesInUrlsOnTwitter.Load(z2);
//	printf("CascadesFullUrlsOnTwitterData loading done, it contains %d items.\n",cascadesInUrlsOnTwitter.Len());
//	long long l2 = 0;
//	for(int i=0;i<cascadesInUrlsOnTwitter.Len();i++)
//	{
//		l2+=cascadesInUrlsOnTwitter[i].Len();
//	}
//
//	cout << "\n\n\nRaw All Items: " << l1 << "\n" << "Full All Items: " << l2 << endl;
//	return 0;


//	TZipIn resIn("CascadesUrlsOnTwitterData.rar");
//	cascadesInUrlsOnTwitter.Load(resIn);
//	printf("The size of CascadesUrlsOnTwitter was: %d\n",cascadesInUrlsOnTwitter.Len());
//	for(int l=0;l<5;l++)
//	{
//		printf("\n\n\nQ%d:\n\n",l);
//		for(int o=0;o<cascadesInUrlsOnTwitter[l].Len();o++)
//		{
//			printf("%d. %s\n",o,cascadesInUrlsOnTwitter[l][o].GetYmdTmStr().CStr());
//		}
//	}
//	return 0;


	TExeTm ExeTm;
	unsigned int q,p;
	int id,i;
	TSecTmV* cascade;
	TSecTmV* res;
	Env = TEnv(argc, argv, TNotify::StdNotify);
	Env.PrepArgs(TStr::Fmt("\n***(((Finding the cascades of the desired quotes in their urls)))***. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
	try
	{
		TZipIn ZquotesIn("/NS/twitter-5/work/oaskaris/QuotesPreprocessedData_NIFTY.rar");
		preprocessedQuotes.Load(ZquotesIn);
		printf("QuotesPreprocessed loading done, it contains %d quotes.\n",preprocessedQuotes.Len());

		TZipIn ZpostsPropIn("PostsPropagatedOverTwitterFullUrls.rar");
		postsPropagatedOverTw.Load(ZpostsPropIn);
		printf("PostsPropagatedOverTwitterFullUrls loading done, it contains %d items.\n",postsPropagatedOverTw.Len());


		for(q=0;q<preprocessedQuotes.Len();q++)
		{
			for(p=0;p<preprocessedQuotes[q].Len();p++)
			{
				TUInt postId = preprocessedQuotes[q][p].post;
				if(postsPropagatedOverTw.GetKeyId(postId) != -1)
				{
					cascade = &postsPropagatedOverTw.GetDat(postId);
					id = cascadesInUrlsOnTwitter.GetKeyId(q);
					if(id == -1)
					{
						res = &cascadesInUrlsOnTwitter.AddDat(q);
						for(i=0;i<cascade->Len();i++)
						{
							res->AddSorted((*cascade)[i]);
						}
					}
					else
					{
						res = &cascadesInUrlsOnTwitter.GetDat(q);
						for(i=0;i<cascade->Len();i++)
						{
							res->AddSorted((*cascade)[i]);
						}
					}
				}
			}
		}

		printf("\n\nFINDING THE CASCADES OF QUOTES WITH URLS OVER TWITTER IS DONE\n\n");

		SaveAll();
	}
	catch(exception& ex)
	{
		SaveAll();
		printf("\nError1 happened, it was: %s\n\n",ex.what());
	}
	catch(TPt<TExcept>& ex)
	{
		SaveAll();
		printf("\nError2 happened: %s\n\n",ex[0].GetStr().CStr());
	}

	printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
	return 0;
}