int main(int argc, char* argv[])
{
	int i,quoteIndex,j,k;
	TExeTm ExeTm;

	printf("Starting The SAVE CODE For Matlab Processing ...\n");
	try
	{
		Env = TEnv(argc, argv, TNotify::StdNotify);
		Env.PrepArgs(TStr::Fmt("\nCreating the volumes of the quotes. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));

		TZipIn ZquotesIn("RESULTS/QuotesPreprocessedData_NIFTY.rar");
		quotes.Load(ZquotesIn);
		printf("Loaded QuotesPreprocessedData_NIFTY has instances: %d\n\n\n",quotes.Len());

		TZipIn ZcascadesOnTwitterIn("RESULTS/CascadesFullUrlsOnTwitterData.rar");
		cascadesOnTwitterUrls.Load(ZcascadesOnTwitterIn);
		printf("Loaded CascadesFullUrlsOnTwitterData has instances: %d\n\n\n",cascadesOnTwitterUrls.Len());

		TZipIn ZIn("RESULTS/CascadesOnTwitterData.rar");
		cascadesOnTwitterContents.Load(ZIn);
		printf("Loaded CascadesOnTwitterData has instances: %d\n\n\n",cascadesOnTwitterContents.Len());


		// Quote's Cascades over Memes
		ofstream quotesContent1("MEMES_QuotesContent.csv",ios::out|ios::app);
		ofstream memeWebs1("MEMES_MemesCascadesWebs.csv",ios::out|ios::app);
		ofstream memeTimes1("MEMES_MemesCascadesTimes.csv",ios::out|ios::app);
		ofstream externalLinks1("MEMES_MemesExternalLinks.csv",ios::out|ios::app);
		for(i=0;i<quotes.Len();i++)
		{
			quotesContent1 << quotes.GetKey(i).CStr() << "\r\n";
			for(j=0;j<quotes[i].Len();j++)
			{
				for(k=0;k<quotes[i][j].explicit_links.Len();k++)
				{
					externalLinks1 << quotes[i][j].explicit_links[k].Val << "," << quotes[i][j].post.Val<<"\r\n";
				}
				memeTimes1 << quotes[i][j].time.GetAbsSecs() << ",";
				memeWebs1 << quotes[i][j].post.Val << ",";
			}
			memeTimes1 << "\r\n";
			memeWebs1 << "\r\n";
			externalLinks1 << "-1\r\n";  // this means that the external links for this quote is finished
		}
		quotesContent1.close();
		memeWebs1.close();
		memeTimes1.close();
		externalLinks1.close();


		// TEXTS Cascades Over Memes and Twitter
		ofstream quotesContent2("MEMES_TWITTER_TXT_QuotesContent.csv",ios::out|ios::app);
		ofstream memeWebs2("MEMES_TWITTER_TXT_MemesCascadesWebs.csv",ios::out|ios::app);
		ofstream memeTimes2("MEMES_TWITTER_TXT_MemesCascadesTimes.csv",ios::out|ios::app);
		ofstream externalLinks2("MEMES_TWITTER_TXT_MemesExternalLinks.csv",ios::out|ios::app);
		ofstream twitterContent2("MEMES_TWITTER_TXT_TwitterTextCascades.csv",ios::out|ios::app);
		for(i=0;i<cascadesOnTwitterContents.Len();i++)
		{
			quoteIndex = cascadesOnTwitterContents.GetKey(i);
			quotesContent2 << quotes.GetKey(quoteIndex).CStr() << "\r\n";

			for(j=0;j<quotes[quoteIndex].Len();j++)
			{
				for(k=0;k<quotes[quoteIndex][j].explicit_links.Len();k++)
				{
					externalLinks2 << quotes[quoteIndex][j].explicit_links[k].Val << "," << quotes[quoteIndex][j].post.Val<<"\r\n";   // << CHECK HERE >> CHANGE -> TO SPACE
				}
				memeTimes2 << quotes[quoteIndex][j].time.GetAbsSecs() << ",";
				memeWebs2 << quotes[quoteIndex][j].post.Val << ",";
			}
			memeTimes2 << "\r\n";
			memeWebs2 << "\r\n";
			externalLinks2 << "-1\r\n";  // this means that the external links for this quote is finished

			for(j=0;j<cascadesOnTwitterContents.GetDat(quoteIndex).Len();j++)
			{
				twitterContent2 << cascadesOnTwitterContents.GetDat(quoteIndex)[j] << ",";
			}
			twitterContent2 << "\r\n";
		}
		quotesContent2.close();
		memeWebs2.close();
		memeTimes2.close();
		externalLinks2.close();
		twitterContent2.close();


		// URLS Cascades Over Memes and Twitter
		ofstream quotesContent3("MEMES_TWITTER_URL_QuotesContent.csv",ios::out|ios::app);
		ofstream memeWebs3("MEMES_TWITTER_URL_MemesCascadesWebs.csv",ios::out|ios::app);
		ofstream memeTimes3("MEMES_TWITTER_URL_MemesCascadesTimes.csv",ios::out|ios::app);
		ofstream externalLinks3("MEMES_TWITTER_URL_MemesExternalLinks.csv",ios::out|ios::app);
		ofstream twitter3("MEMES_TWITTER_URL_TwitterUrlCascades.csv",ios::out|ios::app);
		for(i=0;i<cascadesOnTwitterUrls.Len();i++)
		{
			quoteIndex = cascadesOnTwitterUrls.GetKey(i);
			quotesContent3 << quotes.GetKey(quoteIndex).CStr() << "\r\n";

			for(j=0;j<quotes[quoteIndex].Len();j++)
			{
				for(k=0;k<quotes[quoteIndex][j].explicit_links.Len();k++)
				{
					externalLinks3 << quotes[quoteIndex][j].explicit_links[k].Val << "," << quotes[quoteIndex][j].post.Val<<"\r\n";   // << CHECK HERE >> CHANGE -> TO SPACE
				}
				memeTimes3 << quotes[quoteIndex][j].time.GetAbsSecs() << ",";
				memeWebs3 << quotes[quoteIndex][j].post.Val << ",";
			}
			memeTimes3 << "\r\n";
			memeWebs3 << "\r\n";
			externalLinks3 << "-1\r\n";  // this means that the external links for this quote is finished

			for(j=0;j<cascadesOnTwitterUrls.GetDat(quoteIndex).Len();j++)
			{
				twitter3 << cascadesOnTwitterUrls.GetDat(quoteIndex)[j] << ",";
			}
			twitter3 << "\r\n";
		}
		quotesContent3.close();
		memeWebs3.close();
		memeTimes3.close();
		externalLinks3.close();
		twitter3.close();


		// INTERSECT OF URLS OF TEXTS Cascades Over Memes and Twitter
		ofstream quotesContent4("TRIPLE_QuotesContent.csv",ios::out|ios::app);
		ofstream memeWebs4("TRIPLE_MemesCascadesWebs.csv",ios::out|ios::app);
		ofstream memeTimes4("TRIPLE_MemesCascadesTimes.csv",ios::out|ios::app);
		ofstream externalLinks4("TRIPLE_MemesExternalLinks.csv",ios::out|ios::app);
		ofstream twitter4("TRIPLE_TwitterUrlCascades.csv",ios::out|ios::app);
		ofstream twitterContent4("TRIPLE_TwitterTextCascades.csv",ios::out|ios::app);
		for(i=0;i<cascadesOnTwitterUrls.Len();i++)
		{
			quoteIndex = cascadesOnTwitterUrls.GetKey(i);
			if(cascadesOnTwitterContents.GetKeyId(quoteIndex) == -1)
			{
				continue;
			}
			quotesContent4 << quotes.GetKey(quoteIndex).CStr() << "\r\n";

			for(j=0;j<quotes[quoteIndex].Len();j++)
			{
				for(k=0;k<quotes[quoteIndex][j].explicit_links.Len();k++)
				{
					externalLinks4 << quotes[quoteIndex][j].explicit_links[k].Val << "," << quotes[quoteIndex][j].post.Val<<"\r\n";   // << CHECK HERE >> CHANGE -> TO SPACE
				}
				memeTimes4 << quotes[quoteIndex][j].time.GetAbsSecs() << ",";
				memeWebs4 << quotes[quoteIndex][j].post.Val << ",";
			}
			memeTimes4 << "\r\n";
			memeWebs4 << "\r\n";
			externalLinks4 << "-1\r\n";  // this means that the external links for this quote is finished

			for(j=0;j<cascadesOnTwitterContents.GetDat(quoteIndex).Len();j++)
			{
				twitterContent4 << cascadesOnTwitterContents.GetDat(quoteIndex)[j] << ",";
			}

			for(j=0;j<cascadesOnTwitterUrls.GetDat(quoteIndex).Len();j++)
			{
				twitter4 << cascadesOnTwitterUrls.GetDat(quoteIndex)[j] << ",";
			}
			twitter4 << "\r\n";
			twitterContent4 << "\r\n";
		}
		quotesContent4.close();
		memeWebs4.close();
		memeTimes4.close();
		externalLinks4.close();
		twitter4.close();
		twitterContent4.close();
	}
	catch(exception& ex)
	{
		printf("\nError1 happened, it was: %s\n\n",ex.what());
	}
	catch(TPt<TExcept>& ex)
	{
		printf("\nError2 happened: %s\n\n",ex[0].GetStr().CStr());
	}

	printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
	return 0;
}
Esempio n. 2
0
// -------------------------------------------------------------------------------- //
wxString GetUrlContent( const wxString &url, const wxString &referer, bool gzipped )
{
    wxCurlHTTP  http;
    //char *      Buffer;
    wxString RetVal = wxEmptyString;

    http.AddHeader( wxT( "User-Agent: " ) guDEFAULT_BROWSER_USER_AGENT );
    http.AddHeader( wxT( "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" ) );
    if( gzipped )
    {
        http.AddHeader( wxT( "Accept Encoding: gzip,deflate" ) );
    }
    http.AddHeader( wxT( "Accept-Charset: utf-8" ) );
    if( !referer.IsEmpty() )
    {
        http.AddHeader( wxT( "Referer: " ) + referer );
    }

    //guLogMessage( wxT( "Getting content for %s" ), url.c_str() );

    wxMemoryOutputStream Buffer;
    http.SetOpt( CURLOPT_FOLLOWLOCATION, 1 );
    http.Get( Buffer, url );

    if( Buffer.IsOk() )
    {
        int ResponseCode = http.GetResponseCode();
        //guLogMessage( wxT( "ResponseCode: %i" ), ResponseCode );
        if( ResponseCode >= 300  && ResponseCode < 400 )
        {
            //guLogMessage( wxT( "Response %u:\n%s\n%s" ), http.GetResponseCode(), http.GetResponseHeader().c_str(), http.GetResponseBody().c_str() );
            wxString Location = http.GetResponseHeader();
            int Pos = Location.Lower().Find( wxT( "location: " ) );
            if( Pos != wxNOT_FOUND )
            {
                Location = Location.Mid( Pos + 10 );
                Location.Truncate( Location.Find( wxT( "\r\n" ) ) );
                if( Location.StartsWith( wxT( "/" ) ) )
                {
                    wxURI Uri( url );
                    wxString NewURL;
                    if( Uri.HasScheme() )
                        NewURL = Uri.GetScheme() + wxT( "://" );
                    NewURL += Uri.GetServer();
                    NewURL += Location;
                    Location = NewURL;
                }
                return GetUrlContent( Location, referer, gzipped );
            }
        }
        else if( ResponseCode >= 400 )
            return wxEmptyString;

        wxString ResponseHeaders = http.GetResponseHeader();
        //guLogMessage( wxT( "Response %u:\n%s\n%s" ), http.GetResponseCode(), http.GetResponseHeader().c_str(), http.GetResponseBody().c_str() );

        if( ResponseHeaders.Lower().Find( wxT( "content-encoding: gzip" ) ) != wxNOT_FOUND )
        {
            //guLogMessage( wxT( "Response Headers:\n%s" ), ResponseHeaders.c_str() );
            wxMemoryInputStream Ins( Buffer );
            wxZlibInputStream ZIn( Ins );
            wxStringOutputStream Outs( &RetVal );
            ZIn.Read( Outs );
        }
        else
        {
            //RetVal = wxString( Buffer, wxConvUTF8 );
//            wxStringOutputStream Outs( &RetVal );
//            wxMemoryInputStream Ins( Buffer );
//            Ins.Read( Outs );
            if( Buffer.GetLength() )
            {
                size_t Count = Buffer.GetLength();
                const char * pData = ( const char * ) Buffer.GetOutputStreamBuffer()->GetBufferStart();
                RetVal = wxString( pData, wxConvUTF8, Count );
                if( RetVal.IsEmpty() )
                {
                    RetVal = wxString( pData, wxConvISO8859_1, Count );
                    if( RetVal.IsEmpty() )
                    {
                        RetVal = wxString( pData, wxConvLibc, Count );
                    }
                }
            }
        }
        //free( Buffer );
    }
    else
    {
        guLogError( wxT( "Could not get '%s'" ), url.c_str() );
    }
    //guLogMessage( wxT( "Response:\n%s\n###############" ), RetVal.c_str() );
    return RetVal;
}