Example #1
0
    int runner_t::run(int argc, char** argv) {
        if (!is_right_parameters(argc, argv)) {
            return show_usage(argv);
        }

        parameters_t parameters(argc, argv);
        crawler_t crawler(parameters);
        crawler.run();

        return 0;
    }
Example #2
0
void ExchangeRateNet::init(){
//    url = QString("http://quote.hexun.com/forex/forex.aspx?type=3");
    url = QString("http://exquote.yjfx.jp/quote.js");

    timer = new QTimer(this);
    connect(timer, SIGNAL(timeout()), this, SLOT(crawler()));

    mgr = new QNetworkAccessManager(this);
    connect(mgr, SIGNAL(finished(QNetworkReply*)), this, SLOT(query(QNetworkReply*)));

    thread = new QThread;
    eaWorker = new ExchangeRateWorker;
    eaWorker->moveToThread(thread);
    connect(this, SIGNAL(doWork(QString)), eaWorker, SLOT(work(QString)), Qt::QueuedConnection);
    connect(eaWorker, SIGNAL(showMessage(QString)), this, SIGNAL(showMessage(QString)));
    thread->start();
}
Example #3
0
void WebCrawler::crawler(const std::string &url, const size_t depth)
{
    std::string page;
    std::string newUrl;

    // Filter output
    if (matchOutputFilter(url))
    {
        m_filteredUrls.insert(url);
    }

    // End if reach depth
    if (depth > m_depth) return;

    // Check whether page already crawled
    if (m_searchedUrls.find(url) != m_searchedUrls.end())
    {
        std::cout << ">>> Omit duplicate URL: " << url << std::endl;
        return;
    }

    // Add paged to crawled pages
    m_searchedUrls.insert(url);

    std::cout << ">>> Searching [depth = " << depth << "]: " << url << std::endl;
    // Fetch page
    page = m_cw->fetch(url);
    // Return if page is empty
    if (page.size() == 0) return;

    // Iterate all hrefs on page
    auto hrefs = getHrefs(page);
    for (auto it = hrefs.rbegin(); it != hrefs.rend(); ++it)
    {
        // Omit invalid url
        if (it->size() == 0 || it->at(0) == '#') continue;

        // Process Url
        newUrl = processUrl(url, *it);
        // std::cout << '\t' << url << " >> " << *it << " >> " << newUrl << std::endl;

        crawler(newUrl, depth + 1);
    }
}
Example #4
0
int main (int argc, char *argv[])
{
    bool skip_decrypted = false;

    FileSet fileSet;
    PathCrawler crawler(&fileSet);
    
    printf("---------------------------\n");
    printf("DMA recon v1.4\n");
    printf("---------------------------\n");
    if (argc >= 2 && argv[1][0] == '1') {
        skip_decrypted = true;
        printf("Skipping decrypted!\n");
    }
    
    crawler.listDir(crawler.startPath, skip_decrypted);
    printf("listing finished...\n");
    fileSet.printSummary();

    system("pause");
    return 0;

}
bool CleanupCommand::Execute()
{
	bool bRet = false;

	CCleanTypeDlg dlg;
	if( dlg.DoModal() == IDOK)
	{
		bool quotepath = g_Git.GetConfigValueBool(_T("core.quotepath"));

		CString cmd;
		cmd.Format(_T("git.exe clean"));
		if (dlg.m_bDryRun || !dlg.m_bNoRecycleBin)
			cmd += _T(" -n ");
		if(dlg.m_bDir)
			cmd += _T(" -d ");
		switch(dlg.m_CleanType)
		{
		case 0:
			cmd += _T(" -fx");
			break;
		case 1:
			cmd += _T(" -f");
			break;
		case 2:
			cmd += _T(" -fX");
			break;
		}

		STRING_VECTOR submoduleList;
		SubmodulePayload payload(submoduleList);
		if (dlg.m_bSubmodules)
		{
			payload.basePath = CTGitPath(g_Git.m_CurrentDir).GetGitPathString();
			if (pathList.GetCount() != 1 || pathList.GetCount() == 1 && !pathList[0].IsEmpty())
			{
				for (int i = 0; i < pathList.GetCount(); ++i)
				{
					CString path;
					if (pathList[i].IsDirectory())
						payload.prefixList.push_back(pathList[i].GetGitPathString());
					else
						payload.prefixList.push_back(pathList[i].GetContainingDirectory().GetGitPathString());
				}
			}
			if (!GetSubmodulePathList(payload))
				return FALSE;
			std::sort(submoduleList.begin(), submoduleList.end());
		}

		if (dlg.m_bDryRun || dlg.m_bNoRecycleBin)
		{
			while (true)
			{
				CProgressDlg progress;
				for (int i = 0; i < this->pathList.GetCount(); ++i)
				{
					CString path;
					if (this->pathList[i].IsDirectory())
						path = pathList[i].GetGitPathString();
					else
						path = pathList[i].GetContainingDirectory().GetGitPathString();

					progress.m_GitDirList.push_back(g_Git.m_CurrentDir);
					progress.m_GitCmdList.push_back(cmd + _T(" \"") + path + _T("\""));
				}

				if (dlg.m_bSubmodules)
				{
					for (CString dir : submoduleList)
					{
						progress.m_GitDirList.push_back(CTGitPath(dir).GetWinPathString());
						progress.m_GitCmdList.push_back(cmd);
					}
				}

				INT_PTR idRetry = -1;
				if (!dlg.m_bDryRun)
					idRetry = progress.m_PostFailCmdList.Add(CString(MAKEINTRESOURCE(IDS_MSGBOX_RETRY)));
				INT_PTR result = progress.DoModal();
				if (result == IDOK)
					return TRUE;
				if (progress.m_GitStatus && result == IDC_PROGRESS_BUTTON1 + idRetry)
					continue;
				break;
			}
		}
		else
		{
			CSysProgressDlg sysProgressDlg;
			sysProgressDlg.SetAnimation(IDR_CLEANUPANI);
			sysProgressDlg.SetTitle(CString(MAKEINTRESOURCE(IDS_APPNAME)));
			sysProgressDlg.SetLine(1, CString(MAKEINTRESOURCE(IDS_PROC_CLEANUP_INFO1)));
			sysProgressDlg.SetLine(2, CString(MAKEINTRESOURCE(IDS_PROGRESSWAIT)));
			sysProgressDlg.SetShowProgressBar(false);
			sysProgressDlg.ShowModeless((HWND)NULL, true);

			CTGitPathList delList;
			for (size_t i = 0; i <= submoduleList.size(); ++i)
			{
				CGit git;
				CGit *pGit;
				if (i == 0)
					pGit = &g_Git;
				else
				{
					git.m_CurrentDir = submoduleList[i - 1];
					pGit = &git;
				}
				CString cmdout, cmdouterr;
				if (pGit->Run(cmd, &cmdout, &cmdouterr, CP_UTF8))
				{
					MessageBox(nullptr, cmdouterr, _T("TortoiseGit"), MB_ICONERROR);
					return FALSE;
				}

				if (sysProgressDlg.HasUserCancelled())
				{
					CMessageBox::Show(nullptr, IDS_SVN_USERCANCELLED, IDS_APPNAME, MB_OK);
					return FALSE;
				}

				int pos = 0;
				CString token = cmdout.Tokenize(_T("\n"), pos);
				while (!token.IsEmpty())
				{
					if (token.Mid(0, 13) == _T("Would remove "))
					{
						CString tempPath = token.Mid(13).TrimRight();
						if (quotepath)
						{
							tempPath = UnescapeQuotePath(tempPath.Trim(_T('"')));
						}
						if (i == 0)
							delList.AddPath(CTGitPath(tempPath));
						else
							delList.AddPath(CTGitPath(submoduleList[i - 1] + "/" + tempPath));
					}

					token = cmdout.Tokenize(_T("\n"), pos);
				}

				if (sysProgressDlg.HasUserCancelled())
				{
					CMessageBox::Show(nullptr, IDS_SVN_USERCANCELLED, IDS_APPNAME, MB_OK);
					return FALSE;
				}
			}

			delList.DeleteAllFiles(true, false);

			sysProgressDlg.Stop();
		}
	}
#if 0
	CProgressDlg progress;
	progress.SetTitle(IDS_PROC_CLEANUP);
	progress.SetAnimation(IDR_CLEANUPANI);
	progress.SetShowProgressBar(false);
	progress.SetLine(1, CString(MAKEINTRESOURCE(IDS_PROC_CLEANUP_INFO1)));
	progress.SetLine(2, CString(MAKEINTRESOURCE(IDS_PROC_CLEANUP_INFO2)));
	progress.ShowModeless(hwndExplorer);

	CString strSuccessfullPaths, strFailedPaths;
	for (int i=0; i<pathList.GetCount(); ++i)
	{
		SVN svn;
		if (!svn.CleanUp(pathList[i]))
		{
			strFailedPaths += _T("- ") + pathList[i].GetWinPathString() + _T("\n");
			strFailedPaths += svn.GetLastErrorMessage() + _T("\n\n");
		}
		else
		{
			strSuccessfullPaths += _T("- ") + pathList[i].GetWinPathString() + _T("\n");

			// after the cleanup has finished, crawl the path downwards and send a change
			// notification for every directory to the shell. This will update the
			// overlays in the left tree view of the explorer.
			CDirFileEnum crawler(pathList[i].GetWinPathString());
			CString sPath;
			bool bDir = false;
			CTSVNPathList updateList;
			while (crawler.NextFile(sPath, &bDir))
			{
				if ((bDir) && (!g_SVNAdminDir.IsAdminDirPath(sPath)))
				{
					updateList.AddPath(CTSVNPath(sPath));
				}
			}
			updateList.AddPath(pathList[i]);
			CShellUpdater::Instance().AddPathsForUpdate(updateList);
			CShellUpdater::Instance().Flush();
			updateList.SortByPathname(true);
			for (INT_PTR i=0; i<updateList.GetCount(); ++i)
			{
				SHChangeNotify(SHCNE_UPDATEITEM, SHCNF_PATH, updateList[i].GetWinPath(), NULL);
				CTraceToOutputDebugString::Instance()(_T(__FUNCTION__) _T(": notify change for path %s\n"), updateList[i].GetWinPath());
			}
		}
	}
	progress.Stop();

	CString strMessage;
	if ( !strSuccessfullPaths.IsEmpty() )
	{
		CString tmp;
		tmp.Format(IDS_PROC_CLEANUPFINISHED, (LPCTSTR)strSuccessfullPaths);
		strMessage += tmp;
		bRet = true;
	}
	if ( !strFailedPaths.IsEmpty() )
	{
		if (!strMessage.IsEmpty())
			strMessage += _T("\n");
		CString tmp;
		tmp.Format(IDS_PROC_CLEANUPFINISHED_FAILED, (LPCTSTR)strFailedPaths);
		strMessage += tmp;
		bRet = false;
	}
	CMessageBox::Show(hwndExplorer, strMessage, _T("TortoiseGit"), MB_OK | (strFailedPaths.IsEmpty()?MB_ICONINFORMATION:MB_ICONERROR));
#endif
	CShellUpdater::Instance().Flush();
	return bRet;
}
Example #6
0
void WebCrawler::perform()
{
    crawler(m_url, 0);
}
Example #7
0
bool CleanupCommand::Execute()
{
	bool bRet = false;

	CCleanTypeDlg dlg;
	if( dlg.DoModal() == IDOK)
	{
		bool quotepath = g_Git.GetConfigValueBool(_T("core.quotepath"));

		CString cmd;
		cmd.Format(_T("git clean"));
		if (dlg.m_bDryRun || !dlg.m_bNoRecycleBin)
			cmd += _T(" -n ");
		if(dlg.m_bDir)
			cmd += _T(" -d ");
		switch(dlg.m_CleanType)
		{
		case 0:
			cmd += _T(" -fx");
			break;
		case 1:
			cmd += _T(" -f");
			break;
		case 2:
			cmd += _T(" -fX");
			break;
		}

		if (dlg.m_bDryRun || dlg.m_bNoRecycleBin)
		{
			CProgressDlg progress;
			for (int i = 0; i < this->pathList.GetCount(); ++i)
			{
				CString path;
				if (this->pathList[i].IsDirectory())
					path = pathList[i].GetGitPathString();
				else
					path = pathList[i].GetContainingDirectory().GetGitPathString();

				progress.m_GitCmdList.push_back(cmd + _T(" \"") + path + _T("\""));
			}
			if (progress.DoModal()==IDOK)
				return TRUE;
		}
		else
		{
			CSysProgressDlg sysProgressDlg;
			sysProgressDlg.SetAnimation(IDR_CLEANUPANI);
			sysProgressDlg.SetTitle(CString(MAKEINTRESOURCE(IDS_APPNAME)));
			sysProgressDlg.SetLine(1, CString(MAKEINTRESOURCE(IDS_PROC_CLEANUP_INFO1)));
			sysProgressDlg.SetLine(2, CString(MAKEINTRESOURCE(IDS_PROGRESSWAIT)));
			sysProgressDlg.SetShowProgressBar(false);
			sysProgressDlg.ShowModeless((HWND)NULL, true);

			CString cmdout, cmdouterr;
			if (g_Git.Run(cmd, &cmdout, &cmdouterr, CP_UTF8)) {
				MessageBox(NULL, cmdouterr, _T("TortoiseGit"), MB_ICONERROR);
				return FALSE;
			}

			if (sysProgressDlg.HasUserCancelled())
			{
				CMessageBox::Show(NULL, IDS_SVN_USERCANCELLED, IDS_APPNAME, MB_OK);
				return FALSE;
			}

			int pos = 0;
			CString token = cmdout.Tokenize(_T("\n"), pos);
			CTGitPathList delList;
			while (!token.IsEmpty())
			{
				if (token.Mid(0, 13) == _T("Would remove "))
				{
					CString tempPath = token.Mid(13).TrimRight();
					if (quotepath)
					{
						tempPath = UnescapeQuotePath(tempPath.Trim(_T('"')));
					}
					delList.AddPath(CTGitPath(tempPath));
				}

				token = cmdout.Tokenize(_T("\n"), pos);
			}

			if (sysProgressDlg.HasUserCancelled())
			{
				CMessageBox::Show(NULL, IDS_SVN_USERCANCELLED, IDS_APPNAME, MB_OK);
				return FALSE;
			}

			delList.DeleteAllFiles(true, false);

			sysProgressDlg.Stop();
		}
	}
#if 0
	CProgressDlg progress;
	progress.SetTitle(IDS_PROC_CLEANUP);
	progress.SetAnimation(IDR_CLEANUPANI);
	progress.SetShowProgressBar(false);
	progress.SetLine(1, CString(MAKEINTRESOURCE(IDS_PROC_CLEANUP_INFO1)));
	progress.SetLine(2, CString(MAKEINTRESOURCE(IDS_PROC_CLEANUP_INFO2)));
	progress.ShowModeless(hwndExplorer);

	CString strSuccessfullPaths, strFailedPaths;
	for (int i=0; i<pathList.GetCount(); ++i)
	{
		SVN svn;
		if (!svn.CleanUp(pathList[i]))
		{
			strFailedPaths += _T("- ") + pathList[i].GetWinPathString() + _T("\n");
			strFailedPaths += svn.GetLastErrorMessage() + _T("\n\n");
		}
		else
		{
			strSuccessfullPaths += _T("- ") + pathList[i].GetWinPathString() + _T("\n");

			// after the cleanup has finished, crawl the path downwards and send a change
			// notification for every directory to the shell. This will update the
			// overlays in the left tree view of the explorer.
			CDirFileEnum crawler(pathList[i].GetWinPathString());
			CString sPath;
			bool bDir = false;
			CTSVNPathList updateList;
			while (crawler.NextFile(sPath, &bDir))
			{
				if ((bDir) && (!g_SVNAdminDir.IsAdminDirPath(sPath)))
				{
					updateList.AddPath(CTSVNPath(sPath));
				}
			}
			updateList.AddPath(pathList[i]);
			CShellUpdater::Instance().AddPathsForUpdate(updateList);
			CShellUpdater::Instance().Flush();
			updateList.SortByPathname(true);
			for (INT_PTR i=0; i<updateList.GetCount(); ++i)
			{
				SHChangeNotify(SHCNE_UPDATEITEM, SHCNF_PATH, updateList[i].GetWinPath(), NULL);
				ATLTRACE(_T("notify change for path %s\n"), updateList[i].GetWinPath());
			}
		}
	}
	progress.Stop();

	CString strMessage;
	if ( !strSuccessfullPaths.IsEmpty() )
	{
		CString tmp;
		tmp.Format(IDS_PROC_CLEANUPFINISHED, (LPCTSTR)strSuccessfullPaths);
		strMessage += tmp;
		bRet = true;
	}
	if ( !strFailedPaths.IsEmpty() )
	{
		if (!strMessage.IsEmpty())
			strMessage += _T("\n");
		CString tmp;
		tmp.Format(IDS_PROC_CLEANUPFINISHED_FAILED, (LPCTSTR)strFailedPaths);
		strMessage += tmp;
		bRet = false;
	}
	CMessageBox::Show(hwndExplorer, strMessage, _T("TortoiseGit"), MB_OK | (strFailedPaths.IsEmpty()?MB_ICONINFORMATION:MB_ICONERROR));
#endif
	CShellUpdater::Instance().Flush();
	return bRet;
}
Example #8
0
  //----------------------------------------------------------------
  // IElement::load
  //
  uint64
  IElement::load(IStorage & storage,
                 uint64 bytesToRead,
                 IDelegateLoad * loader)
  {
    if (!bytesToRead)
    {
      return 0;
    }

    // save a storage receipt so that element position references
    // can be resolved later:
    IStorage::IReceiptPtr storageReceipt = storage.receipt();

    // save current seek position, so it can be restored if necessary:
    IStorage::TSeek storageStart(storage);

    uint64 eltId = loadEbmlId(storage);
    if (eltId != getId())
    {
      // element id wrong for my type:
      return 0;
    }

#if 0 // !defined(NDEBUG) && (defined(DEBUG) || defined(_DEBUG))
    Indent::More indentMore(Indent::depth_);
    {
      IStorage::TSeek restore(storage);
      uint64 vsizeSize = 0;
      uint64 vsize = vsizeDecode(storage, vsizeSize);
      std::cout << indent()
                << std::setw(8) << uintEncode(getId()) << " @ "
                << std::hex
                << "0x" << storageStart.absolutePosition()
                << std::dec
                << " -- " << getName()
                << ", payload " << vsize << " bytes" << std::endl;
    }
#endif

    // this appears to be a good payload:
    storageStart.doNotRestore();

    // store the storage receipt:
    receipt_ = storageReceipt;

    // read payload size:
    uint64 vsizeSize = 0;
    uint64 payloadSize = vsizeDecode(storage, vsizeSize);
    const bool payloadSizeUnknown = (payloadSize == uintMax[8]);

    // keep track of the number of bytes read successfully:
    receipt_->add(uintNumBytes(eltId));
    receipt_->add(vsizeSize);

    // clear the payload checksum:
    setCrc32(false);
    storedCrc32_ = 0;
    computedCrc32_ = 0;

    // save the payload storage receipt so that element position references
    // can be resolved later:
    IStorage::IReceiptPtr receiptPayload = storage.receipt();
    offsetToPayload_ = receiptPayload->position() - receipt_->position();
    offsetToCrc32_ = kUndefinedOffset;

    // shortcut:
    IPayload & payload = getPayload();

    // container elements may be present in any order, therefore
    // not every load will succeed -- keep trying until all
    // load attempts fail:
    uint64 payloadBytesToRead = payloadSize;
    uint64 payloadBytesReadTotal = 0;
    while (payloadBytesToRead)
    {
      uint64 prevPayloadBytesToRead = payloadBytesToRead;

      // try to load some part of the payload:
      uint64 partialPayloadSize = 0;
      if (loader)
      {
        uint64 bytesRead = loader->load(storage,
                                        payloadBytesToRead,
                                        eltId,
                                        payload);
        if (bytesRead == uintMax[8])
        {
          // special case, indicating that the loader doesn't
          // want to read any more data:
          storageStart.doRestore();
          loader->loaded(*this);
          return 0;
        }

        partialPayloadSize += bytesRead;
        payloadBytesToRead -= bytesRead;
      }

      if (!partialPayloadSize)
      {
        uint64 bytesRead = payload.load(storage,
                                        payloadBytesToRead,
                                        loader);
        partialPayloadSize += bytesRead;
        payloadBytesToRead -= bytesRead;
      }

      // consume any void elements that may exist:
      IPayload::TVoid eltVoid;
      uint64 voidPayloadSize = eltVoid.load(storage,
                                            payloadBytesToRead,
                                            loader);
      if (voidPayloadSize)
      {
        payloadBytesToRead -= voidPayloadSize;

        // find an element to store the Void element, so that
        // the relative element order would be preserved:
        IStorage::IReceiptPtr voidReceipt = eltVoid.storageReceipt();
        FindElement crawler(voidReceipt->position() - 2);

        if (partialPayloadSize)
        {
          crawler.evalPayload(payload);
          assert(crawler.eltFound_);
        }

        if (crawler.eltFound_)
        {
          IPayload & dstPayload = crawler.eltFound_->getPayload();
          dstPayload.voids_.push_back(eltVoid);
        }
        else
        {
          payload.voids_.push_back(eltVoid);
        }
      }

      // consume the CRC-32 element if it exists:
      payloadBytesToRead -= loadCrc32(storage,
                                      payloadBytesToRead);

      uint64 payloadBytesRead = prevPayloadBytesToRead - payloadBytesToRead;
      payloadBytesReadTotal += payloadBytesRead;

      if (payloadBytesRead == 0)
      {
        break;
      }
    }

    if (payloadBytesReadTotal < payloadSize && !payloadSizeUnknown)
    {
      // skip unrecognized alien data:
      uint64 alienDataSize = payloadSize - payloadBytesReadTotal;

#if !defined(NDEBUG) && (defined(DEBUG) || defined(_DEBUG))
      std::cerr << indent() << "WARNING: " << getName()
                << " 0x" << uintEncode(getId())
                << " -- skipping " << alienDataSize
                << " bytes of unrecognized alien data @ 0x"
                << std::hex
                << storage.receipt()->position()
                << std::dec
                << std::endl;
#endif

      storage.skip(alienDataSize);
      payloadBytesReadTotal = payloadSize;
    }

    receiptPayload->add(payloadBytesReadTotal);
    *receipt_ += receiptPayload;

    // verify stored payload CRC-32 checksum:
    if (shouldComputeCrc32())
    {
      IStorage::IReceiptPtr receiptCrc32 = crc32Receipt();

      Crc32 crc32;
      receiptPayload->calcCrc32(crc32, receiptCrc32);
      computedCrc32_ = crc32.checksum();

      if (computedCrc32_ != storedCrc32_)
      {
#if 1 // !defined(NDEBUG) && (defined(DEBUG) || defined(_DEBUG))
        std::cerr << indent() << "WARNING: " << getName()
                  << " 0x" << uintEncode(getId())
                  << " -- checksum mismatch, loaded "
                  << std::hex << storedCrc32_
                  << ", computed " << computedCrc32_
                  << ", CRC-32 @ 0x" << receiptCrc32->position()
                  << ", payload @ 0x" << receiptPayload->position()
                  << ":" << receiptPayload->numBytes()
                  << std::dec
                  << std::endl;

        Crc32 doOverCrc32;
        receiptPayload->calcCrc32(doOverCrc32, receiptCrc32);
#endif
      }
    }

    if (loader && receipt_->numBytes())
    {
      // allow the delegate to perform post-processing on the loaded element:
      loader->loaded(*this);
    }

    return receipt_->numBytes();
  }
Example #9
0
int main(int argc, char **argv) {
    
    if (argc != 3) {
        printUsage(argv[0]);
        return 0;
    }

    if (strcmp(argv[1], "--crawl") == 0) {
        Logger::setOutputFile("log-crawl_test.txt");

        WebCrawler crawler(120000);
        crawler.setDownloadInterval(5);
        crawler.setPagesDir("data/pages_test");
        if (!crawler.startCrawl(argv[2]))
            return 0;
        crawler.saveToDisk("data/pagesData_total.txt");

    } else  if (strcmp(argv[1], "--resume-crawl") == 0) {
        Logger::setOutputFile("log-crawl.txt");

        WebCrawler crawler(120000);
        crawler.setDownloadInterval(5);
        crawler.setPagesDir("pages");
        if (!crawler.resumeCrawl(argv[2]))
            return 0;
        crawler.saveToDisk("data/pagesData_total.txt");

    } else if (strcmp(argv[1], "--stat") == 0) {
        Logger::setOutputFile("log-stat.txt");
        PagesStatist statist(argv[2]);

        const std::vector<PR>& pageRank = statist.getPageRank();
        TMK_LOG_ALL("Saving PR to file \n");
        tmk::saveToDisk(pageRank, "data/PR.txt");
        
        std::vector<std::pair<Url, PR> > top20;
        statist.getTopPages(20, top20);
        TMK_LOG_ALL("Top 20 pages by PR:\n");
        for (int i = 0; i < top20.size(); ++i) {
           TMK_LOG_ALL("PR: %f\t%s\n", top20[i].second, top20[i].first.c_str()); 
        }
        
        const std::vector<size_t>& pagesSize = statist.getPageSizesInBytes();
        TMK_LOG_ALL("Saving pages Sizes to file \n");
        tmk::saveToDisk(pagesSize, "data/pageSize.txt");
        
        const std::vector<size_t>& pagesOutgoingLinksCount = statist.getPagesOutgoingLinksCount();
        TMK_LOG_ALL("Saving pages outgoing links count to file\n");
        tmk::saveToDisk(pagesOutgoingLinksCount, "data/pageOutLinks.txt");
        
        const std::vector<size_t>& pagesIncomingLinksCount = statist.getPagesIncomingLinksCount();
        TMK_LOG_ALL("Saving pages incoming links count to file\n");
        tmk::saveToDisk(pagesIncomingLinksCount, "data/pageInLinks.txt");
        
        const std::vector<size_t>& pageDistancesFromMain = statist.getPageDistancesFromMain();
        TMK_LOG_ALL("Saving pages distances from main page to file\n");
        tmk::saveToDisk(pageDistancesFromMain, "data/pageDistances.txt");
        size_t maxPageDist = 0;
        for (auto it = pageDistancesFromMain.begin(); it != pageDistancesFromMain.end(); ++it) {
            maxPageDist = std::max(maxPageDist, *it);
        }
        TMK_LOG_ALL("Maximal distance from main page: %zu\n", maxPageDist);
        
    } else {
        printUsage(argv[0]);
    }
    
    return 0;
}