int main(int argc, char* argv[]) { std::string searchFile(argv[2]); uint topN(std::stoi(argv[3])); std::ifstream corpus(argv[1]); std::istream_iterator<std::string> corpus_it(corpus), eof; std::vector<std::string> fileList(corpus_it, eof); strIntMap corpusMap, documentMap; std::cout << "Loading corpus using files listed in " << argv[1] << std::endl; loadCorpusAndSearchFiles(corpusMap, documentMap, searchFile, fileList); std::cout << "Loaded corpus of " << corpusMap.size() << " words from " << fileList.size() << " file(s)" << std::endl << "------[ Starting analysis ]------" << std::endl << "Top " << topN << " significant words..." << std::endl; std::set<tfidfPair> result; getTopN(topN, fileList.size(), result, documentMap, corpusMap); printTopN(result); std::cout << "Lines with 1 or more significant words:" << std::endl; countSigWords(searchFile, result); return 0; }
//no except void __cdecl _tmain(int argc, TCHAR *argv[]) noexcept { if (argc != 2 && argc != 3) { printf("Usage Error: Incorrect number of arguments\n\n"); _tprintf("Usage:\n\t%s <mode> <map_file_name>\n", argv[0]); return; } uint64 total_read = 0; uint64 total_write = 0; clock_t begin, end; std::chrono::high_resolution_clock::time_point bStart = std::chrono::high_resolution_clock::now(); if (strcmp(argv[1], "r") == 0){ printf("Starting 1st Inversion\n"); char* outputFile; char* nodeHash; std::chrono::high_resolution_clock::time_point b1 = std::chrono::high_resolution_clock::now(); { InvertAndRelabelNodes<uint64> graph(argv[2], BUFFERSIZE * _1_MB, true); graph.execute(); outputFile = graph.output_files[0]; nodeHash = graph.nodesHash; printf("Total IO: read - %.2f GB; write - %.2f GB\n", (float)graph.total_read / _1_GB, (float)graph.total_write / _1_GB); total_read += graph.total_read; total_write += graph.total_write; } std::chrono::high_resolution_clock::time_point e1 = std::chrono::high_resolution_clock::now(); printf("Took %lld seconds\n", std::chrono::duration_cast<std::chrono::seconds>(e1 - b1).count()); printf("Ending 1st Inversion\n"); printf("\nStarting 2nd Inversion\n"); std::chrono::high_resolution_clock::time_point b2 = std::chrono::high_resolution_clock::now(); { InvertAndRelabelNodes<uint32> graph(outputFile, BUFFERSIZE * _1_MB, false); graph.nodesHash = nodeHash; graph.execute(); printf("Total IO: read - %.2f GB; write - %.2f GB\n", (float)graph.total_read / _1_GB, (float)graph.total_write / _1_GB); total_read += graph.total_read; total_write += graph.total_write; } std::chrono::high_resolution_clock::time_point e2 = std::chrono::high_resolution_clock::now(); printf("Took %lld seconds\n", std::chrono::duration_cast<std::chrono::seconds>(e2 - b2).count()); printf("Ending 2nd Inversion\n"); } else if (strcmp(argv[1], "c") == 0) { printf("\nStarting Top 10\n"); std::chrono::high_resolution_clock::time_point b3 = std::chrono::high_resolution_clock::now(); { TopN topN("PLD-out-relabeled.dat"); topN.execute(); printf("Total IO: read - %.2f GB; write - %.2f GB\n", (float)topN.total_read / _1_GB, (float)topN.total_write / _1_GB); total_read += topN.total_read; total_write += topN.total_write; } std::chrono::high_resolution_clock::time_point e3 = std::chrono::high_resolution_clock::now(); printf("Took %lld seconds\n", std::chrono::duration_cast<std::chrono::seconds>(e3 - b3).count()); printf("\Ending Top 10\n"); }