void FileScanner::Run(int thread_index) { // Set the name of the thread. std::stringstream temp_ss; temp_ss << "FILESCAN_"; temp_ss << thread_index; set_thread_name(temp_ss.str()); if(m_manually_assign_cores) { // Spread the scanner threads across cores. Linux at least doesn't seem to want to do that by default. AssignToNextCore(); } // Create a reusable, resizable buffer for the File() reads. auto file_data_storage = std::make_shared<ResizableArray<char>>(); // Pull new filenames off the input queue until it's closed. std::string next_string; while(m_in_queue.wait_pull(std::move(next_string)) != queue_op_status::closed) { MatchList ml(next_string); try { // Try to open and read the file. This could throw. LOG(INFO) << "Attempting to scan file \'" << next_string << "\'"; File f(next_string, file_data_storage); if(f.size() == 0) { LOG(INFO) << "WARNING: Filesize of \'" << next_string << "\' is 0, skipping."; continue; } const char *file_data = f.data(); size_t file_size = f.size(); // Scan the file data for occurrences of the regex, sending matches to the MatchList ml. ScanFile(file_data, file_size, ml); if(!ml.empty()) { // Force move semantics here. m_output_queue.wait_push(std::move(ml)); } } catch(const FileException &error) { // The File constructor threw an exception. ERROR() << error.what(); } catch(const std::system_error& error) { // A system error. Currently should only be errors from File. ERROR() << error.code() << " - " << error.code().message(); } catch(...) { // Rethrow whatever it was. throw; } } }
void FileScanner::Run() { if(m_manually_assign_cores) { // Spread the scanner threads across cores. Linux at least doesn't seem to want to do that by default. AssignToNextCore(); } #ifndef HAVE_LIBPCRE // Create the std::regex we're looking for, possibly ignoring case, possibly with match-whole-word. auto stack_regex = m_regex; if(m_word_regexp) { // Surround the regex with \b (word boundary) assertions. stack_regex = "\\b(?:" + m_regex + ")\\b"; } std::regex expression(stack_regex, std::regex_constants::ECMAScript | std::regex_constants::optimize | m_ignore_case ? std::regex_constants::icase : static_cast<typeof(std::regex_constants::icase)>(0)); #endif // Pull new filenames off the input queue until it's closed. std::string next_string; while(m_in_queue.wait_pull(std::move(next_string)) != queue_op_status::closed) { MatchList ml(next_string); try { // Try to open and read the file. This could throw. File f(next_string); if(f.size() == 0) { //std::clog << "WARNING: Filesize of \"" << next_string << "\" is 0" << std::endl; continue; } const char *file_data = f.data(); size_t file_size = f.size(); // Scan the file data for the regex. #if HAVE_LIBPCRE ScanFileLibPCRE(file_data, file_size, ml); #else ScanFileCpp11(expression, file_data, file_size, ml); #endif if(!ml.empty()) { // Force move semantics here. m_output_queue.wait_push(std::move(ml)); } } catch(const FileException &error) { // The File constructor threw an exception. std::cerr << "ucg: ERROR: " << error.what() << std::endl; } catch(const std::system_error& error) { // A system error. Currently should only be errors from File. std::cerr << "ucg: ERROR: " << error.code() << " - " << error.code().message() << std::endl; } catch(...) { // Rethrow whatever it was. throw; } } }