Exemplo n.º 1
0
void FileScanner::Run(int thread_index)
{
	// Set the name of the thread.
	std::stringstream temp_ss;
	temp_ss << "FILESCAN_";
	temp_ss << thread_index;
	set_thread_name(temp_ss.str());

	if(m_manually_assign_cores)
	{
		// Spread the scanner threads across cores.  Linux at least doesn't seem to want to do that by default.
		AssignToNextCore();
	}

	// Create a reusable, resizable buffer for the File() reads.
	auto file_data_storage = std::make_shared<ResizableArray<char>>();

	// Pull new filenames off the input queue until it's closed.
	std::string next_string;
	while(m_in_queue.wait_pull(std::move(next_string)) != queue_op_status::closed)
	{
		MatchList ml(next_string);

		try
		{
			// Try to open and read the file.  This could throw.
			LOG(INFO) << "Attempting to scan file \'" << next_string << "\'";
			File f(next_string, file_data_storage);

			if(f.size() == 0)
			{
				LOG(INFO) << "WARNING: Filesize of \'" << next_string << "\' is 0, skipping.";
				continue;
			}

			const char *file_data = f.data();
			size_t file_size = f.size();

			// Scan the file data for occurrences of the regex, sending matches to the MatchList ml.
			ScanFile(file_data, file_size, ml);

			if(!ml.empty())
			{
				// Force move semantics here.
				m_output_queue.wait_push(std::move(ml));
			}
		}
		catch(const FileException &error)
		{
			// The File constructor threw an exception.
			ERROR() << error.what();
		}
		catch(const std::system_error& error)
		{
			// A system error.  Currently should only be errors from File.
			ERROR() << error.code() << " - " << error.code().message();
		}
		catch(...)
		{
			// Rethrow whatever it was.
			throw;
		}
	}
}
Exemplo n.º 2
0
void FileScanner::Run()
{
	if(m_manually_assign_cores)
	{
		// Spread the scanner threads across cores.  Linux at least doesn't seem to want to do that by default.
		AssignToNextCore();
	}

#ifndef HAVE_LIBPCRE
	// Create the std::regex we're looking for, possibly ignoring case, possibly with match-whole-word.
	auto stack_regex = m_regex;
	if(m_word_regexp)
	{
		// Surround the regex with \b (word boundary) assertions.
		stack_regex = "\\b(?:" + m_regex + ")\\b";
	}
	std::regex expression(stack_regex,
			std::regex_constants::ECMAScript |
			std::regex_constants::optimize   |
			m_ignore_case ? std::regex_constants::icase : static_cast<typeof(std::regex_constants::icase)>(0));
#endif

	// Pull new filenames off the input queue until it's closed.
	std::string next_string;
	while(m_in_queue.wait_pull(std::move(next_string)) != queue_op_status::closed)
	{
		MatchList ml(next_string);

		try
		{
			// Try to open and read the file.  This could throw.
			File f(next_string);

			if(f.size() == 0)
			{
				//std::clog << "WARNING: Filesize of \"" << next_string << "\" is 0" << std::endl;
				continue;
			}

			const char *file_data = f.data();
			size_t file_size = f.size();

			// Scan the file data for the regex.
#if HAVE_LIBPCRE
			ScanFileLibPCRE(file_data, file_size, ml);
#else
			ScanFileCpp11(expression, file_data, file_size, ml);
#endif

			if(!ml.empty())
			{
				// Force move semantics here.
				m_output_queue.wait_push(std::move(ml));
			}
		}
		catch(const FileException &error)
		{
			// The File constructor threw an exception.
			std::cerr << "ucg: ERROR: " << error.what() << std::endl;
		}
		catch(const std::system_error& error)
		{
			// A system error.  Currently should only be errors from File.
			std::cerr << "ucg: ERROR: " << error.code() << " - " << error.code().message() << std::endl;
		}
		catch(...)
		{
			// Rethrow whatever it was.
			throw;
		}
	}
}