示例#1
0
void import_tab(hashdb::import_manager_t& manager,
                const std::string& repository_name,
                const std::string& filename,
                const hashdb::scan_manager_t* const whitelist_manager,
                progress_tracker_t& progress_tracker,
                std::istream& in) {

  // only import file hashes that are new to the session
  std::set<std::string> importable_sources;

  std::string line;
  size_t line_number = 0;
  while(getline(in, line)) {
    ++line_number;

    // skip comment lines
    if (line[0] == '#') {
      continue;
    }

    // skip empty lines
    if (line.size() == 0) {
      continue;
    }

    // find tabs
    size_t tab_index1 = line.find('\t');
    if (tab_index1 == std::string::npos) {
      std::cerr << "Tab not found on line " << line_number << ": '" << line << "'\n";
      continue;
    }
    size_t tab_index2 = line.find('\t', tab_index1 + 1);
    if (tab_index2 == std::string::npos) {
      std::cerr << "Second tab not found on line " << line_number << ": '" << line << "'\n";
      continue;
    }

    // get file hash
    std::string file_hash_string = line.substr(0, tab_index1);
    std::string file_binary_hash = hashdb::hex_to_bin(file_hash_string);
    if (file_binary_hash.size() == 0) {
      std::cerr << "file hexdigest is invalid on line " << line_number
                << ": '" << line << "', '" << file_hash_string << "'\n";
      continue;
    }

    // skip the file hash if it was preexisting else identify it as importable
    if (importable_sources.find(file_binary_hash) == importable_sources.end()) {
      // the file hash has not been seen yet so see if it is preexisting
      if (manager.has_source(file_binary_hash)) {
        // the file is preexisting so skip it
        continue;
      } else {
        // the file hash is new so identify it as importable
        importable_sources.insert(file_binary_hash);
      }
    }

    // skip the file hash if it has not been identified as importable
    if (importable_sources.find(file_binary_hash) == importable_sources.end()) {
      continue;
    }

    // get block hash
    std::string block_hashdigest_string = line.substr(
                                  tab_index1+1, tab_index2 - tab_index1 - 1);
    std::string block_binary_hash = hashdb::hex_to_bin(block_hashdigest_string);
    if (block_binary_hash == "") {
      std::cerr << "Invalid block hash on line " << line_number
                << ": '" << line << "', '" << block_hashdigest_string << "'\n";
      continue;
    }

    // get file offset
    size_t sector_index;
    sector_index = s_to_uint64(line.substr(tab_index2 + 1));
    if (sector_index == 0) {
      // index starts at 1 so 0 is invalid
      std::cerr << "Invalid sector index on line " << line_number
                << ": '" << line << "', '"
                << line.substr(tab_index2 + 1) << "'\n";
      continue;
    }
    uint64_t file_offset = (sector_index - 1) * sector_size;

    // mark with "w" if in whitelist
    std::string whitelist_flag = "";
    if (whitelist_manager != NULL) {
      if (whitelist_manager->find_hash_count(block_binary_hash) > 0) {
        whitelist_flag = "w";
      }
    }

    // add source data
    manager.insert_source_data(file_binary_hash, 0, "", 0, 0);

    // add name pair
    manager.insert_source_name(file_binary_hash, repository_name, filename);

    // add block hash
    manager.insert_hash(block_binary_hash, 0.0, whitelist_flag,
                        file_binary_hash, file_offset);

    // update progress tracker
    progress_tracker.track();
  }
}